1233d2500723e5594f3e7c70896ffeeef32b9c950ywan/*
2233d2500723e5594f3e7c70896ffeeef32b9c950ywan *  Copyright (c) 2010 The WebM project authors. All Rights Reserved.
3233d2500723e5594f3e7c70896ffeeef32b9c950ywan *
4233d2500723e5594f3e7c70896ffeeef32b9c950ywan *  Use of this source code is governed by a BSD-style license
5233d2500723e5594f3e7c70896ffeeef32b9c950ywan *  that can be found in the LICENSE file in the root of the source
6233d2500723e5594f3e7c70896ffeeef32b9c950ywan *  tree. An additional intellectual property rights grant can be found
7233d2500723e5594f3e7c70896ffeeef32b9c950ywan *  in the file PATENTS.  All contributing project authors may
8233d2500723e5594f3e7c70896ffeeef32b9c950ywan *  be found in the AUTHORS file in the root of the source tree.
9233d2500723e5594f3e7c70896ffeeef32b9c950ywan */
10233d2500723e5594f3e7c70896ffeeef32b9c950ywan
11233d2500723e5594f3e7c70896ffeeef32b9c950ywan
12233d2500723e5594f3e7c70896ffeeef32b9c950ywan#include "variance.h"
13233d2500723e5594f3e7c70896ffeeef32b9c950ywan#include "filter.h"
14233d2500723e5594f3e7c70896ffeeef32b9c950ywan
15233d2500723e5594f3e7c70896ffeeef32b9c950ywan
16233d2500723e5594f3e7c70896ffeeef32b9c950ywanunsigned int vp8_get_mb_ss_c
17233d2500723e5594f3e7c70896ffeeef32b9c950ywan(
18233d2500723e5594f3e7c70896ffeeef32b9c950ywan    const short *src_ptr
19233d2500723e5594f3e7c70896ffeeef32b9c950ywan)
20233d2500723e5594f3e7c70896ffeeef32b9c950ywan{
21233d2500723e5594f3e7c70896ffeeef32b9c950ywan    unsigned int i = 0, sum = 0;
22233d2500723e5594f3e7c70896ffeeef32b9c950ywan
23233d2500723e5594f3e7c70896ffeeef32b9c950ywan    do
24233d2500723e5594f3e7c70896ffeeef32b9c950ywan    {
25233d2500723e5594f3e7c70896ffeeef32b9c950ywan        sum += (src_ptr[i] * src_ptr[i]);
26233d2500723e5594f3e7c70896ffeeef32b9c950ywan        i++;
27233d2500723e5594f3e7c70896ffeeef32b9c950ywan    }
28233d2500723e5594f3e7c70896ffeeef32b9c950ywan    while (i < 256);
29233d2500723e5594f3e7c70896ffeeef32b9c950ywan
30233d2500723e5594f3e7c70896ffeeef32b9c950ywan    return sum;
31233d2500723e5594f3e7c70896ffeeef32b9c950ywan}
32233d2500723e5594f3e7c70896ffeeef32b9c950ywan
33233d2500723e5594f3e7c70896ffeeef32b9c950ywan
34233d2500723e5594f3e7c70896ffeeef32b9c950ywanstatic void variance(
35233d2500723e5594f3e7c70896ffeeef32b9c950ywan    const unsigned char *src_ptr,
36233d2500723e5594f3e7c70896ffeeef32b9c950ywan    int  source_stride,
37233d2500723e5594f3e7c70896ffeeef32b9c950ywan    const unsigned char *ref_ptr,
38233d2500723e5594f3e7c70896ffeeef32b9c950ywan    int  recon_stride,
39233d2500723e5594f3e7c70896ffeeef32b9c950ywan    int  w,
40233d2500723e5594f3e7c70896ffeeef32b9c950ywan    int  h,
41233d2500723e5594f3e7c70896ffeeef32b9c950ywan    unsigned int *sse,
42233d2500723e5594f3e7c70896ffeeef32b9c950ywan    int *sum)
43233d2500723e5594f3e7c70896ffeeef32b9c950ywan{
44233d2500723e5594f3e7c70896ffeeef32b9c950ywan    int i, j;
45233d2500723e5594f3e7c70896ffeeef32b9c950ywan    int diff;
46233d2500723e5594f3e7c70896ffeeef32b9c950ywan
47233d2500723e5594f3e7c70896ffeeef32b9c950ywan    *sum = 0;
48233d2500723e5594f3e7c70896ffeeef32b9c950ywan    *sse = 0;
49233d2500723e5594f3e7c70896ffeeef32b9c950ywan
50233d2500723e5594f3e7c70896ffeeef32b9c950ywan    for (i = 0; i < h; i++)
51233d2500723e5594f3e7c70896ffeeef32b9c950ywan    {
52233d2500723e5594f3e7c70896ffeeef32b9c950ywan        for (j = 0; j < w; j++)
53233d2500723e5594f3e7c70896ffeeef32b9c950ywan        {
54233d2500723e5594f3e7c70896ffeeef32b9c950ywan            diff = src_ptr[j] - ref_ptr[j];
55233d2500723e5594f3e7c70896ffeeef32b9c950ywan            *sum += diff;
56233d2500723e5594f3e7c70896ffeeef32b9c950ywan            *sse += diff * diff;
57233d2500723e5594f3e7c70896ffeeef32b9c950ywan        }
58233d2500723e5594f3e7c70896ffeeef32b9c950ywan
59233d2500723e5594f3e7c70896ffeeef32b9c950ywan        src_ptr += source_stride;
60233d2500723e5594f3e7c70896ffeeef32b9c950ywan        ref_ptr += recon_stride;
61233d2500723e5594f3e7c70896ffeeef32b9c950ywan    }
62233d2500723e5594f3e7c70896ffeeef32b9c950ywan}
63233d2500723e5594f3e7c70896ffeeef32b9c950ywan
64233d2500723e5594f3e7c70896ffeeef32b9c950ywan
65233d2500723e5594f3e7c70896ffeeef32b9c950ywanunsigned int vp8_variance16x16_c(
66233d2500723e5594f3e7c70896ffeeef32b9c950ywan    const unsigned char *src_ptr,
67233d2500723e5594f3e7c70896ffeeef32b9c950ywan    int  source_stride,
68233d2500723e5594f3e7c70896ffeeef32b9c950ywan    const unsigned char *ref_ptr,
69233d2500723e5594f3e7c70896ffeeef32b9c950ywan    int  recon_stride,
70233d2500723e5594f3e7c70896ffeeef32b9c950ywan    unsigned int *sse)
71233d2500723e5594f3e7c70896ffeeef32b9c950ywan{
72233d2500723e5594f3e7c70896ffeeef32b9c950ywan    unsigned int var;
73233d2500723e5594f3e7c70896ffeeef32b9c950ywan    int avg;
74233d2500723e5594f3e7c70896ffeeef32b9c950ywan
75233d2500723e5594f3e7c70896ffeeef32b9c950ywan
76233d2500723e5594f3e7c70896ffeeef32b9c950ywan    variance(src_ptr, source_stride, ref_ptr, recon_stride, 16, 16, &var, &avg);
77233d2500723e5594f3e7c70896ffeeef32b9c950ywan    *sse = var;
78233d2500723e5594f3e7c70896ffeeef32b9c950ywan    return (var - (((unsigned int)avg * avg) >> 8));
79233d2500723e5594f3e7c70896ffeeef32b9c950ywan}
80233d2500723e5594f3e7c70896ffeeef32b9c950ywan
81233d2500723e5594f3e7c70896ffeeef32b9c950ywanunsigned int vp8_variance8x16_c(
82233d2500723e5594f3e7c70896ffeeef32b9c950ywan    const unsigned char *src_ptr,
83233d2500723e5594f3e7c70896ffeeef32b9c950ywan    int  source_stride,
84233d2500723e5594f3e7c70896ffeeef32b9c950ywan    const unsigned char *ref_ptr,
85233d2500723e5594f3e7c70896ffeeef32b9c950ywan    int  recon_stride,
86233d2500723e5594f3e7c70896ffeeef32b9c950ywan    unsigned int *sse)
87233d2500723e5594f3e7c70896ffeeef32b9c950ywan{
88233d2500723e5594f3e7c70896ffeeef32b9c950ywan    unsigned int var;
89233d2500723e5594f3e7c70896ffeeef32b9c950ywan    int avg;
90233d2500723e5594f3e7c70896ffeeef32b9c950ywan
91233d2500723e5594f3e7c70896ffeeef32b9c950ywan
92233d2500723e5594f3e7c70896ffeeef32b9c950ywan    variance(src_ptr, source_stride, ref_ptr, recon_stride, 8, 16, &var, &avg);
93233d2500723e5594f3e7c70896ffeeef32b9c950ywan    *sse = var;
94233d2500723e5594f3e7c70896ffeeef32b9c950ywan    return (var - (((unsigned int)avg * avg) >> 7));
95233d2500723e5594f3e7c70896ffeeef32b9c950ywan}
96233d2500723e5594f3e7c70896ffeeef32b9c950ywan
97233d2500723e5594f3e7c70896ffeeef32b9c950ywanunsigned int vp8_variance16x8_c(
98233d2500723e5594f3e7c70896ffeeef32b9c950ywan    const unsigned char *src_ptr,
99233d2500723e5594f3e7c70896ffeeef32b9c950ywan    int  source_stride,
100233d2500723e5594f3e7c70896ffeeef32b9c950ywan    const unsigned char *ref_ptr,
101233d2500723e5594f3e7c70896ffeeef32b9c950ywan    int  recon_stride,
102233d2500723e5594f3e7c70896ffeeef32b9c950ywan    unsigned int *sse)
103233d2500723e5594f3e7c70896ffeeef32b9c950ywan{
104233d2500723e5594f3e7c70896ffeeef32b9c950ywan    unsigned int var;
105233d2500723e5594f3e7c70896ffeeef32b9c950ywan    int avg;
106233d2500723e5594f3e7c70896ffeeef32b9c950ywan
107233d2500723e5594f3e7c70896ffeeef32b9c950ywan
108233d2500723e5594f3e7c70896ffeeef32b9c950ywan    variance(src_ptr, source_stride, ref_ptr, recon_stride, 16, 8, &var, &avg);
109233d2500723e5594f3e7c70896ffeeef32b9c950ywan    *sse = var;
110233d2500723e5594f3e7c70896ffeeef32b9c950ywan    return (var - (((unsigned int)avg * avg) >> 7));
111233d2500723e5594f3e7c70896ffeeef32b9c950ywan}
112233d2500723e5594f3e7c70896ffeeef32b9c950ywan
113233d2500723e5594f3e7c70896ffeeef32b9c950ywan
114233d2500723e5594f3e7c70896ffeeef32b9c950ywanunsigned int vp8_variance8x8_c(
115233d2500723e5594f3e7c70896ffeeef32b9c950ywan    const unsigned char *src_ptr,
116233d2500723e5594f3e7c70896ffeeef32b9c950ywan    int  source_stride,
117233d2500723e5594f3e7c70896ffeeef32b9c950ywan    const unsigned char *ref_ptr,
118233d2500723e5594f3e7c70896ffeeef32b9c950ywan    int  recon_stride,
119233d2500723e5594f3e7c70896ffeeef32b9c950ywan    unsigned int *sse)
120233d2500723e5594f3e7c70896ffeeef32b9c950ywan{
121233d2500723e5594f3e7c70896ffeeef32b9c950ywan    unsigned int var;
122233d2500723e5594f3e7c70896ffeeef32b9c950ywan    int avg;
123233d2500723e5594f3e7c70896ffeeef32b9c950ywan
124233d2500723e5594f3e7c70896ffeeef32b9c950ywan
125233d2500723e5594f3e7c70896ffeeef32b9c950ywan    variance(src_ptr, source_stride, ref_ptr, recon_stride, 8, 8, &var, &avg);
126233d2500723e5594f3e7c70896ffeeef32b9c950ywan    *sse = var;
127233d2500723e5594f3e7c70896ffeeef32b9c950ywan    return (var - (((unsigned int)avg * avg) >> 6));
128233d2500723e5594f3e7c70896ffeeef32b9c950ywan}
129233d2500723e5594f3e7c70896ffeeef32b9c950ywan
130233d2500723e5594f3e7c70896ffeeef32b9c950ywanunsigned int vp8_variance4x4_c(
131233d2500723e5594f3e7c70896ffeeef32b9c950ywan    const unsigned char *src_ptr,
132233d2500723e5594f3e7c70896ffeeef32b9c950ywan    int  source_stride,
133233d2500723e5594f3e7c70896ffeeef32b9c950ywan    const unsigned char *ref_ptr,
134233d2500723e5594f3e7c70896ffeeef32b9c950ywan    int  recon_stride,
135233d2500723e5594f3e7c70896ffeeef32b9c950ywan    unsigned int *sse)
136233d2500723e5594f3e7c70896ffeeef32b9c950ywan{
137233d2500723e5594f3e7c70896ffeeef32b9c950ywan    unsigned int var;
138233d2500723e5594f3e7c70896ffeeef32b9c950ywan    int avg;
139233d2500723e5594f3e7c70896ffeeef32b9c950ywan
140233d2500723e5594f3e7c70896ffeeef32b9c950ywan
141233d2500723e5594f3e7c70896ffeeef32b9c950ywan    variance(src_ptr, source_stride, ref_ptr, recon_stride, 4, 4, &var, &avg);
142233d2500723e5594f3e7c70896ffeeef32b9c950ywan    *sse = var;
143233d2500723e5594f3e7c70896ffeeef32b9c950ywan    return (var - (((unsigned int)avg * avg) >> 4));
144233d2500723e5594f3e7c70896ffeeef32b9c950ywan}
145233d2500723e5594f3e7c70896ffeeef32b9c950ywan
146233d2500723e5594f3e7c70896ffeeef32b9c950ywan
147233d2500723e5594f3e7c70896ffeeef32b9c950ywanunsigned int vp8_mse16x16_c(
148233d2500723e5594f3e7c70896ffeeef32b9c950ywan    const unsigned char *src_ptr,
149233d2500723e5594f3e7c70896ffeeef32b9c950ywan    int  source_stride,
150233d2500723e5594f3e7c70896ffeeef32b9c950ywan    const unsigned char *ref_ptr,
151233d2500723e5594f3e7c70896ffeeef32b9c950ywan    int  recon_stride,
152233d2500723e5594f3e7c70896ffeeef32b9c950ywan    unsigned int *sse)
153233d2500723e5594f3e7c70896ffeeef32b9c950ywan{
154233d2500723e5594f3e7c70896ffeeef32b9c950ywan    unsigned int var;
155233d2500723e5594f3e7c70896ffeeef32b9c950ywan    int avg;
156233d2500723e5594f3e7c70896ffeeef32b9c950ywan
157233d2500723e5594f3e7c70896ffeeef32b9c950ywan    variance(src_ptr, source_stride, ref_ptr, recon_stride, 16, 16, &var, &avg);
158233d2500723e5594f3e7c70896ffeeef32b9c950ywan    *sse = var;
159233d2500723e5594f3e7c70896ffeeef32b9c950ywan    return var;
160233d2500723e5594f3e7c70896ffeeef32b9c950ywan}
161233d2500723e5594f3e7c70896ffeeef32b9c950ywan
162233d2500723e5594f3e7c70896ffeeef32b9c950ywan
163233d2500723e5594f3e7c70896ffeeef32b9c950ywan/****************************************************************************
164233d2500723e5594f3e7c70896ffeeef32b9c950ywan *
165233d2500723e5594f3e7c70896ffeeef32b9c950ywan *  ROUTINE       : filter_block2d_bil_first_pass
166233d2500723e5594f3e7c70896ffeeef32b9c950ywan *
167233d2500723e5594f3e7c70896ffeeef32b9c950ywan *  INPUTS        : UINT8  *src_ptr          : Pointer to source block.
168233d2500723e5594f3e7c70896ffeeef32b9c950ywan *                  UINT32 src_pixels_per_line : Stride of input block.
169233d2500723e5594f3e7c70896ffeeef32b9c950ywan *                  UINT32 pixel_step        : Offset between filter input samples (see notes).
170233d2500723e5594f3e7c70896ffeeef32b9c950ywan *                  UINT32 output_height     : Input block height.
171233d2500723e5594f3e7c70896ffeeef32b9c950ywan *                  UINT32 output_width      : Input block width.
172233d2500723e5594f3e7c70896ffeeef32b9c950ywan *                  INT32  *vp8_filter          : Array of 2 bi-linear filter taps.
173233d2500723e5594f3e7c70896ffeeef32b9c950ywan *
174233d2500723e5594f3e7c70896ffeeef32b9c950ywan *  OUTPUTS       : INT32 *output_ptr        : Pointer to filtered block.
175233d2500723e5594f3e7c70896ffeeef32b9c950ywan *
176233d2500723e5594f3e7c70896ffeeef32b9c950ywan *  RETURNS       : void
177233d2500723e5594f3e7c70896ffeeef32b9c950ywan *
178233d2500723e5594f3e7c70896ffeeef32b9c950ywan *  FUNCTION      : Applies a 1-D 2-tap bi-linear filter to the source block in
179233d2500723e5594f3e7c70896ffeeef32b9c950ywan *                  either horizontal or vertical direction to produce the
180233d2500723e5594f3e7c70896ffeeef32b9c950ywan *                  filtered output block. Used to implement first-pass
181233d2500723e5594f3e7c70896ffeeef32b9c950ywan *                  of 2-D separable filter.
182233d2500723e5594f3e7c70896ffeeef32b9c950ywan *
183233d2500723e5594f3e7c70896ffeeef32b9c950ywan *  SPECIAL NOTES : Produces INT32 output to retain precision for next pass.
184233d2500723e5594f3e7c70896ffeeef32b9c950ywan *                  Two filter taps should sum to VP8_FILTER_WEIGHT.
185233d2500723e5594f3e7c70896ffeeef32b9c950ywan *                  pixel_step defines whether the filter is applied
186233d2500723e5594f3e7c70896ffeeef32b9c950ywan *                  horizontally (pixel_step=1) or vertically (pixel_step=stride).
187233d2500723e5594f3e7c70896ffeeef32b9c950ywan *                  It defines the offset required to move from one input
188233d2500723e5594f3e7c70896ffeeef32b9c950ywan *                  to the next.
189233d2500723e5594f3e7c70896ffeeef32b9c950ywan *
190233d2500723e5594f3e7c70896ffeeef32b9c950ywan ****************************************************************************/
191233d2500723e5594f3e7c70896ffeeef32b9c950ywanstatic void var_filter_block2d_bil_first_pass
192233d2500723e5594f3e7c70896ffeeef32b9c950ywan(
193233d2500723e5594f3e7c70896ffeeef32b9c950ywan    const unsigned char *src_ptr,
194233d2500723e5594f3e7c70896ffeeef32b9c950ywan    unsigned short *output_ptr,
195233d2500723e5594f3e7c70896ffeeef32b9c950ywan    unsigned int src_pixels_per_line,
196233d2500723e5594f3e7c70896ffeeef32b9c950ywan    int pixel_step,
197233d2500723e5594f3e7c70896ffeeef32b9c950ywan    unsigned int output_height,
198233d2500723e5594f3e7c70896ffeeef32b9c950ywan    unsigned int output_width,
199233d2500723e5594f3e7c70896ffeeef32b9c950ywan    const short *vp8_filter
200233d2500723e5594f3e7c70896ffeeef32b9c950ywan)
201233d2500723e5594f3e7c70896ffeeef32b9c950ywan{
202233d2500723e5594f3e7c70896ffeeef32b9c950ywan    unsigned int i, j;
203233d2500723e5594f3e7c70896ffeeef32b9c950ywan
204233d2500723e5594f3e7c70896ffeeef32b9c950ywan    for (i = 0; i < output_height; i++)
205233d2500723e5594f3e7c70896ffeeef32b9c950ywan    {
206233d2500723e5594f3e7c70896ffeeef32b9c950ywan        for (j = 0; j < output_width; j++)
207233d2500723e5594f3e7c70896ffeeef32b9c950ywan        {
208233d2500723e5594f3e7c70896ffeeef32b9c950ywan            /* Apply bilinear filter */
209233d2500723e5594f3e7c70896ffeeef32b9c950ywan            output_ptr[j] = (((int)src_ptr[0]          * vp8_filter[0]) +
210233d2500723e5594f3e7c70896ffeeef32b9c950ywan                             ((int)src_ptr[pixel_step] * vp8_filter[1]) +
211233d2500723e5594f3e7c70896ffeeef32b9c950ywan                             (VP8_FILTER_WEIGHT / 2)) >> VP8_FILTER_SHIFT;
212233d2500723e5594f3e7c70896ffeeef32b9c950ywan            src_ptr++;
213233d2500723e5594f3e7c70896ffeeef32b9c950ywan        }
214233d2500723e5594f3e7c70896ffeeef32b9c950ywan
215233d2500723e5594f3e7c70896ffeeef32b9c950ywan        /* Next row... */
216233d2500723e5594f3e7c70896ffeeef32b9c950ywan        src_ptr    += src_pixels_per_line - output_width;
217233d2500723e5594f3e7c70896ffeeef32b9c950ywan        output_ptr += output_width;
218233d2500723e5594f3e7c70896ffeeef32b9c950ywan    }
219233d2500723e5594f3e7c70896ffeeef32b9c950ywan}
220233d2500723e5594f3e7c70896ffeeef32b9c950ywan
221233d2500723e5594f3e7c70896ffeeef32b9c950ywan/****************************************************************************
222233d2500723e5594f3e7c70896ffeeef32b9c950ywan *
223233d2500723e5594f3e7c70896ffeeef32b9c950ywan *  ROUTINE       : filter_block2d_bil_second_pass
224233d2500723e5594f3e7c70896ffeeef32b9c950ywan *
225233d2500723e5594f3e7c70896ffeeef32b9c950ywan *  INPUTS        : INT32  *src_ptr          : Pointer to source block.
226233d2500723e5594f3e7c70896ffeeef32b9c950ywan *                  UINT32 src_pixels_per_line : Stride of input block.
227233d2500723e5594f3e7c70896ffeeef32b9c950ywan *                  UINT32 pixel_step        : Offset between filter input samples (see notes).
228233d2500723e5594f3e7c70896ffeeef32b9c950ywan *                  UINT32 output_height     : Input block height.
229233d2500723e5594f3e7c70896ffeeef32b9c950ywan *                  UINT32 output_width      : Input block width.
230233d2500723e5594f3e7c70896ffeeef32b9c950ywan *                  INT32  *vp8_filter          : Array of 2 bi-linear filter taps.
231233d2500723e5594f3e7c70896ffeeef32b9c950ywan *
232233d2500723e5594f3e7c70896ffeeef32b9c950ywan *  OUTPUTS       : UINT16 *output_ptr       : Pointer to filtered block.
233233d2500723e5594f3e7c70896ffeeef32b9c950ywan *
234233d2500723e5594f3e7c70896ffeeef32b9c950ywan *  RETURNS       : void
235233d2500723e5594f3e7c70896ffeeef32b9c950ywan *
236233d2500723e5594f3e7c70896ffeeef32b9c950ywan *  FUNCTION      : Applies a 1-D 2-tap bi-linear filter to the source block in
237233d2500723e5594f3e7c70896ffeeef32b9c950ywan *                  either horizontal or vertical direction to produce the
238233d2500723e5594f3e7c70896ffeeef32b9c950ywan *                  filtered output block. Used to implement second-pass
239233d2500723e5594f3e7c70896ffeeef32b9c950ywan *                  of 2-D separable filter.
240233d2500723e5594f3e7c70896ffeeef32b9c950ywan *
241233d2500723e5594f3e7c70896ffeeef32b9c950ywan *  SPECIAL NOTES : Requires 32-bit input as produced by filter_block2d_bil_first_pass.
242233d2500723e5594f3e7c70896ffeeef32b9c950ywan *                  Two filter taps should sum to VP8_FILTER_WEIGHT.
243233d2500723e5594f3e7c70896ffeeef32b9c950ywan *                  pixel_step defines whether the filter is applied
244233d2500723e5594f3e7c70896ffeeef32b9c950ywan *                  horizontally (pixel_step=1) or vertically (pixel_step=stride).
245233d2500723e5594f3e7c70896ffeeef32b9c950ywan *                  It defines the offset required to move from one input
246233d2500723e5594f3e7c70896ffeeef32b9c950ywan *                  to the next.
247233d2500723e5594f3e7c70896ffeeef32b9c950ywan *
248233d2500723e5594f3e7c70896ffeeef32b9c950ywan ****************************************************************************/
249233d2500723e5594f3e7c70896ffeeef32b9c950ywanstatic void var_filter_block2d_bil_second_pass
250233d2500723e5594f3e7c70896ffeeef32b9c950ywan(
251233d2500723e5594f3e7c70896ffeeef32b9c950ywan    const unsigned short *src_ptr,
252233d2500723e5594f3e7c70896ffeeef32b9c950ywan    unsigned char  *output_ptr,
253233d2500723e5594f3e7c70896ffeeef32b9c950ywan    unsigned int  src_pixels_per_line,
254233d2500723e5594f3e7c70896ffeeef32b9c950ywan    unsigned int  pixel_step,
255233d2500723e5594f3e7c70896ffeeef32b9c950ywan    unsigned int  output_height,
256233d2500723e5594f3e7c70896ffeeef32b9c950ywan    unsigned int  output_width,
257233d2500723e5594f3e7c70896ffeeef32b9c950ywan    const short *vp8_filter
258233d2500723e5594f3e7c70896ffeeef32b9c950ywan)
259233d2500723e5594f3e7c70896ffeeef32b9c950ywan{
260233d2500723e5594f3e7c70896ffeeef32b9c950ywan    unsigned int  i, j;
261233d2500723e5594f3e7c70896ffeeef32b9c950ywan    int  Temp;
262233d2500723e5594f3e7c70896ffeeef32b9c950ywan
263233d2500723e5594f3e7c70896ffeeef32b9c950ywan    for (i = 0; i < output_height; i++)
264233d2500723e5594f3e7c70896ffeeef32b9c950ywan    {
265233d2500723e5594f3e7c70896ffeeef32b9c950ywan        for (j = 0; j < output_width; j++)
266233d2500723e5594f3e7c70896ffeeef32b9c950ywan        {
267233d2500723e5594f3e7c70896ffeeef32b9c950ywan            /* Apply filter */
268233d2500723e5594f3e7c70896ffeeef32b9c950ywan            Temp = ((int)src_ptr[0]          * vp8_filter[0]) +
269233d2500723e5594f3e7c70896ffeeef32b9c950ywan                   ((int)src_ptr[pixel_step] * vp8_filter[1]) +
270233d2500723e5594f3e7c70896ffeeef32b9c950ywan                   (VP8_FILTER_WEIGHT / 2);
271233d2500723e5594f3e7c70896ffeeef32b9c950ywan            output_ptr[j] = (unsigned int)(Temp >> VP8_FILTER_SHIFT);
272233d2500723e5594f3e7c70896ffeeef32b9c950ywan            src_ptr++;
273233d2500723e5594f3e7c70896ffeeef32b9c950ywan        }
274233d2500723e5594f3e7c70896ffeeef32b9c950ywan
275233d2500723e5594f3e7c70896ffeeef32b9c950ywan        /* Next row... */
276233d2500723e5594f3e7c70896ffeeef32b9c950ywan        src_ptr    += src_pixels_per_line - output_width;
277233d2500723e5594f3e7c70896ffeeef32b9c950ywan        output_ptr += output_width;
278233d2500723e5594f3e7c70896ffeeef32b9c950ywan    }
279233d2500723e5594f3e7c70896ffeeef32b9c950ywan}
280233d2500723e5594f3e7c70896ffeeef32b9c950ywan
281233d2500723e5594f3e7c70896ffeeef32b9c950ywan
282233d2500723e5594f3e7c70896ffeeef32b9c950ywanunsigned int vp8_sub_pixel_variance4x4_c
283233d2500723e5594f3e7c70896ffeeef32b9c950ywan(
284233d2500723e5594f3e7c70896ffeeef32b9c950ywan    const unsigned char  *src_ptr,
285233d2500723e5594f3e7c70896ffeeef32b9c950ywan    int  src_pixels_per_line,
286233d2500723e5594f3e7c70896ffeeef32b9c950ywan    int  xoffset,
287233d2500723e5594f3e7c70896ffeeef32b9c950ywan    int  yoffset,
288233d2500723e5594f3e7c70896ffeeef32b9c950ywan    const unsigned char *dst_ptr,
289233d2500723e5594f3e7c70896ffeeef32b9c950ywan    int dst_pixels_per_line,
290233d2500723e5594f3e7c70896ffeeef32b9c950ywan    unsigned int *sse
291233d2500723e5594f3e7c70896ffeeef32b9c950ywan)
292233d2500723e5594f3e7c70896ffeeef32b9c950ywan{
293233d2500723e5594f3e7c70896ffeeef32b9c950ywan    unsigned char  temp2[20*16];
294233d2500723e5594f3e7c70896ffeeef32b9c950ywan    const short *HFilter, *VFilter;
295233d2500723e5594f3e7c70896ffeeef32b9c950ywan    unsigned short FData3[5*4]; /* Temp data bufffer used in filtering */
296233d2500723e5594f3e7c70896ffeeef32b9c950ywan
297233d2500723e5594f3e7c70896ffeeef32b9c950ywan    HFilter = vp8_bilinear_filters[xoffset];
298233d2500723e5594f3e7c70896ffeeef32b9c950ywan    VFilter = vp8_bilinear_filters[yoffset];
299233d2500723e5594f3e7c70896ffeeef32b9c950ywan
300233d2500723e5594f3e7c70896ffeeef32b9c950ywan    /* First filter 1d Horizontal */
301233d2500723e5594f3e7c70896ffeeef32b9c950ywan    var_filter_block2d_bil_first_pass(src_ptr, FData3, src_pixels_per_line, 1, 5, 4, HFilter);
302233d2500723e5594f3e7c70896ffeeef32b9c950ywan
303233d2500723e5594f3e7c70896ffeeef32b9c950ywan    /* Now filter Verticaly */
304233d2500723e5594f3e7c70896ffeeef32b9c950ywan    var_filter_block2d_bil_second_pass(FData3, temp2, 4,  4,  4,  4, VFilter);
305233d2500723e5594f3e7c70896ffeeef32b9c950ywan
306233d2500723e5594f3e7c70896ffeeef32b9c950ywan    return vp8_variance4x4_c(temp2, 4, dst_ptr, dst_pixels_per_line, sse);
307233d2500723e5594f3e7c70896ffeeef32b9c950ywan}
308233d2500723e5594f3e7c70896ffeeef32b9c950ywan
309233d2500723e5594f3e7c70896ffeeef32b9c950ywan
310233d2500723e5594f3e7c70896ffeeef32b9c950ywanunsigned int vp8_sub_pixel_variance8x8_c
311233d2500723e5594f3e7c70896ffeeef32b9c950ywan(
312233d2500723e5594f3e7c70896ffeeef32b9c950ywan    const unsigned char  *src_ptr,
313233d2500723e5594f3e7c70896ffeeef32b9c950ywan    int  src_pixels_per_line,
314233d2500723e5594f3e7c70896ffeeef32b9c950ywan    int  xoffset,
315233d2500723e5594f3e7c70896ffeeef32b9c950ywan    int  yoffset,
316233d2500723e5594f3e7c70896ffeeef32b9c950ywan    const unsigned char *dst_ptr,
317233d2500723e5594f3e7c70896ffeeef32b9c950ywan    int dst_pixels_per_line,
318233d2500723e5594f3e7c70896ffeeef32b9c950ywan    unsigned int *sse
319233d2500723e5594f3e7c70896ffeeef32b9c950ywan)
320233d2500723e5594f3e7c70896ffeeef32b9c950ywan{
321233d2500723e5594f3e7c70896ffeeef32b9c950ywan    unsigned short FData3[9*8]; /* Temp data bufffer used in filtering */
322233d2500723e5594f3e7c70896ffeeef32b9c950ywan    unsigned char  temp2[20*16];
323233d2500723e5594f3e7c70896ffeeef32b9c950ywan    const short *HFilter, *VFilter;
324233d2500723e5594f3e7c70896ffeeef32b9c950ywan
325233d2500723e5594f3e7c70896ffeeef32b9c950ywan    HFilter = vp8_bilinear_filters[xoffset];
326233d2500723e5594f3e7c70896ffeeef32b9c950ywan    VFilter = vp8_bilinear_filters[yoffset];
327233d2500723e5594f3e7c70896ffeeef32b9c950ywan
328233d2500723e5594f3e7c70896ffeeef32b9c950ywan    var_filter_block2d_bil_first_pass(src_ptr, FData3, src_pixels_per_line, 1, 9, 8, HFilter);
329233d2500723e5594f3e7c70896ffeeef32b9c950ywan    var_filter_block2d_bil_second_pass(FData3, temp2, 8, 8, 8, 8, VFilter);
330233d2500723e5594f3e7c70896ffeeef32b9c950ywan
331233d2500723e5594f3e7c70896ffeeef32b9c950ywan    return vp8_variance8x8_c(temp2, 8, dst_ptr, dst_pixels_per_line, sse);
332233d2500723e5594f3e7c70896ffeeef32b9c950ywan}
333233d2500723e5594f3e7c70896ffeeef32b9c950ywan
334233d2500723e5594f3e7c70896ffeeef32b9c950ywanunsigned int vp8_sub_pixel_variance16x16_c
335233d2500723e5594f3e7c70896ffeeef32b9c950ywan(
336233d2500723e5594f3e7c70896ffeeef32b9c950ywan    const unsigned char  *src_ptr,
337233d2500723e5594f3e7c70896ffeeef32b9c950ywan    int  src_pixels_per_line,
338233d2500723e5594f3e7c70896ffeeef32b9c950ywan    int  xoffset,
339233d2500723e5594f3e7c70896ffeeef32b9c950ywan    int  yoffset,
340233d2500723e5594f3e7c70896ffeeef32b9c950ywan    const unsigned char *dst_ptr,
341233d2500723e5594f3e7c70896ffeeef32b9c950ywan    int dst_pixels_per_line,
342233d2500723e5594f3e7c70896ffeeef32b9c950ywan    unsigned int *sse
343233d2500723e5594f3e7c70896ffeeef32b9c950ywan)
344233d2500723e5594f3e7c70896ffeeef32b9c950ywan{
345233d2500723e5594f3e7c70896ffeeef32b9c950ywan    unsigned short FData3[17*16];   /* Temp data bufffer used in filtering */
346233d2500723e5594f3e7c70896ffeeef32b9c950ywan    unsigned char  temp2[20*16];
347233d2500723e5594f3e7c70896ffeeef32b9c950ywan    const short *HFilter, *VFilter;
348233d2500723e5594f3e7c70896ffeeef32b9c950ywan
349233d2500723e5594f3e7c70896ffeeef32b9c950ywan    HFilter = vp8_bilinear_filters[xoffset];
350233d2500723e5594f3e7c70896ffeeef32b9c950ywan    VFilter = vp8_bilinear_filters[yoffset];
351233d2500723e5594f3e7c70896ffeeef32b9c950ywan
352233d2500723e5594f3e7c70896ffeeef32b9c950ywan    var_filter_block2d_bil_first_pass(src_ptr, FData3, src_pixels_per_line, 1, 17, 16, HFilter);
353233d2500723e5594f3e7c70896ffeeef32b9c950ywan    var_filter_block2d_bil_second_pass(FData3, temp2, 16, 16, 16, 16, VFilter);
354233d2500723e5594f3e7c70896ffeeef32b9c950ywan
355233d2500723e5594f3e7c70896ffeeef32b9c950ywan    return vp8_variance16x16_c(temp2, 16, dst_ptr, dst_pixels_per_line, sse);
356233d2500723e5594f3e7c70896ffeeef32b9c950ywan}
357233d2500723e5594f3e7c70896ffeeef32b9c950ywan
358233d2500723e5594f3e7c70896ffeeef32b9c950ywan
359233d2500723e5594f3e7c70896ffeeef32b9c950ywanunsigned int vp8_variance_halfpixvar16x16_h_c(
360233d2500723e5594f3e7c70896ffeeef32b9c950ywan    const unsigned char *src_ptr,
361233d2500723e5594f3e7c70896ffeeef32b9c950ywan    int  source_stride,
362233d2500723e5594f3e7c70896ffeeef32b9c950ywan    const unsigned char *ref_ptr,
363233d2500723e5594f3e7c70896ffeeef32b9c950ywan    int  recon_stride,
364233d2500723e5594f3e7c70896ffeeef32b9c950ywan    unsigned int *sse)
365233d2500723e5594f3e7c70896ffeeef32b9c950ywan{
366233d2500723e5594f3e7c70896ffeeef32b9c950ywan    return vp8_sub_pixel_variance16x16_c(src_ptr, source_stride, 4, 0,
367233d2500723e5594f3e7c70896ffeeef32b9c950ywan                                         ref_ptr, recon_stride, sse);
368233d2500723e5594f3e7c70896ffeeef32b9c950ywan}
369233d2500723e5594f3e7c70896ffeeef32b9c950ywan
370233d2500723e5594f3e7c70896ffeeef32b9c950ywan
371233d2500723e5594f3e7c70896ffeeef32b9c950ywanunsigned int vp8_variance_halfpixvar16x16_v_c(
372233d2500723e5594f3e7c70896ffeeef32b9c950ywan    const unsigned char *src_ptr,
373233d2500723e5594f3e7c70896ffeeef32b9c950ywan    int  source_stride,
374233d2500723e5594f3e7c70896ffeeef32b9c950ywan    const unsigned char *ref_ptr,
375233d2500723e5594f3e7c70896ffeeef32b9c950ywan    int  recon_stride,
376233d2500723e5594f3e7c70896ffeeef32b9c950ywan    unsigned int *sse)
377233d2500723e5594f3e7c70896ffeeef32b9c950ywan{
378233d2500723e5594f3e7c70896ffeeef32b9c950ywan    return vp8_sub_pixel_variance16x16_c(src_ptr, source_stride, 0, 4,
379233d2500723e5594f3e7c70896ffeeef32b9c950ywan                                         ref_ptr, recon_stride, sse);
380233d2500723e5594f3e7c70896ffeeef32b9c950ywan}
381233d2500723e5594f3e7c70896ffeeef32b9c950ywan
382233d2500723e5594f3e7c70896ffeeef32b9c950ywan
383233d2500723e5594f3e7c70896ffeeef32b9c950ywanunsigned int vp8_variance_halfpixvar16x16_hv_c(
384233d2500723e5594f3e7c70896ffeeef32b9c950ywan    const unsigned char *src_ptr,
385233d2500723e5594f3e7c70896ffeeef32b9c950ywan    int  source_stride,
386233d2500723e5594f3e7c70896ffeeef32b9c950ywan    const unsigned char *ref_ptr,
387233d2500723e5594f3e7c70896ffeeef32b9c950ywan    int  recon_stride,
388233d2500723e5594f3e7c70896ffeeef32b9c950ywan    unsigned int *sse)
389233d2500723e5594f3e7c70896ffeeef32b9c950ywan{
390233d2500723e5594f3e7c70896ffeeef32b9c950ywan    return vp8_sub_pixel_variance16x16_c(src_ptr, source_stride, 4, 4,
391233d2500723e5594f3e7c70896ffeeef32b9c950ywan                                         ref_ptr, recon_stride, sse);
392233d2500723e5594f3e7c70896ffeeef32b9c950ywan}
393233d2500723e5594f3e7c70896ffeeef32b9c950ywan
394233d2500723e5594f3e7c70896ffeeef32b9c950ywan
395233d2500723e5594f3e7c70896ffeeef32b9c950ywanunsigned int vp8_sub_pixel_mse16x16_c
396233d2500723e5594f3e7c70896ffeeef32b9c950ywan(
397233d2500723e5594f3e7c70896ffeeef32b9c950ywan    const unsigned char  *src_ptr,
398233d2500723e5594f3e7c70896ffeeef32b9c950ywan    int  src_pixels_per_line,
399233d2500723e5594f3e7c70896ffeeef32b9c950ywan    int  xoffset,
400233d2500723e5594f3e7c70896ffeeef32b9c950ywan    int  yoffset,
401233d2500723e5594f3e7c70896ffeeef32b9c950ywan    const unsigned char *dst_ptr,
402233d2500723e5594f3e7c70896ffeeef32b9c950ywan    int dst_pixels_per_line,
403233d2500723e5594f3e7c70896ffeeef32b9c950ywan    unsigned int *sse
404233d2500723e5594f3e7c70896ffeeef32b9c950ywan)
405233d2500723e5594f3e7c70896ffeeef32b9c950ywan{
406233d2500723e5594f3e7c70896ffeeef32b9c950ywan    vp8_sub_pixel_variance16x16_c(src_ptr, src_pixels_per_line, xoffset, yoffset, dst_ptr, dst_pixels_per_line, sse);
407233d2500723e5594f3e7c70896ffeeef32b9c950ywan    return *sse;
408233d2500723e5594f3e7c70896ffeeef32b9c950ywan}
409233d2500723e5594f3e7c70896ffeeef32b9c950ywan
410233d2500723e5594f3e7c70896ffeeef32b9c950ywanunsigned int vp8_sub_pixel_variance16x8_c
411233d2500723e5594f3e7c70896ffeeef32b9c950ywan(
412233d2500723e5594f3e7c70896ffeeef32b9c950ywan    const unsigned char  *src_ptr,
413233d2500723e5594f3e7c70896ffeeef32b9c950ywan    int  src_pixels_per_line,
414233d2500723e5594f3e7c70896ffeeef32b9c950ywan    int  xoffset,
415233d2500723e5594f3e7c70896ffeeef32b9c950ywan    int  yoffset,
416233d2500723e5594f3e7c70896ffeeef32b9c950ywan    const unsigned char *dst_ptr,
417233d2500723e5594f3e7c70896ffeeef32b9c950ywan    int dst_pixels_per_line,
418233d2500723e5594f3e7c70896ffeeef32b9c950ywan    unsigned int *sse
419233d2500723e5594f3e7c70896ffeeef32b9c950ywan)
420233d2500723e5594f3e7c70896ffeeef32b9c950ywan{
421233d2500723e5594f3e7c70896ffeeef32b9c950ywan    unsigned short FData3[16*9];    /* Temp data bufffer used in filtering */
422233d2500723e5594f3e7c70896ffeeef32b9c950ywan    unsigned char  temp2[20*16];
423233d2500723e5594f3e7c70896ffeeef32b9c950ywan    const short *HFilter, *VFilter;
424233d2500723e5594f3e7c70896ffeeef32b9c950ywan
425233d2500723e5594f3e7c70896ffeeef32b9c950ywan    HFilter = vp8_bilinear_filters[xoffset];
426233d2500723e5594f3e7c70896ffeeef32b9c950ywan    VFilter = vp8_bilinear_filters[yoffset];
427233d2500723e5594f3e7c70896ffeeef32b9c950ywan
428233d2500723e5594f3e7c70896ffeeef32b9c950ywan    var_filter_block2d_bil_first_pass(src_ptr, FData3, src_pixels_per_line, 1, 9, 16, HFilter);
429233d2500723e5594f3e7c70896ffeeef32b9c950ywan    var_filter_block2d_bil_second_pass(FData3, temp2, 16, 16, 8, 16, VFilter);
430233d2500723e5594f3e7c70896ffeeef32b9c950ywan
431233d2500723e5594f3e7c70896ffeeef32b9c950ywan    return vp8_variance16x8_c(temp2, 16, dst_ptr, dst_pixels_per_line, sse);
432233d2500723e5594f3e7c70896ffeeef32b9c950ywan}
433233d2500723e5594f3e7c70896ffeeef32b9c950ywan
434233d2500723e5594f3e7c70896ffeeef32b9c950ywanunsigned int vp8_sub_pixel_variance8x16_c
435233d2500723e5594f3e7c70896ffeeef32b9c950ywan(
436233d2500723e5594f3e7c70896ffeeef32b9c950ywan    const unsigned char  *src_ptr,
437233d2500723e5594f3e7c70896ffeeef32b9c950ywan    int  src_pixels_per_line,
438233d2500723e5594f3e7c70896ffeeef32b9c950ywan    int  xoffset,
439233d2500723e5594f3e7c70896ffeeef32b9c950ywan    int  yoffset,
440233d2500723e5594f3e7c70896ffeeef32b9c950ywan    const unsigned char *dst_ptr,
441233d2500723e5594f3e7c70896ffeeef32b9c950ywan    int dst_pixels_per_line,
442233d2500723e5594f3e7c70896ffeeef32b9c950ywan    unsigned int *sse
443233d2500723e5594f3e7c70896ffeeef32b9c950ywan)
444233d2500723e5594f3e7c70896ffeeef32b9c950ywan{
445233d2500723e5594f3e7c70896ffeeef32b9c950ywan    unsigned short FData3[9*16];    /* Temp data bufffer used in filtering */
446233d2500723e5594f3e7c70896ffeeef32b9c950ywan    unsigned char  temp2[20*16];
447233d2500723e5594f3e7c70896ffeeef32b9c950ywan    const short *HFilter, *VFilter;
448233d2500723e5594f3e7c70896ffeeef32b9c950ywan
449233d2500723e5594f3e7c70896ffeeef32b9c950ywan
450233d2500723e5594f3e7c70896ffeeef32b9c950ywan    HFilter = vp8_bilinear_filters[xoffset];
451233d2500723e5594f3e7c70896ffeeef32b9c950ywan    VFilter = vp8_bilinear_filters[yoffset];
452233d2500723e5594f3e7c70896ffeeef32b9c950ywan
453233d2500723e5594f3e7c70896ffeeef32b9c950ywan
454233d2500723e5594f3e7c70896ffeeef32b9c950ywan    var_filter_block2d_bil_first_pass(src_ptr, FData3, src_pixels_per_line, 1, 17, 8, HFilter);
455233d2500723e5594f3e7c70896ffeeef32b9c950ywan    var_filter_block2d_bil_second_pass(FData3, temp2, 8, 8, 16, 8, VFilter);
456233d2500723e5594f3e7c70896ffeeef32b9c950ywan
457233d2500723e5594f3e7c70896ffeeef32b9c950ywan    return vp8_variance8x16_c(temp2, 8, dst_ptr, dst_pixels_per_line, sse);
458233d2500723e5594f3e7c70896ffeeef32b9c950ywan}
459