1/*
2 *  Copyright (c) 2010 The WebM project authors. All Rights Reserved.
3 *
4 *  Use of this source code is governed by a BSD-style license
5 *  that can be found in the LICENSE file in the root of the source
6 *  tree. An additional intellectual property rights grant can be found
7 *  in the file PATENTS.  All contributing project authors may
8 *  be found in the AUTHORS file in the root of the source tree.
9 */
10
11
12#include "filter.h"
13
14DECLARE_ALIGNED(16, const short, vp8_bilinear_filters[8][2]) =
15{
16    { 128,   0 },
17    { 112,  16 },
18    {  96,  32 },
19    {  80,  48 },
20    {  64,  64 },
21    {  48,  80 },
22    {  32,  96 },
23    {  16, 112 }
24};
25
26DECLARE_ALIGNED(16, const short, vp8_sub_pel_filters[8][6]) =
27{
28
29    { 0,  0,  128,    0,   0,  0 },         /* note that 1/8 pel positions are just as per alpha -0.5 bicubic */
30    { 0, -6,  123,   12,  -1,  0 },
31    { 2, -11, 108,   36,  -8,  1 },         /* New 1/4 pel 6 tap filter */
32    { 0, -9,   93,   50,  -6,  0 },
33    { 3, -16,  77,   77, -16,  3 },         /* New 1/2 pel 6 tap filter */
34    { 0, -6,   50,   93,  -9,  0 },
35    { 1, -8,   36,  108, -11,  2 },         /* New 1/4 pel 6 tap filter */
36    { 0, -1,   12,  123,  -6,  0 },
37};
38
39static void filter_block2d_first_pass
40(
41    unsigned char *src_ptr,
42    int *output_ptr,
43    unsigned int src_pixels_per_line,
44    unsigned int pixel_step,
45    unsigned int output_height,
46    unsigned int output_width,
47    const short *vp8_filter
48)
49{
50    unsigned int i, j;
51    int  Temp;
52
53    for (i = 0; i < output_height; i++)
54    {
55        for (j = 0; j < output_width; j++)
56        {
57            Temp = ((int)src_ptr[-2 * (int)pixel_step] * vp8_filter[0]) +
58                   ((int)src_ptr[-1 * (int)pixel_step] * vp8_filter[1]) +
59                   ((int)src_ptr[0]                 * vp8_filter[2]) +
60                   ((int)src_ptr[pixel_step]         * vp8_filter[3]) +
61                   ((int)src_ptr[2*pixel_step]       * vp8_filter[4]) +
62                   ((int)src_ptr[3*pixel_step]       * vp8_filter[5]) +
63                   (VP8_FILTER_WEIGHT >> 1);      /* Rounding */
64
65            /* Normalize back to 0-255 */
66            Temp = Temp >> VP8_FILTER_SHIFT;
67
68            if (Temp < 0)
69                Temp = 0;
70            else if (Temp > 255)
71                Temp = 255;
72
73            output_ptr[j] = Temp;
74            src_ptr++;
75        }
76
77        /* Next row... */
78        src_ptr    += src_pixels_per_line - output_width;
79        output_ptr += output_width;
80    }
81}
82
83static void filter_block2d_second_pass
84(
85    int *src_ptr,
86    unsigned char *output_ptr,
87    int output_pitch,
88    unsigned int src_pixels_per_line,
89    unsigned int pixel_step,
90    unsigned int output_height,
91    unsigned int output_width,
92    const short *vp8_filter
93)
94{
95    unsigned int i, j;
96    int  Temp;
97
98    for (i = 0; i < output_height; i++)
99    {
100        for (j = 0; j < output_width; j++)
101        {
102            /* Apply filter */
103            Temp = ((int)src_ptr[-2 * (int)pixel_step] * vp8_filter[0]) +
104                   ((int)src_ptr[-1 * (int)pixel_step] * vp8_filter[1]) +
105                   ((int)src_ptr[0]                 * vp8_filter[2]) +
106                   ((int)src_ptr[pixel_step]         * vp8_filter[3]) +
107                   ((int)src_ptr[2*pixel_step]       * vp8_filter[4]) +
108                   ((int)src_ptr[3*pixel_step]       * vp8_filter[5]) +
109                   (VP8_FILTER_WEIGHT >> 1);   /* Rounding */
110
111            /* Normalize back to 0-255 */
112            Temp = Temp >> VP8_FILTER_SHIFT;
113
114            if (Temp < 0)
115                Temp = 0;
116            else if (Temp > 255)
117                Temp = 255;
118
119            output_ptr[j] = (unsigned char)Temp;
120            src_ptr++;
121        }
122
123        /* Start next row */
124        src_ptr    += src_pixels_per_line - output_width;
125        output_ptr += output_pitch;
126    }
127}
128
129
130static void filter_block2d
131(
132    unsigned char  *src_ptr,
133    unsigned char  *output_ptr,
134    unsigned int src_pixels_per_line,
135    int output_pitch,
136    const short  *HFilter,
137    const short  *VFilter
138)
139{
140    int FData[9*4]; /* Temp data buffer used in filtering */
141
142    /* First filter 1-D horizontally... */
143    filter_block2d_first_pass(src_ptr - (2 * src_pixels_per_line), FData, src_pixels_per_line, 1, 9, 4, HFilter);
144
145    /* then filter verticaly... */
146    filter_block2d_second_pass(FData + 8, output_ptr, output_pitch, 4, 4, 4, 4, VFilter);
147}
148
149
150void vp8_sixtap_predict4x4_c
151(
152    unsigned char  *src_ptr,
153    int   src_pixels_per_line,
154    int  xoffset,
155    int  yoffset,
156    unsigned char *dst_ptr,
157    int dst_pitch
158)
159{
160    const short  *HFilter;
161    const short  *VFilter;
162
163    HFilter = vp8_sub_pel_filters[xoffset];   /* 6 tap */
164    VFilter = vp8_sub_pel_filters[yoffset];   /* 6 tap */
165
166    filter_block2d(src_ptr, dst_ptr, src_pixels_per_line, dst_pitch, HFilter, VFilter);
167}
168void vp8_sixtap_predict8x8_c
169(
170    unsigned char  *src_ptr,
171    int  src_pixels_per_line,
172    int  xoffset,
173    int  yoffset,
174    unsigned char *dst_ptr,
175    int  dst_pitch
176)
177{
178    const short  *HFilter;
179    const short  *VFilter;
180    int FData[13*16];   /* Temp data buffer used in filtering */
181
182    HFilter = vp8_sub_pel_filters[xoffset];   /* 6 tap */
183    VFilter = vp8_sub_pel_filters[yoffset];   /* 6 tap */
184
185    /* First filter 1-D horizontally... */
186    filter_block2d_first_pass(src_ptr - (2 * src_pixels_per_line), FData, src_pixels_per_line, 1, 13, 8, HFilter);
187
188
189    /* then filter verticaly... */
190    filter_block2d_second_pass(FData + 16, dst_ptr, dst_pitch, 8, 8, 8, 8, VFilter);
191
192}
193
194void vp8_sixtap_predict8x4_c
195(
196    unsigned char  *src_ptr,
197    int  src_pixels_per_line,
198    int  xoffset,
199    int  yoffset,
200    unsigned char *dst_ptr,
201    int  dst_pitch
202)
203{
204    const short  *HFilter;
205    const short  *VFilter;
206    int FData[13*16];   /* Temp data buffer used in filtering */
207
208    HFilter = vp8_sub_pel_filters[xoffset];   /* 6 tap */
209    VFilter = vp8_sub_pel_filters[yoffset];   /* 6 tap */
210
211    /* First filter 1-D horizontally... */
212    filter_block2d_first_pass(src_ptr - (2 * src_pixels_per_line), FData, src_pixels_per_line, 1, 9, 8, HFilter);
213
214
215    /* then filter verticaly... */
216    filter_block2d_second_pass(FData + 16, dst_ptr, dst_pitch, 8, 8, 4, 8, VFilter);
217
218}
219
220void vp8_sixtap_predict16x16_c
221(
222    unsigned char  *src_ptr,
223    int  src_pixels_per_line,
224    int  xoffset,
225    int  yoffset,
226    unsigned char *dst_ptr,
227    int  dst_pitch
228)
229{
230    const short  *HFilter;
231    const short  *VFilter;
232    int FData[21*24];   /* Temp data buffer used in filtering */
233
234
235    HFilter = vp8_sub_pel_filters[xoffset];   /* 6 tap */
236    VFilter = vp8_sub_pel_filters[yoffset];   /* 6 tap */
237
238    /* First filter 1-D horizontally... */
239    filter_block2d_first_pass(src_ptr - (2 * src_pixels_per_line), FData, src_pixels_per_line, 1, 21, 16, HFilter);
240
241    /* then filter verticaly... */
242    filter_block2d_second_pass(FData + 32, dst_ptr, dst_pitch, 16, 16, 16, 16, VFilter);
243
244}
245
246
247/****************************************************************************
248 *
249 *  ROUTINE       : filter_block2d_bil_first_pass
250 *
251 *  INPUTS        : UINT8  *src_ptr    : Pointer to source block.
252 *                  UINT32  src_stride : Stride of source block.
253 *                  UINT32  height     : Block height.
254 *                  UINT32  width      : Block width.
255 *                  INT32  *vp8_filter : Array of 2 bi-linear filter taps.
256 *
257 *  OUTPUTS       : INT32  *dst_ptr    : Pointer to filtered block.
258 *
259 *  RETURNS       : void
260 *
261 *  FUNCTION      : Applies a 1-D 2-tap bi-linear filter to the source block
262 *                  in the horizontal direction to produce the filtered output
263 *                  block. Used to implement first-pass of 2-D separable filter.
264 *
265 *  SPECIAL NOTES : Produces INT32 output to retain precision for next pass.
266 *                  Two filter taps should sum to VP8_FILTER_WEIGHT.
267 *
268 ****************************************************************************/
269static void filter_block2d_bil_first_pass
270(
271    unsigned char  *src_ptr,
272    unsigned short *dst_ptr,
273    unsigned int    src_stride,
274    unsigned int    height,
275    unsigned int    width,
276    const short    *vp8_filter
277)
278{
279    unsigned int i, j;
280
281    for (i = 0; i < height; i++)
282    {
283        for (j = 0; j < width; j++)
284        {
285            /* Apply bilinear filter */
286            dst_ptr[j] = (((int)src_ptr[0] * vp8_filter[0]) +
287                          ((int)src_ptr[1] * vp8_filter[1]) +
288                          (VP8_FILTER_WEIGHT / 2)) >> VP8_FILTER_SHIFT;
289            src_ptr++;
290        }
291
292        /* Next row... */
293        src_ptr += src_stride - width;
294        dst_ptr += width;
295    }
296}
297
298/****************************************************************************
299 *
300 *  ROUTINE       : filter_block2d_bil_second_pass
301 *
302 *  INPUTS        : INT32  *src_ptr    : Pointer to source block.
303 *                  UINT32  dst_pitch  : Destination block pitch.
304 *                  UINT32  height     : Block height.
305 *                  UINT32  width      : Block width.
306 *                  INT32  *vp8_filter : Array of 2 bi-linear filter taps.
307 *
308 *  OUTPUTS       : UINT16 *dst_ptr    : Pointer to filtered block.
309 *
310 *  RETURNS       : void
311 *
312 *  FUNCTION      : Applies a 1-D 2-tap bi-linear filter to the source block
313 *                  in the vertical direction to produce the filtered output
314 *                  block. Used to implement second-pass of 2-D separable filter.
315 *
316 *  SPECIAL NOTES : Requires 32-bit input as produced by filter_block2d_bil_first_pass.
317 *                  Two filter taps should sum to VP8_FILTER_WEIGHT.
318 *
319 ****************************************************************************/
320static void filter_block2d_bil_second_pass
321(
322    unsigned short *src_ptr,
323    unsigned char  *dst_ptr,
324    int             dst_pitch,
325    unsigned int    height,
326    unsigned int    width,
327    const short    *vp8_filter
328)
329{
330    unsigned int  i, j;
331    int  Temp;
332
333    for (i = 0; i < height; i++)
334    {
335        for (j = 0; j < width; j++)
336        {
337            /* Apply filter */
338            Temp = ((int)src_ptr[0]     * vp8_filter[0]) +
339                   ((int)src_ptr[width] * vp8_filter[1]) +
340                   (VP8_FILTER_WEIGHT / 2);
341            dst_ptr[j] = (unsigned int)(Temp >> VP8_FILTER_SHIFT);
342            src_ptr++;
343        }
344
345        /* Next row... */
346        dst_ptr += dst_pitch;
347    }
348}
349
350
351/****************************************************************************
352 *
353 *  ROUTINE       : filter_block2d_bil
354 *
355 *  INPUTS        : UINT8  *src_ptr          : Pointer to source block.
356 *                  UINT32  src_pitch        : Stride of source block.
357 *                  UINT32  dst_pitch        : Stride of destination block.
358 *                  INT32  *HFilter          : Array of 2 horizontal filter taps.
359 *                  INT32  *VFilter          : Array of 2 vertical filter taps.
360 *                  INT32  Width             : Block width
361 *                  INT32  Height            : Block height
362 *
363 *  OUTPUTS       : UINT16 *dst_ptr       : Pointer to filtered block.
364 *
365 *  RETURNS       : void
366 *
367 *  FUNCTION      : 2-D filters an input block by applying a 2-tap
368 *                  bi-linear filter horizontally followed by a 2-tap
369 *                  bi-linear filter vertically on the result.
370 *
371 *  SPECIAL NOTES : The largest block size can be handled here is 16x16
372 *
373 ****************************************************************************/
374static void filter_block2d_bil
375(
376    unsigned char *src_ptr,
377    unsigned char *dst_ptr,
378    unsigned int   src_pitch,
379    unsigned int   dst_pitch,
380    const short   *HFilter,
381    const short   *VFilter,
382    int            Width,
383    int            Height
384)
385{
386
387    unsigned short FData[17*16];    /* Temp data buffer used in filtering */
388
389    /* First filter 1-D horizontally... */
390    filter_block2d_bil_first_pass(src_ptr, FData, src_pitch, Height + 1, Width, HFilter);
391
392    /* then 1-D vertically... */
393    filter_block2d_bil_second_pass(FData, dst_ptr, dst_pitch, Height, Width, VFilter);
394}
395
396
397void vp8_bilinear_predict4x4_c
398(
399    unsigned char  *src_ptr,
400    int   src_pixels_per_line,
401    int  xoffset,
402    int  yoffset,
403    unsigned char *dst_ptr,
404    int dst_pitch
405)
406{
407    const short *HFilter;
408    const short *VFilter;
409
410    HFilter = vp8_bilinear_filters[xoffset];
411    VFilter = vp8_bilinear_filters[yoffset];
412#if 0
413    {
414        int i;
415        unsigned char temp1[16];
416        unsigned char temp2[16];
417
418        bilinear_predict4x4_mmx(src_ptr, src_pixels_per_line, xoffset, yoffset, temp1, 4);
419        filter_block2d_bil(src_ptr, temp2, src_pixels_per_line, 4, HFilter, VFilter, 4, 4);
420
421        for (i = 0; i < 16; i++)
422        {
423            if (temp1[i] != temp2[i])
424            {
425                bilinear_predict4x4_mmx(src_ptr, src_pixels_per_line, xoffset, yoffset, temp1, 4);
426                filter_block2d_bil(src_ptr, temp2, src_pixels_per_line, 4, HFilter, VFilter, 4, 4);
427            }
428        }
429    }
430#endif
431    filter_block2d_bil(src_ptr, dst_ptr, src_pixels_per_line, dst_pitch, HFilter, VFilter, 4, 4);
432
433}
434
435void vp8_bilinear_predict8x8_c
436(
437    unsigned char  *src_ptr,
438    int  src_pixels_per_line,
439    int  xoffset,
440    int  yoffset,
441    unsigned char *dst_ptr,
442    int  dst_pitch
443)
444{
445    const short *HFilter;
446    const short *VFilter;
447
448    HFilter = vp8_bilinear_filters[xoffset];
449    VFilter = vp8_bilinear_filters[yoffset];
450
451    filter_block2d_bil(src_ptr, dst_ptr, src_pixels_per_line, dst_pitch, HFilter, VFilter, 8, 8);
452
453}
454
455void vp8_bilinear_predict8x4_c
456(
457    unsigned char  *src_ptr,
458    int  src_pixels_per_line,
459    int  xoffset,
460    int  yoffset,
461    unsigned char *dst_ptr,
462    int  dst_pitch
463)
464{
465    const short *HFilter;
466    const short *VFilter;
467
468    HFilter = vp8_bilinear_filters[xoffset];
469    VFilter = vp8_bilinear_filters[yoffset];
470
471    filter_block2d_bil(src_ptr, dst_ptr, src_pixels_per_line, dst_pitch, HFilter, VFilter, 8, 4);
472
473}
474
475void vp8_bilinear_predict16x16_c
476(
477    unsigned char  *src_ptr,
478    int  src_pixels_per_line,
479    int  xoffset,
480    int  yoffset,
481    unsigned char *dst_ptr,
482    int  dst_pitch
483)
484{
485    const short *HFilter;
486    const short *VFilter;
487
488    HFilter = vp8_bilinear_filters[xoffset];
489    VFilter = vp8_bilinear_filters[yoffset];
490
491    filter_block2d_bil(src_ptr, dst_ptr, src_pixels_per_line, dst_pitch, HFilter, VFilter, 16, 16);
492}
493