1/*
2 *  Copyright (c) 2010 The WebM project authors. All Rights Reserved.
3 *
4 *  Use of this source code is governed by a BSD-style license
5 *  that can be found in the LICENSE file in the root of the source
6 *  tree. An additional intellectual property rights grant can be found
7 *  in the file PATENTS.  All contributing project authors may
8 *  be found in the AUTHORS file in the root of the source tree.
9 */
10
11
12#include "filter.h"
13#include "./vp8_rtcd.h"
14
15DECLARE_ALIGNED(16, const short, vp8_bilinear_filters[8][2]) =
16{
17    { 128,   0 },
18    { 112,  16 },
19    {  96,  32 },
20    {  80,  48 },
21    {  64,  64 },
22    {  48,  80 },
23    {  32,  96 },
24    {  16, 112 }
25};
26
27DECLARE_ALIGNED(16, const short, vp8_sub_pel_filters[8][6]) =
28{
29
30    { 0,  0,  128,    0,   0,  0 },         /* note that 1/8 pel positions are just as per alpha -0.5 bicubic */
31    { 0, -6,  123,   12,  -1,  0 },
32    { 2, -11, 108,   36,  -8,  1 },         /* New 1/4 pel 6 tap filter */
33    { 0, -9,   93,   50,  -6,  0 },
34    { 3, -16,  77,   77, -16,  3 },         /* New 1/2 pel 6 tap filter */
35    { 0, -6,   50,   93,  -9,  0 },
36    { 1, -8,   36,  108, -11,  2 },         /* New 1/4 pel 6 tap filter */
37    { 0, -1,   12,  123,  -6,  0 },
38};
39
40static void filter_block2d_first_pass
41(
42    unsigned char *src_ptr,
43    int *output_ptr,
44    unsigned int src_pixels_per_line,
45    unsigned int pixel_step,
46    unsigned int output_height,
47    unsigned int output_width,
48    const short *vp8_filter
49)
50{
51    unsigned int i, j;
52    int  Temp;
53
54    for (i = 0; i < output_height; i++)
55    {
56        for (j = 0; j < output_width; j++)
57        {
58            Temp = ((int)src_ptr[-2 * (int)pixel_step] * vp8_filter[0]) +
59                   ((int)src_ptr[-1 * (int)pixel_step] * vp8_filter[1]) +
60                   ((int)src_ptr[0]                 * vp8_filter[2]) +
61                   ((int)src_ptr[pixel_step]         * vp8_filter[3]) +
62                   ((int)src_ptr[2*pixel_step]       * vp8_filter[4]) +
63                   ((int)src_ptr[3*pixel_step]       * vp8_filter[5]) +
64                   (VP8_FILTER_WEIGHT >> 1);      /* Rounding */
65
66            /* Normalize back to 0-255 */
67            Temp = Temp >> VP8_FILTER_SHIFT;
68
69            if (Temp < 0)
70                Temp = 0;
71            else if (Temp > 255)
72                Temp = 255;
73
74            output_ptr[j] = Temp;
75            src_ptr++;
76        }
77
78        /* Next row... */
79        src_ptr    += src_pixels_per_line - output_width;
80        output_ptr += output_width;
81    }
82}
83
84static void filter_block2d_second_pass
85(
86    int *src_ptr,
87    unsigned char *output_ptr,
88    int output_pitch,
89    unsigned int src_pixels_per_line,
90    unsigned int pixel_step,
91    unsigned int output_height,
92    unsigned int output_width,
93    const short *vp8_filter
94)
95{
96    unsigned int i, j;
97    int  Temp;
98
99    for (i = 0; i < output_height; i++)
100    {
101        for (j = 0; j < output_width; j++)
102        {
103            /* Apply filter */
104            Temp = ((int)src_ptr[-2 * (int)pixel_step] * vp8_filter[0]) +
105                   ((int)src_ptr[-1 * (int)pixel_step] * vp8_filter[1]) +
106                   ((int)src_ptr[0]                 * vp8_filter[2]) +
107                   ((int)src_ptr[pixel_step]         * vp8_filter[3]) +
108                   ((int)src_ptr[2*pixel_step]       * vp8_filter[4]) +
109                   ((int)src_ptr[3*pixel_step]       * vp8_filter[5]) +
110                   (VP8_FILTER_WEIGHT >> 1);   /* Rounding */
111
112            /* Normalize back to 0-255 */
113            Temp = Temp >> VP8_FILTER_SHIFT;
114
115            if (Temp < 0)
116                Temp = 0;
117            else if (Temp > 255)
118                Temp = 255;
119
120            output_ptr[j] = (unsigned char)Temp;
121            src_ptr++;
122        }
123
124        /* Start next row */
125        src_ptr    += src_pixels_per_line - output_width;
126        output_ptr += output_pitch;
127    }
128}
129
130
131static void filter_block2d
132(
133    unsigned char  *src_ptr,
134    unsigned char  *output_ptr,
135    unsigned int src_pixels_per_line,
136    int output_pitch,
137    const short  *HFilter,
138    const short  *VFilter
139)
140{
141    int FData[9*4]; /* Temp data buffer used in filtering */
142
143    /* First filter 1-D horizontally... */
144    filter_block2d_first_pass(src_ptr - (2 * src_pixels_per_line), FData, src_pixels_per_line, 1, 9, 4, HFilter);
145
146    /* then filter verticaly... */
147    filter_block2d_second_pass(FData + 8, output_ptr, output_pitch, 4, 4, 4, 4, VFilter);
148}
149
150
151void vp8_sixtap_predict4x4_c
152(
153    unsigned char  *src_ptr,
154    int   src_pixels_per_line,
155    int  xoffset,
156    int  yoffset,
157    unsigned char *dst_ptr,
158    int dst_pitch
159)
160{
161    const short  *HFilter;
162    const short  *VFilter;
163
164    HFilter = vp8_sub_pel_filters[xoffset];   /* 6 tap */
165    VFilter = vp8_sub_pel_filters[yoffset];   /* 6 tap */
166
167    filter_block2d(src_ptr, dst_ptr, src_pixels_per_line, dst_pitch, HFilter, VFilter);
168}
169void vp8_sixtap_predict8x8_c
170(
171    unsigned char  *src_ptr,
172    int  src_pixels_per_line,
173    int  xoffset,
174    int  yoffset,
175    unsigned char *dst_ptr,
176    int  dst_pitch
177)
178{
179    const short  *HFilter;
180    const short  *VFilter;
181    int FData[13*16];   /* Temp data buffer used in filtering */
182
183    HFilter = vp8_sub_pel_filters[xoffset];   /* 6 tap */
184    VFilter = vp8_sub_pel_filters[yoffset];   /* 6 tap */
185
186    /* First filter 1-D horizontally... */
187    filter_block2d_first_pass(src_ptr - (2 * src_pixels_per_line), FData, src_pixels_per_line, 1, 13, 8, HFilter);
188
189
190    /* then filter verticaly... */
191    filter_block2d_second_pass(FData + 16, dst_ptr, dst_pitch, 8, 8, 8, 8, VFilter);
192
193}
194
195void vp8_sixtap_predict8x4_c
196(
197    unsigned char  *src_ptr,
198    int  src_pixels_per_line,
199    int  xoffset,
200    int  yoffset,
201    unsigned char *dst_ptr,
202    int  dst_pitch
203)
204{
205    const short  *HFilter;
206    const short  *VFilter;
207    int FData[13*16];   /* Temp data buffer used in filtering */
208
209    HFilter = vp8_sub_pel_filters[xoffset];   /* 6 tap */
210    VFilter = vp8_sub_pel_filters[yoffset];   /* 6 tap */
211
212    /* First filter 1-D horizontally... */
213    filter_block2d_first_pass(src_ptr - (2 * src_pixels_per_line), FData, src_pixels_per_line, 1, 9, 8, HFilter);
214
215
216    /* then filter verticaly... */
217    filter_block2d_second_pass(FData + 16, dst_ptr, dst_pitch, 8, 8, 4, 8, VFilter);
218
219}
220
221void vp8_sixtap_predict16x16_c
222(
223    unsigned char  *src_ptr,
224    int  src_pixels_per_line,
225    int  xoffset,
226    int  yoffset,
227    unsigned char *dst_ptr,
228    int  dst_pitch
229)
230{
231    const short  *HFilter;
232    const short  *VFilter;
233    int FData[21*24];   /* Temp data buffer used in filtering */
234
235
236    HFilter = vp8_sub_pel_filters[xoffset];   /* 6 tap */
237    VFilter = vp8_sub_pel_filters[yoffset];   /* 6 tap */
238
239    /* First filter 1-D horizontally... */
240    filter_block2d_first_pass(src_ptr - (2 * src_pixels_per_line), FData, src_pixels_per_line, 1, 21, 16, HFilter);
241
242    /* then filter verticaly... */
243    filter_block2d_second_pass(FData + 32, dst_ptr, dst_pitch, 16, 16, 16, 16, VFilter);
244
245}
246
247
248/****************************************************************************
249 *
250 *  ROUTINE       : filter_block2d_bil_first_pass
251 *
252 *  INPUTS        : UINT8  *src_ptr    : Pointer to source block.
253 *                  UINT32  src_stride : Stride of source block.
254 *                  UINT32  height     : Block height.
255 *                  UINT32  width      : Block width.
256 *                  INT32  *vp8_filter : Array of 2 bi-linear filter taps.
257 *
258 *  OUTPUTS       : INT32  *dst_ptr    : Pointer to filtered block.
259 *
260 *  RETURNS       : void
261 *
262 *  FUNCTION      : Applies a 1-D 2-tap bi-linear filter to the source block
263 *                  in the horizontal direction to produce the filtered output
264 *                  block. Used to implement first-pass of 2-D separable filter.
265 *
266 *  SPECIAL NOTES : Produces INT32 output to retain precision for next pass.
267 *                  Two filter taps should sum to VP8_FILTER_WEIGHT.
268 *
269 ****************************************************************************/
270static void filter_block2d_bil_first_pass
271(
272    unsigned char  *src_ptr,
273    unsigned short *dst_ptr,
274    unsigned int    src_stride,
275    unsigned int    height,
276    unsigned int    width,
277    const short    *vp8_filter
278)
279{
280    unsigned int i, j;
281
282    for (i = 0; i < height; i++)
283    {
284        for (j = 0; j < width; j++)
285        {
286            /* Apply bilinear filter */
287            dst_ptr[j] = (((int)src_ptr[0] * vp8_filter[0]) +
288                          ((int)src_ptr[1] * vp8_filter[1]) +
289                          (VP8_FILTER_WEIGHT / 2)) >> VP8_FILTER_SHIFT;
290            src_ptr++;
291        }
292
293        /* Next row... */
294        src_ptr += src_stride - width;
295        dst_ptr += width;
296    }
297}
298
299/****************************************************************************
300 *
301 *  ROUTINE       : filter_block2d_bil_second_pass
302 *
303 *  INPUTS        : INT32  *src_ptr    : Pointer to source block.
304 *                  UINT32  dst_pitch  : Destination block pitch.
305 *                  UINT32  height     : Block height.
306 *                  UINT32  width      : Block width.
307 *                  INT32  *vp8_filter : Array of 2 bi-linear filter taps.
308 *
309 *  OUTPUTS       : UINT16 *dst_ptr    : Pointer to filtered block.
310 *
311 *  RETURNS       : void
312 *
313 *  FUNCTION      : Applies a 1-D 2-tap bi-linear filter to the source block
314 *                  in the vertical direction to produce the filtered output
315 *                  block. Used to implement second-pass of 2-D separable filter.
316 *
317 *  SPECIAL NOTES : Requires 32-bit input as produced by filter_block2d_bil_first_pass.
318 *                  Two filter taps should sum to VP8_FILTER_WEIGHT.
319 *
320 ****************************************************************************/
321static void filter_block2d_bil_second_pass
322(
323    unsigned short *src_ptr,
324    unsigned char  *dst_ptr,
325    int             dst_pitch,
326    unsigned int    height,
327    unsigned int    width,
328    const short    *vp8_filter
329)
330{
331    unsigned int  i, j;
332    int  Temp;
333
334    for (i = 0; i < height; i++)
335    {
336        for (j = 0; j < width; j++)
337        {
338            /* Apply filter */
339            Temp = ((int)src_ptr[0]     * vp8_filter[0]) +
340                   ((int)src_ptr[width] * vp8_filter[1]) +
341                   (VP8_FILTER_WEIGHT / 2);
342            dst_ptr[j] = (unsigned int)(Temp >> VP8_FILTER_SHIFT);
343            src_ptr++;
344        }
345
346        /* Next row... */
347        dst_ptr += dst_pitch;
348    }
349}
350
351
352/****************************************************************************
353 *
354 *  ROUTINE       : filter_block2d_bil
355 *
356 *  INPUTS        : UINT8  *src_ptr          : Pointer to source block.
357 *                  UINT32  src_pitch        : Stride of source block.
358 *                  UINT32  dst_pitch        : Stride of destination block.
359 *                  INT32  *HFilter          : Array of 2 horizontal filter taps.
360 *                  INT32  *VFilter          : Array of 2 vertical filter taps.
361 *                  INT32  Width             : Block width
362 *                  INT32  Height            : Block height
363 *
364 *  OUTPUTS       : UINT16 *dst_ptr       : Pointer to filtered block.
365 *
366 *  RETURNS       : void
367 *
368 *  FUNCTION      : 2-D filters an input block by applying a 2-tap
369 *                  bi-linear filter horizontally followed by a 2-tap
370 *                  bi-linear filter vertically on the result.
371 *
372 *  SPECIAL NOTES : The largest block size can be handled here is 16x16
373 *
374 ****************************************************************************/
375static void filter_block2d_bil
376(
377    unsigned char *src_ptr,
378    unsigned char *dst_ptr,
379    unsigned int   src_pitch,
380    unsigned int   dst_pitch,
381    const short   *HFilter,
382    const short   *VFilter,
383    int            Width,
384    int            Height
385)
386{
387
388    unsigned short FData[17*16];    /* Temp data buffer used in filtering */
389
390    /* First filter 1-D horizontally... */
391    filter_block2d_bil_first_pass(src_ptr, FData, src_pitch, Height + 1, Width, HFilter);
392
393    /* then 1-D vertically... */
394    filter_block2d_bil_second_pass(FData, dst_ptr, dst_pitch, Height, Width, VFilter);
395}
396
397
398void vp8_bilinear_predict4x4_c
399(
400    unsigned char  *src_ptr,
401    int   src_pixels_per_line,
402    int  xoffset,
403    int  yoffset,
404    unsigned char *dst_ptr,
405    int dst_pitch
406)
407{
408    const short *HFilter;
409    const short *VFilter;
410
411    HFilter = vp8_bilinear_filters[xoffset];
412    VFilter = vp8_bilinear_filters[yoffset];
413#if 0
414    {
415        int i;
416        unsigned char temp1[16];
417        unsigned char temp2[16];
418
419        bilinear_predict4x4_mmx(src_ptr, src_pixels_per_line, xoffset, yoffset, temp1, 4);
420        filter_block2d_bil(src_ptr, temp2, src_pixels_per_line, 4, HFilter, VFilter, 4, 4);
421
422        for (i = 0; i < 16; i++)
423        {
424            if (temp1[i] != temp2[i])
425            {
426                bilinear_predict4x4_mmx(src_ptr, src_pixels_per_line, xoffset, yoffset, temp1, 4);
427                filter_block2d_bil(src_ptr, temp2, src_pixels_per_line, 4, HFilter, VFilter, 4, 4);
428            }
429        }
430    }
431#endif
432    filter_block2d_bil(src_ptr, dst_ptr, src_pixels_per_line, dst_pitch, HFilter, VFilter, 4, 4);
433
434}
435
436void vp8_bilinear_predict8x8_c
437(
438    unsigned char  *src_ptr,
439    int  src_pixels_per_line,
440    int  xoffset,
441    int  yoffset,
442    unsigned char *dst_ptr,
443    int  dst_pitch
444)
445{
446    const short *HFilter;
447    const short *VFilter;
448
449    HFilter = vp8_bilinear_filters[xoffset];
450    VFilter = vp8_bilinear_filters[yoffset];
451
452    filter_block2d_bil(src_ptr, dst_ptr, src_pixels_per_line, dst_pitch, HFilter, VFilter, 8, 8);
453
454}
455
456void vp8_bilinear_predict8x4_c
457(
458    unsigned char  *src_ptr,
459    int  src_pixels_per_line,
460    int  xoffset,
461    int  yoffset,
462    unsigned char *dst_ptr,
463    int  dst_pitch
464)
465{
466    const short *HFilter;
467    const short *VFilter;
468
469    HFilter = vp8_bilinear_filters[xoffset];
470    VFilter = vp8_bilinear_filters[yoffset];
471
472    filter_block2d_bil(src_ptr, dst_ptr, src_pixels_per_line, dst_pitch, HFilter, VFilter, 8, 4);
473
474}
475
476void vp8_bilinear_predict16x16_c
477(
478    unsigned char  *src_ptr,
479    int  src_pixels_per_line,
480    int  xoffset,
481    int  yoffset,
482    unsigned char *dst_ptr,
483    int  dst_pitch
484)
485{
486    const short *HFilter;
487    const short *VFilter;
488
489    HFilter = vp8_bilinear_filters[xoffset];
490    VFilter = vp8_bilinear_filters[yoffset];
491
492    filter_block2d_bil(src_ptr, dst_ptr, src_pixels_per_line, dst_pitch, HFilter, VFilter, 16, 16);
493}
494