1/*
2 *  Copyright (c) 2010 The WebM project authors. All Rights Reserved.
3 *
4 *  Use of this source code is governed by a BSD-style license
5 *  that can be found in the LICENSE file in the root of the source
6 *  tree. An additional intellectual property rights grant can be found
7 *  in the file PATENTS.  All contributing project authors may
8 *  be found in the AUTHORS file in the root of the source tree.
9 */
10
11
12#include <stdlib.h>
13
14#define BLOCK_HEIGHT_WIDTH 4
15#define VP8_FILTER_WEIGHT 128
16#define VP8_FILTER_SHIFT  7
17
18
19static const int bilinear_filters[8][2] =
20{
21    { 128,   0 },
22    { 112,  16 },
23    {  96,  32 },
24    {  80,  48 },
25    {  64,  64 },
26    {  48,  80 },
27    {  32,  96 },
28    {  16, 112 }
29};
30
31
32static const short sub_pel_filters[8][6] =
33{
34
35    { 0,  0,  128,    0,   0,  0 },         /* note that 1/8 pel positions are just as per alpha -0.5 bicubic */
36    { 0, -6,  123,   12,  -1,  0 },
37    { 2, -11, 108,   36,  -8,  1 },         /* New 1/4 pel 6 tap filter */
38    { 0, -9,   93,   50,  -6,  0 },
39    { 3, -16,  77,   77, -16,  3 },         /* New 1/2 pel 6 tap filter */
40    { 0, -6,   50,   93,  -9,  0 },
41    { 1, -8,   36,  108, -11,  2 },         /* New 1/4 pel 6 tap filter */
42    { 0, -1,   12,  123,  -6,  0 },
43
44
45
46};
47
48void vp8_filter_block2d_first_pass
49(
50    unsigned char *src_ptr,
51    int *output_ptr,
52    unsigned int src_pixels_per_line,
53    unsigned int pixel_step,
54    unsigned int output_height,
55    unsigned int output_width,
56    const short *vp8_filter
57)
58{
59    unsigned int i, j;
60    int  Temp;
61
62    for (i = 0; i < output_height; i++)
63    {
64        for (j = 0; j < output_width; j++)
65        {
66            Temp = ((int)src_ptr[-2 * (int)pixel_step] * vp8_filter[0]) +
67                   ((int)src_ptr[-1 * (int)pixel_step] * vp8_filter[1]) +
68                   ((int)src_ptr[0]                 * vp8_filter[2]) +
69                   ((int)src_ptr[pixel_step]         * vp8_filter[3]) +
70                   ((int)src_ptr[2*pixel_step]       * vp8_filter[4]) +
71                   ((int)src_ptr[3*pixel_step]       * vp8_filter[5]) +
72                   (VP8_FILTER_WEIGHT >> 1);      /* Rounding */
73
74            /* Normalize back to 0-255 */
75            Temp = Temp >> VP8_FILTER_SHIFT;
76
77            if (Temp < 0)
78                Temp = 0;
79            else if (Temp > 255)
80                Temp = 255;
81
82            output_ptr[j] = Temp;
83            src_ptr++;
84        }
85
86        /* Next row... */
87        src_ptr    += src_pixels_per_line - output_width;
88        output_ptr += output_width;
89    }
90}
91
92void vp8_filter_block2d_second_pass
93(
94    int *src_ptr,
95    unsigned char *output_ptr,
96    int output_pitch,
97    unsigned int src_pixels_per_line,
98    unsigned int pixel_step,
99    unsigned int output_height,
100    unsigned int output_width,
101    const short *vp8_filter
102)
103{
104    unsigned int i, j;
105    int  Temp;
106
107    for (i = 0; i < output_height; i++)
108    {
109        for (j = 0; j < output_width; j++)
110        {
111            /* Apply filter */
112            Temp = ((int)src_ptr[-2 * (int)pixel_step] * vp8_filter[0]) +
113                   ((int)src_ptr[-1 * (int)pixel_step] * vp8_filter[1]) +
114                   ((int)src_ptr[0]                 * vp8_filter[2]) +
115                   ((int)src_ptr[pixel_step]         * vp8_filter[3]) +
116                   ((int)src_ptr[2*pixel_step]       * vp8_filter[4]) +
117                   ((int)src_ptr[3*pixel_step]       * vp8_filter[5]) +
118                   (VP8_FILTER_WEIGHT >> 1);   /* Rounding */
119
120            /* Normalize back to 0-255 */
121            Temp = Temp >> VP8_FILTER_SHIFT;
122
123            if (Temp < 0)
124                Temp = 0;
125            else if (Temp > 255)
126                Temp = 255;
127
128            output_ptr[j] = (unsigned char)Temp;
129            src_ptr++;
130        }
131
132        /* Start next row */
133        src_ptr    += src_pixels_per_line - output_width;
134        output_ptr += output_pitch;
135    }
136}
137
138
139void vp8_filter_block2d
140(
141    unsigned char  *src_ptr,
142    unsigned char  *output_ptr,
143    unsigned int src_pixels_per_line,
144    int output_pitch,
145    const short  *HFilter,
146    const short  *VFilter
147)
148{
149    int FData[9*4]; /* Temp data bufffer used in filtering */
150
151    /* First filter 1-D horizontally... */
152    vp8_filter_block2d_first_pass(src_ptr - (2 * src_pixels_per_line), FData, src_pixels_per_line, 1, 9, 4, HFilter);
153
154    /* then filter verticaly... */
155    vp8_filter_block2d_second_pass(FData + 8, output_ptr, output_pitch, 4, 4, 4, 4, VFilter);
156}
157
158
159void vp8_block_variation_c
160(
161    unsigned char  *src_ptr,
162    int   src_pixels_per_line,
163    int *HVar,
164    int *VVar
165)
166{
167    int i, j;
168    unsigned char *Ptr = src_ptr;
169
170    for (i = 0; i < 4; i++)
171    {
172        for (j = 0; j < 4; j++)
173        {
174            *HVar += abs((int)Ptr[j] - (int)Ptr[j+1]);
175            *VVar += abs((int)Ptr[j] - (int)Ptr[j+src_pixels_per_line]);
176        }
177
178        Ptr += src_pixels_per_line;
179    }
180}
181
182
183
184
185void vp8_sixtap_predict_c
186(
187    unsigned char  *src_ptr,
188    int   src_pixels_per_line,
189    int  xoffset,
190    int  yoffset,
191    unsigned char *dst_ptr,
192    int dst_pitch
193)
194{
195    const short  *HFilter;
196    const short  *VFilter;
197
198    HFilter = sub_pel_filters[xoffset];   /* 6 tap */
199    VFilter = sub_pel_filters[yoffset];   /* 6 tap */
200
201    vp8_filter_block2d(src_ptr, dst_ptr, src_pixels_per_line, dst_pitch, HFilter, VFilter);
202}
203void vp8_sixtap_predict8x8_c
204(
205    unsigned char  *src_ptr,
206    int  src_pixels_per_line,
207    int  xoffset,
208    int  yoffset,
209    unsigned char *dst_ptr,
210    int  dst_pitch
211)
212{
213    const short  *HFilter;
214    const short  *VFilter;
215    int FData[13*16];   /* Temp data bufffer used in filtering */
216
217    HFilter = sub_pel_filters[xoffset];   /* 6 tap */
218    VFilter = sub_pel_filters[yoffset];   /* 6 tap */
219
220    /* First filter 1-D horizontally... */
221    vp8_filter_block2d_first_pass(src_ptr - (2 * src_pixels_per_line), FData, src_pixels_per_line, 1, 13, 8, HFilter);
222
223
224    /* then filter verticaly... */
225    vp8_filter_block2d_second_pass(FData + 16, dst_ptr, dst_pitch, 8, 8, 8, 8, VFilter);
226
227}
228
229void vp8_sixtap_predict8x4_c
230(
231    unsigned char  *src_ptr,
232    int  src_pixels_per_line,
233    int  xoffset,
234    int  yoffset,
235    unsigned char *dst_ptr,
236    int  dst_pitch
237)
238{
239    const short  *HFilter;
240    const short  *VFilter;
241    int FData[13*16];   /* Temp data bufffer used in filtering */
242
243    HFilter = sub_pel_filters[xoffset];   /* 6 tap */
244    VFilter = sub_pel_filters[yoffset];   /* 6 tap */
245
246    /* First filter 1-D horizontally... */
247    vp8_filter_block2d_first_pass(src_ptr - (2 * src_pixels_per_line), FData, src_pixels_per_line, 1, 9, 8, HFilter);
248
249
250    /* then filter verticaly... */
251    vp8_filter_block2d_second_pass(FData + 16, dst_ptr, dst_pitch, 8, 8, 4, 8, VFilter);
252
253}
254
255void vp8_sixtap_predict16x16_c
256(
257    unsigned char  *src_ptr,
258    int  src_pixels_per_line,
259    int  xoffset,
260    int  yoffset,
261    unsigned char *dst_ptr,
262    int  dst_pitch
263)
264{
265    const short  *HFilter;
266    const short  *VFilter;
267    int FData[21*24];   /* Temp data bufffer used in filtering */
268
269
270    HFilter = sub_pel_filters[xoffset];   /* 6 tap */
271    VFilter = sub_pel_filters[yoffset];   /* 6 tap */
272
273    /* First filter 1-D horizontally... */
274    vp8_filter_block2d_first_pass(src_ptr - (2 * src_pixels_per_line), FData, src_pixels_per_line, 1, 21, 16, HFilter);
275
276    /* then filter verticaly... */
277    vp8_filter_block2d_second_pass(FData + 32, dst_ptr, dst_pitch, 16, 16, 16, 16, VFilter);
278
279}
280
281
282/****************************************************************************
283 *
284 *  ROUTINE       : filter_block2d_bil_first_pass
285 *
286 *  INPUTS        : UINT8  *src_ptr          : Pointer to source block.
287 *                  UINT32 src_pixels_per_line : Stride of input block.
288 *                  UINT32 pixel_step        : Offset between filter input samples (see notes).
289 *                  UINT32 output_height     : Input block height.
290 *                  UINT32 output_width      : Input block width.
291 *                  INT32  *vp8_filter          : Array of 2 bi-linear filter taps.
292 *
293 *  OUTPUTS       : INT32 *output_ptr        : Pointer to filtered block.
294 *
295 *  RETURNS       : void
296 *
297 *  FUNCTION      : Applies a 1-D 2-tap bi-linear filter to the source block in
298 *                  either horizontal or vertical direction to produce the
299 *                  filtered output block. Used to implement first-pass
300 *                  of 2-D separable filter.
301 *
302 *  SPECIAL NOTES : Produces INT32 output to retain precision for next pass.
303 *                  Two filter taps should sum to VP8_FILTER_WEIGHT.
304 *                  pixel_step defines whether the filter is applied
305 *                  horizontally (pixel_step=1) or vertically (pixel_step=stride).
306 *                  It defines the offset required to move from one input
307 *                  to the next.
308 *
309 ****************************************************************************/
310void vp8_filter_block2d_bil_first_pass
311(
312    unsigned char *src_ptr,
313    unsigned short *output_ptr,
314    unsigned int src_pixels_per_line,
315    int pixel_step,
316    unsigned int output_height,
317    unsigned int output_width,
318    const int *vp8_filter
319)
320{
321    unsigned int i, j;
322
323    for (i = 0; i < output_height; i++)
324    {
325        for (j = 0; j < output_width; j++)
326        {
327            /* Apply bilinear filter */
328            output_ptr[j] = (((int)src_ptr[0]          * vp8_filter[0]) +
329                             ((int)src_ptr[pixel_step] * vp8_filter[1]) +
330                             (VP8_FILTER_WEIGHT / 2)) >> VP8_FILTER_SHIFT;
331            src_ptr++;
332        }
333
334        /* Next row... */
335        src_ptr    += src_pixels_per_line - output_width;
336        output_ptr += output_width;
337    }
338}
339
340/****************************************************************************
341 *
342 *  ROUTINE       : filter_block2d_bil_second_pass
343 *
344 *  INPUTS        : INT32  *src_ptr          : Pointer to source block.
345 *                  UINT32 src_pixels_per_line : Stride of input block.
346 *                  UINT32 pixel_step        : Offset between filter input samples (see notes).
347 *                  UINT32 output_height     : Input block height.
348 *                  UINT32 output_width      : Input block width.
349 *                  INT32  *vp8_filter          : Array of 2 bi-linear filter taps.
350 *
351 *  OUTPUTS       : UINT16 *output_ptr       : Pointer to filtered block.
352 *
353 *  RETURNS       : void
354 *
355 *  FUNCTION      : Applies a 1-D 2-tap bi-linear filter to the source block in
356 *                  either horizontal or vertical direction to produce the
357 *                  filtered output block. Used to implement second-pass
358 *                  of 2-D separable filter.
359 *
360 *  SPECIAL NOTES : Requires 32-bit input as produced by filter_block2d_bil_first_pass.
361 *                  Two filter taps should sum to VP8_FILTER_WEIGHT.
362 *                  pixel_step defines whether the filter is applied
363 *                  horizontally (pixel_step=1) or vertically (pixel_step=stride).
364 *                  It defines the offset required to move from one input
365 *                  to the next.
366 *
367 ****************************************************************************/
368void vp8_filter_block2d_bil_second_pass
369(
370    unsigned short *src_ptr,
371    unsigned char  *output_ptr,
372    int output_pitch,
373    unsigned int  src_pixels_per_line,
374    unsigned int  pixel_step,
375    unsigned int  output_height,
376    unsigned int  output_width,
377    const int *vp8_filter
378)
379{
380    unsigned int  i, j;
381    int  Temp;
382
383    for (i = 0; i < output_height; i++)
384    {
385        for (j = 0; j < output_width; j++)
386        {
387            /* Apply filter */
388            Temp = ((int)src_ptr[0]         * vp8_filter[0]) +
389                   ((int)src_ptr[pixel_step] * vp8_filter[1]) +
390                   (VP8_FILTER_WEIGHT / 2);
391            output_ptr[j] = (unsigned int)(Temp >> VP8_FILTER_SHIFT);
392            src_ptr++;
393        }
394
395        /* Next row... */
396        src_ptr    += src_pixels_per_line - output_width;
397        output_ptr += output_pitch;
398    }
399}
400
401
402/****************************************************************************
403 *
404 *  ROUTINE       : filter_block2d_bil
405 *
406 *  INPUTS        : UINT8  *src_ptr          : Pointer to source block.
407 *                  UINT32 src_pixels_per_line : Stride of input block.
408 *                  INT32  *HFilter         : Array of 2 horizontal filter taps.
409 *                  INT32  *VFilter         : Array of 2 vertical filter taps.
410 *
411 *  OUTPUTS       : UINT16 *output_ptr       : Pointer to filtered block.
412 *
413 *  RETURNS       : void
414 *
415 *  FUNCTION      : 2-D filters an input block by applying a 2-tap
416 *                  bi-linear filter horizontally followed by a 2-tap
417 *                  bi-linear filter vertically on the result.
418 *
419 *  SPECIAL NOTES : The largest block size can be handled here is 16x16
420 *
421 ****************************************************************************/
422void vp8_filter_block2d_bil
423(
424    unsigned char *src_ptr,
425    unsigned char *output_ptr,
426    unsigned int   src_pixels_per_line,
427    unsigned int   dst_pitch,
428    const int      *HFilter,
429    const int      *VFilter,
430    int            Width,
431    int            Height
432)
433{
434
435    unsigned short FData[17*16];    /* Temp data bufffer used in filtering */
436
437    /* First filter 1-D horizontally... */
438    vp8_filter_block2d_bil_first_pass(src_ptr, FData, src_pixels_per_line, 1, Height + 1, Width, HFilter);
439
440    /* then 1-D vertically... */
441    vp8_filter_block2d_bil_second_pass(FData, output_ptr, dst_pitch, Width, Width, Height, Width, VFilter);
442}
443
444
445void vp8_bilinear_predict4x4_c
446(
447    unsigned char  *src_ptr,
448    int   src_pixels_per_line,
449    int  xoffset,
450    int  yoffset,
451    unsigned char *dst_ptr,
452    int dst_pitch
453)
454{
455    const int  *HFilter;
456    const int  *VFilter;
457
458    HFilter = bilinear_filters[xoffset];
459    VFilter = bilinear_filters[yoffset];
460#if 0
461    {
462        int i;
463        unsigned char temp1[16];
464        unsigned char temp2[16];
465
466        bilinear_predict4x4_mmx(src_ptr, src_pixels_per_line, xoffset, yoffset, temp1, 4);
467        vp8_filter_block2d_bil(src_ptr, temp2, src_pixels_per_line, 4, HFilter, VFilter, 4, 4);
468
469        for (i = 0; i < 16; i++)
470        {
471            if (temp1[i] != temp2[i])
472            {
473                bilinear_predict4x4_mmx(src_ptr, src_pixels_per_line, xoffset, yoffset, temp1, 4);
474                vp8_filter_block2d_bil(src_ptr, temp2, src_pixels_per_line, 4, HFilter, VFilter, 4, 4);
475            }
476        }
477    }
478#endif
479    vp8_filter_block2d_bil(src_ptr, dst_ptr, src_pixels_per_line, dst_pitch, HFilter, VFilter, 4, 4);
480
481}
482
483void vp8_bilinear_predict8x8_c
484(
485    unsigned char  *src_ptr,
486    int  src_pixels_per_line,
487    int  xoffset,
488    int  yoffset,
489    unsigned char *dst_ptr,
490    int  dst_pitch
491)
492{
493    const int  *HFilter;
494    const int  *VFilter;
495
496    HFilter = bilinear_filters[xoffset];
497    VFilter = bilinear_filters[yoffset];
498
499    vp8_filter_block2d_bil(src_ptr, dst_ptr, src_pixels_per_line, dst_pitch, HFilter, VFilter, 8, 8);
500
501}
502
503void vp8_bilinear_predict8x4_c
504(
505    unsigned char  *src_ptr,
506    int  src_pixels_per_line,
507    int  xoffset,
508    int  yoffset,
509    unsigned char *dst_ptr,
510    int  dst_pitch
511)
512{
513    const int  *HFilter;
514    const int  *VFilter;
515
516    HFilter = bilinear_filters[xoffset];
517    VFilter = bilinear_filters[yoffset];
518
519    vp8_filter_block2d_bil(src_ptr, dst_ptr, src_pixels_per_line, dst_pitch, HFilter, VFilter, 8, 4);
520
521}
522
523void vp8_bilinear_predict16x16_c
524(
525    unsigned char  *src_ptr,
526    int  src_pixels_per_line,
527    int  xoffset,
528    int  yoffset,
529    unsigned char *dst_ptr,
530    int  dst_pitch
531)
532{
533    const int  *HFilter;
534    const int  *VFilter;
535
536    HFilter = bilinear_filters[xoffset];
537    VFilter = bilinear_filters[yoffset];
538
539    vp8_filter_block2d_bil(src_ptr, dst_ptr, src_pixels_per_line, dst_pitch, HFilter, VFilter, 16, 16);
540}
541