190d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber/*
2f71323e297a928af368937089d3ed71239786f86Andreas Huber *  Copyright (c) 2010 The WebM project authors. All Rights Reserved.
390d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber *
4f71323e297a928af368937089d3ed71239786f86Andreas Huber *  Use of this source code is governed by a BSD-style license
5f71323e297a928af368937089d3ed71239786f86Andreas Huber *  that can be found in the LICENSE file in the root of the source
6f71323e297a928af368937089d3ed71239786f86Andreas Huber *  tree. An additional intellectual property rights grant can be found
7f71323e297a928af368937089d3ed71239786f86Andreas Huber *  in the file PATENTS.  All contributing project authors may
8f71323e297a928af368937089d3ed71239786f86Andreas Huber *  be found in the AUTHORS file in the root of the source tree.
990d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber */
1090d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber
117bc9febe8749e98a3812a0dc4380ceae75c29450Johann#include <assert.h>
12da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian#include "./vp8_rtcd.h"
137bc9febe8749e98a3812a0dc4380ceae75c29450Johann#include "vp8/common/filter.h"
1490d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber
157bc9febe8749e98a3812a0dc4380ceae75c29450JohannDECLARE_ALIGNED(16, const short, vp8_bilinear_filters[8][2]) = {
167bc9febe8749e98a3812a0dc4380ceae75c29450Johann  { 128, 0 }, { 112, 16 }, { 96, 32 }, { 80, 48 },
177bc9febe8749e98a3812a0dc4380ceae75c29450Johann  { 64, 64 }, { 48, 80 },  { 32, 96 }, { 16, 112 }
1890d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber};
1990d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber
207bc9febe8749e98a3812a0dc4380ceae75c29450JohannDECLARE_ALIGNED(16, const short, vp8_sub_pel_filters[8][6]) = {
217bc9febe8749e98a3812a0dc4380ceae75c29450Johann
227bc9febe8749e98a3812a0dc4380ceae75c29450Johann  { 0, 0, 128, 0, 0,
237bc9febe8749e98a3812a0dc4380ceae75c29450Johann    0 }, /* note that 1/8 pel positions are just as per alpha -0.5 bicubic */
247bc9febe8749e98a3812a0dc4380ceae75c29450Johann  { 0, -6, 123, 12, -1, 0 },
257bc9febe8749e98a3812a0dc4380ceae75c29450Johann  { 2, -11, 108, 36, -8, 1 }, /* New 1/4 pel 6 tap filter */
267bc9febe8749e98a3812a0dc4380ceae75c29450Johann  { 0, -9, 93, 50, -6, 0 },
277bc9febe8749e98a3812a0dc4380ceae75c29450Johann  { 3, -16, 77, 77, -16, 3 }, /* New 1/2 pel 6 tap filter */
287bc9febe8749e98a3812a0dc4380ceae75c29450Johann  { 0, -6, 50, 93, -9, 0 },
297bc9febe8749e98a3812a0dc4380ceae75c29450Johann  { 1, -8, 36, 108, -11, 2 }, /* New 1/4 pel 6 tap filter */
307bc9febe8749e98a3812a0dc4380ceae75c29450Johann  { 0, -1, 12, 123, -6, 0 },
3190d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber};
3290d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber
337bc9febe8749e98a3812a0dc4380ceae75c29450Johannstatic void filter_block2d_first_pass(unsigned char *src_ptr, int *output_ptr,
347bc9febe8749e98a3812a0dc4380ceae75c29450Johann                                      unsigned int src_pixels_per_line,
357bc9febe8749e98a3812a0dc4380ceae75c29450Johann                                      unsigned int pixel_step,
367bc9febe8749e98a3812a0dc4380ceae75c29450Johann                                      unsigned int output_height,
377bc9febe8749e98a3812a0dc4380ceae75c29450Johann                                      unsigned int output_width,
387bc9febe8749e98a3812a0dc4380ceae75c29450Johann                                      const short *vp8_filter) {
397bc9febe8749e98a3812a0dc4380ceae75c29450Johann  unsigned int i, j;
407bc9febe8749e98a3812a0dc4380ceae75c29450Johann  int Temp;
417bc9febe8749e98a3812a0dc4380ceae75c29450Johann
427bc9febe8749e98a3812a0dc4380ceae75c29450Johann  for (i = 0; i < output_height; ++i) {
437bc9febe8749e98a3812a0dc4380ceae75c29450Johann    for (j = 0; j < output_width; ++j) {
447bc9febe8749e98a3812a0dc4380ceae75c29450Johann      Temp = ((int)src_ptr[-2 * (int)pixel_step] * vp8_filter[0]) +
457bc9febe8749e98a3812a0dc4380ceae75c29450Johann             ((int)src_ptr[-1 * (int)pixel_step] * vp8_filter[1]) +
467bc9febe8749e98a3812a0dc4380ceae75c29450Johann             ((int)src_ptr[0] * vp8_filter[2]) +
477bc9febe8749e98a3812a0dc4380ceae75c29450Johann             ((int)src_ptr[pixel_step] * vp8_filter[3]) +
487bc9febe8749e98a3812a0dc4380ceae75c29450Johann             ((int)src_ptr[2 * pixel_step] * vp8_filter[4]) +
497bc9febe8749e98a3812a0dc4380ceae75c29450Johann             ((int)src_ptr[3 * pixel_step] * vp8_filter[5]) +
507bc9febe8749e98a3812a0dc4380ceae75c29450Johann             (VP8_FILTER_WEIGHT >> 1); /* Rounding */
517bc9febe8749e98a3812a0dc4380ceae75c29450Johann
527bc9febe8749e98a3812a0dc4380ceae75c29450Johann      /* Normalize back to 0-255 */
537bc9febe8749e98a3812a0dc4380ceae75c29450Johann      Temp = Temp >> VP8_FILTER_SHIFT;
547bc9febe8749e98a3812a0dc4380ceae75c29450Johann
557bc9febe8749e98a3812a0dc4380ceae75c29450Johann      if (Temp < 0) {
567bc9febe8749e98a3812a0dc4380ceae75c29450Johann        Temp = 0;
577bc9febe8749e98a3812a0dc4380ceae75c29450Johann      } else if (Temp > 255) {
587bc9febe8749e98a3812a0dc4380ceae75c29450Johann        Temp = 255;
597bc9febe8749e98a3812a0dc4380ceae75c29450Johann      }
607bc9febe8749e98a3812a0dc4380ceae75c29450Johann
617bc9febe8749e98a3812a0dc4380ceae75c29450Johann      output_ptr[j] = Temp;
627bc9febe8749e98a3812a0dc4380ceae75c29450Johann      src_ptr++;
6390d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber    }
647bc9febe8749e98a3812a0dc4380ceae75c29450Johann
657bc9febe8749e98a3812a0dc4380ceae75c29450Johann    /* Next row... */
667bc9febe8749e98a3812a0dc4380ceae75c29450Johann    src_ptr += src_pixels_per_line - output_width;
677bc9febe8749e98a3812a0dc4380ceae75c29450Johann    output_ptr += output_width;
687bc9febe8749e98a3812a0dc4380ceae75c29450Johann  }
6990d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber}
7090d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber
717bc9febe8749e98a3812a0dc4380ceae75c29450Johannstatic void filter_block2d_second_pass(int *src_ptr, unsigned char *output_ptr,
727bc9febe8749e98a3812a0dc4380ceae75c29450Johann                                       int output_pitch,
737bc9febe8749e98a3812a0dc4380ceae75c29450Johann                                       unsigned int src_pixels_per_line,
747bc9febe8749e98a3812a0dc4380ceae75c29450Johann                                       unsigned int pixel_step,
757bc9febe8749e98a3812a0dc4380ceae75c29450Johann                                       unsigned int output_height,
767bc9febe8749e98a3812a0dc4380ceae75c29450Johann                                       unsigned int output_width,
777bc9febe8749e98a3812a0dc4380ceae75c29450Johann                                       const short *vp8_filter) {
787bc9febe8749e98a3812a0dc4380ceae75c29450Johann  unsigned int i, j;
797bc9febe8749e98a3812a0dc4380ceae75c29450Johann  int Temp;
807bc9febe8749e98a3812a0dc4380ceae75c29450Johann
817bc9febe8749e98a3812a0dc4380ceae75c29450Johann  for (i = 0; i < output_height; ++i) {
827bc9febe8749e98a3812a0dc4380ceae75c29450Johann    for (j = 0; j < output_width; ++j) {
837bc9febe8749e98a3812a0dc4380ceae75c29450Johann      /* Apply filter */
847bc9febe8749e98a3812a0dc4380ceae75c29450Johann      Temp = ((int)src_ptr[-2 * (int)pixel_step] * vp8_filter[0]) +
857bc9febe8749e98a3812a0dc4380ceae75c29450Johann             ((int)src_ptr[-1 * (int)pixel_step] * vp8_filter[1]) +
867bc9febe8749e98a3812a0dc4380ceae75c29450Johann             ((int)src_ptr[0] * vp8_filter[2]) +
877bc9febe8749e98a3812a0dc4380ceae75c29450Johann             ((int)src_ptr[pixel_step] * vp8_filter[3]) +
887bc9febe8749e98a3812a0dc4380ceae75c29450Johann             ((int)src_ptr[2 * pixel_step] * vp8_filter[4]) +
897bc9febe8749e98a3812a0dc4380ceae75c29450Johann             ((int)src_ptr[3 * pixel_step] * vp8_filter[5]) +
907bc9febe8749e98a3812a0dc4380ceae75c29450Johann             (VP8_FILTER_WEIGHT >> 1); /* Rounding */
917bc9febe8749e98a3812a0dc4380ceae75c29450Johann
927bc9febe8749e98a3812a0dc4380ceae75c29450Johann      /* Normalize back to 0-255 */
937bc9febe8749e98a3812a0dc4380ceae75c29450Johann      Temp = Temp >> VP8_FILTER_SHIFT;
947bc9febe8749e98a3812a0dc4380ceae75c29450Johann
957bc9febe8749e98a3812a0dc4380ceae75c29450Johann      if (Temp < 0) {
967bc9febe8749e98a3812a0dc4380ceae75c29450Johann        Temp = 0;
977bc9febe8749e98a3812a0dc4380ceae75c29450Johann      } else if (Temp > 255) {
987bc9febe8749e98a3812a0dc4380ceae75c29450Johann        Temp = 255;
997bc9febe8749e98a3812a0dc4380ceae75c29450Johann      }
1007bc9febe8749e98a3812a0dc4380ceae75c29450Johann
1017bc9febe8749e98a3812a0dc4380ceae75c29450Johann      output_ptr[j] = (unsigned char)Temp;
1027bc9febe8749e98a3812a0dc4380ceae75c29450Johann      src_ptr++;
10390d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber    }
10490d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber
1057bc9febe8749e98a3812a0dc4380ceae75c29450Johann    /* Start next row */
1067bc9febe8749e98a3812a0dc4380ceae75c29450Johann    src_ptr += src_pixels_per_line - output_width;
1077bc9febe8749e98a3812a0dc4380ceae75c29450Johann    output_ptr += output_pitch;
1087bc9febe8749e98a3812a0dc4380ceae75c29450Johann  }
1097bc9febe8749e98a3812a0dc4380ceae75c29450Johann}
11090d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber
1117bc9febe8749e98a3812a0dc4380ceae75c29450Johannstatic void filter_block2d(unsigned char *src_ptr, unsigned char *output_ptr,
1127bc9febe8749e98a3812a0dc4380ceae75c29450Johann                           unsigned int src_pixels_per_line, int output_pitch,
1137bc9febe8749e98a3812a0dc4380ceae75c29450Johann                           const short *HFilter, const short *VFilter) {
1147bc9febe8749e98a3812a0dc4380ceae75c29450Johann  int FData[9 * 4]; /* Temp data buffer used in filtering */
11590d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber
1167bc9febe8749e98a3812a0dc4380ceae75c29450Johann  /* First filter 1-D horizontally... */
1177bc9febe8749e98a3812a0dc4380ceae75c29450Johann  filter_block2d_first_pass(src_ptr - (2 * src_pixels_per_line), FData,
1187bc9febe8749e98a3812a0dc4380ceae75c29450Johann                            src_pixels_per_line, 1, 9, 4, HFilter);
11990d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber
1207bc9febe8749e98a3812a0dc4380ceae75c29450Johann  /* then filter verticaly... */
1217bc9febe8749e98a3812a0dc4380ceae75c29450Johann  filter_block2d_second_pass(FData + 8, output_ptr, output_pitch, 4, 4, 4, 4,
1227bc9febe8749e98a3812a0dc4380ceae75c29450Johann                             VFilter);
12390d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber}
12490d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber
1257bc9febe8749e98a3812a0dc4380ceae75c29450Johannvoid vp8_sixtap_predict4x4_c(unsigned char *src_ptr, int src_pixels_per_line,
1267bc9febe8749e98a3812a0dc4380ceae75c29450Johann                             int xoffset, int yoffset, unsigned char *dst_ptr,
1277bc9febe8749e98a3812a0dc4380ceae75c29450Johann                             int dst_pitch) {
1287bc9febe8749e98a3812a0dc4380ceae75c29450Johann  const short *HFilter;
1297bc9febe8749e98a3812a0dc4380ceae75c29450Johann  const short *VFilter;
13090d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber
1317bc9febe8749e98a3812a0dc4380ceae75c29450Johann  HFilter = vp8_sub_pel_filters[xoffset]; /* 6 tap */
1327bc9febe8749e98a3812a0dc4380ceae75c29450Johann  VFilter = vp8_sub_pel_filters[yoffset]; /* 6 tap */
13390d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber
1347bc9febe8749e98a3812a0dc4380ceae75c29450Johann  filter_block2d(src_ptr, dst_ptr, src_pixels_per_line, dst_pitch, HFilter,
1357bc9febe8749e98a3812a0dc4380ceae75c29450Johann                 VFilter);
13690d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber}
1377bc9febe8749e98a3812a0dc4380ceae75c29450Johannvoid vp8_sixtap_predict8x8_c(unsigned char *src_ptr, int src_pixels_per_line,
1387bc9febe8749e98a3812a0dc4380ceae75c29450Johann                             int xoffset, int yoffset, unsigned char *dst_ptr,
1397bc9febe8749e98a3812a0dc4380ceae75c29450Johann                             int dst_pitch) {
1407bc9febe8749e98a3812a0dc4380ceae75c29450Johann  const short *HFilter;
1417bc9febe8749e98a3812a0dc4380ceae75c29450Johann  const short *VFilter;
1427bc9febe8749e98a3812a0dc4380ceae75c29450Johann  int FData[13 * 16]; /* Temp data buffer used in filtering */
1437bc9febe8749e98a3812a0dc4380ceae75c29450Johann
1447bc9febe8749e98a3812a0dc4380ceae75c29450Johann  HFilter = vp8_sub_pel_filters[xoffset]; /* 6 tap */
1457bc9febe8749e98a3812a0dc4380ceae75c29450Johann  VFilter = vp8_sub_pel_filters[yoffset]; /* 6 tap */
1467bc9febe8749e98a3812a0dc4380ceae75c29450Johann
1477bc9febe8749e98a3812a0dc4380ceae75c29450Johann  /* First filter 1-D horizontally... */
1487bc9febe8749e98a3812a0dc4380ceae75c29450Johann  filter_block2d_first_pass(src_ptr - (2 * src_pixels_per_line), FData,
1497bc9febe8749e98a3812a0dc4380ceae75c29450Johann                            src_pixels_per_line, 1, 13, 8, HFilter);
1507bc9febe8749e98a3812a0dc4380ceae75c29450Johann
1517bc9febe8749e98a3812a0dc4380ceae75c29450Johann  /* then filter verticaly... */
1527bc9febe8749e98a3812a0dc4380ceae75c29450Johann  filter_block2d_second_pass(FData + 16, dst_ptr, dst_pitch, 8, 8, 8, 8,
1537bc9febe8749e98a3812a0dc4380ceae75c29450Johann                             VFilter);
15490d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber}
15590d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber
1567bc9febe8749e98a3812a0dc4380ceae75c29450Johannvoid vp8_sixtap_predict8x4_c(unsigned char *src_ptr, int src_pixels_per_line,
1577bc9febe8749e98a3812a0dc4380ceae75c29450Johann                             int xoffset, int yoffset, unsigned char *dst_ptr,
1587bc9febe8749e98a3812a0dc4380ceae75c29450Johann                             int dst_pitch) {
1597bc9febe8749e98a3812a0dc4380ceae75c29450Johann  const short *HFilter;
1607bc9febe8749e98a3812a0dc4380ceae75c29450Johann  const short *VFilter;
1617bc9febe8749e98a3812a0dc4380ceae75c29450Johann  int FData[13 * 16]; /* Temp data buffer used in filtering */
16290d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber
1637bc9febe8749e98a3812a0dc4380ceae75c29450Johann  HFilter = vp8_sub_pel_filters[xoffset]; /* 6 tap */
1647bc9febe8749e98a3812a0dc4380ceae75c29450Johann  VFilter = vp8_sub_pel_filters[yoffset]; /* 6 tap */
16590d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber
1667bc9febe8749e98a3812a0dc4380ceae75c29450Johann  /* First filter 1-D horizontally... */
1677bc9febe8749e98a3812a0dc4380ceae75c29450Johann  filter_block2d_first_pass(src_ptr - (2 * src_pixels_per_line), FData,
1687bc9febe8749e98a3812a0dc4380ceae75c29450Johann                            src_pixels_per_line, 1, 9, 8, HFilter);
16990d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber
1707bc9febe8749e98a3812a0dc4380ceae75c29450Johann  /* then filter verticaly... */
1717bc9febe8749e98a3812a0dc4380ceae75c29450Johann  filter_block2d_second_pass(FData + 16, dst_ptr, dst_pitch, 8, 8, 4, 8,
1727bc9febe8749e98a3812a0dc4380ceae75c29450Johann                             VFilter);
17390d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber}
17490d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber
1757bc9febe8749e98a3812a0dc4380ceae75c29450Johannvoid vp8_sixtap_predict16x16_c(unsigned char *src_ptr, int src_pixels_per_line,
1767bc9febe8749e98a3812a0dc4380ceae75c29450Johann                               int xoffset, int yoffset, unsigned char *dst_ptr,
1777bc9febe8749e98a3812a0dc4380ceae75c29450Johann                               int dst_pitch) {
1787bc9febe8749e98a3812a0dc4380ceae75c29450Johann  const short *HFilter;
1797bc9febe8749e98a3812a0dc4380ceae75c29450Johann  const short *VFilter;
1807bc9febe8749e98a3812a0dc4380ceae75c29450Johann  int FData[21 * 24]; /* Temp data buffer used in filtering */
18190d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber
1827bc9febe8749e98a3812a0dc4380ceae75c29450Johann  HFilter = vp8_sub_pel_filters[xoffset]; /* 6 tap */
1837bc9febe8749e98a3812a0dc4380ceae75c29450Johann  VFilter = vp8_sub_pel_filters[yoffset]; /* 6 tap */
18490d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber
1857bc9febe8749e98a3812a0dc4380ceae75c29450Johann  /* First filter 1-D horizontally... */
1867bc9febe8749e98a3812a0dc4380ceae75c29450Johann  filter_block2d_first_pass(src_ptr - (2 * src_pixels_per_line), FData,
1877bc9febe8749e98a3812a0dc4380ceae75c29450Johann                            src_pixels_per_line, 1, 21, 16, HFilter);
18890d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber
1897bc9febe8749e98a3812a0dc4380ceae75c29450Johann  /* then filter verticaly... */
1907bc9febe8749e98a3812a0dc4380ceae75c29450Johann  filter_block2d_second_pass(FData + 32, dst_ptr, dst_pitch, 16, 16, 16, 16,
1917bc9febe8749e98a3812a0dc4380ceae75c29450Johann                             VFilter);
19290d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber}
19390d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber
19490d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber/****************************************************************************
19590d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber *
19690d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber *  ROUTINE       : filter_block2d_bil_first_pass
19790d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber *
19879f15823c34ae1e423108295e416213200bb280fAndreas Huber *  INPUTS        : UINT8  *src_ptr    : Pointer to source block.
19979f15823c34ae1e423108295e416213200bb280fAndreas Huber *                  UINT32  src_stride : Stride of source block.
20079f15823c34ae1e423108295e416213200bb280fAndreas Huber *                  UINT32  height     : Block height.
20179f15823c34ae1e423108295e416213200bb280fAndreas Huber *                  UINT32  width      : Block width.
20279f15823c34ae1e423108295e416213200bb280fAndreas Huber *                  INT32  *vp8_filter : Array of 2 bi-linear filter taps.
20390d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber *
20479f15823c34ae1e423108295e416213200bb280fAndreas Huber *  OUTPUTS       : INT32  *dst_ptr    : Pointer to filtered block.
20590d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber *
20690d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber *  RETURNS       : void
20790d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber *
20879f15823c34ae1e423108295e416213200bb280fAndreas Huber *  FUNCTION      : Applies a 1-D 2-tap bi-linear filter to the source block
20979f15823c34ae1e423108295e416213200bb280fAndreas Huber *                  in the horizontal direction to produce the filtered output
21079f15823c34ae1e423108295e416213200bb280fAndreas Huber *                  block. Used to implement first-pass of 2-D separable filter.
21190d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber *
21290d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber *  SPECIAL NOTES : Produces INT32 output to retain precision for next pass.
21390d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber *                  Two filter taps should sum to VP8_FILTER_WEIGHT.
21490d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber *
21590d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber ****************************************************************************/
2167bc9febe8749e98a3812a0dc4380ceae75c29450Johannstatic void filter_block2d_bil_first_pass(
2177bc9febe8749e98a3812a0dc4380ceae75c29450Johann    unsigned char *src_ptr, unsigned short *dst_ptr, unsigned int src_stride,
2187bc9febe8749e98a3812a0dc4380ceae75c29450Johann    unsigned int height, unsigned int width, const short *vp8_filter) {
2197bc9febe8749e98a3812a0dc4380ceae75c29450Johann  unsigned int i, j;
2207bc9febe8749e98a3812a0dc4380ceae75c29450Johann
2217bc9febe8749e98a3812a0dc4380ceae75c29450Johann  for (i = 0; i < height; ++i) {
2227bc9febe8749e98a3812a0dc4380ceae75c29450Johann    for (j = 0; j < width; ++j) {
2237bc9febe8749e98a3812a0dc4380ceae75c29450Johann      /* Apply bilinear filter */
2247bc9febe8749e98a3812a0dc4380ceae75c29450Johann      dst_ptr[j] =
2257bc9febe8749e98a3812a0dc4380ceae75c29450Johann          (((int)src_ptr[0] * vp8_filter[0]) +
2267bc9febe8749e98a3812a0dc4380ceae75c29450Johann           ((int)src_ptr[1] * vp8_filter[1]) + (VP8_FILTER_WEIGHT / 2)) >>
2277bc9febe8749e98a3812a0dc4380ceae75c29450Johann          VP8_FILTER_SHIFT;
2287bc9febe8749e98a3812a0dc4380ceae75c29450Johann      src_ptr++;
22990d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber    }
2307bc9febe8749e98a3812a0dc4380ceae75c29450Johann
2317bc9febe8749e98a3812a0dc4380ceae75c29450Johann    /* Next row... */
2327bc9febe8749e98a3812a0dc4380ceae75c29450Johann    src_ptr += src_stride - width;
2337bc9febe8749e98a3812a0dc4380ceae75c29450Johann    dst_ptr += width;
2347bc9febe8749e98a3812a0dc4380ceae75c29450Johann  }
23590d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber}
23690d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber
23790d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber/****************************************************************************
23890d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber *
23990d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber *  ROUTINE       : filter_block2d_bil_second_pass
24090d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber *
24179f15823c34ae1e423108295e416213200bb280fAndreas Huber *  INPUTS        : INT32  *src_ptr    : Pointer to source block.
24279f15823c34ae1e423108295e416213200bb280fAndreas Huber *                  UINT32  dst_pitch  : Destination block pitch.
24379f15823c34ae1e423108295e416213200bb280fAndreas Huber *                  UINT32  height     : Block height.
24479f15823c34ae1e423108295e416213200bb280fAndreas Huber *                  UINT32  width      : Block width.
24579f15823c34ae1e423108295e416213200bb280fAndreas Huber *                  INT32  *vp8_filter : Array of 2 bi-linear filter taps.
24690d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber *
24779f15823c34ae1e423108295e416213200bb280fAndreas Huber *  OUTPUTS       : UINT16 *dst_ptr    : Pointer to filtered block.
24890d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber *
24990d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber *  RETURNS       : void
25090d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber *
25179f15823c34ae1e423108295e416213200bb280fAndreas Huber *  FUNCTION      : Applies a 1-D 2-tap bi-linear filter to the source block
25279f15823c34ae1e423108295e416213200bb280fAndreas Huber *                  in the vertical direction to produce the filtered output
2537bc9febe8749e98a3812a0dc4380ceae75c29450Johann *                  block. Used to implement second-pass of 2-D separable
2547bc9febe8749e98a3812a0dc4380ceae75c29450Johann *                  filter.
25590d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber *
2567bc9febe8749e98a3812a0dc4380ceae75c29450Johann *  SPECIAL NOTES : Requires 32-bit input as produced by
2577bc9febe8749e98a3812a0dc4380ceae75c29450Johann *                  filter_block2d_bil_first_pass.
25890d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber *                  Two filter taps should sum to VP8_FILTER_WEIGHT.
25990d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber *
26090d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber ****************************************************************************/
2617bc9febe8749e98a3812a0dc4380ceae75c29450Johannstatic void filter_block2d_bil_second_pass(unsigned short *src_ptr,
2627bc9febe8749e98a3812a0dc4380ceae75c29450Johann                                           unsigned char *dst_ptr,
2637bc9febe8749e98a3812a0dc4380ceae75c29450Johann                                           int dst_pitch, unsigned int height,
2647bc9febe8749e98a3812a0dc4380ceae75c29450Johann                                           unsigned int width,
2657bc9febe8749e98a3812a0dc4380ceae75c29450Johann                                           const short *vp8_filter) {
2667bc9febe8749e98a3812a0dc4380ceae75c29450Johann  unsigned int i, j;
2677bc9febe8749e98a3812a0dc4380ceae75c29450Johann  int Temp;
2687bc9febe8749e98a3812a0dc4380ceae75c29450Johann
2697bc9febe8749e98a3812a0dc4380ceae75c29450Johann  for (i = 0; i < height; ++i) {
2707bc9febe8749e98a3812a0dc4380ceae75c29450Johann    for (j = 0; j < width; ++j) {
2717bc9febe8749e98a3812a0dc4380ceae75c29450Johann      /* Apply filter */
2727bc9febe8749e98a3812a0dc4380ceae75c29450Johann      Temp = ((int)src_ptr[0] * vp8_filter[0]) +
2737bc9febe8749e98a3812a0dc4380ceae75c29450Johann             ((int)src_ptr[width] * vp8_filter[1]) + (VP8_FILTER_WEIGHT / 2);
2747bc9febe8749e98a3812a0dc4380ceae75c29450Johann      dst_ptr[j] = (unsigned int)(Temp >> VP8_FILTER_SHIFT);
2757bc9febe8749e98a3812a0dc4380ceae75c29450Johann      src_ptr++;
27690d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber    }
27790d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber
2787bc9febe8749e98a3812a0dc4380ceae75c29450Johann    /* Next row... */
2797bc9febe8749e98a3812a0dc4380ceae75c29450Johann    dst_ptr += dst_pitch;
2807bc9febe8749e98a3812a0dc4380ceae75c29450Johann  }
2817bc9febe8749e98a3812a0dc4380ceae75c29450Johann}
28290d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber
28390d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber/****************************************************************************
28490d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber *
28590d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber *  ROUTINE       : filter_block2d_bil
28690d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber *
28790d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber *  INPUTS        : UINT8  *src_ptr          : Pointer to source block.
28879f15823c34ae1e423108295e416213200bb280fAndreas Huber *                  UINT32  src_pitch        : Stride of source block.
28979f15823c34ae1e423108295e416213200bb280fAndreas Huber *                  UINT32  dst_pitch        : Stride of destination block.
2907bc9febe8749e98a3812a0dc4380ceae75c29450Johann *                  INT32  *HFilter          : Array of 2 horizontal filter
2917bc9febe8749e98a3812a0dc4380ceae75c29450Johann *                                             taps.
29279f15823c34ae1e423108295e416213200bb280fAndreas Huber *                  INT32  *VFilter          : Array of 2 vertical filter taps.
29379f15823c34ae1e423108295e416213200bb280fAndreas Huber *                  INT32  Width             : Block width
29479f15823c34ae1e423108295e416213200bb280fAndreas Huber *                  INT32  Height            : Block height
29590d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber *
29679f15823c34ae1e423108295e416213200bb280fAndreas Huber *  OUTPUTS       : UINT16 *dst_ptr       : Pointer to filtered block.
29790d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber *
29890d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber *  RETURNS       : void
29990d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber *
30090d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber *  FUNCTION      : 2-D filters an input block by applying a 2-tap
30190d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber *                  bi-linear filter horizontally followed by a 2-tap
30290d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber *                  bi-linear filter vertically on the result.
30390d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber *
30490d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber *  SPECIAL NOTES : The largest block size can be handled here is 16x16
30590d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber *
30690d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber ****************************************************************************/
3077bc9febe8749e98a3812a0dc4380ceae75c29450Johannstatic void filter_block2d_bil(unsigned char *src_ptr, unsigned char *dst_ptr,
3087bc9febe8749e98a3812a0dc4380ceae75c29450Johann                               unsigned int src_pitch, unsigned int dst_pitch,
3097bc9febe8749e98a3812a0dc4380ceae75c29450Johann                               const short *HFilter, const short *VFilter,
3107bc9febe8749e98a3812a0dc4380ceae75c29450Johann                               int Width, int Height) {
3117bc9febe8749e98a3812a0dc4380ceae75c29450Johann  unsigned short FData[17 * 16]; /* Temp data buffer used in filtering */
3127bc9febe8749e98a3812a0dc4380ceae75c29450Johann
3137bc9febe8749e98a3812a0dc4380ceae75c29450Johann  /* First filter 1-D horizontally... */
3147bc9febe8749e98a3812a0dc4380ceae75c29450Johann  filter_block2d_bil_first_pass(src_ptr, FData, src_pitch, Height + 1, Width,
3157bc9febe8749e98a3812a0dc4380ceae75c29450Johann                                HFilter);
3167bc9febe8749e98a3812a0dc4380ceae75c29450Johann
3177bc9febe8749e98a3812a0dc4380ceae75c29450Johann  /* then 1-D vertically... */
3187bc9febe8749e98a3812a0dc4380ceae75c29450Johann  filter_block2d_bil_second_pass(FData, dst_ptr, dst_pitch, Height, Width,
3197bc9febe8749e98a3812a0dc4380ceae75c29450Johann                                 VFilter);
32090d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber}
32190d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber
3227bc9febe8749e98a3812a0dc4380ceae75c29450Johannvoid vp8_bilinear_predict4x4_c(unsigned char *src_ptr, int src_pixels_per_line,
3237bc9febe8749e98a3812a0dc4380ceae75c29450Johann                               int xoffset, int yoffset, unsigned char *dst_ptr,
3247bc9febe8749e98a3812a0dc4380ceae75c29450Johann                               int dst_pitch) {
3257bc9febe8749e98a3812a0dc4380ceae75c29450Johann  const short *HFilter;
3267bc9febe8749e98a3812a0dc4380ceae75c29450Johann  const short *VFilter;
32790d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber
3287bc9febe8749e98a3812a0dc4380ceae75c29450Johann  // This represents a copy and is not required to be handled by optimizations.
3297bc9febe8749e98a3812a0dc4380ceae75c29450Johann  assert((xoffset | yoffset) != 0);
33090d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber
3317bc9febe8749e98a3812a0dc4380ceae75c29450Johann  HFilter = vp8_bilinear_filters[xoffset];
3327bc9febe8749e98a3812a0dc4380ceae75c29450Johann  VFilter = vp8_bilinear_filters[yoffset];
3337bc9febe8749e98a3812a0dc4380ceae75c29450Johann  filter_block2d_bil(src_ptr, dst_ptr, src_pixels_per_line, dst_pitch, HFilter,
3347bc9febe8749e98a3812a0dc4380ceae75c29450Johann                     VFilter, 4, 4);
33590d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber}
33690d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber
3377bc9febe8749e98a3812a0dc4380ceae75c29450Johannvoid vp8_bilinear_predict8x8_c(unsigned char *src_ptr, int src_pixels_per_line,
3387bc9febe8749e98a3812a0dc4380ceae75c29450Johann                               int xoffset, int yoffset, unsigned char *dst_ptr,
3397bc9febe8749e98a3812a0dc4380ceae75c29450Johann                               int dst_pitch) {
3407bc9febe8749e98a3812a0dc4380ceae75c29450Johann  const short *HFilter;
3417bc9febe8749e98a3812a0dc4380ceae75c29450Johann  const short *VFilter;
34290d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber
3437bc9febe8749e98a3812a0dc4380ceae75c29450Johann  assert((xoffset | yoffset) != 0);
34490d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber
3457bc9febe8749e98a3812a0dc4380ceae75c29450Johann  HFilter = vp8_bilinear_filters[xoffset];
3467bc9febe8749e98a3812a0dc4380ceae75c29450Johann  VFilter = vp8_bilinear_filters[yoffset];
34790d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber
3487bc9febe8749e98a3812a0dc4380ceae75c29450Johann  filter_block2d_bil(src_ptr, dst_ptr, src_pixels_per_line, dst_pitch, HFilter,
3497bc9febe8749e98a3812a0dc4380ceae75c29450Johann                     VFilter, 8, 8);
35090d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber}
35190d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber
3527bc9febe8749e98a3812a0dc4380ceae75c29450Johannvoid vp8_bilinear_predict8x4_c(unsigned char *src_ptr, int src_pixels_per_line,
3537bc9febe8749e98a3812a0dc4380ceae75c29450Johann                               int xoffset, int yoffset, unsigned char *dst_ptr,
3547bc9febe8749e98a3812a0dc4380ceae75c29450Johann                               int dst_pitch) {
3557bc9febe8749e98a3812a0dc4380ceae75c29450Johann  const short *HFilter;
3567bc9febe8749e98a3812a0dc4380ceae75c29450Johann  const short *VFilter;
35790d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber
3587bc9febe8749e98a3812a0dc4380ceae75c29450Johann  assert((xoffset | yoffset) != 0);
35990d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber
3607bc9febe8749e98a3812a0dc4380ceae75c29450Johann  HFilter = vp8_bilinear_filters[xoffset];
3617bc9febe8749e98a3812a0dc4380ceae75c29450Johann  VFilter = vp8_bilinear_filters[yoffset];
36290d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber
3637bc9febe8749e98a3812a0dc4380ceae75c29450Johann  filter_block2d_bil(src_ptr, dst_ptr, src_pixels_per_line, dst_pitch, HFilter,
3647bc9febe8749e98a3812a0dc4380ceae75c29450Johann                     VFilter, 8, 4);
36590d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber}
36690d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber
3677bc9febe8749e98a3812a0dc4380ceae75c29450Johannvoid vp8_bilinear_predict16x16_c(unsigned char *src_ptr,
3687bc9febe8749e98a3812a0dc4380ceae75c29450Johann                                 int src_pixels_per_line, int xoffset,
3697bc9febe8749e98a3812a0dc4380ceae75c29450Johann                                 int yoffset, unsigned char *dst_ptr,
3707bc9febe8749e98a3812a0dc4380ceae75c29450Johann                                 int dst_pitch) {
3717bc9febe8749e98a3812a0dc4380ceae75c29450Johann  const short *HFilter;
3727bc9febe8749e98a3812a0dc4380ceae75c29450Johann  const short *VFilter;
3737bc9febe8749e98a3812a0dc4380ceae75c29450Johann
3747bc9febe8749e98a3812a0dc4380ceae75c29450Johann  assert((xoffset | yoffset) != 0);
3757bc9febe8749e98a3812a0dc4380ceae75c29450Johann
3767bc9febe8749e98a3812a0dc4380ceae75c29450Johann  HFilter = vp8_bilinear_filters[xoffset];
3777bc9febe8749e98a3812a0dc4380ceae75c29450Johann  VFilter = vp8_bilinear_filters[yoffset];
3787bc9febe8749e98a3812a0dc4380ceae75c29450Johann
3797bc9febe8749e98a3812a0dc4380ceae75c29450Johann  filter_block2d_bil(src_ptr, dst_ptr, src_pixels_per_line, dst_pitch, HFilter,
3807bc9febe8749e98a3812a0dc4380ceae75c29450Johann                     VFilter, 16, 16);
38190d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber}
382