1/* 2 * Copyright (c) 2010 The WebM project authors. All Rights Reserved. 3 * 4 * Use of this source code is governed by a BSD-style license 5 * that can be found in the LICENSE file in the root of the source 6 * tree. An additional intellectual property rights grant can be found 7 * in the file PATENTS. All contributing project authors may 8 * be found in the AUTHORS file in the root of the source tree. 9 */ 10 11 12#include "filter.h" 13#include "./vp8_rtcd.h" 14 15DECLARE_ALIGNED(16, const short, vp8_bilinear_filters[8][2]) = 16{ 17 { 128, 0 }, 18 { 112, 16 }, 19 { 96, 32 }, 20 { 80, 48 }, 21 { 64, 64 }, 22 { 48, 80 }, 23 { 32, 96 }, 24 { 16, 112 } 25}; 26 27DECLARE_ALIGNED(16, const short, vp8_sub_pel_filters[8][6]) = 28{ 29 30 { 0, 0, 128, 0, 0, 0 }, /* note that 1/8 pel positions are just as per alpha -0.5 bicubic */ 31 { 0, -6, 123, 12, -1, 0 }, 32 { 2, -11, 108, 36, -8, 1 }, /* New 1/4 pel 6 tap filter */ 33 { 0, -9, 93, 50, -6, 0 }, 34 { 3, -16, 77, 77, -16, 3 }, /* New 1/2 pel 6 tap filter */ 35 { 0, -6, 50, 93, -9, 0 }, 36 { 1, -8, 36, 108, -11, 2 }, /* New 1/4 pel 6 tap filter */ 37 { 0, -1, 12, 123, -6, 0 }, 38}; 39 40static void filter_block2d_first_pass 41( 42 unsigned char *src_ptr, 43 int *output_ptr, 44 unsigned int src_pixels_per_line, 45 unsigned int pixel_step, 46 unsigned int output_height, 47 unsigned int output_width, 48 const short *vp8_filter 49) 50{ 51 unsigned int i, j; 52 int Temp; 53 54 for (i = 0; i < output_height; i++) 55 { 56 for (j = 0; j < output_width; j++) 57 { 58 Temp = ((int)src_ptr[-2 * (int)pixel_step] * vp8_filter[0]) + 59 ((int)src_ptr[-1 * (int)pixel_step] * vp8_filter[1]) + 60 ((int)src_ptr[0] * vp8_filter[2]) + 61 ((int)src_ptr[pixel_step] * vp8_filter[3]) + 62 ((int)src_ptr[2*pixel_step] * vp8_filter[4]) + 63 ((int)src_ptr[3*pixel_step] * vp8_filter[5]) + 64 (VP8_FILTER_WEIGHT >> 1); /* Rounding */ 65 66 /* Normalize back to 0-255 */ 67 Temp = Temp >> VP8_FILTER_SHIFT; 68 69 if (Temp < 0) 70 Temp = 0; 71 else if (Temp > 255) 72 Temp = 255; 73 74 output_ptr[j] = Temp; 75 src_ptr++; 76 } 77 78 /* Next row... */ 79 src_ptr += src_pixels_per_line - output_width; 80 output_ptr += output_width; 81 } 82} 83 84static void filter_block2d_second_pass 85( 86 int *src_ptr, 87 unsigned char *output_ptr, 88 int output_pitch, 89 unsigned int src_pixels_per_line, 90 unsigned int pixel_step, 91 unsigned int output_height, 92 unsigned int output_width, 93 const short *vp8_filter 94) 95{ 96 unsigned int i, j; 97 int Temp; 98 99 for (i = 0; i < output_height; i++) 100 { 101 for (j = 0; j < output_width; j++) 102 { 103 /* Apply filter */ 104 Temp = ((int)src_ptr[-2 * (int)pixel_step] * vp8_filter[0]) + 105 ((int)src_ptr[-1 * (int)pixel_step] * vp8_filter[1]) + 106 ((int)src_ptr[0] * vp8_filter[2]) + 107 ((int)src_ptr[pixel_step] * vp8_filter[3]) + 108 ((int)src_ptr[2*pixel_step] * vp8_filter[4]) + 109 ((int)src_ptr[3*pixel_step] * vp8_filter[5]) + 110 (VP8_FILTER_WEIGHT >> 1); /* Rounding */ 111 112 /* Normalize back to 0-255 */ 113 Temp = Temp >> VP8_FILTER_SHIFT; 114 115 if (Temp < 0) 116 Temp = 0; 117 else if (Temp > 255) 118 Temp = 255; 119 120 output_ptr[j] = (unsigned char)Temp; 121 src_ptr++; 122 } 123 124 /* Start next row */ 125 src_ptr += src_pixels_per_line - output_width; 126 output_ptr += output_pitch; 127 } 128} 129 130 131static void filter_block2d 132( 133 unsigned char *src_ptr, 134 unsigned char *output_ptr, 135 unsigned int src_pixels_per_line, 136 int output_pitch, 137 const short *HFilter, 138 const short *VFilter 139) 140{ 141 int FData[9*4]; /* Temp data buffer used in filtering */ 142 143 /* First filter 1-D horizontally... */ 144 filter_block2d_first_pass(src_ptr - (2 * src_pixels_per_line), FData, src_pixels_per_line, 1, 9, 4, HFilter); 145 146 /* then filter verticaly... */ 147 filter_block2d_second_pass(FData + 8, output_ptr, output_pitch, 4, 4, 4, 4, VFilter); 148} 149 150 151void vp8_sixtap_predict4x4_c 152( 153 unsigned char *src_ptr, 154 int src_pixels_per_line, 155 int xoffset, 156 int yoffset, 157 unsigned char *dst_ptr, 158 int dst_pitch 159) 160{ 161 const short *HFilter; 162 const short *VFilter; 163 164 HFilter = vp8_sub_pel_filters[xoffset]; /* 6 tap */ 165 VFilter = vp8_sub_pel_filters[yoffset]; /* 6 tap */ 166 167 filter_block2d(src_ptr, dst_ptr, src_pixels_per_line, dst_pitch, HFilter, VFilter); 168} 169void vp8_sixtap_predict8x8_c 170( 171 unsigned char *src_ptr, 172 int src_pixels_per_line, 173 int xoffset, 174 int yoffset, 175 unsigned char *dst_ptr, 176 int dst_pitch 177) 178{ 179 const short *HFilter; 180 const short *VFilter; 181 int FData[13*16]; /* Temp data buffer used in filtering */ 182 183 HFilter = vp8_sub_pel_filters[xoffset]; /* 6 tap */ 184 VFilter = vp8_sub_pel_filters[yoffset]; /* 6 tap */ 185 186 /* First filter 1-D horizontally... */ 187 filter_block2d_first_pass(src_ptr - (2 * src_pixels_per_line), FData, src_pixels_per_line, 1, 13, 8, HFilter); 188 189 190 /* then filter verticaly... */ 191 filter_block2d_second_pass(FData + 16, dst_ptr, dst_pitch, 8, 8, 8, 8, VFilter); 192 193} 194 195void vp8_sixtap_predict8x4_c 196( 197 unsigned char *src_ptr, 198 int src_pixels_per_line, 199 int xoffset, 200 int yoffset, 201 unsigned char *dst_ptr, 202 int dst_pitch 203) 204{ 205 const short *HFilter; 206 const short *VFilter; 207 int FData[13*16]; /* Temp data buffer used in filtering */ 208 209 HFilter = vp8_sub_pel_filters[xoffset]; /* 6 tap */ 210 VFilter = vp8_sub_pel_filters[yoffset]; /* 6 tap */ 211 212 /* First filter 1-D horizontally... */ 213 filter_block2d_first_pass(src_ptr - (2 * src_pixels_per_line), FData, src_pixels_per_line, 1, 9, 8, HFilter); 214 215 216 /* then filter verticaly... */ 217 filter_block2d_second_pass(FData + 16, dst_ptr, dst_pitch, 8, 8, 4, 8, VFilter); 218 219} 220 221void vp8_sixtap_predict16x16_c 222( 223 unsigned char *src_ptr, 224 int src_pixels_per_line, 225 int xoffset, 226 int yoffset, 227 unsigned char *dst_ptr, 228 int dst_pitch 229) 230{ 231 const short *HFilter; 232 const short *VFilter; 233 int FData[21*24]; /* Temp data buffer used in filtering */ 234 235 236 HFilter = vp8_sub_pel_filters[xoffset]; /* 6 tap */ 237 VFilter = vp8_sub_pel_filters[yoffset]; /* 6 tap */ 238 239 /* First filter 1-D horizontally... */ 240 filter_block2d_first_pass(src_ptr - (2 * src_pixels_per_line), FData, src_pixels_per_line, 1, 21, 16, HFilter); 241 242 /* then filter verticaly... */ 243 filter_block2d_second_pass(FData + 32, dst_ptr, dst_pitch, 16, 16, 16, 16, VFilter); 244 245} 246 247 248/**************************************************************************** 249 * 250 * ROUTINE : filter_block2d_bil_first_pass 251 * 252 * INPUTS : UINT8 *src_ptr : Pointer to source block. 253 * UINT32 src_stride : Stride of source block. 254 * UINT32 height : Block height. 255 * UINT32 width : Block width. 256 * INT32 *vp8_filter : Array of 2 bi-linear filter taps. 257 * 258 * OUTPUTS : INT32 *dst_ptr : Pointer to filtered block. 259 * 260 * RETURNS : void 261 * 262 * FUNCTION : Applies a 1-D 2-tap bi-linear filter to the source block 263 * in the horizontal direction to produce the filtered output 264 * block. Used to implement first-pass of 2-D separable filter. 265 * 266 * SPECIAL NOTES : Produces INT32 output to retain precision for next pass. 267 * Two filter taps should sum to VP8_FILTER_WEIGHT. 268 * 269 ****************************************************************************/ 270static void filter_block2d_bil_first_pass 271( 272 unsigned char *src_ptr, 273 unsigned short *dst_ptr, 274 unsigned int src_stride, 275 unsigned int height, 276 unsigned int width, 277 const short *vp8_filter 278) 279{ 280 unsigned int i, j; 281 282 for (i = 0; i < height; i++) 283 { 284 for (j = 0; j < width; j++) 285 { 286 /* Apply bilinear filter */ 287 dst_ptr[j] = (((int)src_ptr[0] * vp8_filter[0]) + 288 ((int)src_ptr[1] * vp8_filter[1]) + 289 (VP8_FILTER_WEIGHT / 2)) >> VP8_FILTER_SHIFT; 290 src_ptr++; 291 } 292 293 /* Next row... */ 294 src_ptr += src_stride - width; 295 dst_ptr += width; 296 } 297} 298 299/**************************************************************************** 300 * 301 * ROUTINE : filter_block2d_bil_second_pass 302 * 303 * INPUTS : INT32 *src_ptr : Pointer to source block. 304 * UINT32 dst_pitch : Destination block pitch. 305 * UINT32 height : Block height. 306 * UINT32 width : Block width. 307 * INT32 *vp8_filter : Array of 2 bi-linear filter taps. 308 * 309 * OUTPUTS : UINT16 *dst_ptr : Pointer to filtered block. 310 * 311 * RETURNS : void 312 * 313 * FUNCTION : Applies a 1-D 2-tap bi-linear filter to the source block 314 * in the vertical direction to produce the filtered output 315 * block. Used to implement second-pass of 2-D separable filter. 316 * 317 * SPECIAL NOTES : Requires 32-bit input as produced by filter_block2d_bil_first_pass. 318 * Two filter taps should sum to VP8_FILTER_WEIGHT. 319 * 320 ****************************************************************************/ 321static void filter_block2d_bil_second_pass 322( 323 unsigned short *src_ptr, 324 unsigned char *dst_ptr, 325 int dst_pitch, 326 unsigned int height, 327 unsigned int width, 328 const short *vp8_filter 329) 330{ 331 unsigned int i, j; 332 int Temp; 333 334 for (i = 0; i < height; i++) 335 { 336 for (j = 0; j < width; j++) 337 { 338 /* Apply filter */ 339 Temp = ((int)src_ptr[0] * vp8_filter[0]) + 340 ((int)src_ptr[width] * vp8_filter[1]) + 341 (VP8_FILTER_WEIGHT / 2); 342 dst_ptr[j] = (unsigned int)(Temp >> VP8_FILTER_SHIFT); 343 src_ptr++; 344 } 345 346 /* Next row... */ 347 dst_ptr += dst_pitch; 348 } 349} 350 351 352/**************************************************************************** 353 * 354 * ROUTINE : filter_block2d_bil 355 * 356 * INPUTS : UINT8 *src_ptr : Pointer to source block. 357 * UINT32 src_pitch : Stride of source block. 358 * UINT32 dst_pitch : Stride of destination block. 359 * INT32 *HFilter : Array of 2 horizontal filter taps. 360 * INT32 *VFilter : Array of 2 vertical filter taps. 361 * INT32 Width : Block width 362 * INT32 Height : Block height 363 * 364 * OUTPUTS : UINT16 *dst_ptr : Pointer to filtered block. 365 * 366 * RETURNS : void 367 * 368 * FUNCTION : 2-D filters an input block by applying a 2-tap 369 * bi-linear filter horizontally followed by a 2-tap 370 * bi-linear filter vertically on the result. 371 * 372 * SPECIAL NOTES : The largest block size can be handled here is 16x16 373 * 374 ****************************************************************************/ 375static void filter_block2d_bil 376( 377 unsigned char *src_ptr, 378 unsigned char *dst_ptr, 379 unsigned int src_pitch, 380 unsigned int dst_pitch, 381 const short *HFilter, 382 const short *VFilter, 383 int Width, 384 int Height 385) 386{ 387 388 unsigned short FData[17*16]; /* Temp data buffer used in filtering */ 389 390 /* First filter 1-D horizontally... */ 391 filter_block2d_bil_first_pass(src_ptr, FData, src_pitch, Height + 1, Width, HFilter); 392 393 /* then 1-D vertically... */ 394 filter_block2d_bil_second_pass(FData, dst_ptr, dst_pitch, Height, Width, VFilter); 395} 396 397 398void vp8_bilinear_predict4x4_c 399( 400 unsigned char *src_ptr, 401 int src_pixels_per_line, 402 int xoffset, 403 int yoffset, 404 unsigned char *dst_ptr, 405 int dst_pitch 406) 407{ 408 const short *HFilter; 409 const short *VFilter; 410 411 HFilter = vp8_bilinear_filters[xoffset]; 412 VFilter = vp8_bilinear_filters[yoffset]; 413#if 0 414 { 415 int i; 416 unsigned char temp1[16]; 417 unsigned char temp2[16]; 418 419 bilinear_predict4x4_mmx(src_ptr, src_pixels_per_line, xoffset, yoffset, temp1, 4); 420 filter_block2d_bil(src_ptr, temp2, src_pixels_per_line, 4, HFilter, VFilter, 4, 4); 421 422 for (i = 0; i < 16; i++) 423 { 424 if (temp1[i] != temp2[i]) 425 { 426 bilinear_predict4x4_mmx(src_ptr, src_pixels_per_line, xoffset, yoffset, temp1, 4); 427 filter_block2d_bil(src_ptr, temp2, src_pixels_per_line, 4, HFilter, VFilter, 4, 4); 428 } 429 } 430 } 431#endif 432 filter_block2d_bil(src_ptr, dst_ptr, src_pixels_per_line, dst_pitch, HFilter, VFilter, 4, 4); 433 434} 435 436void vp8_bilinear_predict8x8_c 437( 438 unsigned char *src_ptr, 439 int src_pixels_per_line, 440 int xoffset, 441 int yoffset, 442 unsigned char *dst_ptr, 443 int dst_pitch 444) 445{ 446 const short *HFilter; 447 const short *VFilter; 448 449 HFilter = vp8_bilinear_filters[xoffset]; 450 VFilter = vp8_bilinear_filters[yoffset]; 451 452 filter_block2d_bil(src_ptr, dst_ptr, src_pixels_per_line, dst_pitch, HFilter, VFilter, 8, 8); 453 454} 455 456void vp8_bilinear_predict8x4_c 457( 458 unsigned char *src_ptr, 459 int src_pixels_per_line, 460 int xoffset, 461 int yoffset, 462 unsigned char *dst_ptr, 463 int dst_pitch 464) 465{ 466 const short *HFilter; 467 const short *VFilter; 468 469 HFilter = vp8_bilinear_filters[xoffset]; 470 VFilter = vp8_bilinear_filters[yoffset]; 471 472 filter_block2d_bil(src_ptr, dst_ptr, src_pixels_per_line, dst_pitch, HFilter, VFilter, 8, 4); 473 474} 475 476void vp8_bilinear_predict16x16_c 477( 478 unsigned char *src_ptr, 479 int src_pixels_per_line, 480 int xoffset, 481 int yoffset, 482 unsigned char *dst_ptr, 483 int dst_pitch 484) 485{ 486 const short *HFilter; 487 const short *VFilter; 488 489 HFilter = vp8_bilinear_filters[xoffset]; 490 VFilter = vp8_bilinear_filters[yoffset]; 491 492 filter_block2d_bil(src_ptr, dst_ptr, src_pixels_per_line, dst_pitch, HFilter, VFilter, 16, 16); 493} 494