1/* 2 * Copyright (c) 2010 The WebM project authors. All Rights Reserved. 3 * 4 * Use of this source code is governed by a BSD-style license 5 * that can be found in the LICENSE file in the root of the source 6 * tree. An additional intellectual property rights grant can be found 7 * in the file PATENTS. All contributing project authors may 8 * be found in the AUTHORS file in the root of the source tree. 9 */ 10 11 12#include "filter.h" 13 14DECLARE_ALIGNED(16, const short, vp8_bilinear_filters[8][2]) = 15{ 16 { 128, 0 }, 17 { 112, 16 }, 18 { 96, 32 }, 19 { 80, 48 }, 20 { 64, 64 }, 21 { 48, 80 }, 22 { 32, 96 }, 23 { 16, 112 } 24}; 25 26DECLARE_ALIGNED(16, const short, vp8_sub_pel_filters[8][6]) = 27{ 28 29 { 0, 0, 128, 0, 0, 0 }, /* note that 1/8 pel positions are just as per alpha -0.5 bicubic */ 30 { 0, -6, 123, 12, -1, 0 }, 31 { 2, -11, 108, 36, -8, 1 }, /* New 1/4 pel 6 tap filter */ 32 { 0, -9, 93, 50, -6, 0 }, 33 { 3, -16, 77, 77, -16, 3 }, /* New 1/2 pel 6 tap filter */ 34 { 0, -6, 50, 93, -9, 0 }, 35 { 1, -8, 36, 108, -11, 2 }, /* New 1/4 pel 6 tap filter */ 36 { 0, -1, 12, 123, -6, 0 }, 37}; 38 39static void filter_block2d_first_pass 40( 41 unsigned char *src_ptr, 42 int *output_ptr, 43 unsigned int src_pixels_per_line, 44 unsigned int pixel_step, 45 unsigned int output_height, 46 unsigned int output_width, 47 const short *vp8_filter 48) 49{ 50 unsigned int i, j; 51 int Temp; 52 53 for (i = 0; i < output_height; i++) 54 { 55 for (j = 0; j < output_width; j++) 56 { 57 Temp = ((int)src_ptr[-2 * (int)pixel_step] * vp8_filter[0]) + 58 ((int)src_ptr[-1 * (int)pixel_step] * vp8_filter[1]) + 59 ((int)src_ptr[0] * vp8_filter[2]) + 60 ((int)src_ptr[pixel_step] * vp8_filter[3]) + 61 ((int)src_ptr[2*pixel_step] * vp8_filter[4]) + 62 ((int)src_ptr[3*pixel_step] * vp8_filter[5]) + 63 (VP8_FILTER_WEIGHT >> 1); /* Rounding */ 64 65 /* Normalize back to 0-255 */ 66 Temp = Temp >> VP8_FILTER_SHIFT; 67 68 if (Temp < 0) 69 Temp = 0; 70 else if (Temp > 255) 71 Temp = 255; 72 73 output_ptr[j] = Temp; 74 src_ptr++; 75 } 76 77 /* Next row... */ 78 src_ptr += src_pixels_per_line - output_width; 79 output_ptr += output_width; 80 } 81} 82 83static void filter_block2d_second_pass 84( 85 int *src_ptr, 86 unsigned char *output_ptr, 87 int output_pitch, 88 unsigned int src_pixels_per_line, 89 unsigned int pixel_step, 90 unsigned int output_height, 91 unsigned int output_width, 92 const short *vp8_filter 93) 94{ 95 unsigned int i, j; 96 int Temp; 97 98 for (i = 0; i < output_height; i++) 99 { 100 for (j = 0; j < output_width; j++) 101 { 102 /* Apply filter */ 103 Temp = ((int)src_ptr[-2 * (int)pixel_step] * vp8_filter[0]) + 104 ((int)src_ptr[-1 * (int)pixel_step] * vp8_filter[1]) + 105 ((int)src_ptr[0] * vp8_filter[2]) + 106 ((int)src_ptr[pixel_step] * vp8_filter[3]) + 107 ((int)src_ptr[2*pixel_step] * vp8_filter[4]) + 108 ((int)src_ptr[3*pixel_step] * vp8_filter[5]) + 109 (VP8_FILTER_WEIGHT >> 1); /* Rounding */ 110 111 /* Normalize back to 0-255 */ 112 Temp = Temp >> VP8_FILTER_SHIFT; 113 114 if (Temp < 0) 115 Temp = 0; 116 else if (Temp > 255) 117 Temp = 255; 118 119 output_ptr[j] = (unsigned char)Temp; 120 src_ptr++; 121 } 122 123 /* Start next row */ 124 src_ptr += src_pixels_per_line - output_width; 125 output_ptr += output_pitch; 126 } 127} 128 129 130static void filter_block2d 131( 132 unsigned char *src_ptr, 133 unsigned char *output_ptr, 134 unsigned int src_pixels_per_line, 135 int output_pitch, 136 const short *HFilter, 137 const short *VFilter 138) 139{ 140 int FData[9*4]; /* Temp data buffer used in filtering */ 141 142 /* First filter 1-D horizontally... */ 143 filter_block2d_first_pass(src_ptr - (2 * src_pixels_per_line), FData, src_pixels_per_line, 1, 9, 4, HFilter); 144 145 /* then filter verticaly... */ 146 filter_block2d_second_pass(FData + 8, output_ptr, output_pitch, 4, 4, 4, 4, VFilter); 147} 148 149 150void vp8_sixtap_predict4x4_c 151( 152 unsigned char *src_ptr, 153 int src_pixels_per_line, 154 int xoffset, 155 int yoffset, 156 unsigned char *dst_ptr, 157 int dst_pitch 158) 159{ 160 const short *HFilter; 161 const short *VFilter; 162 163 HFilter = vp8_sub_pel_filters[xoffset]; /* 6 tap */ 164 VFilter = vp8_sub_pel_filters[yoffset]; /* 6 tap */ 165 166 filter_block2d(src_ptr, dst_ptr, src_pixels_per_line, dst_pitch, HFilter, VFilter); 167} 168void vp8_sixtap_predict8x8_c 169( 170 unsigned char *src_ptr, 171 int src_pixels_per_line, 172 int xoffset, 173 int yoffset, 174 unsigned char *dst_ptr, 175 int dst_pitch 176) 177{ 178 const short *HFilter; 179 const short *VFilter; 180 int FData[13*16]; /* Temp data buffer used in filtering */ 181 182 HFilter = vp8_sub_pel_filters[xoffset]; /* 6 tap */ 183 VFilter = vp8_sub_pel_filters[yoffset]; /* 6 tap */ 184 185 /* First filter 1-D horizontally... */ 186 filter_block2d_first_pass(src_ptr - (2 * src_pixels_per_line), FData, src_pixels_per_line, 1, 13, 8, HFilter); 187 188 189 /* then filter verticaly... */ 190 filter_block2d_second_pass(FData + 16, dst_ptr, dst_pitch, 8, 8, 8, 8, VFilter); 191 192} 193 194void vp8_sixtap_predict8x4_c 195( 196 unsigned char *src_ptr, 197 int src_pixels_per_line, 198 int xoffset, 199 int yoffset, 200 unsigned char *dst_ptr, 201 int dst_pitch 202) 203{ 204 const short *HFilter; 205 const short *VFilter; 206 int FData[13*16]; /* Temp data buffer used in filtering */ 207 208 HFilter = vp8_sub_pel_filters[xoffset]; /* 6 tap */ 209 VFilter = vp8_sub_pel_filters[yoffset]; /* 6 tap */ 210 211 /* First filter 1-D horizontally... */ 212 filter_block2d_first_pass(src_ptr - (2 * src_pixels_per_line), FData, src_pixels_per_line, 1, 9, 8, HFilter); 213 214 215 /* then filter verticaly... */ 216 filter_block2d_second_pass(FData + 16, dst_ptr, dst_pitch, 8, 8, 4, 8, VFilter); 217 218} 219 220void vp8_sixtap_predict16x16_c 221( 222 unsigned char *src_ptr, 223 int src_pixels_per_line, 224 int xoffset, 225 int yoffset, 226 unsigned char *dst_ptr, 227 int dst_pitch 228) 229{ 230 const short *HFilter; 231 const short *VFilter; 232 int FData[21*24]; /* Temp data buffer used in filtering */ 233 234 235 HFilter = vp8_sub_pel_filters[xoffset]; /* 6 tap */ 236 VFilter = vp8_sub_pel_filters[yoffset]; /* 6 tap */ 237 238 /* First filter 1-D horizontally... */ 239 filter_block2d_first_pass(src_ptr - (2 * src_pixels_per_line), FData, src_pixels_per_line, 1, 21, 16, HFilter); 240 241 /* then filter verticaly... */ 242 filter_block2d_second_pass(FData + 32, dst_ptr, dst_pitch, 16, 16, 16, 16, VFilter); 243 244} 245 246 247/**************************************************************************** 248 * 249 * ROUTINE : filter_block2d_bil_first_pass 250 * 251 * INPUTS : UINT8 *src_ptr : Pointer to source block. 252 * UINT32 src_stride : Stride of source block. 253 * UINT32 height : Block height. 254 * UINT32 width : Block width. 255 * INT32 *vp8_filter : Array of 2 bi-linear filter taps. 256 * 257 * OUTPUTS : INT32 *dst_ptr : Pointer to filtered block. 258 * 259 * RETURNS : void 260 * 261 * FUNCTION : Applies a 1-D 2-tap bi-linear filter to the source block 262 * in the horizontal direction to produce the filtered output 263 * block. Used to implement first-pass of 2-D separable filter. 264 * 265 * SPECIAL NOTES : Produces INT32 output to retain precision for next pass. 266 * Two filter taps should sum to VP8_FILTER_WEIGHT. 267 * 268 ****************************************************************************/ 269static void filter_block2d_bil_first_pass 270( 271 unsigned char *src_ptr, 272 unsigned short *dst_ptr, 273 unsigned int src_stride, 274 unsigned int height, 275 unsigned int width, 276 const short *vp8_filter 277) 278{ 279 unsigned int i, j; 280 281 for (i = 0; i < height; i++) 282 { 283 for (j = 0; j < width; j++) 284 { 285 /* Apply bilinear filter */ 286 dst_ptr[j] = (((int)src_ptr[0] * vp8_filter[0]) + 287 ((int)src_ptr[1] * vp8_filter[1]) + 288 (VP8_FILTER_WEIGHT / 2)) >> VP8_FILTER_SHIFT; 289 src_ptr++; 290 } 291 292 /* Next row... */ 293 src_ptr += src_stride - width; 294 dst_ptr += width; 295 } 296} 297 298/**************************************************************************** 299 * 300 * ROUTINE : filter_block2d_bil_second_pass 301 * 302 * INPUTS : INT32 *src_ptr : Pointer to source block. 303 * UINT32 dst_pitch : Destination block pitch. 304 * UINT32 height : Block height. 305 * UINT32 width : Block width. 306 * INT32 *vp8_filter : Array of 2 bi-linear filter taps. 307 * 308 * OUTPUTS : UINT16 *dst_ptr : Pointer to filtered block. 309 * 310 * RETURNS : void 311 * 312 * FUNCTION : Applies a 1-D 2-tap bi-linear filter to the source block 313 * in the vertical direction to produce the filtered output 314 * block. Used to implement second-pass of 2-D separable filter. 315 * 316 * SPECIAL NOTES : Requires 32-bit input as produced by filter_block2d_bil_first_pass. 317 * Two filter taps should sum to VP8_FILTER_WEIGHT. 318 * 319 ****************************************************************************/ 320static void filter_block2d_bil_second_pass 321( 322 unsigned short *src_ptr, 323 unsigned char *dst_ptr, 324 int dst_pitch, 325 unsigned int height, 326 unsigned int width, 327 const short *vp8_filter 328) 329{ 330 unsigned int i, j; 331 int Temp; 332 333 for (i = 0; i < height; i++) 334 { 335 for (j = 0; j < width; j++) 336 { 337 /* Apply filter */ 338 Temp = ((int)src_ptr[0] * vp8_filter[0]) + 339 ((int)src_ptr[width] * vp8_filter[1]) + 340 (VP8_FILTER_WEIGHT / 2); 341 dst_ptr[j] = (unsigned int)(Temp >> VP8_FILTER_SHIFT); 342 src_ptr++; 343 } 344 345 /* Next row... */ 346 dst_ptr += dst_pitch; 347 } 348} 349 350 351/**************************************************************************** 352 * 353 * ROUTINE : filter_block2d_bil 354 * 355 * INPUTS : UINT8 *src_ptr : Pointer to source block. 356 * UINT32 src_pitch : Stride of source block. 357 * UINT32 dst_pitch : Stride of destination block. 358 * INT32 *HFilter : Array of 2 horizontal filter taps. 359 * INT32 *VFilter : Array of 2 vertical filter taps. 360 * INT32 Width : Block width 361 * INT32 Height : Block height 362 * 363 * OUTPUTS : UINT16 *dst_ptr : Pointer to filtered block. 364 * 365 * RETURNS : void 366 * 367 * FUNCTION : 2-D filters an input block by applying a 2-tap 368 * bi-linear filter horizontally followed by a 2-tap 369 * bi-linear filter vertically on the result. 370 * 371 * SPECIAL NOTES : The largest block size can be handled here is 16x16 372 * 373 ****************************************************************************/ 374static void filter_block2d_bil 375( 376 unsigned char *src_ptr, 377 unsigned char *dst_ptr, 378 unsigned int src_pitch, 379 unsigned int dst_pitch, 380 const short *HFilter, 381 const short *VFilter, 382 int Width, 383 int Height 384) 385{ 386 387 unsigned short FData[17*16]; /* Temp data buffer used in filtering */ 388 389 /* First filter 1-D horizontally... */ 390 filter_block2d_bil_first_pass(src_ptr, FData, src_pitch, Height + 1, Width, HFilter); 391 392 /* then 1-D vertically... */ 393 filter_block2d_bil_second_pass(FData, dst_ptr, dst_pitch, Height, Width, VFilter); 394} 395 396 397void vp8_bilinear_predict4x4_c 398( 399 unsigned char *src_ptr, 400 int src_pixels_per_line, 401 int xoffset, 402 int yoffset, 403 unsigned char *dst_ptr, 404 int dst_pitch 405) 406{ 407 const short *HFilter; 408 const short *VFilter; 409 410 HFilter = vp8_bilinear_filters[xoffset]; 411 VFilter = vp8_bilinear_filters[yoffset]; 412#if 0 413 { 414 int i; 415 unsigned char temp1[16]; 416 unsigned char temp2[16]; 417 418 bilinear_predict4x4_mmx(src_ptr, src_pixels_per_line, xoffset, yoffset, temp1, 4); 419 filter_block2d_bil(src_ptr, temp2, src_pixels_per_line, 4, HFilter, VFilter, 4, 4); 420 421 for (i = 0; i < 16; i++) 422 { 423 if (temp1[i] != temp2[i]) 424 { 425 bilinear_predict4x4_mmx(src_ptr, src_pixels_per_line, xoffset, yoffset, temp1, 4); 426 filter_block2d_bil(src_ptr, temp2, src_pixels_per_line, 4, HFilter, VFilter, 4, 4); 427 } 428 } 429 } 430#endif 431 filter_block2d_bil(src_ptr, dst_ptr, src_pixels_per_line, dst_pitch, HFilter, VFilter, 4, 4); 432 433} 434 435void vp8_bilinear_predict8x8_c 436( 437 unsigned char *src_ptr, 438 int src_pixels_per_line, 439 int xoffset, 440 int yoffset, 441 unsigned char *dst_ptr, 442 int dst_pitch 443) 444{ 445 const short *HFilter; 446 const short *VFilter; 447 448 HFilter = vp8_bilinear_filters[xoffset]; 449 VFilter = vp8_bilinear_filters[yoffset]; 450 451 filter_block2d_bil(src_ptr, dst_ptr, src_pixels_per_line, dst_pitch, HFilter, VFilter, 8, 8); 452 453} 454 455void vp8_bilinear_predict8x4_c 456( 457 unsigned char *src_ptr, 458 int src_pixels_per_line, 459 int xoffset, 460 int yoffset, 461 unsigned char *dst_ptr, 462 int dst_pitch 463) 464{ 465 const short *HFilter; 466 const short *VFilter; 467 468 HFilter = vp8_bilinear_filters[xoffset]; 469 VFilter = vp8_bilinear_filters[yoffset]; 470 471 filter_block2d_bil(src_ptr, dst_ptr, src_pixels_per_line, dst_pitch, HFilter, VFilter, 8, 4); 472 473} 474 475void vp8_bilinear_predict16x16_c 476( 477 unsigned char *src_ptr, 478 int src_pixels_per_line, 479 int xoffset, 480 int yoffset, 481 unsigned char *dst_ptr, 482 int dst_pitch 483) 484{ 485 const short *HFilter; 486 const short *VFilter; 487 488 HFilter = vp8_bilinear_filters[xoffset]; 489 VFilter = vp8_bilinear_filters[yoffset]; 490 491 filter_block2d_bil(src_ptr, dst_ptr, src_pixels_per_line, dst_pitch, HFilter, VFilter, 16, 16); 492} 493