1/* 2 * Copyright (c) 2010 The WebM project authors. All Rights Reserved. 3 * 4 * Use of this source code is governed by a BSD-style license 5 * that can be found in the LICENSE file in the root of the source 6 * tree. An additional intellectual property rights grant can be found 7 * in the file PATENTS. All contributing project authors may 8 * be found in the AUTHORS file in the root of the source tree. 9 */ 10 11#include "./vp9_rtcd.h" 12 13#include "vpx_ports/mem.h" 14#include "vpx/vpx_integer.h" 15 16#include "vp9/common/vp9_common.h" 17#include "vp9/common/vp9_filter.h" 18 19#include "vp9/encoder/vp9_variance.h" 20 21void variance(const uint8_t *src_ptr, 22 int source_stride, 23 const uint8_t *ref_ptr, 24 int recon_stride, 25 int w, 26 int h, 27 unsigned int *sse, 28 int *sum) { 29 int i, j; 30 int diff; 31 32 *sum = 0; 33 *sse = 0; 34 35 for (i = 0; i < h; i++) { 36 for (j = 0; j < w; j++) { 37 diff = src_ptr[j] - ref_ptr[j]; 38 *sum += diff; 39 *sse += diff * diff; 40 } 41 42 src_ptr += source_stride; 43 ref_ptr += recon_stride; 44 } 45} 46 47/**************************************************************************** 48 * 49 * ROUTINE : filter_block2d_bil_first_pass 50 * 51 * INPUTS : uint8_t *src_ptr : Pointer to source block. 52 * uint32_t src_pixels_per_line : Stride of input block. 53 * uint32_t pixel_step : Offset between filter input 54 * samples (see notes). 55 * uint32_t output_height : Input block height. 56 * uint32_t output_width : Input block width. 57 * int32_t *vp9_filter : Array of 2 bi-linear filter 58 * taps. 59 * 60 * OUTPUTS : int32_t *output_ptr : Pointer to filtered block. 61 * 62 * RETURNS : void 63 * 64 * FUNCTION : Applies a 1-D 2-tap bi-linear filter to the source block in 65 * either horizontal or vertical direction to produce the 66 * filtered output block. Used to implement first-pass 67 * of 2-D separable filter. 68 * 69 * SPECIAL NOTES : Produces int32_t output to retain precision for next pass. 70 * Two filter taps should sum to VP9_FILTER_WEIGHT. 71 * pixel_step defines whether the filter is applied 72 * horizontally (pixel_step=1) or vertically (pixel_step= 73 * stride). 74 * It defines the offset required to move from one input 75 * to the next. 76 * 77 ****************************************************************************/ 78static void var_filter_block2d_bil_first_pass(const uint8_t *src_ptr, 79 uint16_t *output_ptr, 80 unsigned int src_pixels_per_line, 81 int pixel_step, 82 unsigned int output_height, 83 unsigned int output_width, 84 const int16_t *vp9_filter) { 85 unsigned int i, j; 86 87 for (i = 0; i < output_height; i++) { 88 for (j = 0; j < output_width; j++) { 89 output_ptr[j] = ROUND_POWER_OF_TWO((int)src_ptr[0] * vp9_filter[0] + 90 (int)src_ptr[pixel_step] * vp9_filter[1], 91 FILTER_BITS); 92 93 src_ptr++; 94 } 95 96 // Next row... 97 src_ptr += src_pixels_per_line - output_width; 98 output_ptr += output_width; 99 } 100} 101 102/**************************************************************************** 103 * 104 * ROUTINE : filter_block2d_bil_second_pass 105 * 106 * INPUTS : int32_t *src_ptr : Pointer to source block. 107 * uint32_t src_pixels_per_line : Stride of input block. 108 * uint32_t pixel_step : Offset between filter input 109 * samples (see notes). 110 * uint32_t output_height : Input block height. 111 * uint32_t output_width : Input block width. 112 * int32_t *vp9_filter : Array of 2 bi-linear filter 113 * taps. 114 * 115 * OUTPUTS : uint16_t *output_ptr : Pointer to filtered block. 116 * 117 * RETURNS : void 118 * 119 * FUNCTION : Applies a 1-D 2-tap bi-linear filter to the source block in 120 * either horizontal or vertical direction to produce the 121 * filtered output block. Used to implement second-pass 122 * of 2-D separable filter. 123 * 124 * SPECIAL NOTES : Requires 32-bit input as produced by 125 * filter_block2d_bil_first_pass. 126 * Two filter taps should sum to VP9_FILTER_WEIGHT. 127 * pixel_step defines whether the filter is applied 128 * horizontally (pixel_step=1) or vertically (pixel_step= 129 * stride). 130 * It defines the offset required to move from one input 131 * to the next. 132 * 133 ****************************************************************************/ 134static void var_filter_block2d_bil_second_pass(const uint16_t *src_ptr, 135 uint8_t *output_ptr, 136 unsigned int src_pixels_per_line, 137 unsigned int pixel_step, 138 unsigned int output_height, 139 unsigned int output_width, 140 const int16_t *vp9_filter) { 141 unsigned int i, j; 142 143 for (i = 0; i < output_height; i++) { 144 for (j = 0; j < output_width; j++) { 145 output_ptr[j] = ROUND_POWER_OF_TWO((int)src_ptr[0] * vp9_filter[0] + 146 (int)src_ptr[pixel_step] * vp9_filter[1], 147 FILTER_BITS); 148 src_ptr++; 149 } 150 151 src_ptr += src_pixels_per_line - output_width; 152 output_ptr += output_width; 153 } 154} 155 156unsigned int vp9_get_mb_ss_c(const int16_t *src_ptr) { 157 unsigned int i, sum = 0; 158 159 for (i = 0; i < 256; i++) { 160 sum += (src_ptr[i] * src_ptr[i]); 161 } 162 163 return sum; 164} 165 166unsigned int vp9_variance64x32_c(const uint8_t *src_ptr, 167 int source_stride, 168 const uint8_t *ref_ptr, 169 int recon_stride, 170 unsigned int *sse) { 171 unsigned int var; 172 int avg; 173 174 variance(src_ptr, source_stride, ref_ptr, recon_stride, 64, 32, &var, &avg); 175 *sse = var; 176 return (var - (((int64_t)avg * avg) >> 11)); 177} 178 179unsigned int vp9_sub_pixel_variance64x32_c(const uint8_t *src_ptr, 180 int src_pixels_per_line, 181 int xoffset, 182 int yoffset, 183 const uint8_t *dst_ptr, 184 int dst_pixels_per_line, 185 unsigned int *sse) { 186 uint16_t fdata3[65 * 64]; // Temp data buffer used in filtering 187 uint8_t temp2[68 * 64]; 188 const int16_t *hfilter, *vfilter; 189 190 hfilter = BILINEAR_FILTERS_2TAP(xoffset); 191 vfilter = BILINEAR_FILTERS_2TAP(yoffset); 192 193 var_filter_block2d_bil_first_pass(src_ptr, fdata3, src_pixels_per_line, 194 1, 33, 64, hfilter); 195 var_filter_block2d_bil_second_pass(fdata3, temp2, 64, 64, 32, 64, vfilter); 196 197 return vp9_variance64x32(temp2, 64, dst_ptr, dst_pixels_per_line, sse); 198} 199 200unsigned int vp9_sub_pixel_avg_variance64x32_c(const uint8_t *src_ptr, 201 int src_pixels_per_line, 202 int xoffset, 203 int yoffset, 204 const uint8_t *dst_ptr, 205 int dst_pixels_per_line, 206 unsigned int *sse, 207 const uint8_t *second_pred) { 208 uint16_t fdata3[65 * 64]; // Temp data buffer used in filtering 209 uint8_t temp2[68 * 64]; 210 DECLARE_ALIGNED_ARRAY(16, uint8_t, temp3, 64 * 64); // compound pred buffer 211 const int16_t *hfilter, *vfilter; 212 213 hfilter = BILINEAR_FILTERS_2TAP(xoffset); 214 vfilter = BILINEAR_FILTERS_2TAP(yoffset); 215 216 var_filter_block2d_bil_first_pass(src_ptr, fdata3, src_pixels_per_line, 217 1, 33, 64, hfilter); 218 var_filter_block2d_bil_second_pass(fdata3, temp2, 64, 64, 32, 64, vfilter); 219 vp9_comp_avg_pred(temp3, second_pred, 64, 32, temp2, 64); 220 return vp9_variance64x32(temp3, 64, dst_ptr, dst_pixels_per_line, sse); 221} 222 223unsigned int vp9_variance32x64_c(const uint8_t *src_ptr, 224 int source_stride, 225 const uint8_t *ref_ptr, 226 int recon_stride, 227 unsigned int *sse) { 228 unsigned int var; 229 int avg; 230 231 variance(src_ptr, source_stride, ref_ptr, recon_stride, 32, 64, &var, &avg); 232 *sse = var; 233 return (var - (((int64_t)avg * avg) >> 11)); 234} 235 236unsigned int vp9_sub_pixel_variance32x64_c(const uint8_t *src_ptr, 237 int src_pixels_per_line, 238 int xoffset, 239 int yoffset, 240 const uint8_t *dst_ptr, 241 int dst_pixels_per_line, 242 unsigned int *sse) { 243 uint16_t fdata3[65 * 64]; // Temp data buffer used in filtering 244 uint8_t temp2[68 * 64]; 245 const int16_t *hfilter, *vfilter; 246 247 hfilter = BILINEAR_FILTERS_2TAP(xoffset); 248 vfilter = BILINEAR_FILTERS_2TAP(yoffset); 249 250 var_filter_block2d_bil_first_pass(src_ptr, fdata3, src_pixels_per_line, 251 1, 65, 32, hfilter); 252 var_filter_block2d_bil_second_pass(fdata3, temp2, 32, 32, 64, 32, vfilter); 253 254 return vp9_variance32x64(temp2, 32, dst_ptr, dst_pixels_per_line, sse); 255} 256 257unsigned int vp9_sub_pixel_avg_variance32x64_c(const uint8_t *src_ptr, 258 int src_pixels_per_line, 259 int xoffset, 260 int yoffset, 261 const uint8_t *dst_ptr, 262 int dst_pixels_per_line, 263 unsigned int *sse, 264 const uint8_t *second_pred) { 265 uint16_t fdata3[65 * 64]; // Temp data buffer used in filtering 266 uint8_t temp2[68 * 64]; 267 DECLARE_ALIGNED_ARRAY(16, uint8_t, temp3, 32 * 64); // compound pred buffer 268 const int16_t *hfilter, *vfilter; 269 270 hfilter = BILINEAR_FILTERS_2TAP(xoffset); 271 vfilter = BILINEAR_FILTERS_2TAP(yoffset); 272 273 var_filter_block2d_bil_first_pass(src_ptr, fdata3, src_pixels_per_line, 274 1, 65, 32, hfilter); 275 var_filter_block2d_bil_second_pass(fdata3, temp2, 32, 32, 64, 32, vfilter); 276 vp9_comp_avg_pred(temp3, second_pred, 32, 64, temp2, 32); 277 return vp9_variance32x64(temp3, 32, dst_ptr, dst_pixels_per_line, sse); 278} 279 280unsigned int vp9_variance32x16_c(const uint8_t *src_ptr, 281 int source_stride, 282 const uint8_t *ref_ptr, 283 int recon_stride, 284 unsigned int *sse) { 285 unsigned int var; 286 int avg; 287 288 variance(src_ptr, source_stride, ref_ptr, recon_stride, 32, 16, &var, &avg); 289 *sse = var; 290 return (var - (((int64_t)avg * avg) >> 9)); 291} 292 293unsigned int vp9_sub_pixel_variance32x16_c(const uint8_t *src_ptr, 294 int src_pixels_per_line, 295 int xoffset, 296 int yoffset, 297 const uint8_t *dst_ptr, 298 int dst_pixels_per_line, 299 unsigned int *sse) { 300 uint16_t fdata3[33 * 32]; // Temp data buffer used in filtering 301 uint8_t temp2[36 * 32]; 302 const int16_t *hfilter, *vfilter; 303 304 hfilter = BILINEAR_FILTERS_2TAP(xoffset); 305 vfilter = BILINEAR_FILTERS_2TAP(yoffset); 306 307 var_filter_block2d_bil_first_pass(src_ptr, fdata3, src_pixels_per_line, 308 1, 17, 32, hfilter); 309 var_filter_block2d_bil_second_pass(fdata3, temp2, 32, 32, 16, 32, vfilter); 310 311 return vp9_variance32x16(temp2, 32, dst_ptr, dst_pixels_per_line, sse); 312} 313 314unsigned int vp9_sub_pixel_avg_variance32x16_c(const uint8_t *src_ptr, 315 int src_pixels_per_line, 316 int xoffset, 317 int yoffset, 318 const uint8_t *dst_ptr, 319 int dst_pixels_per_line, 320 unsigned int *sse, 321 const uint8_t *second_pred) { 322 uint16_t fdata3[33 * 32]; // Temp data buffer used in filtering 323 uint8_t temp2[36 * 32]; 324 DECLARE_ALIGNED_ARRAY(16, uint8_t, temp3, 32 * 16); // compound pred buffer 325 const int16_t *hfilter, *vfilter; 326 327 hfilter = BILINEAR_FILTERS_2TAP(xoffset); 328 vfilter = BILINEAR_FILTERS_2TAP(yoffset); 329 330 var_filter_block2d_bil_first_pass(src_ptr, fdata3, src_pixels_per_line, 331 1, 17, 32, hfilter); 332 var_filter_block2d_bil_second_pass(fdata3, temp2, 32, 32, 16, 32, vfilter); 333 vp9_comp_avg_pred(temp3, second_pred, 32, 16, temp2, 32); 334 return vp9_variance32x16(temp3, 32, dst_ptr, dst_pixels_per_line, sse); 335} 336 337unsigned int vp9_variance16x32_c(const uint8_t *src_ptr, 338 int source_stride, 339 const uint8_t *ref_ptr, 340 int recon_stride, 341 unsigned int *sse) { 342 unsigned int var; 343 int avg; 344 345 variance(src_ptr, source_stride, ref_ptr, recon_stride, 16, 32, &var, &avg); 346 *sse = var; 347 return (var - (((int64_t)avg * avg) >> 9)); 348} 349 350unsigned int vp9_sub_pixel_variance16x32_c(const uint8_t *src_ptr, 351 int src_pixels_per_line, 352 int xoffset, 353 int yoffset, 354 const uint8_t *dst_ptr, 355 int dst_pixels_per_line, 356 unsigned int *sse) { 357 uint16_t fdata3[33 * 32]; // Temp data buffer used in filtering 358 uint8_t temp2[36 * 32]; 359 const int16_t *hfilter, *vfilter; 360 361 hfilter = BILINEAR_FILTERS_2TAP(xoffset); 362 vfilter = BILINEAR_FILTERS_2TAP(yoffset); 363 364 var_filter_block2d_bil_first_pass(src_ptr, fdata3, src_pixels_per_line, 365 1, 33, 16, hfilter); 366 var_filter_block2d_bil_second_pass(fdata3, temp2, 16, 16, 32, 16, vfilter); 367 368 return vp9_variance16x32(temp2, 16, dst_ptr, dst_pixels_per_line, sse); 369} 370 371unsigned int vp9_sub_pixel_avg_variance16x32_c(const uint8_t *src_ptr, 372 int src_pixels_per_line, 373 int xoffset, 374 int yoffset, 375 const uint8_t *dst_ptr, 376 int dst_pixels_per_line, 377 unsigned int *sse, 378 const uint8_t *second_pred) { 379 uint16_t fdata3[33 * 32]; // Temp data buffer used in filtering 380 uint8_t temp2[36 * 32]; 381 DECLARE_ALIGNED_ARRAY(16, uint8_t, temp3, 16 * 32); // compound pred buffer 382 const int16_t *hfilter, *vfilter; 383 384 hfilter = BILINEAR_FILTERS_2TAP(xoffset); 385 vfilter = BILINEAR_FILTERS_2TAP(yoffset); 386 387 var_filter_block2d_bil_first_pass(src_ptr, fdata3, src_pixels_per_line, 388 1, 33, 16, hfilter); 389 var_filter_block2d_bil_second_pass(fdata3, temp2, 16, 16, 32, 16, vfilter); 390 vp9_comp_avg_pred(temp3, second_pred, 16, 32, temp2, 16); 391 return vp9_variance16x32(temp3, 16, dst_ptr, dst_pixels_per_line, sse); 392} 393 394unsigned int vp9_variance64x64_c(const uint8_t *src_ptr, 395 int source_stride, 396 const uint8_t *ref_ptr, 397 int recon_stride, 398 unsigned int *sse) { 399 unsigned int var; 400 int avg; 401 402 variance(src_ptr, source_stride, ref_ptr, recon_stride, 64, 64, &var, &avg); 403 *sse = var; 404 return (var - (((int64_t)avg * avg) >> 12)); 405} 406 407unsigned int vp9_variance32x32_c(const uint8_t *src_ptr, 408 int source_stride, 409 const uint8_t *ref_ptr, 410 int recon_stride, 411 unsigned int *sse) { 412 unsigned int var; 413 int avg; 414 415 variance(src_ptr, source_stride, ref_ptr, recon_stride, 32, 32, &var, &avg); 416 *sse = var; 417 return (var - (((int64_t)avg * avg) >> 10)); 418} 419 420void vp9_get_sse_sum_16x16_c(const uint8_t *src_ptr, int source_stride, 421 const uint8_t *ref_ptr, int ref_stride, 422 unsigned int *sse, int *sum) { 423 variance(src_ptr, source_stride, ref_ptr, ref_stride, 16, 16, sse, sum); 424} 425 426unsigned int vp9_variance16x16_c(const uint8_t *src_ptr, 427 int source_stride, 428 const uint8_t *ref_ptr, 429 int recon_stride, 430 unsigned int *sse) { 431 unsigned int var; 432 int avg; 433 434 variance(src_ptr, source_stride, ref_ptr, recon_stride, 16, 16, &var, &avg); 435 *sse = var; 436 return (var - (((unsigned int)avg * avg) >> 8)); 437} 438 439unsigned int vp9_variance8x16_c(const uint8_t *src_ptr, 440 int source_stride, 441 const uint8_t *ref_ptr, 442 int recon_stride, 443 unsigned int *sse) { 444 unsigned int var; 445 int avg; 446 447 variance(src_ptr, source_stride, ref_ptr, recon_stride, 8, 16, &var, &avg); 448 *sse = var; 449 return (var - (((unsigned int)avg * avg) >> 7)); 450} 451 452unsigned int vp9_variance16x8_c(const uint8_t *src_ptr, 453 int source_stride, 454 const uint8_t *ref_ptr, 455 int recon_stride, 456 unsigned int *sse) { 457 unsigned int var; 458 int avg; 459 460 variance(src_ptr, source_stride, ref_ptr, recon_stride, 16, 8, &var, &avg); 461 *sse = var; 462 return (var - (((unsigned int)avg * avg) >> 7)); 463} 464 465void vp9_get_sse_sum_8x8_c(const uint8_t *src_ptr, int source_stride, 466 const uint8_t *ref_ptr, int ref_stride, 467 unsigned int *sse, int *sum) { 468 variance(src_ptr, source_stride, ref_ptr, ref_stride, 8, 8, sse, sum); 469} 470 471unsigned int vp9_variance8x8_c(const uint8_t *src_ptr, 472 int source_stride, 473 const uint8_t *ref_ptr, 474 int recon_stride, 475 unsigned int *sse) { 476 unsigned int var; 477 int avg; 478 479 variance(src_ptr, source_stride, ref_ptr, recon_stride, 8, 8, &var, &avg); 480 *sse = var; 481 return (var - (((unsigned int)avg * avg) >> 6)); 482} 483 484unsigned int vp9_variance8x4_c(const uint8_t *src_ptr, 485 int source_stride, 486 const uint8_t *ref_ptr, 487 int recon_stride, 488 unsigned int *sse) { 489 unsigned int var; 490 int avg; 491 492 variance(src_ptr, source_stride, ref_ptr, recon_stride, 8, 4, &var, &avg); 493 *sse = var; 494 return (var - (((unsigned int)avg * avg) >> 5)); 495} 496 497unsigned int vp9_variance4x8_c(const uint8_t *src_ptr, 498 int source_stride, 499 const uint8_t *ref_ptr, 500 int recon_stride, 501 unsigned int *sse) { 502 unsigned int var; 503 int avg; 504 505 variance(src_ptr, source_stride, ref_ptr, recon_stride, 4, 8, &var, &avg); 506 *sse = var; 507 return (var - (((unsigned int)avg * avg) >> 5)); 508} 509 510unsigned int vp9_variance4x4_c(const uint8_t *src_ptr, 511 int source_stride, 512 const uint8_t *ref_ptr, 513 int recon_stride, 514 unsigned int *sse) { 515 unsigned int var; 516 int avg; 517 518 variance(src_ptr, source_stride, ref_ptr, recon_stride, 4, 4, &var, &avg); 519 *sse = var; 520 return (var - (((unsigned int)avg * avg) >> 4)); 521} 522 523 524unsigned int vp9_mse16x16_c(const uint8_t *src_ptr, 525 int source_stride, 526 const uint8_t *ref_ptr, 527 int recon_stride, 528 unsigned int *sse) { 529 unsigned int var; 530 int avg; 531 532 variance(src_ptr, source_stride, ref_ptr, recon_stride, 16, 16, &var, &avg); 533 *sse = var; 534 return var; 535} 536 537unsigned int vp9_mse16x8_c(const uint8_t *src_ptr, 538 int source_stride, 539 const uint8_t *ref_ptr, 540 int recon_stride, 541 unsigned int *sse) { 542 unsigned int var; 543 int avg; 544 545 variance(src_ptr, source_stride, ref_ptr, recon_stride, 16, 8, &var, &avg); 546 *sse = var; 547 return var; 548} 549 550unsigned int vp9_mse8x16_c(const uint8_t *src_ptr, 551 int source_stride, 552 const uint8_t *ref_ptr, 553 int recon_stride, 554 unsigned int *sse) { 555 unsigned int var; 556 int avg; 557 558 variance(src_ptr, source_stride, ref_ptr, recon_stride, 8, 16, &var, &avg); 559 *sse = var; 560 return var; 561} 562 563unsigned int vp9_mse8x8_c(const uint8_t *src_ptr, 564 int source_stride, 565 const uint8_t *ref_ptr, 566 int recon_stride, 567 unsigned int *sse) { 568 unsigned int var; 569 int avg; 570 571 variance(src_ptr, source_stride, ref_ptr, recon_stride, 8, 8, &var, &avg); 572 *sse = var; 573 return var; 574} 575 576 577unsigned int vp9_sub_pixel_variance4x4_c(const uint8_t *src_ptr, 578 int src_pixels_per_line, 579 int xoffset, 580 int yoffset, 581 const uint8_t *dst_ptr, 582 int dst_pixels_per_line, 583 unsigned int *sse) { 584 uint8_t temp2[20 * 16]; 585 const int16_t *hfilter, *vfilter; 586 uint16_t fdata3[5 * 4]; // Temp data buffer used in filtering 587 588 hfilter = BILINEAR_FILTERS_2TAP(xoffset); 589 vfilter = BILINEAR_FILTERS_2TAP(yoffset); 590 591 // First filter 1d Horizontal 592 var_filter_block2d_bil_first_pass(src_ptr, fdata3, src_pixels_per_line, 593 1, 5, 4, hfilter); 594 595 // Now filter Verticaly 596 var_filter_block2d_bil_second_pass(fdata3, temp2, 4, 4, 4, 4, vfilter); 597 598 return vp9_variance4x4(temp2, 4, dst_ptr, dst_pixels_per_line, sse); 599} 600 601unsigned int vp9_sub_pixel_avg_variance4x4_c(const uint8_t *src_ptr, 602 int src_pixels_per_line, 603 int xoffset, 604 int yoffset, 605 const uint8_t *dst_ptr, 606 int dst_pixels_per_line, 607 unsigned int *sse, 608 const uint8_t *second_pred) { 609 uint8_t temp2[20 * 16]; 610 const int16_t *hfilter, *vfilter; 611 DECLARE_ALIGNED_ARRAY(16, uint8_t, temp3, 4 * 4); // compound pred buffer 612 uint16_t fdata3[5 * 4]; // Temp data buffer used in filtering 613 614 hfilter = BILINEAR_FILTERS_2TAP(xoffset); 615 vfilter = BILINEAR_FILTERS_2TAP(yoffset); 616 617 // First filter 1d Horizontal 618 var_filter_block2d_bil_first_pass(src_ptr, fdata3, src_pixels_per_line, 619 1, 5, 4, hfilter); 620 621 // Now filter Verticaly 622 var_filter_block2d_bil_second_pass(fdata3, temp2, 4, 4, 4, 4, vfilter); 623 vp9_comp_avg_pred(temp3, second_pred, 4, 4, temp2, 4); 624 return vp9_variance4x4(temp3, 4, dst_ptr, dst_pixels_per_line, sse); 625} 626 627unsigned int vp9_sub_pixel_variance8x8_c(const uint8_t *src_ptr, 628 int src_pixels_per_line, 629 int xoffset, 630 int yoffset, 631 const uint8_t *dst_ptr, 632 int dst_pixels_per_line, 633 unsigned int *sse) { 634 uint16_t fdata3[9 * 8]; // Temp data buffer used in filtering 635 uint8_t temp2[20 * 16]; 636 const int16_t *hfilter, *vfilter; 637 638 hfilter = BILINEAR_FILTERS_2TAP(xoffset); 639 vfilter = BILINEAR_FILTERS_2TAP(yoffset); 640 641 var_filter_block2d_bil_first_pass(src_ptr, fdata3, src_pixels_per_line, 642 1, 9, 8, hfilter); 643 var_filter_block2d_bil_second_pass(fdata3, temp2, 8, 8, 8, 8, vfilter); 644 645 return vp9_variance8x8(temp2, 8, dst_ptr, dst_pixels_per_line, sse); 646} 647 648unsigned int vp9_sub_pixel_avg_variance8x8_c(const uint8_t *src_ptr, 649 int src_pixels_per_line, 650 int xoffset, 651 int yoffset, 652 const uint8_t *dst_ptr, 653 int dst_pixels_per_line, 654 unsigned int *sse, 655 const uint8_t *second_pred) { 656 uint16_t fdata3[9 * 8]; // Temp data buffer used in filtering 657 uint8_t temp2[20 * 16]; 658 DECLARE_ALIGNED_ARRAY(16, uint8_t, temp3, 8 * 8); // compound pred buffer 659 const int16_t *hfilter, *vfilter; 660 661 hfilter = BILINEAR_FILTERS_2TAP(xoffset); 662 vfilter = BILINEAR_FILTERS_2TAP(yoffset); 663 664 var_filter_block2d_bil_first_pass(src_ptr, fdata3, src_pixels_per_line, 665 1, 9, 8, hfilter); 666 var_filter_block2d_bil_second_pass(fdata3, temp2, 8, 8, 8, 8, vfilter); 667 vp9_comp_avg_pred(temp3, second_pred, 8, 8, temp2, 8); 668 return vp9_variance8x8(temp3, 8, dst_ptr, dst_pixels_per_line, sse); 669} 670 671unsigned int vp9_sub_pixel_variance16x16_c(const uint8_t *src_ptr, 672 int src_pixels_per_line, 673 int xoffset, 674 int yoffset, 675 const uint8_t *dst_ptr, 676 int dst_pixels_per_line, 677 unsigned int *sse) { 678 uint16_t fdata3[17 * 16]; // Temp data buffer used in filtering 679 uint8_t temp2[20 * 16]; 680 const int16_t *hfilter, *vfilter; 681 682 hfilter = BILINEAR_FILTERS_2TAP(xoffset); 683 vfilter = BILINEAR_FILTERS_2TAP(yoffset); 684 685 var_filter_block2d_bil_first_pass(src_ptr, fdata3, src_pixels_per_line, 686 1, 17, 16, hfilter); 687 var_filter_block2d_bil_second_pass(fdata3, temp2, 16, 16, 16, 16, vfilter); 688 689 return vp9_variance16x16(temp2, 16, dst_ptr, dst_pixels_per_line, sse); 690} 691 692unsigned int vp9_sub_pixel_avg_variance16x16_c(const uint8_t *src_ptr, 693 int src_pixels_per_line, 694 int xoffset, 695 int yoffset, 696 const uint8_t *dst_ptr, 697 int dst_pixels_per_line, 698 unsigned int *sse, 699 const uint8_t *second_pred) { 700 uint16_t fdata3[17 * 16]; 701 uint8_t temp2[20 * 16]; 702 DECLARE_ALIGNED_ARRAY(16, uint8_t, temp3, 16 * 16); // compound pred buffer 703 const int16_t *hfilter, *vfilter; 704 705 hfilter = BILINEAR_FILTERS_2TAP(xoffset); 706 vfilter = BILINEAR_FILTERS_2TAP(yoffset); 707 708 var_filter_block2d_bil_first_pass(src_ptr, fdata3, src_pixels_per_line, 709 1, 17, 16, hfilter); 710 var_filter_block2d_bil_second_pass(fdata3, temp2, 16, 16, 16, 16, vfilter); 711 712 vp9_comp_avg_pred(temp3, second_pred, 16, 16, temp2, 16); 713 return vp9_variance16x16(temp3, 16, dst_ptr, dst_pixels_per_line, sse); 714} 715 716unsigned int vp9_sub_pixel_variance64x64_c(const uint8_t *src_ptr, 717 int src_pixels_per_line, 718 int xoffset, 719 int yoffset, 720 const uint8_t *dst_ptr, 721 int dst_pixels_per_line, 722 unsigned int *sse) { 723 uint16_t fdata3[65 * 64]; // Temp data buffer used in filtering 724 uint8_t temp2[68 * 64]; 725 const int16_t *hfilter, *vfilter; 726 727 hfilter = BILINEAR_FILTERS_2TAP(xoffset); 728 vfilter = BILINEAR_FILTERS_2TAP(yoffset); 729 730 var_filter_block2d_bil_first_pass(src_ptr, fdata3, src_pixels_per_line, 731 1, 65, 64, hfilter); 732 var_filter_block2d_bil_second_pass(fdata3, temp2, 64, 64, 64, 64, vfilter); 733 734 return vp9_variance64x64(temp2, 64, dst_ptr, dst_pixels_per_line, sse); 735} 736 737unsigned int vp9_sub_pixel_avg_variance64x64_c(const uint8_t *src_ptr, 738 int src_pixels_per_line, 739 int xoffset, 740 int yoffset, 741 const uint8_t *dst_ptr, 742 int dst_pixels_per_line, 743 unsigned int *sse, 744 const uint8_t *second_pred) { 745 uint16_t fdata3[65 * 64]; // Temp data buffer used in filtering 746 uint8_t temp2[68 * 64]; 747 DECLARE_ALIGNED_ARRAY(16, uint8_t, temp3, 64 * 64); // compound pred buffer 748 const int16_t *hfilter, *vfilter; 749 750 hfilter = BILINEAR_FILTERS_2TAP(xoffset); 751 vfilter = BILINEAR_FILTERS_2TAP(yoffset); 752 753 var_filter_block2d_bil_first_pass(src_ptr, fdata3, src_pixels_per_line, 754 1, 65, 64, hfilter); 755 var_filter_block2d_bil_second_pass(fdata3, temp2, 64, 64, 64, 64, vfilter); 756 vp9_comp_avg_pred(temp3, second_pred, 64, 64, temp2, 64); 757 return vp9_variance64x64(temp3, 64, dst_ptr, dst_pixels_per_line, sse); 758} 759 760unsigned int vp9_sub_pixel_variance32x32_c(const uint8_t *src_ptr, 761 int src_pixels_per_line, 762 int xoffset, 763 int yoffset, 764 const uint8_t *dst_ptr, 765 int dst_pixels_per_line, 766 unsigned int *sse) { 767 uint16_t fdata3[33 * 32]; // Temp data buffer used in filtering 768 uint8_t temp2[36 * 32]; 769 const int16_t *hfilter, *vfilter; 770 771 hfilter = BILINEAR_FILTERS_2TAP(xoffset); 772 vfilter = BILINEAR_FILTERS_2TAP(yoffset); 773 774 var_filter_block2d_bil_first_pass(src_ptr, fdata3, src_pixels_per_line, 775 1, 33, 32, hfilter); 776 var_filter_block2d_bil_second_pass(fdata3, temp2, 32, 32, 32, 32, vfilter); 777 778 return vp9_variance32x32(temp2, 32, dst_ptr, dst_pixels_per_line, sse); 779} 780 781unsigned int vp9_sub_pixel_avg_variance32x32_c(const uint8_t *src_ptr, 782 int src_pixels_per_line, 783 int xoffset, 784 int yoffset, 785 const uint8_t *dst_ptr, 786 int dst_pixels_per_line, 787 unsigned int *sse, 788 const uint8_t *second_pred) { 789 uint16_t fdata3[33 * 32]; // Temp data buffer used in filtering 790 uint8_t temp2[36 * 32]; 791 DECLARE_ALIGNED_ARRAY(16, uint8_t, temp3, 32 * 32); // compound pred buffer 792 const int16_t *hfilter, *vfilter; 793 794 hfilter = BILINEAR_FILTERS_2TAP(xoffset); 795 vfilter = BILINEAR_FILTERS_2TAP(yoffset); 796 797 var_filter_block2d_bil_first_pass(src_ptr, fdata3, src_pixels_per_line, 798 1, 33, 32, hfilter); 799 var_filter_block2d_bil_second_pass(fdata3, temp2, 32, 32, 32, 32, vfilter); 800 vp9_comp_avg_pred(temp3, second_pred, 32, 32, temp2, 32); 801 return vp9_variance32x32(temp3, 32, dst_ptr, dst_pixels_per_line, sse); 802} 803 804unsigned int vp9_variance_halfpixvar16x16_h_c(const uint8_t *src_ptr, 805 int source_stride, 806 const uint8_t *ref_ptr, 807 int recon_stride, 808 unsigned int *sse) { 809 return vp9_sub_pixel_variance16x16_c(src_ptr, source_stride, 8, 0, 810 ref_ptr, recon_stride, sse); 811} 812 813unsigned int vp9_variance_halfpixvar32x32_h_c(const uint8_t *src_ptr, 814 int source_stride, 815 const uint8_t *ref_ptr, 816 int recon_stride, 817 unsigned int *sse) { 818 return vp9_sub_pixel_variance32x32_c(src_ptr, source_stride, 8, 0, 819 ref_ptr, recon_stride, sse); 820} 821 822unsigned int vp9_variance_halfpixvar64x64_h_c(const uint8_t *src_ptr, 823 int source_stride, 824 const uint8_t *ref_ptr, 825 int recon_stride, 826 unsigned int *sse) { 827 return vp9_sub_pixel_variance64x64_c(src_ptr, source_stride, 8, 0, 828 ref_ptr, recon_stride, sse); 829} 830 831unsigned int vp9_variance_halfpixvar16x16_v_c(const uint8_t *src_ptr, 832 int source_stride, 833 const uint8_t *ref_ptr, 834 int recon_stride, 835 unsigned int *sse) { 836 return vp9_sub_pixel_variance16x16_c(src_ptr, source_stride, 0, 8, 837 ref_ptr, recon_stride, sse); 838} 839 840unsigned int vp9_variance_halfpixvar32x32_v_c(const uint8_t *src_ptr, 841 int source_stride, 842 const uint8_t *ref_ptr, 843 int recon_stride, 844 unsigned int *sse) { 845 return vp9_sub_pixel_variance32x32_c(src_ptr, source_stride, 0, 8, 846 ref_ptr, recon_stride, sse); 847} 848 849unsigned int vp9_variance_halfpixvar64x64_v_c(const uint8_t *src_ptr, 850 int source_stride, 851 const uint8_t *ref_ptr, 852 int recon_stride, 853 unsigned int *sse) { 854 return vp9_sub_pixel_variance64x64_c(src_ptr, source_stride, 0, 8, 855 ref_ptr, recon_stride, sse); 856} 857 858unsigned int vp9_variance_halfpixvar16x16_hv_c(const uint8_t *src_ptr, 859 int source_stride, 860 const uint8_t *ref_ptr, 861 int recon_stride, 862 unsigned int *sse) { 863 return vp9_sub_pixel_variance16x16_c(src_ptr, source_stride, 8, 8, 864 ref_ptr, recon_stride, sse); 865} 866 867unsigned int vp9_variance_halfpixvar32x32_hv_c(const uint8_t *src_ptr, 868 int source_stride, 869 const uint8_t *ref_ptr, 870 int recon_stride, 871 unsigned int *sse) { 872 return vp9_sub_pixel_variance32x32_c(src_ptr, source_stride, 8, 8, 873 ref_ptr, recon_stride, sse); 874} 875 876unsigned int vp9_variance_halfpixvar64x64_hv_c(const uint8_t *src_ptr, 877 int source_stride, 878 const uint8_t *ref_ptr, 879 int recon_stride, 880 unsigned int *sse) { 881 return vp9_sub_pixel_variance64x64_c(src_ptr, source_stride, 8, 8, 882 ref_ptr, recon_stride, sse); 883} 884 885unsigned int vp9_sub_pixel_mse16x16_c(const uint8_t *src_ptr, 886 int src_pixels_per_line, 887 int xoffset, 888 int yoffset, 889 const uint8_t *dst_ptr, 890 int dst_pixels_per_line, 891 unsigned int *sse) { 892 vp9_sub_pixel_variance16x16_c(src_ptr, src_pixels_per_line, 893 xoffset, yoffset, dst_ptr, 894 dst_pixels_per_line, sse); 895 return *sse; 896} 897 898unsigned int vp9_sub_pixel_mse32x32_c(const uint8_t *src_ptr, 899 int src_pixels_per_line, 900 int xoffset, 901 int yoffset, 902 const uint8_t *dst_ptr, 903 int dst_pixels_per_line, 904 unsigned int *sse) { 905 vp9_sub_pixel_variance32x32_c(src_ptr, src_pixels_per_line, 906 xoffset, yoffset, dst_ptr, 907 dst_pixels_per_line, sse); 908 return *sse; 909} 910 911unsigned int vp9_sub_pixel_mse64x64_c(const uint8_t *src_ptr, 912 int src_pixels_per_line, 913 int xoffset, 914 int yoffset, 915 const uint8_t *dst_ptr, 916 int dst_pixels_per_line, 917 unsigned int *sse) { 918 vp9_sub_pixel_variance64x64_c(src_ptr, src_pixels_per_line, 919 xoffset, yoffset, dst_ptr, 920 dst_pixels_per_line, sse); 921 return *sse; 922} 923 924unsigned int vp9_sub_pixel_variance16x8_c(const uint8_t *src_ptr, 925 int src_pixels_per_line, 926 int xoffset, 927 int yoffset, 928 const uint8_t *dst_ptr, 929 int dst_pixels_per_line, 930 unsigned int *sse) { 931 uint16_t fdata3[16 * 9]; // Temp data buffer used in filtering 932 uint8_t temp2[20 * 16]; 933 const int16_t *hfilter, *vfilter; 934 935 hfilter = BILINEAR_FILTERS_2TAP(xoffset); 936 vfilter = BILINEAR_FILTERS_2TAP(yoffset); 937 938 var_filter_block2d_bil_first_pass(src_ptr, fdata3, src_pixels_per_line, 939 1, 9, 16, hfilter); 940 var_filter_block2d_bil_second_pass(fdata3, temp2, 16, 16, 8, 16, vfilter); 941 942 return vp9_variance16x8(temp2, 16, dst_ptr, dst_pixels_per_line, sse); 943} 944 945unsigned int vp9_sub_pixel_avg_variance16x8_c(const uint8_t *src_ptr, 946 int src_pixels_per_line, 947 int xoffset, 948 int yoffset, 949 const uint8_t *dst_ptr, 950 int dst_pixels_per_line, 951 unsigned int *sse, 952 const uint8_t *second_pred) { 953 uint16_t fdata3[16 * 9]; // Temp data buffer used in filtering 954 uint8_t temp2[20 * 16]; 955 DECLARE_ALIGNED_ARRAY(16, uint8_t, temp3, 16 * 8); // compound pred buffer 956 const int16_t *hfilter, *vfilter; 957 958 hfilter = BILINEAR_FILTERS_2TAP(xoffset); 959 vfilter = BILINEAR_FILTERS_2TAP(yoffset); 960 961 var_filter_block2d_bil_first_pass(src_ptr, fdata3, src_pixels_per_line, 962 1, 9, 16, hfilter); 963 var_filter_block2d_bil_second_pass(fdata3, temp2, 16, 16, 8, 16, vfilter); 964 vp9_comp_avg_pred(temp3, second_pred, 16, 8, temp2, 16); 965 return vp9_variance16x8(temp3, 16, dst_ptr, dst_pixels_per_line, sse); 966} 967 968unsigned int vp9_sub_pixel_variance8x16_c(const uint8_t *src_ptr, 969 int src_pixels_per_line, 970 int xoffset, 971 int yoffset, 972 const uint8_t *dst_ptr, 973 int dst_pixels_per_line, 974 unsigned int *sse) { 975 uint16_t fdata3[9 * 16]; // Temp data buffer used in filtering 976 uint8_t temp2[20 * 16]; 977 const int16_t *hfilter, *vfilter; 978 979 hfilter = BILINEAR_FILTERS_2TAP(xoffset); 980 vfilter = BILINEAR_FILTERS_2TAP(yoffset); 981 982 var_filter_block2d_bil_first_pass(src_ptr, fdata3, src_pixels_per_line, 983 1, 17, 8, hfilter); 984 var_filter_block2d_bil_second_pass(fdata3, temp2, 8, 8, 16, 8, vfilter); 985 986 return vp9_variance8x16(temp2, 8, dst_ptr, dst_pixels_per_line, sse); 987} 988 989unsigned int vp9_sub_pixel_avg_variance8x16_c(const uint8_t *src_ptr, 990 int src_pixels_per_line, 991 int xoffset, 992 int yoffset, 993 const uint8_t *dst_ptr, 994 int dst_pixels_per_line, 995 unsigned int *sse, 996 const uint8_t *second_pred) { 997 uint16_t fdata3[9 * 16]; // Temp data buffer used in filtering 998 uint8_t temp2[20 * 16]; 999 DECLARE_ALIGNED_ARRAY(16, uint8_t, temp3, 8 * 16); // compound pred buffer 1000 const int16_t *hfilter, *vfilter; 1001 1002 hfilter = BILINEAR_FILTERS_2TAP(xoffset); 1003 vfilter = BILINEAR_FILTERS_2TAP(yoffset); 1004 1005 var_filter_block2d_bil_first_pass(src_ptr, fdata3, src_pixels_per_line, 1006 1, 17, 8, hfilter); 1007 var_filter_block2d_bil_second_pass(fdata3, temp2, 8, 8, 16, 8, vfilter); 1008 vp9_comp_avg_pred(temp3, second_pred, 8, 16, temp2, 8); 1009 return vp9_variance8x16(temp3, 8, dst_ptr, dst_pixels_per_line, sse); 1010} 1011 1012unsigned int vp9_sub_pixel_variance8x4_c(const uint8_t *src_ptr, 1013 int src_pixels_per_line, 1014 int xoffset, 1015 int yoffset, 1016 const uint8_t *dst_ptr, 1017 int dst_pixels_per_line, 1018 unsigned int *sse) { 1019 uint16_t fdata3[8 * 5]; // Temp data buffer used in filtering 1020 uint8_t temp2[20 * 16]; 1021 const int16_t *hfilter, *vfilter; 1022 1023 hfilter = BILINEAR_FILTERS_2TAP(xoffset); 1024 vfilter = BILINEAR_FILTERS_2TAP(yoffset); 1025 1026 var_filter_block2d_bil_first_pass(src_ptr, fdata3, src_pixels_per_line, 1027 1, 5, 8, hfilter); 1028 var_filter_block2d_bil_second_pass(fdata3, temp2, 8, 8, 4, 8, vfilter); 1029 1030 return vp9_variance8x4(temp2, 8, dst_ptr, dst_pixels_per_line, sse); 1031} 1032 1033unsigned int vp9_sub_pixel_avg_variance8x4_c(const uint8_t *src_ptr, 1034 int src_pixels_per_line, 1035 int xoffset, 1036 int yoffset, 1037 const uint8_t *dst_ptr, 1038 int dst_pixels_per_line, 1039 unsigned int *sse, 1040 const uint8_t *second_pred) { 1041 uint16_t fdata3[8 * 5]; // Temp data buffer used in filtering 1042 uint8_t temp2[20 * 16]; 1043 DECLARE_ALIGNED_ARRAY(16, uint8_t, temp3, 8 * 4); // compound pred buffer 1044 const int16_t *hfilter, *vfilter; 1045 1046 hfilter = BILINEAR_FILTERS_2TAP(xoffset); 1047 vfilter = BILINEAR_FILTERS_2TAP(yoffset); 1048 1049 var_filter_block2d_bil_first_pass(src_ptr, fdata3, src_pixels_per_line, 1050 1, 5, 8, hfilter); 1051 var_filter_block2d_bil_second_pass(fdata3, temp2, 8, 8, 4, 8, vfilter); 1052 vp9_comp_avg_pred(temp3, second_pred, 8, 4, temp2, 8); 1053 return vp9_variance8x4(temp3, 8, dst_ptr, dst_pixels_per_line, sse); 1054} 1055 1056unsigned int vp9_sub_pixel_variance4x8_c(const uint8_t *src_ptr, 1057 int src_pixels_per_line, 1058 int xoffset, 1059 int yoffset, 1060 const uint8_t *dst_ptr, 1061 int dst_pixels_per_line, 1062 unsigned int *sse) { 1063 uint16_t fdata3[5 * 8]; // Temp data buffer used in filtering 1064 // FIXME(jingning,rbultje): this temp2 buffer probably doesn't need to be 1065 // of this big? same issue appears in all other block size settings. 1066 uint8_t temp2[20 * 16]; 1067 const int16_t *hfilter, *vfilter; 1068 1069 hfilter = BILINEAR_FILTERS_2TAP(xoffset); 1070 vfilter = BILINEAR_FILTERS_2TAP(yoffset); 1071 1072 var_filter_block2d_bil_first_pass(src_ptr, fdata3, src_pixels_per_line, 1073 1, 9, 4, hfilter); 1074 var_filter_block2d_bil_second_pass(fdata3, temp2, 4, 4, 8, 4, vfilter); 1075 1076 return vp9_variance4x8(temp2, 4, dst_ptr, dst_pixels_per_line, sse); 1077} 1078 1079unsigned int vp9_sub_pixel_avg_variance4x8_c(const uint8_t *src_ptr, 1080 int src_pixels_per_line, 1081 int xoffset, 1082 int yoffset, 1083 const uint8_t *dst_ptr, 1084 int dst_pixels_per_line, 1085 unsigned int *sse, 1086 const uint8_t *second_pred) { 1087 uint16_t fdata3[5 * 8]; // Temp data buffer used in filtering 1088 uint8_t temp2[20 * 16]; 1089 DECLARE_ALIGNED_ARRAY(16, uint8_t, temp3, 4 * 8); // compound pred buffer 1090 const int16_t *hfilter, *vfilter; 1091 1092 hfilter = BILINEAR_FILTERS_2TAP(xoffset); 1093 vfilter = BILINEAR_FILTERS_2TAP(yoffset); 1094 1095 var_filter_block2d_bil_first_pass(src_ptr, fdata3, src_pixels_per_line, 1096 1, 9, 4, hfilter); 1097 var_filter_block2d_bil_second_pass(fdata3, temp2, 4, 4, 8, 4, vfilter); 1098 vp9_comp_avg_pred(temp3, second_pred, 4, 8, temp2, 4); 1099 return vp9_variance4x8(temp3, 4, dst_ptr, dst_pixels_per_line, sse); 1100} 1101 1102 1103void vp9_comp_avg_pred(uint8_t *comp_pred, const uint8_t *pred, int width, 1104 int height, const uint8_t *ref, int ref_stride) { 1105 int i, j; 1106 1107 for (i = 0; i < height; i++) { 1108 for (j = 0; j < width; j++) { 1109 int tmp; 1110 tmp = pred[j] + ref[j]; 1111 comp_pred[j] = (tmp + 1) >> 1; 1112 } 1113 comp_pred += width; 1114 pred += width; 1115 ref += ref_stride; 1116 } 1117} 1118