1/* 2 * Copyright (c) 2010 The WebM project authors. All Rights Reserved. 3 * 4 * Use of this source code is governed by a BSD-style license 5 * that can be found in the LICENSE file in the root of the source 6 * tree. An additional intellectual property rights grant can be found 7 * in the file PATENTS. All contributing project authors may 8 * be found in the AUTHORS file in the root of the source tree. 9 */ 10 11 12#include <limits.h> 13#include <string.h> 14 15#include "vpx_config.h" 16#include "vp8_rtcd.h" 17#include "vpx/vpx_integer.h" 18#include "blockd.h" 19#include "reconinter.h" 20#if CONFIG_RUNTIME_CPU_DETECT 21#include "onyxc_int.h" 22#endif 23 24void vp8_copy_mem16x16_c( 25 unsigned char *src, 26 int src_stride, 27 unsigned char *dst, 28 int dst_stride) 29{ 30 31 int r; 32 33 for (r = 0; r < 16; r++) 34 { 35 memcpy(dst, src, 16); 36 37 src += src_stride; 38 dst += dst_stride; 39 40 } 41 42} 43 44void vp8_copy_mem8x8_c( 45 unsigned char *src, 46 int src_stride, 47 unsigned char *dst, 48 int dst_stride) 49{ 50 int r; 51 52 for (r = 0; r < 8; r++) 53 { 54 memcpy(dst, src, 8); 55 56 src += src_stride; 57 dst += dst_stride; 58 59 } 60 61} 62 63void vp8_copy_mem8x4_c( 64 unsigned char *src, 65 int src_stride, 66 unsigned char *dst, 67 int dst_stride) 68{ 69 int r; 70 71 for (r = 0; r < 4; r++) 72 { 73 memcpy(dst, src, 8); 74 75 src += src_stride; 76 dst += dst_stride; 77 78 } 79 80} 81 82 83void vp8_build_inter_predictors_b(BLOCKD *d, int pitch, unsigned char *base_pre, int pre_stride, vp8_subpix_fn_t sppf) 84{ 85 int r; 86 unsigned char *pred_ptr = d->predictor; 87 unsigned char *ptr; 88 ptr = base_pre + d->offset + (d->bmi.mv.as_mv.row >> 3) * pre_stride + (d->bmi.mv.as_mv.col >> 3); 89 90 if (d->bmi.mv.as_mv.row & 7 || d->bmi.mv.as_mv.col & 7) 91 { 92 sppf(ptr, pre_stride, d->bmi.mv.as_mv.col & 7, d->bmi.mv.as_mv.row & 7, pred_ptr, pitch); 93 } 94 else 95 { 96 for (r = 0; r < 4; r++) 97 { 98 pred_ptr[0] = ptr[0]; 99 pred_ptr[1] = ptr[1]; 100 pred_ptr[2] = ptr[2]; 101 pred_ptr[3] = ptr[3]; 102 pred_ptr += pitch; 103 ptr += pre_stride; 104 } 105 } 106} 107 108static void build_inter_predictors4b(MACROBLOCKD *x, BLOCKD *d, unsigned char *dst, int dst_stride, unsigned char *base_pre, int pre_stride) 109{ 110 unsigned char *ptr; 111 ptr = base_pre + d->offset + (d->bmi.mv.as_mv.row >> 3) * pre_stride + (d->bmi.mv.as_mv.col >> 3); 112 113 if (d->bmi.mv.as_mv.row & 7 || d->bmi.mv.as_mv.col & 7) 114 { 115 x->subpixel_predict8x8(ptr, pre_stride, d->bmi.mv.as_mv.col & 7, d->bmi.mv.as_mv.row & 7, dst, dst_stride); 116 } 117 else 118 { 119 vp8_copy_mem8x8(ptr, pre_stride, dst, dst_stride); 120 } 121} 122 123static void build_inter_predictors2b(MACROBLOCKD *x, BLOCKD *d, unsigned char *dst, int dst_stride, unsigned char *base_pre, int pre_stride) 124{ 125 unsigned char *ptr; 126 ptr = base_pre + d->offset + (d->bmi.mv.as_mv.row >> 3) * pre_stride + (d->bmi.mv.as_mv.col >> 3); 127 128 if (d->bmi.mv.as_mv.row & 7 || d->bmi.mv.as_mv.col & 7) 129 { 130 x->subpixel_predict8x4(ptr, pre_stride, d->bmi.mv.as_mv.col & 7, d->bmi.mv.as_mv.row & 7, dst, dst_stride); 131 } 132 else 133 { 134 vp8_copy_mem8x4(ptr, pre_stride, dst, dst_stride); 135 } 136} 137 138static void build_inter_predictors_b(BLOCKD *d, unsigned char *dst, int dst_stride, unsigned char *base_pre, int pre_stride, vp8_subpix_fn_t sppf) 139{ 140 int r; 141 unsigned char *ptr; 142 ptr = base_pre + d->offset + (d->bmi.mv.as_mv.row >> 3) * pre_stride + (d->bmi.mv.as_mv.col >> 3); 143 144 if (d->bmi.mv.as_mv.row & 7 || d->bmi.mv.as_mv.col & 7) 145 { 146 sppf(ptr, pre_stride, d->bmi.mv.as_mv.col & 7, d->bmi.mv.as_mv.row & 7, dst, dst_stride); 147 } 148 else 149 { 150 for (r = 0; r < 4; r++) 151 { 152 dst[0] = ptr[0]; 153 dst[1] = ptr[1]; 154 dst[2] = ptr[2]; 155 dst[3] = ptr[3]; 156 dst += dst_stride; 157 ptr += pre_stride; 158 } 159 } 160} 161 162 163/*encoder only*/ 164void vp8_build_inter16x16_predictors_mbuv(MACROBLOCKD *x) 165{ 166 unsigned char *uptr, *vptr; 167 unsigned char *upred_ptr = &x->predictor[256]; 168 unsigned char *vpred_ptr = &x->predictor[320]; 169 170 int mv_row = x->mode_info_context->mbmi.mv.as_mv.row; 171 int mv_col = x->mode_info_context->mbmi.mv.as_mv.col; 172 int offset; 173 int pre_stride = x->pre.uv_stride; 174 175 /* calc uv motion vectors */ 176 mv_row += 1 | (mv_row >> (sizeof(int) * CHAR_BIT - 1)); 177 mv_col += 1 | (mv_col >> (sizeof(int) * CHAR_BIT - 1)); 178 mv_row /= 2; 179 mv_col /= 2; 180 mv_row &= x->fullpixel_mask; 181 mv_col &= x->fullpixel_mask; 182 183 offset = (mv_row >> 3) * pre_stride + (mv_col >> 3); 184 uptr = x->pre.u_buffer + offset; 185 vptr = x->pre.v_buffer + offset; 186 187 if ((mv_row | mv_col) & 7) 188 { 189 x->subpixel_predict8x8(uptr, pre_stride, mv_col & 7, mv_row & 7, upred_ptr, 8); 190 x->subpixel_predict8x8(vptr, pre_stride, mv_col & 7, mv_row & 7, vpred_ptr, 8); 191 } 192 else 193 { 194 vp8_copy_mem8x8(uptr, pre_stride, upred_ptr, 8); 195 vp8_copy_mem8x8(vptr, pre_stride, vpred_ptr, 8); 196 } 197} 198 199/*encoder only*/ 200void vp8_build_inter4x4_predictors_mbuv(MACROBLOCKD *x) 201{ 202 int i, j; 203 int pre_stride = x->pre.uv_stride; 204 unsigned char *base_pre; 205 206 /* build uv mvs */ 207 for (i = 0; i < 2; i++) 208 { 209 for (j = 0; j < 2; j++) 210 { 211 int yoffset = i * 8 + j * 2; 212 int uoffset = 16 + i * 2 + j; 213 int voffset = 20 + i * 2 + j; 214 215 int temp; 216 217 temp = x->block[yoffset ].bmi.mv.as_mv.row 218 + x->block[yoffset+1].bmi.mv.as_mv.row 219 + x->block[yoffset+4].bmi.mv.as_mv.row 220 + x->block[yoffset+5].bmi.mv.as_mv.row; 221 222 temp += 4 + ((temp >> (sizeof(temp) * CHAR_BIT - 1)) * 8); 223 224 x->block[uoffset].bmi.mv.as_mv.row = (temp / 8) & x->fullpixel_mask; 225 226 temp = x->block[yoffset ].bmi.mv.as_mv.col 227 + x->block[yoffset+1].bmi.mv.as_mv.col 228 + x->block[yoffset+4].bmi.mv.as_mv.col 229 + x->block[yoffset+5].bmi.mv.as_mv.col; 230 231 temp += 4 + ((temp >> (sizeof(temp) * CHAR_BIT - 1)) * 8); 232 233 x->block[uoffset].bmi.mv.as_mv.col = (temp / 8) & x->fullpixel_mask; 234 235 x->block[voffset].bmi.mv.as_int = x->block[uoffset].bmi.mv.as_int; 236 } 237 } 238 239 base_pre = x->pre.u_buffer; 240 for (i = 16; i < 20; i += 2) 241 { 242 BLOCKD *d0 = &x->block[i]; 243 BLOCKD *d1 = &x->block[i+1]; 244 245 if (d0->bmi.mv.as_int == d1->bmi.mv.as_int) 246 build_inter_predictors2b(x, d0, d0->predictor, 8, base_pre, pre_stride); 247 else 248 { 249 vp8_build_inter_predictors_b(d0, 8, base_pre, pre_stride, x->subpixel_predict); 250 vp8_build_inter_predictors_b(d1, 8, base_pre, pre_stride, x->subpixel_predict); 251 } 252 } 253 254 base_pre = x->pre.v_buffer; 255 for (i = 20; i < 24; i += 2) 256 { 257 BLOCKD *d0 = &x->block[i]; 258 BLOCKD *d1 = &x->block[i+1]; 259 260 if (d0->bmi.mv.as_int == d1->bmi.mv.as_int) 261 build_inter_predictors2b(x, d0, d0->predictor, 8, base_pre, pre_stride); 262 else 263 { 264 vp8_build_inter_predictors_b(d0, 8, base_pre, pre_stride, x->subpixel_predict); 265 vp8_build_inter_predictors_b(d1, 8, base_pre, pre_stride, x->subpixel_predict); 266 } 267 } 268} 269 270 271/*encoder only*/ 272void vp8_build_inter16x16_predictors_mby(MACROBLOCKD *x, 273 unsigned char *dst_y, 274 int dst_ystride) 275{ 276 unsigned char *ptr_base; 277 unsigned char *ptr; 278 int mv_row = x->mode_info_context->mbmi.mv.as_mv.row; 279 int mv_col = x->mode_info_context->mbmi.mv.as_mv.col; 280 int pre_stride = x->pre.y_stride; 281 282 ptr_base = x->pre.y_buffer; 283 ptr = ptr_base + (mv_row >> 3) * pre_stride + (mv_col >> 3); 284 285 if ((mv_row | mv_col) & 7) 286 { 287 x->subpixel_predict16x16(ptr, pre_stride, mv_col & 7, mv_row & 7, 288 dst_y, dst_ystride); 289 } 290 else 291 { 292 vp8_copy_mem16x16(ptr, pre_stride, dst_y, 293 dst_ystride); 294 } 295} 296 297static void clamp_mv_to_umv_border(MV *mv, const MACROBLOCKD *xd) 298{ 299 /* If the MV points so far into the UMV border that no visible pixels 300 * are used for reconstruction, the subpel part of the MV can be 301 * discarded and the MV limited to 16 pixels with equivalent results. 302 * 303 * This limit kicks in at 19 pixels for the top and left edges, for 304 * the 16 pixels plus 3 taps right of the central pixel when subpel 305 * filtering. The bottom and right edges use 16 pixels plus 2 pixels 306 * left of the central pixel when filtering. 307 */ 308 if (mv->col < (xd->mb_to_left_edge - (19 << 3))) 309 mv->col = xd->mb_to_left_edge - (16 << 3); 310 else if (mv->col > xd->mb_to_right_edge + (18 << 3)) 311 mv->col = xd->mb_to_right_edge + (16 << 3); 312 313 if (mv->row < (xd->mb_to_top_edge - (19 << 3))) 314 mv->row = xd->mb_to_top_edge - (16 << 3); 315 else if (mv->row > xd->mb_to_bottom_edge + (18 << 3)) 316 mv->row = xd->mb_to_bottom_edge + (16 << 3); 317} 318 319/* A version of the above function for chroma block MVs.*/ 320static void clamp_uvmv_to_umv_border(MV *mv, const MACROBLOCKD *xd) 321{ 322 mv->col = (2*mv->col < (xd->mb_to_left_edge - (19 << 3))) ? 323 (xd->mb_to_left_edge - (16 << 3)) >> 1 : mv->col; 324 mv->col = (2*mv->col > xd->mb_to_right_edge + (18 << 3)) ? 325 (xd->mb_to_right_edge + (16 << 3)) >> 1 : mv->col; 326 327 mv->row = (2*mv->row < (xd->mb_to_top_edge - (19 << 3))) ? 328 (xd->mb_to_top_edge - (16 << 3)) >> 1 : mv->row; 329 mv->row = (2*mv->row > xd->mb_to_bottom_edge + (18 << 3)) ? 330 (xd->mb_to_bottom_edge + (16 << 3)) >> 1 : mv->row; 331} 332 333void vp8_build_inter16x16_predictors_mb(MACROBLOCKD *x, 334 unsigned char *dst_y, 335 unsigned char *dst_u, 336 unsigned char *dst_v, 337 int dst_ystride, 338 int dst_uvstride) 339{ 340 int offset; 341 unsigned char *ptr; 342 unsigned char *uptr, *vptr; 343 344 int_mv _16x16mv; 345 346 unsigned char *ptr_base = x->pre.y_buffer; 347 int pre_stride = x->pre.y_stride; 348 349 _16x16mv.as_int = x->mode_info_context->mbmi.mv.as_int; 350 351 if (x->mode_info_context->mbmi.need_to_clamp_mvs) 352 { 353 clamp_mv_to_umv_border(&_16x16mv.as_mv, x); 354 } 355 356 ptr = ptr_base + ( _16x16mv.as_mv.row >> 3) * pre_stride + (_16x16mv.as_mv.col >> 3); 357 358 if ( _16x16mv.as_int & 0x00070007) 359 { 360 x->subpixel_predict16x16(ptr, pre_stride, _16x16mv.as_mv.col & 7, _16x16mv.as_mv.row & 7, dst_y, dst_ystride); 361 } 362 else 363 { 364 vp8_copy_mem16x16(ptr, pre_stride, dst_y, dst_ystride); 365 } 366 367 /* calc uv motion vectors */ 368 _16x16mv.as_mv.row += 1 | (_16x16mv.as_mv.row >> (sizeof(int) * CHAR_BIT - 1)); 369 _16x16mv.as_mv.col += 1 | (_16x16mv.as_mv.col >> (sizeof(int) * CHAR_BIT - 1)); 370 _16x16mv.as_mv.row /= 2; 371 _16x16mv.as_mv.col /= 2; 372 _16x16mv.as_mv.row &= x->fullpixel_mask; 373 _16x16mv.as_mv.col &= x->fullpixel_mask; 374 375 pre_stride >>= 1; 376 offset = ( _16x16mv.as_mv.row >> 3) * pre_stride + (_16x16mv.as_mv.col >> 3); 377 uptr = x->pre.u_buffer + offset; 378 vptr = x->pre.v_buffer + offset; 379 380 if ( _16x16mv.as_int & 0x00070007) 381 { 382 x->subpixel_predict8x8(uptr, pre_stride, _16x16mv.as_mv.col & 7, _16x16mv.as_mv.row & 7, dst_u, dst_uvstride); 383 x->subpixel_predict8x8(vptr, pre_stride, _16x16mv.as_mv.col & 7, _16x16mv.as_mv.row & 7, dst_v, dst_uvstride); 384 } 385 else 386 { 387 vp8_copy_mem8x8(uptr, pre_stride, dst_u, dst_uvstride); 388 vp8_copy_mem8x8(vptr, pre_stride, dst_v, dst_uvstride); 389 } 390} 391 392static void build_inter4x4_predictors_mb(MACROBLOCKD *x) 393{ 394 int i; 395 unsigned char *base_dst = x->dst.y_buffer; 396 unsigned char *base_pre = x->pre.y_buffer; 397 398 if (x->mode_info_context->mbmi.partitioning < 3) 399 { 400 BLOCKD *b; 401 int dst_stride = x->dst.y_stride; 402 403 x->block[ 0].bmi = x->mode_info_context->bmi[ 0]; 404 x->block[ 2].bmi = x->mode_info_context->bmi[ 2]; 405 x->block[ 8].bmi = x->mode_info_context->bmi[ 8]; 406 x->block[10].bmi = x->mode_info_context->bmi[10]; 407 if (x->mode_info_context->mbmi.need_to_clamp_mvs) 408 { 409 clamp_mv_to_umv_border(&x->block[ 0].bmi.mv.as_mv, x); 410 clamp_mv_to_umv_border(&x->block[ 2].bmi.mv.as_mv, x); 411 clamp_mv_to_umv_border(&x->block[ 8].bmi.mv.as_mv, x); 412 clamp_mv_to_umv_border(&x->block[10].bmi.mv.as_mv, x); 413 } 414 415 b = &x->block[ 0]; 416 build_inter_predictors4b(x, b, base_dst + b->offset, dst_stride, base_pre, dst_stride); 417 b = &x->block[ 2]; 418 build_inter_predictors4b(x, b, base_dst + b->offset, dst_stride, base_pre, dst_stride); 419 b = &x->block[ 8]; 420 build_inter_predictors4b(x, b, base_dst + b->offset, dst_stride, base_pre, dst_stride); 421 b = &x->block[10]; 422 build_inter_predictors4b(x, b, base_dst + b->offset, dst_stride, base_pre, dst_stride); 423 } 424 else 425 { 426 for (i = 0; i < 16; i += 2) 427 { 428 BLOCKD *d0 = &x->block[i]; 429 BLOCKD *d1 = &x->block[i+1]; 430 int dst_stride = x->dst.y_stride; 431 432 x->block[i+0].bmi = x->mode_info_context->bmi[i+0]; 433 x->block[i+1].bmi = x->mode_info_context->bmi[i+1]; 434 if (x->mode_info_context->mbmi.need_to_clamp_mvs) 435 { 436 clamp_mv_to_umv_border(&x->block[i+0].bmi.mv.as_mv, x); 437 clamp_mv_to_umv_border(&x->block[i+1].bmi.mv.as_mv, x); 438 } 439 440 if (d0->bmi.mv.as_int == d1->bmi.mv.as_int) 441 build_inter_predictors2b(x, d0, base_dst + d0->offset, dst_stride, base_pre, dst_stride); 442 else 443 { 444 build_inter_predictors_b(d0, base_dst + d0->offset, dst_stride, base_pre, dst_stride, x->subpixel_predict); 445 build_inter_predictors_b(d1, base_dst + d1->offset, dst_stride, base_pre, dst_stride, x->subpixel_predict); 446 } 447 448 } 449 450 } 451 base_dst = x->dst.u_buffer; 452 base_pre = x->pre.u_buffer; 453 for (i = 16; i < 20; i += 2) 454 { 455 BLOCKD *d0 = &x->block[i]; 456 BLOCKD *d1 = &x->block[i+1]; 457 int dst_stride = x->dst.uv_stride; 458 459 /* Note: uv mvs already clamped in build_4x4uvmvs() */ 460 461 if (d0->bmi.mv.as_int == d1->bmi.mv.as_int) 462 build_inter_predictors2b(x, d0, base_dst + d0->offset, dst_stride, base_pre, dst_stride); 463 else 464 { 465 build_inter_predictors_b(d0, base_dst + d0->offset, dst_stride, base_pre, dst_stride, x->subpixel_predict); 466 build_inter_predictors_b(d1, base_dst + d1->offset, dst_stride, base_pre, dst_stride, x->subpixel_predict); 467 } 468 } 469 470 base_dst = x->dst.v_buffer; 471 base_pre = x->pre.v_buffer; 472 for (i = 20; i < 24; i += 2) 473 { 474 BLOCKD *d0 = &x->block[i]; 475 BLOCKD *d1 = &x->block[i+1]; 476 int dst_stride = x->dst.uv_stride; 477 478 /* Note: uv mvs already clamped in build_4x4uvmvs() */ 479 480 if (d0->bmi.mv.as_int == d1->bmi.mv.as_int) 481 build_inter_predictors2b(x, d0, base_dst + d0->offset, dst_stride, base_pre, dst_stride); 482 else 483 { 484 build_inter_predictors_b(d0, base_dst + d0->offset, dst_stride, base_pre, dst_stride, x->subpixel_predict); 485 build_inter_predictors_b(d1, base_dst + d1->offset, dst_stride, base_pre, dst_stride, x->subpixel_predict); 486 } 487 } 488} 489 490static 491void build_4x4uvmvs(MACROBLOCKD *x) 492{ 493 int i, j; 494 495 for (i = 0; i < 2; i++) 496 { 497 for (j = 0; j < 2; j++) 498 { 499 int yoffset = i * 8 + j * 2; 500 int uoffset = 16 + i * 2 + j; 501 int voffset = 20 + i * 2 + j; 502 503 int temp; 504 505 temp = x->mode_info_context->bmi[yoffset + 0].mv.as_mv.row 506 + x->mode_info_context->bmi[yoffset + 1].mv.as_mv.row 507 + x->mode_info_context->bmi[yoffset + 4].mv.as_mv.row 508 + x->mode_info_context->bmi[yoffset + 5].mv.as_mv.row; 509 510 temp += 4 + ((temp >> (sizeof(temp) * CHAR_BIT - 1)) * 8); 511 512 x->block[uoffset].bmi.mv.as_mv.row = (temp / 8) & x->fullpixel_mask; 513 514 temp = x->mode_info_context->bmi[yoffset + 0].mv.as_mv.col 515 + x->mode_info_context->bmi[yoffset + 1].mv.as_mv.col 516 + x->mode_info_context->bmi[yoffset + 4].mv.as_mv.col 517 + x->mode_info_context->bmi[yoffset + 5].mv.as_mv.col; 518 519 temp += 4 + ((temp >> (sizeof(temp) * CHAR_BIT - 1)) * 8); 520 521 x->block[uoffset].bmi.mv.as_mv.col = (temp / 8) & x->fullpixel_mask; 522 523 if (x->mode_info_context->mbmi.need_to_clamp_mvs) 524 clamp_uvmv_to_umv_border(&x->block[uoffset].bmi.mv.as_mv, x); 525 526 x->block[voffset].bmi.mv.as_int = x->block[uoffset].bmi.mv.as_int; 527 } 528 } 529} 530 531void vp8_build_inter_predictors_mb(MACROBLOCKD *xd) 532{ 533 if (xd->mode_info_context->mbmi.mode != SPLITMV) 534 { 535 vp8_build_inter16x16_predictors_mb(xd, xd->dst.y_buffer, 536 xd->dst.u_buffer, xd->dst.v_buffer, 537 xd->dst.y_stride, xd->dst.uv_stride); 538 } 539 else 540 { 541 build_4x4uvmvs(xd); 542 build_inter4x4_predictors_mb(xd); 543 } 544} 545