1/* 2 * Copyright (c) 2010 The WebM project authors. All Rights Reserved. 3 * 4 * Use of this source code is governed by a BSD-style license 5 * that can be found in the LICENSE file in the root of the source 6 * tree. An additional intellectual property rights grant can be found 7 * in the file PATENTS. All contributing project authors may 8 * be found in the AUTHORS file in the root of the source tree. 9 */ 10 11#include <assert.h> 12#include <math.h> 13 14#include "./vp9_rtcd.h" 15#include "./vpx_dsp_rtcd.h" 16 17#include "vpx_mem/vpx_mem.h" 18#include "vpx_ports/mem.h" 19#include "vpx_ports/system_state.h" 20 21#include "vp9/common/vp9_common.h" 22#include "vp9/common/vp9_entropy.h" 23#include "vp9/common/vp9_entropymode.h" 24#include "vp9/common/vp9_idct.h" 25#include "vp9/common/vp9_mvref_common.h" 26#include "vp9/common/vp9_pred_common.h" 27#include "vp9/common/vp9_quant_common.h" 28#include "vp9/common/vp9_reconinter.h" 29#include "vp9/common/vp9_reconintra.h" 30#include "vp9/common/vp9_scan.h" 31#include "vp9/common/vp9_seg_common.h" 32 33#include "vp9/encoder/vp9_cost.h" 34#include "vp9/encoder/vp9_encodemb.h" 35#include "vp9/encoder/vp9_encodemv.h" 36#include "vp9/encoder/vp9_encoder.h" 37#include "vp9/encoder/vp9_mcomp.h" 38#include "vp9/encoder/vp9_quantize.h" 39#include "vp9/encoder/vp9_ratectrl.h" 40#include "vp9/encoder/vp9_rd.h" 41#include "vp9/encoder/vp9_rdopt.h" 42#include "vp9/encoder/vp9_aq_variance.h" 43 44#define LAST_FRAME_MODE_MASK ((1 << GOLDEN_FRAME) | (1 << ALTREF_FRAME) | \ 45 (1 << INTRA_FRAME)) 46#define GOLDEN_FRAME_MODE_MASK ((1 << LAST_FRAME) | (1 << ALTREF_FRAME) | \ 47 (1 << INTRA_FRAME)) 48#define ALT_REF_MODE_MASK ((1 << LAST_FRAME) | (1 << GOLDEN_FRAME) | \ 49 (1 << INTRA_FRAME)) 50 51#define SECOND_REF_FRAME_MASK ((1 << ALTREF_FRAME) | 0x01) 52 53#define MIN_EARLY_TERM_INDEX 3 54#define NEW_MV_DISCOUNT_FACTOR 8 55 56typedef struct { 57 PREDICTION_MODE mode; 58 MV_REFERENCE_FRAME ref_frame[2]; 59} MODE_DEFINITION; 60 61typedef struct { 62 MV_REFERENCE_FRAME ref_frame[2]; 63} REF_DEFINITION; 64 65struct rdcost_block_args { 66 MACROBLOCK *x; 67 ENTROPY_CONTEXT t_above[16]; 68 ENTROPY_CONTEXT t_left[16]; 69 int this_rate; 70 int64_t this_dist; 71 int64_t this_sse; 72 int64_t this_rd; 73 int64_t best_rd; 74 int exit_early; 75 int use_fast_coef_costing; 76 const scan_order *so; 77 uint8_t skippable; 78}; 79 80#define LAST_NEW_MV_INDEX 6 81static const MODE_DEFINITION vp9_mode_order[MAX_MODES] = { 82 {NEARESTMV, {LAST_FRAME, NONE}}, 83 {NEARESTMV, {ALTREF_FRAME, NONE}}, 84 {NEARESTMV, {GOLDEN_FRAME, NONE}}, 85 86 {DC_PRED, {INTRA_FRAME, NONE}}, 87 88 {NEWMV, {LAST_FRAME, NONE}}, 89 {NEWMV, {ALTREF_FRAME, NONE}}, 90 {NEWMV, {GOLDEN_FRAME, NONE}}, 91 92 {NEARMV, {LAST_FRAME, NONE}}, 93 {NEARMV, {ALTREF_FRAME, NONE}}, 94 {NEARMV, {GOLDEN_FRAME, NONE}}, 95 96 {ZEROMV, {LAST_FRAME, NONE}}, 97 {ZEROMV, {GOLDEN_FRAME, NONE}}, 98 {ZEROMV, {ALTREF_FRAME, NONE}}, 99 100 {NEARESTMV, {LAST_FRAME, ALTREF_FRAME}}, 101 {NEARESTMV, {GOLDEN_FRAME, ALTREF_FRAME}}, 102 103 {TM_PRED, {INTRA_FRAME, NONE}}, 104 105 {NEARMV, {LAST_FRAME, ALTREF_FRAME}}, 106 {NEWMV, {LAST_FRAME, ALTREF_FRAME}}, 107 {NEARMV, {GOLDEN_FRAME, ALTREF_FRAME}}, 108 {NEWMV, {GOLDEN_FRAME, ALTREF_FRAME}}, 109 110 {ZEROMV, {LAST_FRAME, ALTREF_FRAME}}, 111 {ZEROMV, {GOLDEN_FRAME, ALTREF_FRAME}}, 112 113 {H_PRED, {INTRA_FRAME, NONE}}, 114 {V_PRED, {INTRA_FRAME, NONE}}, 115 {D135_PRED, {INTRA_FRAME, NONE}}, 116 {D207_PRED, {INTRA_FRAME, NONE}}, 117 {D153_PRED, {INTRA_FRAME, NONE}}, 118 {D63_PRED, {INTRA_FRAME, NONE}}, 119 {D117_PRED, {INTRA_FRAME, NONE}}, 120 {D45_PRED, {INTRA_FRAME, NONE}}, 121}; 122 123static const REF_DEFINITION vp9_ref_order[MAX_REFS] = { 124 {{LAST_FRAME, NONE}}, 125 {{GOLDEN_FRAME, NONE}}, 126 {{ALTREF_FRAME, NONE}}, 127 {{LAST_FRAME, ALTREF_FRAME}}, 128 {{GOLDEN_FRAME, ALTREF_FRAME}}, 129 {{INTRA_FRAME, NONE}}, 130}; 131 132static void swap_block_ptr(MACROBLOCK *x, PICK_MODE_CONTEXT *ctx, 133 int m, int n, int min_plane, int max_plane) { 134 int i; 135 136 for (i = min_plane; i < max_plane; ++i) { 137 struct macroblock_plane *const p = &x->plane[i]; 138 struct macroblockd_plane *const pd = &x->e_mbd.plane[i]; 139 140 p->coeff = ctx->coeff_pbuf[i][m]; 141 p->qcoeff = ctx->qcoeff_pbuf[i][m]; 142 pd->dqcoeff = ctx->dqcoeff_pbuf[i][m]; 143 p->eobs = ctx->eobs_pbuf[i][m]; 144 145 ctx->coeff_pbuf[i][m] = ctx->coeff_pbuf[i][n]; 146 ctx->qcoeff_pbuf[i][m] = ctx->qcoeff_pbuf[i][n]; 147 ctx->dqcoeff_pbuf[i][m] = ctx->dqcoeff_pbuf[i][n]; 148 ctx->eobs_pbuf[i][m] = ctx->eobs_pbuf[i][n]; 149 150 ctx->coeff_pbuf[i][n] = p->coeff; 151 ctx->qcoeff_pbuf[i][n] = p->qcoeff; 152 ctx->dqcoeff_pbuf[i][n] = pd->dqcoeff; 153 ctx->eobs_pbuf[i][n] = p->eobs; 154 } 155} 156 157static void model_rd_for_sb(VP9_COMP *cpi, BLOCK_SIZE bsize, 158 MACROBLOCK *x, MACROBLOCKD *xd, 159 int *out_rate_sum, int64_t *out_dist_sum, 160 int *skip_txfm_sb, int64_t *skip_sse_sb) { 161 // Note our transform coeffs are 8 times an orthogonal transform. 162 // Hence quantizer step is also 8 times. To get effective quantizer 163 // we need to divide by 8 before sending to modeling function. 164 int i; 165 int64_t rate_sum = 0; 166 int64_t dist_sum = 0; 167 const int ref = xd->mi[0]->mbmi.ref_frame[0]; 168 unsigned int sse; 169 unsigned int var = 0; 170 unsigned int sum_sse = 0; 171 int64_t total_sse = 0; 172 int skip_flag = 1; 173 const int shift = 6; 174 int rate; 175 int64_t dist; 176 const int dequant_shift = 177#if CONFIG_VP9_HIGHBITDEPTH 178 (xd->cur_buf->flags & YV12_FLAG_HIGHBITDEPTH) ? 179 xd->bd - 5 : 180#endif // CONFIG_VP9_HIGHBITDEPTH 181 3; 182 183 x->pred_sse[ref] = 0; 184 185 for (i = 0; i < MAX_MB_PLANE; ++i) { 186 struct macroblock_plane *const p = &x->plane[i]; 187 struct macroblockd_plane *const pd = &xd->plane[i]; 188 const BLOCK_SIZE bs = get_plane_block_size(bsize, pd); 189 const TX_SIZE max_tx_size = max_txsize_lookup[bs]; 190 const BLOCK_SIZE unit_size = txsize_to_bsize[max_tx_size]; 191 const int64_t dc_thr = p->quant_thred[0] >> shift; 192 const int64_t ac_thr = p->quant_thred[1] >> shift; 193 // The low thresholds are used to measure if the prediction errors are 194 // low enough so that we can skip the mode search. 195 const int64_t low_dc_thr = MIN(50, dc_thr >> 2); 196 const int64_t low_ac_thr = MIN(80, ac_thr >> 2); 197 int bw = 1 << (b_width_log2_lookup[bs] - b_width_log2_lookup[unit_size]); 198 int bh = 1 << (b_height_log2_lookup[bs] - b_width_log2_lookup[unit_size]); 199 int idx, idy; 200 int lw = b_width_log2_lookup[unit_size] + 2; 201 int lh = b_height_log2_lookup[unit_size] + 2; 202 203 sum_sse = 0; 204 205 for (idy = 0; idy < bh; ++idy) { 206 for (idx = 0; idx < bw; ++idx) { 207 uint8_t *src = p->src.buf + (idy * p->src.stride << lh) + (idx << lw); 208 uint8_t *dst = pd->dst.buf + (idy * pd->dst.stride << lh) + (idx << lh); 209 int block_idx = (idy << 1) + idx; 210 int low_err_skip = 0; 211 212 var = cpi->fn_ptr[unit_size].vf(src, p->src.stride, 213 dst, pd->dst.stride, &sse); 214 x->bsse[(i << 2) + block_idx] = sse; 215 sum_sse += sse; 216 217 x->skip_txfm[(i << 2) + block_idx] = SKIP_TXFM_NONE; 218 if (!x->select_tx_size) { 219 // Check if all ac coefficients can be quantized to zero. 220 if (var < ac_thr || var == 0) { 221 x->skip_txfm[(i << 2) + block_idx] = SKIP_TXFM_AC_ONLY; 222 223 // Check if dc coefficient can be quantized to zero. 224 if (sse - var < dc_thr || sse == var) { 225 x->skip_txfm[(i << 2) + block_idx] = SKIP_TXFM_AC_DC; 226 227 if (!sse || (var < low_ac_thr && sse - var < low_dc_thr)) 228 low_err_skip = 1; 229 } 230 } 231 } 232 233 if (skip_flag && !low_err_skip) 234 skip_flag = 0; 235 236 if (i == 0) 237 x->pred_sse[ref] += sse; 238 } 239 } 240 241 total_sse += sum_sse; 242 243 // Fast approximate the modelling function. 244 if (cpi->sf.simple_model_rd_from_var) { 245 int64_t rate; 246 const int64_t square_error = sum_sse; 247 int quantizer = (pd->dequant[1] >> dequant_shift); 248 249 if (quantizer < 120) 250 rate = (square_error * (280 - quantizer)) >> 8; 251 else 252 rate = 0; 253 dist = (square_error * quantizer) >> 8; 254 rate_sum += rate; 255 dist_sum += dist; 256 } else { 257 vp9_model_rd_from_var_lapndz(sum_sse, num_pels_log2_lookup[bs], 258 pd->dequant[1] >> dequant_shift, 259 &rate, &dist); 260 rate_sum += rate; 261 dist_sum += dist; 262 } 263 } 264 265 *skip_txfm_sb = skip_flag; 266 *skip_sse_sb = total_sse << 4; 267 *out_rate_sum = (int)rate_sum; 268 *out_dist_sum = dist_sum << 4; 269} 270 271int64_t vp9_block_error_c(const tran_low_t *coeff, const tran_low_t *dqcoeff, 272 intptr_t block_size, int64_t *ssz) { 273 int i; 274 int64_t error = 0, sqcoeff = 0; 275 276 for (i = 0; i < block_size; i++) { 277 const int diff = coeff[i] - dqcoeff[i]; 278 error += diff * diff; 279 sqcoeff += coeff[i] * coeff[i]; 280 } 281 282 *ssz = sqcoeff; 283 return error; 284} 285 286int64_t vp9_block_error_fp_c(const int16_t *coeff, const int16_t *dqcoeff, 287 int block_size) { 288 int i; 289 int64_t error = 0; 290 291 for (i = 0; i < block_size; i++) { 292 const int diff = coeff[i] - dqcoeff[i]; 293 error += diff * diff; 294 } 295 296 return error; 297} 298 299#if CONFIG_VP9_HIGHBITDEPTH 300int64_t vp9_highbd_block_error_c(const tran_low_t *coeff, 301 const tran_low_t *dqcoeff, 302 intptr_t block_size, 303 int64_t *ssz, int bd) { 304 int i; 305 int64_t error = 0, sqcoeff = 0; 306 int shift = 2 * (bd - 8); 307 int rounding = shift > 0 ? 1 << (shift - 1) : 0; 308 309 for (i = 0; i < block_size; i++) { 310 const int64_t diff = coeff[i] - dqcoeff[i]; 311 error += diff * diff; 312 sqcoeff += (int64_t)coeff[i] * (int64_t)coeff[i]; 313 } 314 assert(error >= 0 && sqcoeff >= 0); 315 error = (error + rounding) >> shift; 316 sqcoeff = (sqcoeff + rounding) >> shift; 317 318 *ssz = sqcoeff; 319 return error; 320} 321#endif // CONFIG_VP9_HIGHBITDEPTH 322 323/* The trailing '0' is a terminator which is used inside cost_coeffs() to 324 * decide whether to include cost of a trailing EOB node or not (i.e. we 325 * can skip this if the last coefficient in this transform block, e.g. the 326 * 16th coefficient in a 4x4 block or the 64th coefficient in a 8x8 block, 327 * were non-zero). */ 328static const int16_t band_counts[TX_SIZES][8] = { 329 { 1, 2, 3, 4, 3, 16 - 13, 0 }, 330 { 1, 2, 3, 4, 11, 64 - 21, 0 }, 331 { 1, 2, 3, 4, 11, 256 - 21, 0 }, 332 { 1, 2, 3, 4, 11, 1024 - 21, 0 }, 333}; 334static int cost_coeffs(MACROBLOCK *x, 335 int plane, int block, 336 ENTROPY_CONTEXT *A, ENTROPY_CONTEXT *L, 337 TX_SIZE tx_size, 338 const int16_t *scan, const int16_t *nb, 339 int use_fast_coef_costing) { 340 MACROBLOCKD *const xd = &x->e_mbd; 341 MB_MODE_INFO *mbmi = &xd->mi[0]->mbmi; 342 const struct macroblock_plane *p = &x->plane[plane]; 343 const struct macroblockd_plane *pd = &xd->plane[plane]; 344 const PLANE_TYPE type = pd->plane_type; 345 const int16_t *band_count = &band_counts[tx_size][1]; 346 const int eob = p->eobs[block]; 347 const tran_low_t *const qcoeff = BLOCK_OFFSET(p->qcoeff, block); 348 unsigned int (*token_costs)[2][COEFF_CONTEXTS][ENTROPY_TOKENS] = 349 x->token_costs[tx_size][type][is_inter_block(mbmi)]; 350 uint8_t token_cache[32 * 32]; 351 int pt = combine_entropy_contexts(*A, *L); 352 int c, cost; 353#if CONFIG_VP9_HIGHBITDEPTH 354 const int16_t *cat6_high_cost = vp9_get_high_cost_table(xd->bd); 355#else 356 const int16_t *cat6_high_cost = vp9_get_high_cost_table(8); 357#endif 358 359 // Check for consistency of tx_size with mode info 360 assert(type == PLANE_TYPE_Y ? mbmi->tx_size == tx_size 361 : get_uv_tx_size(mbmi, pd) == tx_size); 362 363 if (eob == 0) { 364 // single eob token 365 cost = token_costs[0][0][pt][EOB_TOKEN]; 366 c = 0; 367 } else { 368 int band_left = *band_count++; 369 370 // dc token 371 int v = qcoeff[0]; 372 int16_t prev_t; 373 EXTRABIT e; 374 vp9_get_token_extra(v, &prev_t, &e); 375 cost = (*token_costs)[0][pt][prev_t] + 376 vp9_get_cost(prev_t, e, cat6_high_cost); 377 378 token_cache[0] = vp9_pt_energy_class[prev_t]; 379 ++token_costs; 380 381 // ac tokens 382 for (c = 1; c < eob; c++) { 383 const int rc = scan[c]; 384 int16_t t; 385 386 v = qcoeff[rc]; 387 vp9_get_token_extra(v, &t, &e); 388 if (use_fast_coef_costing) { 389 cost += (*token_costs)[!prev_t][!prev_t][t] + 390 vp9_get_cost(t, e, cat6_high_cost); 391 } else { 392 pt = get_coef_context(nb, token_cache, c); 393 cost += (*token_costs)[!prev_t][pt][t] + 394 vp9_get_cost(t, e, cat6_high_cost); 395 token_cache[rc] = vp9_pt_energy_class[t]; 396 } 397 prev_t = t; 398 if (!--band_left) { 399 band_left = *band_count++; 400 ++token_costs; 401 } 402 } 403 404 // eob token 405 if (band_left) { 406 if (use_fast_coef_costing) { 407 cost += (*token_costs)[0][!prev_t][EOB_TOKEN]; 408 } else { 409 pt = get_coef_context(nb, token_cache, c); 410 cost += (*token_costs)[0][pt][EOB_TOKEN]; 411 } 412 } 413 } 414 415 // is eob first coefficient; 416 *A = *L = (c > 0); 417 418 return cost; 419} 420 421static void dist_block(MACROBLOCK *x, int plane, int block, TX_SIZE tx_size, 422 int64_t *out_dist, int64_t *out_sse) { 423 const int ss_txfrm_size = tx_size << 1; 424 MACROBLOCKD* const xd = &x->e_mbd; 425 const struct macroblock_plane *const p = &x->plane[plane]; 426 const struct macroblockd_plane *const pd = &xd->plane[plane]; 427 int64_t this_sse; 428 int shift = tx_size == TX_32X32 ? 0 : 2; 429 tran_low_t *const coeff = BLOCK_OFFSET(p->coeff, block); 430 tran_low_t *const dqcoeff = BLOCK_OFFSET(pd->dqcoeff, block); 431#if CONFIG_VP9_HIGHBITDEPTH 432 const int bd = (xd->cur_buf->flags & YV12_FLAG_HIGHBITDEPTH) ? xd->bd : 8; 433 *out_dist = vp9_highbd_block_error(coeff, dqcoeff, 16 << ss_txfrm_size, 434 &this_sse, bd) >> shift; 435#else 436 *out_dist = vp9_block_error(coeff, dqcoeff, 16 << ss_txfrm_size, 437 &this_sse) >> shift; 438#endif // CONFIG_VP9_HIGHBITDEPTH 439 *out_sse = this_sse >> shift; 440 441 if (x->skip_encode && !is_inter_block(&xd->mi[0]->mbmi)) { 442 // TODO(jingning): tune the model to better capture the distortion. 443 int64_t p = (pd->dequant[1] * pd->dequant[1] * 444 (1 << ss_txfrm_size)) >> 445#if CONFIG_VP9_HIGHBITDEPTH 446 (shift + 2 + (bd - 8) * 2); 447#else 448 (shift + 2); 449#endif // CONFIG_VP9_HIGHBITDEPTH 450 *out_dist += (p >> 4); 451 *out_sse += p; 452 } 453} 454 455static int rate_block(int plane, int block, BLOCK_SIZE plane_bsize, 456 TX_SIZE tx_size, struct rdcost_block_args* args) { 457 int x_idx, y_idx; 458 txfrm_block_to_raster_xy(plane_bsize, tx_size, block, &x_idx, &y_idx); 459 460 return cost_coeffs(args->x, plane, block, args->t_above + x_idx, 461 args->t_left + y_idx, tx_size, 462 args->so->scan, args->so->neighbors, 463 args->use_fast_coef_costing); 464} 465 466static void block_rd_txfm(int plane, int block, BLOCK_SIZE plane_bsize, 467 TX_SIZE tx_size, void *arg) { 468 struct rdcost_block_args *args = arg; 469 MACROBLOCK *const x = args->x; 470 MACROBLOCKD *const xd = &x->e_mbd; 471 MB_MODE_INFO *const mbmi = &xd->mi[0]->mbmi; 472 int64_t rd1, rd2, rd; 473 int rate; 474 int64_t dist; 475 int64_t sse; 476 477 if (args->exit_early) 478 return; 479 480 if (!is_inter_block(mbmi)) { 481 struct encode_b_args arg = {x, NULL, &mbmi->skip}; 482 vp9_encode_block_intra(plane, block, plane_bsize, tx_size, &arg); 483 dist_block(x, plane, block, tx_size, &dist, &sse); 484 } else if (max_txsize_lookup[plane_bsize] == tx_size) { 485 if (x->skip_txfm[(plane << 2) + (block >> (tx_size << 1))] == 486 SKIP_TXFM_NONE) { 487 // full forward transform and quantization 488 vp9_xform_quant(x, plane, block, plane_bsize, tx_size); 489 dist_block(x, plane, block, tx_size, &dist, &sse); 490 } else if (x->skip_txfm[(plane << 2) + (block >> (tx_size << 1))] == 491 SKIP_TXFM_AC_ONLY) { 492 // compute DC coefficient 493 tran_low_t *const coeff = BLOCK_OFFSET(x->plane[plane].coeff, block); 494 tran_low_t *const dqcoeff = BLOCK_OFFSET(xd->plane[plane].dqcoeff, block); 495 vp9_xform_quant_dc(x, plane, block, plane_bsize, tx_size); 496 sse = x->bsse[(plane << 2) + (block >> (tx_size << 1))] << 4; 497 dist = sse; 498 if (x->plane[plane].eobs[block]) { 499 const int64_t orig_sse = (int64_t)coeff[0] * coeff[0]; 500 const int64_t resd_sse = coeff[0] - dqcoeff[0]; 501 int64_t dc_correct = orig_sse - resd_sse * resd_sse; 502#if CONFIG_VP9_HIGHBITDEPTH 503 dc_correct >>= ((xd->bd - 8) * 2); 504#endif 505 if (tx_size != TX_32X32) 506 dc_correct >>= 2; 507 508 dist = MAX(0, sse - dc_correct); 509 } 510 } else { 511 // SKIP_TXFM_AC_DC 512 // skip forward transform 513 x->plane[plane].eobs[block] = 0; 514 sse = x->bsse[(plane << 2) + (block >> (tx_size << 1))] << 4; 515 dist = sse; 516 } 517 } else { 518 // full forward transform and quantization 519 vp9_xform_quant(x, plane, block, plane_bsize, tx_size); 520 dist_block(x, plane, block, tx_size, &dist, &sse); 521 } 522 523 rd = RDCOST(x->rdmult, x->rddiv, 0, dist); 524 if (args->this_rd + rd > args->best_rd) { 525 args->exit_early = 1; 526 return; 527 } 528 529 rate = rate_block(plane, block, plane_bsize, tx_size, args); 530 rd1 = RDCOST(x->rdmult, x->rddiv, rate, dist); 531 rd2 = RDCOST(x->rdmult, x->rddiv, 0, sse); 532 533 // TODO(jingning): temporarily enabled only for luma component 534 rd = MIN(rd1, rd2); 535 if (plane == 0) 536 x->zcoeff_blk[tx_size][block] = !x->plane[plane].eobs[block] || 537 (rd1 > rd2 && !xd->lossless); 538 539 args->this_rate += rate; 540 args->this_dist += dist; 541 args->this_sse += sse; 542 args->this_rd += rd; 543 544 if (args->this_rd > args->best_rd) { 545 args->exit_early = 1; 546 return; 547 } 548 549 args->skippable &= !x->plane[plane].eobs[block]; 550} 551 552static void txfm_rd_in_plane(MACROBLOCK *x, 553 int *rate, int64_t *distortion, 554 int *skippable, int64_t *sse, 555 int64_t ref_best_rd, int plane, 556 BLOCK_SIZE bsize, TX_SIZE tx_size, 557 int use_fast_coef_casting) { 558 MACROBLOCKD *const xd = &x->e_mbd; 559 const struct macroblockd_plane *const pd = &xd->plane[plane]; 560 struct rdcost_block_args args; 561 vp9_zero(args); 562 args.x = x; 563 args.best_rd = ref_best_rd; 564 args.use_fast_coef_costing = use_fast_coef_casting; 565 args.skippable = 1; 566 567 if (plane == 0) 568 xd->mi[0]->mbmi.tx_size = tx_size; 569 570 vp9_get_entropy_contexts(bsize, tx_size, pd, args.t_above, args.t_left); 571 572 args.so = get_scan(xd, tx_size, pd->plane_type, 0); 573 574 vp9_foreach_transformed_block_in_plane(xd, bsize, plane, 575 block_rd_txfm, &args); 576 if (args.exit_early) { 577 *rate = INT_MAX; 578 *distortion = INT64_MAX; 579 *sse = INT64_MAX; 580 *skippable = 0; 581 } else { 582 *distortion = args.this_dist; 583 *rate = args.this_rate; 584 *sse = args.this_sse; 585 *skippable = args.skippable; 586 } 587} 588 589static void choose_largest_tx_size(VP9_COMP *cpi, MACROBLOCK *x, 590 int *rate, int64_t *distortion, 591 int *skip, int64_t *sse, 592 int64_t ref_best_rd, 593 BLOCK_SIZE bs) { 594 const TX_SIZE max_tx_size = max_txsize_lookup[bs]; 595 VP9_COMMON *const cm = &cpi->common; 596 const TX_SIZE largest_tx_size = tx_mode_to_biggest_tx_size[cm->tx_mode]; 597 MACROBLOCKD *const xd = &x->e_mbd; 598 MB_MODE_INFO *const mbmi = &xd->mi[0]->mbmi; 599 600 mbmi->tx_size = MIN(max_tx_size, largest_tx_size); 601 602 txfm_rd_in_plane(x, rate, distortion, skip, 603 sse, ref_best_rd, 0, bs, 604 mbmi->tx_size, cpi->sf.use_fast_coef_costing); 605} 606 607static void choose_tx_size_from_rd(VP9_COMP *cpi, MACROBLOCK *x, 608 int *rate, 609 int64_t *distortion, 610 int *skip, 611 int64_t *psse, 612 int64_t ref_best_rd, 613 BLOCK_SIZE bs) { 614 const TX_SIZE max_tx_size = max_txsize_lookup[bs]; 615 VP9_COMMON *const cm = &cpi->common; 616 MACROBLOCKD *const xd = &x->e_mbd; 617 MB_MODE_INFO *const mbmi = &xd->mi[0]->mbmi; 618 vpx_prob skip_prob = vp9_get_skip_prob(cm, xd); 619 int r[TX_SIZES][2], s[TX_SIZES]; 620 int64_t d[TX_SIZES], sse[TX_SIZES]; 621 int64_t rd[TX_SIZES][2] = {{INT64_MAX, INT64_MAX}, 622 {INT64_MAX, INT64_MAX}, 623 {INT64_MAX, INT64_MAX}, 624 {INT64_MAX, INT64_MAX}}; 625 int n, m; 626 int s0, s1; 627 int64_t best_rd = INT64_MAX; 628 TX_SIZE best_tx = max_tx_size; 629 int start_tx, end_tx; 630 631 const vpx_prob *tx_probs = get_tx_probs2(max_tx_size, xd, &cm->fc->tx_probs); 632 assert(skip_prob > 0); 633 s0 = vp9_cost_bit(skip_prob, 0); 634 s1 = vp9_cost_bit(skip_prob, 1); 635 636 if (cm->tx_mode == TX_MODE_SELECT) { 637 start_tx = max_tx_size; 638 end_tx = 0; 639 } else { 640 TX_SIZE chosen_tx_size = MIN(max_tx_size, 641 tx_mode_to_biggest_tx_size[cm->tx_mode]); 642 start_tx = chosen_tx_size; 643 end_tx = chosen_tx_size; 644 } 645 646 for (n = start_tx; n >= end_tx; n--) { 647 int r_tx_size = 0; 648 for (m = 0; m <= n - (n == (int) max_tx_size); m++) { 649 if (m == n) 650 r_tx_size += vp9_cost_zero(tx_probs[m]); 651 else 652 r_tx_size += vp9_cost_one(tx_probs[m]); 653 } 654 txfm_rd_in_plane(x, &r[n][0], &d[n], &s[n], 655 &sse[n], ref_best_rd, 0, bs, n, 656 cpi->sf.use_fast_coef_costing); 657 r[n][1] = r[n][0]; 658 if (r[n][0] < INT_MAX) { 659 r[n][1] += r_tx_size; 660 } 661 if (d[n] == INT64_MAX || r[n][0] == INT_MAX) { 662 rd[n][0] = rd[n][1] = INT64_MAX; 663 } else if (s[n]) { 664 if (is_inter_block(mbmi)) { 665 rd[n][0] = rd[n][1] = RDCOST(x->rdmult, x->rddiv, s1, sse[n]); 666 } else { 667 rd[n][0] = RDCOST(x->rdmult, x->rddiv, s1, sse[n]); 668 rd[n][1] = RDCOST(x->rdmult, x->rddiv, s1 + r_tx_size, sse[n]); 669 } 670 } else { 671 rd[n][0] = RDCOST(x->rdmult, x->rddiv, r[n][0] + s0, d[n]); 672 rd[n][1] = RDCOST(x->rdmult, x->rddiv, r[n][1] + s0, d[n]); 673 } 674 675 // Early termination in transform size search. 676 if (cpi->sf.tx_size_search_breakout && 677 (rd[n][1] == INT64_MAX || 678 (n < (int) max_tx_size && rd[n][1] > rd[n + 1][1]) || 679 s[n] == 1)) 680 break; 681 682 if (rd[n][1] < best_rd) { 683 best_tx = n; 684 best_rd = rd[n][1]; 685 } 686 } 687 mbmi->tx_size = best_tx; 688 689 *distortion = d[mbmi->tx_size]; 690 *rate = r[mbmi->tx_size][cm->tx_mode == TX_MODE_SELECT]; 691 *skip = s[mbmi->tx_size]; 692 *psse = sse[mbmi->tx_size]; 693} 694 695static void super_block_yrd(VP9_COMP *cpi, MACROBLOCK *x, int *rate, 696 int64_t *distortion, int *skip, 697 int64_t *psse, BLOCK_SIZE bs, 698 int64_t ref_best_rd) { 699 MACROBLOCKD *xd = &x->e_mbd; 700 int64_t sse; 701 int64_t *ret_sse = psse ? psse : &sse; 702 703 assert(bs == xd->mi[0]->mbmi.sb_type); 704 705 if (cpi->sf.tx_size_search_method == USE_LARGESTALL || xd->lossless) { 706 choose_largest_tx_size(cpi, x, rate, distortion, skip, ret_sse, ref_best_rd, 707 bs); 708 } else { 709 choose_tx_size_from_rd(cpi, x, rate, distortion, skip, ret_sse, 710 ref_best_rd, bs); 711 } 712} 713 714static int conditional_skipintra(PREDICTION_MODE mode, 715 PREDICTION_MODE best_intra_mode) { 716 if (mode == D117_PRED && 717 best_intra_mode != V_PRED && 718 best_intra_mode != D135_PRED) 719 return 1; 720 if (mode == D63_PRED && 721 best_intra_mode != V_PRED && 722 best_intra_mode != D45_PRED) 723 return 1; 724 if (mode == D207_PRED && 725 best_intra_mode != H_PRED && 726 best_intra_mode != D45_PRED) 727 return 1; 728 if (mode == D153_PRED && 729 best_intra_mode != H_PRED && 730 best_intra_mode != D135_PRED) 731 return 1; 732 return 0; 733} 734 735static int64_t rd_pick_intra4x4block(VP9_COMP *cpi, MACROBLOCK *x, 736 int row, int col, 737 PREDICTION_MODE *best_mode, 738 const int *bmode_costs, 739 ENTROPY_CONTEXT *a, ENTROPY_CONTEXT *l, 740 int *bestrate, int *bestratey, 741 int64_t *bestdistortion, 742 BLOCK_SIZE bsize, int64_t rd_thresh) { 743 PREDICTION_MODE mode; 744 MACROBLOCKD *const xd = &x->e_mbd; 745 int64_t best_rd = rd_thresh; 746 struct macroblock_plane *p = &x->plane[0]; 747 struct macroblockd_plane *pd = &xd->plane[0]; 748 const int src_stride = p->src.stride; 749 const int dst_stride = pd->dst.stride; 750 const uint8_t *src_init = &p->src.buf[row * 4 * src_stride + col * 4]; 751 uint8_t *dst_init = &pd->dst.buf[row * 4 * src_stride + col * 4]; 752 ENTROPY_CONTEXT ta[2], tempa[2]; 753 ENTROPY_CONTEXT tl[2], templ[2]; 754 const int num_4x4_blocks_wide = num_4x4_blocks_wide_lookup[bsize]; 755 const int num_4x4_blocks_high = num_4x4_blocks_high_lookup[bsize]; 756 int idx, idy; 757 uint8_t best_dst[8 * 8]; 758#if CONFIG_VP9_HIGHBITDEPTH 759 uint16_t best_dst16[8 * 8]; 760#endif 761 762 memcpy(ta, a, sizeof(ta)); 763 memcpy(tl, l, sizeof(tl)); 764 xd->mi[0]->mbmi.tx_size = TX_4X4; 765 766#if CONFIG_VP9_HIGHBITDEPTH 767 if (xd->cur_buf->flags & YV12_FLAG_HIGHBITDEPTH) { 768 for (mode = DC_PRED; mode <= TM_PRED; ++mode) { 769 int64_t this_rd; 770 int ratey = 0; 771 int64_t distortion = 0; 772 int rate = bmode_costs[mode]; 773 774 if (!(cpi->sf.intra_y_mode_mask[TX_4X4] & (1 << mode))) 775 continue; 776 777 // Only do the oblique modes if the best so far is 778 // one of the neighboring directional modes 779 if (cpi->sf.mode_search_skip_flags & FLAG_SKIP_INTRA_DIRMISMATCH) { 780 if (conditional_skipintra(mode, *best_mode)) 781 continue; 782 } 783 784 memcpy(tempa, ta, sizeof(ta)); 785 memcpy(templ, tl, sizeof(tl)); 786 787 for (idy = 0; idy < num_4x4_blocks_high; ++idy) { 788 for (idx = 0; idx < num_4x4_blocks_wide; ++idx) { 789 const int block = (row + idy) * 2 + (col + idx); 790 const uint8_t *const src = &src_init[idx * 4 + idy * 4 * src_stride]; 791 uint8_t *const dst = &dst_init[idx * 4 + idy * 4 * dst_stride]; 792 int16_t *const src_diff = vp9_raster_block_offset_int16(BLOCK_8X8, 793 block, 794 p->src_diff); 795 tran_low_t *const coeff = BLOCK_OFFSET(x->plane[0].coeff, block); 796 xd->mi[0]->bmi[block].as_mode = mode; 797 vp9_predict_intra_block(xd, 1, TX_4X4, mode, 798 x->skip_encode ? src : dst, 799 x->skip_encode ? src_stride : dst_stride, 800 dst, dst_stride, 801 col + idx, row + idy, 0); 802 vpx_highbd_subtract_block(4, 4, src_diff, 8, src, src_stride, 803 dst, dst_stride, xd->bd); 804 if (xd->lossless) { 805 const scan_order *so = &vp9_default_scan_orders[TX_4X4]; 806 vp9_highbd_fwht4x4(src_diff, coeff, 8); 807 vp9_regular_quantize_b_4x4(x, 0, block, so->scan, so->iscan); 808 ratey += cost_coeffs(x, 0, block, tempa + idx, templ + idy, TX_4X4, 809 so->scan, so->neighbors, 810 cpi->sf.use_fast_coef_costing); 811 if (RDCOST(x->rdmult, x->rddiv, ratey, distortion) >= best_rd) 812 goto next_highbd; 813 vp9_highbd_iwht4x4_add(BLOCK_OFFSET(pd->dqcoeff, block), 814 dst, dst_stride, 815 p->eobs[block], xd->bd); 816 } else { 817 int64_t unused; 818 const TX_TYPE tx_type = get_tx_type_4x4(PLANE_TYPE_Y, xd, block); 819 const scan_order *so = &vp9_scan_orders[TX_4X4][tx_type]; 820 if (tx_type == DCT_DCT) 821 vpx_highbd_fdct4x4(src_diff, coeff, 8); 822 else 823 vp9_highbd_fht4x4(src_diff, coeff, 8, tx_type); 824 vp9_regular_quantize_b_4x4(x, 0, block, so->scan, so->iscan); 825 ratey += cost_coeffs(x, 0, block, tempa + idx, templ + idy, TX_4X4, 826 so->scan, so->neighbors, 827 cpi->sf.use_fast_coef_costing); 828 distortion += vp9_highbd_block_error( 829 coeff, BLOCK_OFFSET(pd->dqcoeff, block), 830 16, &unused, xd->bd) >> 2; 831 if (RDCOST(x->rdmult, x->rddiv, ratey, distortion) >= best_rd) 832 goto next_highbd; 833 vp9_highbd_iht4x4_add(tx_type, BLOCK_OFFSET(pd->dqcoeff, block), 834 dst, dst_stride, p->eobs[block], xd->bd); 835 } 836 } 837 } 838 839 rate += ratey; 840 this_rd = RDCOST(x->rdmult, x->rddiv, rate, distortion); 841 842 if (this_rd < best_rd) { 843 *bestrate = rate; 844 *bestratey = ratey; 845 *bestdistortion = distortion; 846 best_rd = this_rd; 847 *best_mode = mode; 848 memcpy(a, tempa, sizeof(tempa)); 849 memcpy(l, templ, sizeof(templ)); 850 for (idy = 0; idy < num_4x4_blocks_high * 4; ++idy) { 851 memcpy(best_dst16 + idy * 8, 852 CONVERT_TO_SHORTPTR(dst_init + idy * dst_stride), 853 num_4x4_blocks_wide * 4 * sizeof(uint16_t)); 854 } 855 } 856 next_highbd: 857 {} 858 } 859 if (best_rd >= rd_thresh || x->skip_encode) 860 return best_rd; 861 862 for (idy = 0; idy < num_4x4_blocks_high * 4; ++idy) { 863 memcpy(CONVERT_TO_SHORTPTR(dst_init + idy * dst_stride), 864 best_dst16 + idy * 8, 865 num_4x4_blocks_wide * 4 * sizeof(uint16_t)); 866 } 867 868 return best_rd; 869 } 870#endif // CONFIG_VP9_HIGHBITDEPTH 871 872 for (mode = DC_PRED; mode <= TM_PRED; ++mode) { 873 int64_t this_rd; 874 int ratey = 0; 875 int64_t distortion = 0; 876 int rate = bmode_costs[mode]; 877 878 if (!(cpi->sf.intra_y_mode_mask[TX_4X4] & (1 << mode))) 879 continue; 880 881 // Only do the oblique modes if the best so far is 882 // one of the neighboring directional modes 883 if (cpi->sf.mode_search_skip_flags & FLAG_SKIP_INTRA_DIRMISMATCH) { 884 if (conditional_skipintra(mode, *best_mode)) 885 continue; 886 } 887 888 memcpy(tempa, ta, sizeof(ta)); 889 memcpy(templ, tl, sizeof(tl)); 890 891 for (idy = 0; idy < num_4x4_blocks_high; ++idy) { 892 for (idx = 0; idx < num_4x4_blocks_wide; ++idx) { 893 const int block = (row + idy) * 2 + (col + idx); 894 const uint8_t *const src = &src_init[idx * 4 + idy * 4 * src_stride]; 895 uint8_t *const dst = &dst_init[idx * 4 + idy * 4 * dst_stride]; 896 int16_t *const src_diff = 897 vp9_raster_block_offset_int16(BLOCK_8X8, block, p->src_diff); 898 tran_low_t *const coeff = BLOCK_OFFSET(x->plane[0].coeff, block); 899 xd->mi[0]->bmi[block].as_mode = mode; 900 vp9_predict_intra_block(xd, 1, TX_4X4, mode, 901 x->skip_encode ? src : dst, 902 x->skip_encode ? src_stride : dst_stride, 903 dst, dst_stride, col + idx, row + idy, 0); 904 vpx_subtract_block(4, 4, src_diff, 8, src, src_stride, dst, dst_stride); 905 906 if (xd->lossless) { 907 const scan_order *so = &vp9_default_scan_orders[TX_4X4]; 908 vp9_fwht4x4(src_diff, coeff, 8); 909 vp9_regular_quantize_b_4x4(x, 0, block, so->scan, so->iscan); 910 ratey += cost_coeffs(x, 0, block, tempa + idx, templ + idy, TX_4X4, 911 so->scan, so->neighbors, 912 cpi->sf.use_fast_coef_costing); 913 if (RDCOST(x->rdmult, x->rddiv, ratey, distortion) >= best_rd) 914 goto next; 915 vp9_iwht4x4_add(BLOCK_OFFSET(pd->dqcoeff, block), dst, dst_stride, 916 p->eobs[block]); 917 } else { 918 int64_t unused; 919 const TX_TYPE tx_type = get_tx_type_4x4(PLANE_TYPE_Y, xd, block); 920 const scan_order *so = &vp9_scan_orders[TX_4X4][tx_type]; 921 vp9_fht4x4(src_diff, coeff, 8, tx_type); 922 vp9_regular_quantize_b_4x4(x, 0, block, so->scan, so->iscan); 923 ratey += cost_coeffs(x, 0, block, tempa + idx, templ + idy, TX_4X4, 924 so->scan, so->neighbors, 925 cpi->sf.use_fast_coef_costing); 926 distortion += vp9_block_error(coeff, BLOCK_OFFSET(pd->dqcoeff, block), 927 16, &unused) >> 2; 928 if (RDCOST(x->rdmult, x->rddiv, ratey, distortion) >= best_rd) 929 goto next; 930 vp9_iht4x4_add(tx_type, BLOCK_OFFSET(pd->dqcoeff, block), 931 dst, dst_stride, p->eobs[block]); 932 } 933 } 934 } 935 936 rate += ratey; 937 this_rd = RDCOST(x->rdmult, x->rddiv, rate, distortion); 938 939 if (this_rd < best_rd) { 940 *bestrate = rate; 941 *bestratey = ratey; 942 *bestdistortion = distortion; 943 best_rd = this_rd; 944 *best_mode = mode; 945 memcpy(a, tempa, sizeof(tempa)); 946 memcpy(l, templ, sizeof(templ)); 947 for (idy = 0; idy < num_4x4_blocks_high * 4; ++idy) 948 memcpy(best_dst + idy * 8, dst_init + idy * dst_stride, 949 num_4x4_blocks_wide * 4); 950 } 951 next: 952 {} 953 } 954 955 if (best_rd >= rd_thresh || x->skip_encode) 956 return best_rd; 957 958 for (idy = 0; idy < num_4x4_blocks_high * 4; ++idy) 959 memcpy(dst_init + idy * dst_stride, best_dst + idy * 8, 960 num_4x4_blocks_wide * 4); 961 962 return best_rd; 963} 964 965static int64_t rd_pick_intra_sub_8x8_y_mode(VP9_COMP *cpi, MACROBLOCK *mb, 966 int *rate, int *rate_y, 967 int64_t *distortion, 968 int64_t best_rd) { 969 int i, j; 970 const MACROBLOCKD *const xd = &mb->e_mbd; 971 MODE_INFO *const mic = xd->mi[0]; 972 const MODE_INFO *above_mi = xd->above_mi; 973 const MODE_INFO *left_mi = xd->left_mi; 974 const BLOCK_SIZE bsize = xd->mi[0]->mbmi.sb_type; 975 const int num_4x4_blocks_wide = num_4x4_blocks_wide_lookup[bsize]; 976 const int num_4x4_blocks_high = num_4x4_blocks_high_lookup[bsize]; 977 int idx, idy; 978 int cost = 0; 979 int64_t total_distortion = 0; 980 int tot_rate_y = 0; 981 int64_t total_rd = 0; 982 ENTROPY_CONTEXT t_above[4], t_left[4]; 983 const int *bmode_costs = cpi->mbmode_cost; 984 985 memcpy(t_above, xd->plane[0].above_context, sizeof(t_above)); 986 memcpy(t_left, xd->plane[0].left_context, sizeof(t_left)); 987 988 // Pick modes for each sub-block (of size 4x4, 4x8, or 8x4) in an 8x8 block. 989 for (idy = 0; idy < 2; idy += num_4x4_blocks_high) { 990 for (idx = 0; idx < 2; idx += num_4x4_blocks_wide) { 991 PREDICTION_MODE best_mode = DC_PRED; 992 int r = INT_MAX, ry = INT_MAX; 993 int64_t d = INT64_MAX, this_rd = INT64_MAX; 994 i = idy * 2 + idx; 995 if (cpi->common.frame_type == KEY_FRAME) { 996 const PREDICTION_MODE A = vp9_above_block_mode(mic, above_mi, i); 997 const PREDICTION_MODE L = vp9_left_block_mode(mic, left_mi, i); 998 999 bmode_costs = cpi->y_mode_costs[A][L]; 1000 } 1001 1002 this_rd = rd_pick_intra4x4block(cpi, mb, idy, idx, &best_mode, 1003 bmode_costs, t_above + idx, t_left + idy, 1004 &r, &ry, &d, bsize, best_rd - total_rd); 1005 if (this_rd >= best_rd - total_rd) 1006 return INT64_MAX; 1007 1008 total_rd += this_rd; 1009 cost += r; 1010 total_distortion += d; 1011 tot_rate_y += ry; 1012 1013 mic->bmi[i].as_mode = best_mode; 1014 for (j = 1; j < num_4x4_blocks_high; ++j) 1015 mic->bmi[i + j * 2].as_mode = best_mode; 1016 for (j = 1; j < num_4x4_blocks_wide; ++j) 1017 mic->bmi[i + j].as_mode = best_mode; 1018 1019 if (total_rd >= best_rd) 1020 return INT64_MAX; 1021 } 1022 } 1023 1024 *rate = cost; 1025 *rate_y = tot_rate_y; 1026 *distortion = total_distortion; 1027 mic->mbmi.mode = mic->bmi[3].as_mode; 1028 1029 return RDCOST(mb->rdmult, mb->rddiv, cost, total_distortion); 1030} 1031 1032// This function is used only for intra_only frames 1033static int64_t rd_pick_intra_sby_mode(VP9_COMP *cpi, MACROBLOCK *x, 1034 int *rate, int *rate_tokenonly, 1035 int64_t *distortion, int *skippable, 1036 BLOCK_SIZE bsize, 1037 int64_t best_rd) { 1038 PREDICTION_MODE mode; 1039 PREDICTION_MODE mode_selected = DC_PRED; 1040 MACROBLOCKD *const xd = &x->e_mbd; 1041 MODE_INFO *const mic = xd->mi[0]; 1042 int this_rate, this_rate_tokenonly, s; 1043 int64_t this_distortion, this_rd; 1044 TX_SIZE best_tx = TX_4X4; 1045 int *bmode_costs; 1046 const MODE_INFO *above_mi = xd->above_mi; 1047 const MODE_INFO *left_mi = xd->left_mi; 1048 const PREDICTION_MODE A = vp9_above_block_mode(mic, above_mi, 0); 1049 const PREDICTION_MODE L = vp9_left_block_mode(mic, left_mi, 0); 1050 bmode_costs = cpi->y_mode_costs[A][L]; 1051 1052 memset(x->skip_txfm, SKIP_TXFM_NONE, sizeof(x->skip_txfm)); 1053 /* Y Search for intra prediction mode */ 1054 for (mode = DC_PRED; mode <= TM_PRED; mode++) { 1055 if (cpi->sf.use_nonrd_pick_mode) { 1056 // These speed features are turned on in hybrid non-RD and RD mode 1057 // for key frame coding in the context of real-time setting. 1058 if (conditional_skipintra(mode, mode_selected)) 1059 continue; 1060 if (*skippable) 1061 break; 1062 } 1063 1064 mic->mbmi.mode = mode; 1065 1066 super_block_yrd(cpi, x, &this_rate_tokenonly, &this_distortion, 1067 &s, NULL, bsize, best_rd); 1068 1069 if (this_rate_tokenonly == INT_MAX) 1070 continue; 1071 1072 this_rate = this_rate_tokenonly + bmode_costs[mode]; 1073 this_rd = RDCOST(x->rdmult, x->rddiv, this_rate, this_distortion); 1074 1075 if (this_rd < best_rd) { 1076 mode_selected = mode; 1077 best_rd = this_rd; 1078 best_tx = mic->mbmi.tx_size; 1079 *rate = this_rate; 1080 *rate_tokenonly = this_rate_tokenonly; 1081 *distortion = this_distortion; 1082 *skippable = s; 1083 } 1084 } 1085 1086 mic->mbmi.mode = mode_selected; 1087 mic->mbmi.tx_size = best_tx; 1088 1089 return best_rd; 1090} 1091 1092// Return value 0: early termination triggered, no valid rd cost available; 1093// 1: rd cost values are valid. 1094static int super_block_uvrd(const VP9_COMP *cpi, MACROBLOCK *x, 1095 int *rate, int64_t *distortion, int *skippable, 1096 int64_t *sse, BLOCK_SIZE bsize, 1097 int64_t ref_best_rd) { 1098 MACROBLOCKD *const xd = &x->e_mbd; 1099 MB_MODE_INFO *const mbmi = &xd->mi[0]->mbmi; 1100 const TX_SIZE uv_tx_size = get_uv_tx_size(mbmi, &xd->plane[1]); 1101 int plane; 1102 int pnrate = 0, pnskip = 1; 1103 int64_t pndist = 0, pnsse = 0; 1104 int is_cost_valid = 1; 1105 1106 if (ref_best_rd < 0) 1107 is_cost_valid = 0; 1108 1109 if (is_inter_block(mbmi) && is_cost_valid) { 1110 int plane; 1111 for (plane = 1; plane < MAX_MB_PLANE; ++plane) 1112 vp9_subtract_plane(x, bsize, plane); 1113 } 1114 1115 *rate = 0; 1116 *distortion = 0; 1117 *sse = 0; 1118 *skippable = 1; 1119 1120 for (plane = 1; plane < MAX_MB_PLANE; ++plane) { 1121 txfm_rd_in_plane(x, &pnrate, &pndist, &pnskip, &pnsse, 1122 ref_best_rd, plane, bsize, uv_tx_size, 1123 cpi->sf.use_fast_coef_costing); 1124 if (pnrate == INT_MAX) { 1125 is_cost_valid = 0; 1126 break; 1127 } 1128 *rate += pnrate; 1129 *distortion += pndist; 1130 *sse += pnsse; 1131 *skippable &= pnskip; 1132 } 1133 1134 if (!is_cost_valid) { 1135 // reset cost value 1136 *rate = INT_MAX; 1137 *distortion = INT64_MAX; 1138 *sse = INT64_MAX; 1139 *skippable = 0; 1140 } 1141 1142 return is_cost_valid; 1143} 1144 1145static int64_t rd_pick_intra_sbuv_mode(VP9_COMP *cpi, MACROBLOCK *x, 1146 PICK_MODE_CONTEXT *ctx, 1147 int *rate, int *rate_tokenonly, 1148 int64_t *distortion, int *skippable, 1149 BLOCK_SIZE bsize, TX_SIZE max_tx_size) { 1150 MACROBLOCKD *xd = &x->e_mbd; 1151 PREDICTION_MODE mode; 1152 PREDICTION_MODE mode_selected = DC_PRED; 1153 int64_t best_rd = INT64_MAX, this_rd; 1154 int this_rate_tokenonly, this_rate, s; 1155 int64_t this_distortion, this_sse; 1156 1157 memset(x->skip_txfm, SKIP_TXFM_NONE, sizeof(x->skip_txfm)); 1158 for (mode = DC_PRED; mode <= TM_PRED; ++mode) { 1159 if (!(cpi->sf.intra_uv_mode_mask[max_tx_size] & (1 << mode))) 1160 continue; 1161 1162 xd->mi[0]->mbmi.uv_mode = mode; 1163 1164 if (!super_block_uvrd(cpi, x, &this_rate_tokenonly, 1165 &this_distortion, &s, &this_sse, bsize, best_rd)) 1166 continue; 1167 this_rate = this_rate_tokenonly + 1168 cpi->intra_uv_mode_cost[cpi->common.frame_type][mode]; 1169 this_rd = RDCOST(x->rdmult, x->rddiv, this_rate, this_distortion); 1170 1171 if (this_rd < best_rd) { 1172 mode_selected = mode; 1173 best_rd = this_rd; 1174 *rate = this_rate; 1175 *rate_tokenonly = this_rate_tokenonly; 1176 *distortion = this_distortion; 1177 *skippable = s; 1178 if (!x->select_tx_size) 1179 swap_block_ptr(x, ctx, 2, 0, 1, MAX_MB_PLANE); 1180 } 1181 } 1182 1183 xd->mi[0]->mbmi.uv_mode = mode_selected; 1184 return best_rd; 1185} 1186 1187static int64_t rd_sbuv_dcpred(const VP9_COMP *cpi, MACROBLOCK *x, 1188 int *rate, int *rate_tokenonly, 1189 int64_t *distortion, int *skippable, 1190 BLOCK_SIZE bsize) { 1191 const VP9_COMMON *cm = &cpi->common; 1192 int64_t unused; 1193 1194 x->e_mbd.mi[0]->mbmi.uv_mode = DC_PRED; 1195 memset(x->skip_txfm, SKIP_TXFM_NONE, sizeof(x->skip_txfm)); 1196 super_block_uvrd(cpi, x, rate_tokenonly, distortion, 1197 skippable, &unused, bsize, INT64_MAX); 1198 *rate = *rate_tokenonly + cpi->intra_uv_mode_cost[cm->frame_type][DC_PRED]; 1199 return RDCOST(x->rdmult, x->rddiv, *rate, *distortion); 1200} 1201 1202static void choose_intra_uv_mode(VP9_COMP *cpi, MACROBLOCK *const x, 1203 PICK_MODE_CONTEXT *ctx, 1204 BLOCK_SIZE bsize, TX_SIZE max_tx_size, 1205 int *rate_uv, int *rate_uv_tokenonly, 1206 int64_t *dist_uv, int *skip_uv, 1207 PREDICTION_MODE *mode_uv) { 1208 // Use an estimated rd for uv_intra based on DC_PRED if the 1209 // appropriate speed flag is set. 1210 if (cpi->sf.use_uv_intra_rd_estimate) { 1211 rd_sbuv_dcpred(cpi, x, rate_uv, rate_uv_tokenonly, dist_uv, 1212 skip_uv, bsize < BLOCK_8X8 ? BLOCK_8X8 : bsize); 1213 // Else do a proper rd search for each possible transform size that may 1214 // be considered in the main rd loop. 1215 } else { 1216 rd_pick_intra_sbuv_mode(cpi, x, ctx, 1217 rate_uv, rate_uv_tokenonly, dist_uv, skip_uv, 1218 bsize < BLOCK_8X8 ? BLOCK_8X8 : bsize, max_tx_size); 1219 } 1220 *mode_uv = x->e_mbd.mi[0]->mbmi.uv_mode; 1221} 1222 1223static int cost_mv_ref(const VP9_COMP *cpi, PREDICTION_MODE mode, 1224 int mode_context) { 1225 assert(is_inter_mode(mode)); 1226 return cpi->inter_mode_cost[mode_context][INTER_OFFSET(mode)]; 1227} 1228 1229static int set_and_cost_bmi_mvs(VP9_COMP *cpi, MACROBLOCK *x, MACROBLOCKD *xd, 1230 int i, 1231 PREDICTION_MODE mode, int_mv this_mv[2], 1232 int_mv frame_mv[MB_MODE_COUNT][MAX_REF_FRAMES], 1233 int_mv seg_mvs[MAX_REF_FRAMES], 1234 int_mv *best_ref_mv[2], const int *mvjcost, 1235 int *mvcost[2]) { 1236 MODE_INFO *const mic = xd->mi[0]; 1237 const MB_MODE_INFO *const mbmi = &mic->mbmi; 1238 const MB_MODE_INFO_EXT *const mbmi_ext = x->mbmi_ext; 1239 int thismvcost = 0; 1240 int idx, idy; 1241 const int num_4x4_blocks_wide = num_4x4_blocks_wide_lookup[mbmi->sb_type]; 1242 const int num_4x4_blocks_high = num_4x4_blocks_high_lookup[mbmi->sb_type]; 1243 const int is_compound = has_second_ref(mbmi); 1244 1245 switch (mode) { 1246 case NEWMV: 1247 this_mv[0].as_int = seg_mvs[mbmi->ref_frame[0]].as_int; 1248 thismvcost += vp9_mv_bit_cost(&this_mv[0].as_mv, &best_ref_mv[0]->as_mv, 1249 mvjcost, mvcost, MV_COST_WEIGHT_SUB); 1250 if (is_compound) { 1251 this_mv[1].as_int = seg_mvs[mbmi->ref_frame[1]].as_int; 1252 thismvcost += vp9_mv_bit_cost(&this_mv[1].as_mv, &best_ref_mv[1]->as_mv, 1253 mvjcost, mvcost, MV_COST_WEIGHT_SUB); 1254 } 1255 break; 1256 case NEARMV: 1257 case NEARESTMV: 1258 this_mv[0].as_int = frame_mv[mode][mbmi->ref_frame[0]].as_int; 1259 if (is_compound) 1260 this_mv[1].as_int = frame_mv[mode][mbmi->ref_frame[1]].as_int; 1261 break; 1262 case ZEROMV: 1263 this_mv[0].as_int = 0; 1264 if (is_compound) 1265 this_mv[1].as_int = 0; 1266 break; 1267 default: 1268 break; 1269 } 1270 1271 mic->bmi[i].as_mv[0].as_int = this_mv[0].as_int; 1272 if (is_compound) 1273 mic->bmi[i].as_mv[1].as_int = this_mv[1].as_int; 1274 1275 mic->bmi[i].as_mode = mode; 1276 1277 for (idy = 0; idy < num_4x4_blocks_high; ++idy) 1278 for (idx = 0; idx < num_4x4_blocks_wide; ++idx) 1279 memmove(&mic->bmi[i + idy * 2 + idx], &mic->bmi[i], sizeof(mic->bmi[i])); 1280 1281 return cost_mv_ref(cpi, mode, mbmi_ext->mode_context[mbmi->ref_frame[0]]) + 1282 thismvcost; 1283} 1284 1285static int64_t encode_inter_mb_segment(VP9_COMP *cpi, 1286 MACROBLOCK *x, 1287 int64_t best_yrd, 1288 int i, 1289 int *labelyrate, 1290 int64_t *distortion, int64_t *sse, 1291 ENTROPY_CONTEXT *ta, 1292 ENTROPY_CONTEXT *tl, 1293 int mi_row, int mi_col) { 1294 int k; 1295 MACROBLOCKD *xd = &x->e_mbd; 1296 struct macroblockd_plane *const pd = &xd->plane[0]; 1297 struct macroblock_plane *const p = &x->plane[0]; 1298 MODE_INFO *const mi = xd->mi[0]; 1299 const BLOCK_SIZE plane_bsize = get_plane_block_size(mi->mbmi.sb_type, pd); 1300 const int width = 4 * num_4x4_blocks_wide_lookup[plane_bsize]; 1301 const int height = 4 * num_4x4_blocks_high_lookup[plane_bsize]; 1302 int idx, idy; 1303 1304 const uint8_t *const src = 1305 &p->src.buf[vp9_raster_block_offset(BLOCK_8X8, i, p->src.stride)]; 1306 uint8_t *const dst = &pd->dst.buf[vp9_raster_block_offset(BLOCK_8X8, i, 1307 pd->dst.stride)]; 1308 int64_t thisdistortion = 0, thissse = 0; 1309 int thisrate = 0, ref; 1310 const scan_order *so = &vp9_default_scan_orders[TX_4X4]; 1311 const int is_compound = has_second_ref(&mi->mbmi); 1312 const InterpKernel *kernel = vp9_filter_kernels[mi->mbmi.interp_filter]; 1313 1314 for (ref = 0; ref < 1 + is_compound; ++ref) { 1315 const uint8_t *pre = &pd->pre[ref].buf[vp9_raster_block_offset(BLOCK_8X8, i, 1316 pd->pre[ref].stride)]; 1317#if CONFIG_VP9_HIGHBITDEPTH 1318 if (xd->cur_buf->flags & YV12_FLAG_HIGHBITDEPTH) { 1319 vp9_highbd_build_inter_predictor(pre, pd->pre[ref].stride, 1320 dst, pd->dst.stride, 1321 &mi->bmi[i].as_mv[ref].as_mv, 1322 &xd->block_refs[ref]->sf, width, height, 1323 ref, kernel, MV_PRECISION_Q3, 1324 mi_col * MI_SIZE + 4 * (i % 2), 1325 mi_row * MI_SIZE + 4 * (i / 2), xd->bd); 1326 } else { 1327 vp9_build_inter_predictor(pre, pd->pre[ref].stride, 1328 dst, pd->dst.stride, 1329 &mi->bmi[i].as_mv[ref].as_mv, 1330 &xd->block_refs[ref]->sf, width, height, ref, 1331 kernel, MV_PRECISION_Q3, 1332 mi_col * MI_SIZE + 4 * (i % 2), 1333 mi_row * MI_SIZE + 4 * (i / 2)); 1334 } 1335#else 1336 vp9_build_inter_predictor(pre, pd->pre[ref].stride, 1337 dst, pd->dst.stride, 1338 &mi->bmi[i].as_mv[ref].as_mv, 1339 &xd->block_refs[ref]->sf, width, height, ref, 1340 kernel, MV_PRECISION_Q3, 1341 mi_col * MI_SIZE + 4 * (i % 2), 1342 mi_row * MI_SIZE + 4 * (i / 2)); 1343#endif // CONFIG_VP9_HIGHBITDEPTH 1344 } 1345 1346#if CONFIG_VP9_HIGHBITDEPTH 1347 if (xd->cur_buf->flags & YV12_FLAG_HIGHBITDEPTH) { 1348 vpx_highbd_subtract_block( 1349 height, width, vp9_raster_block_offset_int16(BLOCK_8X8, i, p->src_diff), 1350 8, src, p->src.stride, dst, pd->dst.stride, xd->bd); 1351 } else { 1352 vpx_subtract_block( 1353 height, width, vp9_raster_block_offset_int16(BLOCK_8X8, i, p->src_diff), 1354 8, src, p->src.stride, dst, pd->dst.stride); 1355 } 1356#else 1357 vpx_subtract_block(height, width, 1358 vp9_raster_block_offset_int16(BLOCK_8X8, i, p->src_diff), 1359 8, src, p->src.stride, dst, pd->dst.stride); 1360#endif // CONFIG_VP9_HIGHBITDEPTH 1361 1362 k = i; 1363 for (idy = 0; idy < height / 4; ++idy) { 1364 for (idx = 0; idx < width / 4; ++idx) { 1365 int64_t ssz, rd, rd1, rd2; 1366 tran_low_t* coeff; 1367 1368 k += (idy * 2 + idx); 1369 coeff = BLOCK_OFFSET(p->coeff, k); 1370 x->fwd_txm4x4(vp9_raster_block_offset_int16(BLOCK_8X8, k, p->src_diff), 1371 coeff, 8); 1372 vp9_regular_quantize_b_4x4(x, 0, k, so->scan, so->iscan); 1373#if CONFIG_VP9_HIGHBITDEPTH 1374 if (xd->cur_buf->flags & YV12_FLAG_HIGHBITDEPTH) { 1375 thisdistortion += vp9_highbd_block_error(coeff, 1376 BLOCK_OFFSET(pd->dqcoeff, k), 1377 16, &ssz, xd->bd); 1378 } else { 1379 thisdistortion += vp9_block_error(coeff, BLOCK_OFFSET(pd->dqcoeff, k), 1380 16, &ssz); 1381 } 1382#else 1383 thisdistortion += vp9_block_error(coeff, BLOCK_OFFSET(pd->dqcoeff, k), 1384 16, &ssz); 1385#endif // CONFIG_VP9_HIGHBITDEPTH 1386 thissse += ssz; 1387 thisrate += cost_coeffs(x, 0, k, ta + (k & 1), tl + (k >> 1), TX_4X4, 1388 so->scan, so->neighbors, 1389 cpi->sf.use_fast_coef_costing); 1390 rd1 = RDCOST(x->rdmult, x->rddiv, thisrate, thisdistortion >> 2); 1391 rd2 = RDCOST(x->rdmult, x->rddiv, 0, thissse >> 2); 1392 rd = MIN(rd1, rd2); 1393 if (rd >= best_yrd) 1394 return INT64_MAX; 1395 } 1396 } 1397 1398 *distortion = thisdistortion >> 2; 1399 *labelyrate = thisrate; 1400 *sse = thissse >> 2; 1401 1402 return RDCOST(x->rdmult, x->rddiv, *labelyrate, *distortion); 1403} 1404 1405typedef struct { 1406 int eobs; 1407 int brate; 1408 int byrate; 1409 int64_t bdist; 1410 int64_t bsse; 1411 int64_t brdcost; 1412 int_mv mvs[2]; 1413 ENTROPY_CONTEXT ta[2]; 1414 ENTROPY_CONTEXT tl[2]; 1415} SEG_RDSTAT; 1416 1417typedef struct { 1418 int_mv *ref_mv[2]; 1419 int_mv mvp; 1420 1421 int64_t segment_rd; 1422 int r; 1423 int64_t d; 1424 int64_t sse; 1425 int segment_yrate; 1426 PREDICTION_MODE modes[4]; 1427 SEG_RDSTAT rdstat[4][INTER_MODES]; 1428 int mvthresh; 1429} BEST_SEG_INFO; 1430 1431static INLINE int mv_check_bounds(const MACROBLOCK *x, const MV *mv) { 1432 return (mv->row >> 3) < x->mv_row_min || 1433 (mv->row >> 3) > x->mv_row_max || 1434 (mv->col >> 3) < x->mv_col_min || 1435 (mv->col >> 3) > x->mv_col_max; 1436} 1437 1438static INLINE void mi_buf_shift(MACROBLOCK *x, int i) { 1439 MB_MODE_INFO *const mbmi = &x->e_mbd.mi[0]->mbmi; 1440 struct macroblock_plane *const p = &x->plane[0]; 1441 struct macroblockd_plane *const pd = &x->e_mbd.plane[0]; 1442 1443 p->src.buf = &p->src.buf[vp9_raster_block_offset(BLOCK_8X8, i, 1444 p->src.stride)]; 1445 assert(((intptr_t)pd->pre[0].buf & 0x7) == 0); 1446 pd->pre[0].buf = &pd->pre[0].buf[vp9_raster_block_offset(BLOCK_8X8, i, 1447 pd->pre[0].stride)]; 1448 if (has_second_ref(mbmi)) 1449 pd->pre[1].buf = &pd->pre[1].buf[vp9_raster_block_offset(BLOCK_8X8, i, 1450 pd->pre[1].stride)]; 1451} 1452 1453static INLINE void mi_buf_restore(MACROBLOCK *x, struct buf_2d orig_src, 1454 struct buf_2d orig_pre[2]) { 1455 MB_MODE_INFO *mbmi = &x->e_mbd.mi[0]->mbmi; 1456 x->plane[0].src = orig_src; 1457 x->e_mbd.plane[0].pre[0] = orig_pre[0]; 1458 if (has_second_ref(mbmi)) 1459 x->e_mbd.plane[0].pre[1] = orig_pre[1]; 1460} 1461 1462static INLINE int mv_has_subpel(const MV *mv) { 1463 return (mv->row & 0x0F) || (mv->col & 0x0F); 1464} 1465 1466// Check if NEARESTMV/NEARMV/ZEROMV is the cheapest way encode zero motion. 1467// TODO(aconverse): Find out if this is still productive then clean up or remove 1468static int check_best_zero_mv( 1469 const VP9_COMP *cpi, const uint8_t mode_context[MAX_REF_FRAMES], 1470 int_mv frame_mv[MB_MODE_COUNT][MAX_REF_FRAMES], int this_mode, 1471 const MV_REFERENCE_FRAME ref_frames[2]) { 1472 if ((this_mode == NEARMV || this_mode == NEARESTMV || this_mode == ZEROMV) && 1473 frame_mv[this_mode][ref_frames[0]].as_int == 0 && 1474 (ref_frames[1] == NONE || 1475 frame_mv[this_mode][ref_frames[1]].as_int == 0)) { 1476 int rfc = mode_context[ref_frames[0]]; 1477 int c1 = cost_mv_ref(cpi, NEARMV, rfc); 1478 int c2 = cost_mv_ref(cpi, NEARESTMV, rfc); 1479 int c3 = cost_mv_ref(cpi, ZEROMV, rfc); 1480 1481 if (this_mode == NEARMV) { 1482 if (c1 > c3) return 0; 1483 } else if (this_mode == NEARESTMV) { 1484 if (c2 > c3) return 0; 1485 } else { 1486 assert(this_mode == ZEROMV); 1487 if (ref_frames[1] == NONE) { 1488 if ((c3 >= c2 && frame_mv[NEARESTMV][ref_frames[0]].as_int == 0) || 1489 (c3 >= c1 && frame_mv[NEARMV][ref_frames[0]].as_int == 0)) 1490 return 0; 1491 } else { 1492 if ((c3 >= c2 && frame_mv[NEARESTMV][ref_frames[0]].as_int == 0 && 1493 frame_mv[NEARESTMV][ref_frames[1]].as_int == 0) || 1494 (c3 >= c1 && frame_mv[NEARMV][ref_frames[0]].as_int == 0 && 1495 frame_mv[NEARMV][ref_frames[1]].as_int == 0)) 1496 return 0; 1497 } 1498 } 1499 } 1500 return 1; 1501} 1502 1503static void joint_motion_search(VP9_COMP *cpi, MACROBLOCK *x, 1504 BLOCK_SIZE bsize, 1505 int_mv *frame_mv, 1506 int mi_row, int mi_col, 1507 int_mv single_newmv[MAX_REF_FRAMES], 1508 int *rate_mv) { 1509 const VP9_COMMON *const cm = &cpi->common; 1510 const int pw = 4 * num_4x4_blocks_wide_lookup[bsize]; 1511 const int ph = 4 * num_4x4_blocks_high_lookup[bsize]; 1512 MACROBLOCKD *xd = &x->e_mbd; 1513 MB_MODE_INFO *mbmi = &xd->mi[0]->mbmi; 1514 const int refs[2] = {mbmi->ref_frame[0], 1515 mbmi->ref_frame[1] < 0 ? 0 : mbmi->ref_frame[1]}; 1516 int_mv ref_mv[2]; 1517 int ite, ref; 1518 const InterpKernel *kernel = vp9_filter_kernels[mbmi->interp_filter]; 1519 struct scale_factors sf; 1520 1521 // Do joint motion search in compound mode to get more accurate mv. 1522 struct buf_2d backup_yv12[2][MAX_MB_PLANE]; 1523 int last_besterr[2] = {INT_MAX, INT_MAX}; 1524 const YV12_BUFFER_CONFIG *const scaled_ref_frame[2] = { 1525 vp9_get_scaled_ref_frame(cpi, mbmi->ref_frame[0]), 1526 vp9_get_scaled_ref_frame(cpi, mbmi->ref_frame[1]) 1527 }; 1528 1529 // Prediction buffer from second frame. 1530#if CONFIG_VP9_HIGHBITDEPTH 1531 DECLARE_ALIGNED(16, uint16_t, second_pred_alloc_16[64 * 64]); 1532 uint8_t *second_pred; 1533#else 1534 DECLARE_ALIGNED(16, uint8_t, second_pred[64 * 64]); 1535#endif // CONFIG_VP9_HIGHBITDEPTH 1536 1537 for (ref = 0; ref < 2; ++ref) { 1538 ref_mv[ref] = x->mbmi_ext->ref_mvs[refs[ref]][0]; 1539 1540 if (scaled_ref_frame[ref]) { 1541 int i; 1542 // Swap out the reference frame for a version that's been scaled to 1543 // match the resolution of the current frame, allowing the existing 1544 // motion search code to be used without additional modifications. 1545 for (i = 0; i < MAX_MB_PLANE; i++) 1546 backup_yv12[ref][i] = xd->plane[i].pre[ref]; 1547 vp9_setup_pre_planes(xd, ref, scaled_ref_frame[ref], mi_row, mi_col, 1548 NULL); 1549 } 1550 1551 frame_mv[refs[ref]].as_int = single_newmv[refs[ref]].as_int; 1552 } 1553 1554 // Since we have scaled the reference frames to match the size of the current 1555 // frame we must use a unit scaling factor during mode selection. 1556#if CONFIG_VP9_HIGHBITDEPTH 1557 vp9_setup_scale_factors_for_frame(&sf, cm->width, cm->height, 1558 cm->width, cm->height, 1559 cm->use_highbitdepth); 1560#else 1561 vp9_setup_scale_factors_for_frame(&sf, cm->width, cm->height, 1562 cm->width, cm->height); 1563#endif // CONFIG_VP9_HIGHBITDEPTH 1564 1565 // Allow joint search multiple times iteratively for each reference frame 1566 // and break out of the search loop if it couldn't find a better mv. 1567 for (ite = 0; ite < 4; ite++) { 1568 struct buf_2d ref_yv12[2]; 1569 int bestsme = INT_MAX; 1570 int sadpb = x->sadperbit16; 1571 MV tmp_mv; 1572 int search_range = 3; 1573 1574 int tmp_col_min = x->mv_col_min; 1575 int tmp_col_max = x->mv_col_max; 1576 int tmp_row_min = x->mv_row_min; 1577 int tmp_row_max = x->mv_row_max; 1578 int id = ite % 2; // Even iterations search in the first reference frame, 1579 // odd iterations search in the second. The predictor 1580 // found for the 'other' reference frame is factored in. 1581 1582 // Initialized here because of compiler problem in Visual Studio. 1583 ref_yv12[0] = xd->plane[0].pre[0]; 1584 ref_yv12[1] = xd->plane[0].pre[1]; 1585 1586 // Get the prediction block from the 'other' reference frame. 1587#if CONFIG_VP9_HIGHBITDEPTH 1588 if (xd->cur_buf->flags & YV12_FLAG_HIGHBITDEPTH) { 1589 second_pred = CONVERT_TO_BYTEPTR(second_pred_alloc_16); 1590 vp9_highbd_build_inter_predictor(ref_yv12[!id].buf, 1591 ref_yv12[!id].stride, 1592 second_pred, pw, 1593 &frame_mv[refs[!id]].as_mv, 1594 &sf, pw, ph, 0, 1595 kernel, MV_PRECISION_Q3, 1596 mi_col * MI_SIZE, mi_row * MI_SIZE, 1597 xd->bd); 1598 } else { 1599 second_pred = (uint8_t *)second_pred_alloc_16; 1600 vp9_build_inter_predictor(ref_yv12[!id].buf, 1601 ref_yv12[!id].stride, 1602 second_pred, pw, 1603 &frame_mv[refs[!id]].as_mv, 1604 &sf, pw, ph, 0, 1605 kernel, MV_PRECISION_Q3, 1606 mi_col * MI_SIZE, mi_row * MI_SIZE); 1607 } 1608#else 1609 vp9_build_inter_predictor(ref_yv12[!id].buf, 1610 ref_yv12[!id].stride, 1611 second_pred, pw, 1612 &frame_mv[refs[!id]].as_mv, 1613 &sf, pw, ph, 0, 1614 kernel, MV_PRECISION_Q3, 1615 mi_col * MI_SIZE, mi_row * MI_SIZE); 1616#endif // CONFIG_VP9_HIGHBITDEPTH 1617 1618 // Do compound motion search on the current reference frame. 1619 if (id) 1620 xd->plane[0].pre[0] = ref_yv12[id]; 1621 vp9_set_mv_search_range(x, &ref_mv[id].as_mv); 1622 1623 // Use the mv result from the single mode as mv predictor. 1624 tmp_mv = frame_mv[refs[id]].as_mv; 1625 1626 tmp_mv.col >>= 3; 1627 tmp_mv.row >>= 3; 1628 1629 // Small-range full-pixel motion search. 1630 bestsme = vp9_refining_search_8p_c(x, &tmp_mv, sadpb, 1631 search_range, 1632 &cpi->fn_ptr[bsize], 1633 &ref_mv[id].as_mv, second_pred); 1634 if (bestsme < INT_MAX) 1635 bestsme = vp9_get_mvpred_av_var(x, &tmp_mv, &ref_mv[id].as_mv, 1636 second_pred, &cpi->fn_ptr[bsize], 1); 1637 1638 x->mv_col_min = tmp_col_min; 1639 x->mv_col_max = tmp_col_max; 1640 x->mv_row_min = tmp_row_min; 1641 x->mv_row_max = tmp_row_max; 1642 1643 if (bestsme < INT_MAX) { 1644 int dis; /* TODO: use dis in distortion calculation later. */ 1645 unsigned int sse; 1646 bestsme = cpi->find_fractional_mv_step( 1647 x, &tmp_mv, 1648 &ref_mv[id].as_mv, 1649 cpi->common.allow_high_precision_mv, 1650 x->errorperbit, 1651 &cpi->fn_ptr[bsize], 1652 0, cpi->sf.mv.subpel_iters_per_step, 1653 NULL, 1654 x->nmvjointcost, x->mvcost, 1655 &dis, &sse, second_pred, 1656 pw, ph); 1657 } 1658 1659 // Restore the pointer to the first (possibly scaled) prediction buffer. 1660 if (id) 1661 xd->plane[0].pre[0] = ref_yv12[0]; 1662 1663 if (bestsme < last_besterr[id]) { 1664 frame_mv[refs[id]].as_mv = tmp_mv; 1665 last_besterr[id] = bestsme; 1666 } else { 1667 break; 1668 } 1669 } 1670 1671 *rate_mv = 0; 1672 1673 for (ref = 0; ref < 2; ++ref) { 1674 if (scaled_ref_frame[ref]) { 1675 // Restore the prediction frame pointers to their unscaled versions. 1676 int i; 1677 for (i = 0; i < MAX_MB_PLANE; i++) 1678 xd->plane[i].pre[ref] = backup_yv12[ref][i]; 1679 } 1680 1681 *rate_mv += vp9_mv_bit_cost(&frame_mv[refs[ref]].as_mv, 1682 &x->mbmi_ext->ref_mvs[refs[ref]][0].as_mv, 1683 x->nmvjointcost, x->mvcost, MV_COST_WEIGHT); 1684 } 1685} 1686 1687static int64_t rd_pick_best_sub8x8_mode(VP9_COMP *cpi, MACROBLOCK *x, 1688 int_mv *best_ref_mv, 1689 int_mv *second_best_ref_mv, 1690 int64_t best_rd, int *returntotrate, 1691 int *returnyrate, 1692 int64_t *returndistortion, 1693 int *skippable, int64_t *psse, 1694 int mvthresh, 1695 int_mv seg_mvs[4][MAX_REF_FRAMES], 1696 BEST_SEG_INFO *bsi_buf, int filter_idx, 1697 int mi_row, int mi_col) { 1698 int i; 1699 BEST_SEG_INFO *bsi = bsi_buf + filter_idx; 1700 MACROBLOCKD *xd = &x->e_mbd; 1701 MODE_INFO *mi = xd->mi[0]; 1702 MB_MODE_INFO *mbmi = &mi->mbmi; 1703 int mode_idx; 1704 int k, br = 0, idx, idy; 1705 int64_t bd = 0, block_sse = 0; 1706 PREDICTION_MODE this_mode; 1707 VP9_COMMON *cm = &cpi->common; 1708 struct macroblock_plane *const p = &x->plane[0]; 1709 struct macroblockd_plane *const pd = &xd->plane[0]; 1710 const int label_count = 4; 1711 int64_t this_segment_rd = 0; 1712 int label_mv_thresh; 1713 int segmentyrate = 0; 1714 const BLOCK_SIZE bsize = mbmi->sb_type; 1715 const int num_4x4_blocks_wide = num_4x4_blocks_wide_lookup[bsize]; 1716 const int num_4x4_blocks_high = num_4x4_blocks_high_lookup[bsize]; 1717 ENTROPY_CONTEXT t_above[2], t_left[2]; 1718 int subpelmv = 1, have_ref = 0; 1719 const int has_second_rf = has_second_ref(mbmi); 1720 const int inter_mode_mask = cpi->sf.inter_mode_mask[bsize]; 1721 MB_MODE_INFO_EXT *const mbmi_ext = x->mbmi_ext; 1722 1723 vp9_zero(*bsi); 1724 1725 bsi->segment_rd = best_rd; 1726 bsi->ref_mv[0] = best_ref_mv; 1727 bsi->ref_mv[1] = second_best_ref_mv; 1728 bsi->mvp.as_int = best_ref_mv->as_int; 1729 bsi->mvthresh = mvthresh; 1730 1731 for (i = 0; i < 4; i++) 1732 bsi->modes[i] = ZEROMV; 1733 1734 memcpy(t_above, pd->above_context, sizeof(t_above)); 1735 memcpy(t_left, pd->left_context, sizeof(t_left)); 1736 1737 // 64 makes this threshold really big effectively 1738 // making it so that we very rarely check mvs on 1739 // segments. setting this to 1 would make mv thresh 1740 // roughly equal to what it is for macroblocks 1741 label_mv_thresh = 1 * bsi->mvthresh / label_count; 1742 1743 // Segmentation method overheads 1744 for (idy = 0; idy < 2; idy += num_4x4_blocks_high) { 1745 for (idx = 0; idx < 2; idx += num_4x4_blocks_wide) { 1746 // TODO(jingning,rbultje): rewrite the rate-distortion optimization 1747 // loop for 4x4/4x8/8x4 block coding. to be replaced with new rd loop 1748 int_mv mode_mv[MB_MODE_COUNT][2]; 1749 int_mv frame_mv[MB_MODE_COUNT][MAX_REF_FRAMES]; 1750 PREDICTION_MODE mode_selected = ZEROMV; 1751 int64_t best_rd = INT64_MAX; 1752 const int i = idy * 2 + idx; 1753 int ref; 1754 1755 for (ref = 0; ref < 1 + has_second_rf; ++ref) { 1756 const MV_REFERENCE_FRAME frame = mbmi->ref_frame[ref]; 1757 frame_mv[ZEROMV][frame].as_int = 0; 1758 vp9_append_sub8x8_mvs_for_idx(cm, xd, i, ref, mi_row, mi_col, 1759 &frame_mv[NEARESTMV][frame], 1760 &frame_mv[NEARMV][frame], 1761 mbmi_ext->mode_context); 1762 } 1763 1764 // search for the best motion vector on this segment 1765 for (this_mode = NEARESTMV; this_mode <= NEWMV; ++this_mode) { 1766 const struct buf_2d orig_src = x->plane[0].src; 1767 struct buf_2d orig_pre[2]; 1768 1769 mode_idx = INTER_OFFSET(this_mode); 1770 bsi->rdstat[i][mode_idx].brdcost = INT64_MAX; 1771 if (!(inter_mode_mask & (1 << this_mode))) 1772 continue; 1773 1774 if (!check_best_zero_mv(cpi, mbmi_ext->mode_context, frame_mv, 1775 this_mode, mbmi->ref_frame)) 1776 continue; 1777 1778 memcpy(orig_pre, pd->pre, sizeof(orig_pre)); 1779 memcpy(bsi->rdstat[i][mode_idx].ta, t_above, 1780 sizeof(bsi->rdstat[i][mode_idx].ta)); 1781 memcpy(bsi->rdstat[i][mode_idx].tl, t_left, 1782 sizeof(bsi->rdstat[i][mode_idx].tl)); 1783 1784 // motion search for newmv (single predictor case only) 1785 if (!has_second_rf && this_mode == NEWMV && 1786 seg_mvs[i][mbmi->ref_frame[0]].as_int == INVALID_MV) { 1787 MV *const new_mv = &mode_mv[NEWMV][0].as_mv; 1788 int step_param = 0; 1789 int thissme, bestsme = INT_MAX; 1790 int sadpb = x->sadperbit4; 1791 MV mvp_full; 1792 int max_mv; 1793 int cost_list[5]; 1794 1795 /* Is the best so far sufficiently good that we cant justify doing 1796 * and new motion search. */ 1797 if (best_rd < label_mv_thresh) 1798 break; 1799 1800 if (cpi->oxcf.mode != BEST) { 1801 // use previous block's result as next block's MV predictor. 1802 if (i > 0) { 1803 bsi->mvp.as_int = mi->bmi[i - 1].as_mv[0].as_int; 1804 if (i == 2) 1805 bsi->mvp.as_int = mi->bmi[i - 2].as_mv[0].as_int; 1806 } 1807 } 1808 if (i == 0) 1809 max_mv = x->max_mv_context[mbmi->ref_frame[0]]; 1810 else 1811 max_mv = MAX(abs(bsi->mvp.as_mv.row), abs(bsi->mvp.as_mv.col)) >> 3; 1812 1813 if (cpi->sf.mv.auto_mv_step_size && cm->show_frame) { 1814 // Take wtd average of the step_params based on the last frame's 1815 // max mv magnitude and the best ref mvs of the current block for 1816 // the given reference. 1817 step_param = (vp9_init_search_range(max_mv) + 1818 cpi->mv_step_param) / 2; 1819 } else { 1820 step_param = cpi->mv_step_param; 1821 } 1822 1823 mvp_full.row = bsi->mvp.as_mv.row >> 3; 1824 mvp_full.col = bsi->mvp.as_mv.col >> 3; 1825 1826 if (cpi->sf.adaptive_motion_search) { 1827 mvp_full.row = x->pred_mv[mbmi->ref_frame[0]].row >> 3; 1828 mvp_full.col = x->pred_mv[mbmi->ref_frame[0]].col >> 3; 1829 step_param = MAX(step_param, 8); 1830 } 1831 1832 // adjust src pointer for this block 1833 mi_buf_shift(x, i); 1834 1835 vp9_set_mv_search_range(x, &bsi->ref_mv[0]->as_mv); 1836 1837 bestsme = vp9_full_pixel_search( 1838 cpi, x, bsize, &mvp_full, step_param, sadpb, 1839 cpi->sf.mv.subpel_search_method != SUBPEL_TREE ? cost_list : NULL, 1840 &bsi->ref_mv[0]->as_mv, new_mv, 1841 INT_MAX, 1); 1842 1843 // Should we do a full search (best quality only) 1844 if (cpi->oxcf.mode == BEST) { 1845 int_mv *const best_mv = &mi->bmi[i].as_mv[0]; 1846 /* Check if mvp_full is within the range. */ 1847 clamp_mv(&mvp_full, x->mv_col_min, x->mv_col_max, 1848 x->mv_row_min, x->mv_row_max); 1849 thissme = cpi->full_search_sad(x, &mvp_full, 1850 sadpb, 16, &cpi->fn_ptr[bsize], 1851 &bsi->ref_mv[0]->as_mv, 1852 &best_mv->as_mv); 1853 cost_list[1] = cost_list[2] = cost_list[3] = cost_list[4] = INT_MAX; 1854 if (thissme < bestsme) { 1855 bestsme = thissme; 1856 *new_mv = best_mv->as_mv; 1857 } else { 1858 // The full search result is actually worse so re-instate the 1859 // previous best vector 1860 best_mv->as_mv = *new_mv; 1861 } 1862 } 1863 1864 if (bestsme < INT_MAX) { 1865 int distortion; 1866 cpi->find_fractional_mv_step( 1867 x, 1868 new_mv, 1869 &bsi->ref_mv[0]->as_mv, 1870 cm->allow_high_precision_mv, 1871 x->errorperbit, &cpi->fn_ptr[bsize], 1872 cpi->sf.mv.subpel_force_stop, 1873 cpi->sf.mv.subpel_iters_per_step, 1874 cond_cost_list(cpi, cost_list), 1875 x->nmvjointcost, x->mvcost, 1876 &distortion, 1877 &x->pred_sse[mbmi->ref_frame[0]], 1878 NULL, 0, 0); 1879 1880 // save motion search result for use in compound prediction 1881 seg_mvs[i][mbmi->ref_frame[0]].as_mv = *new_mv; 1882 } 1883 1884 if (cpi->sf.adaptive_motion_search) 1885 x->pred_mv[mbmi->ref_frame[0]] = *new_mv; 1886 1887 // restore src pointers 1888 mi_buf_restore(x, orig_src, orig_pre); 1889 } 1890 1891 if (has_second_rf) { 1892 if (seg_mvs[i][mbmi->ref_frame[1]].as_int == INVALID_MV || 1893 seg_mvs[i][mbmi->ref_frame[0]].as_int == INVALID_MV) 1894 continue; 1895 } 1896 1897 if (has_second_rf && this_mode == NEWMV && 1898 mbmi->interp_filter == EIGHTTAP) { 1899 // adjust src pointers 1900 mi_buf_shift(x, i); 1901 if (cpi->sf.comp_inter_joint_search_thresh <= bsize) { 1902 int rate_mv; 1903 joint_motion_search(cpi, x, bsize, frame_mv[this_mode], 1904 mi_row, mi_col, seg_mvs[i], 1905 &rate_mv); 1906 seg_mvs[i][mbmi->ref_frame[0]].as_int = 1907 frame_mv[this_mode][mbmi->ref_frame[0]].as_int; 1908 seg_mvs[i][mbmi->ref_frame[1]].as_int = 1909 frame_mv[this_mode][mbmi->ref_frame[1]].as_int; 1910 } 1911 // restore src pointers 1912 mi_buf_restore(x, orig_src, orig_pre); 1913 } 1914 1915 bsi->rdstat[i][mode_idx].brate = 1916 set_and_cost_bmi_mvs(cpi, x, xd, i, this_mode, mode_mv[this_mode], 1917 frame_mv, seg_mvs[i], bsi->ref_mv, 1918 x->nmvjointcost, x->mvcost); 1919 1920 for (ref = 0; ref < 1 + has_second_rf; ++ref) { 1921 bsi->rdstat[i][mode_idx].mvs[ref].as_int = 1922 mode_mv[this_mode][ref].as_int; 1923 if (num_4x4_blocks_wide > 1) 1924 bsi->rdstat[i + 1][mode_idx].mvs[ref].as_int = 1925 mode_mv[this_mode][ref].as_int; 1926 if (num_4x4_blocks_high > 1) 1927 bsi->rdstat[i + 2][mode_idx].mvs[ref].as_int = 1928 mode_mv[this_mode][ref].as_int; 1929 } 1930 1931 // Trap vectors that reach beyond the UMV borders 1932 if (mv_check_bounds(x, &mode_mv[this_mode][0].as_mv) || 1933 (has_second_rf && 1934 mv_check_bounds(x, &mode_mv[this_mode][1].as_mv))) 1935 continue; 1936 1937 if (filter_idx > 0) { 1938 BEST_SEG_INFO *ref_bsi = bsi_buf; 1939 subpelmv = 0; 1940 have_ref = 1; 1941 1942 for (ref = 0; ref < 1 + has_second_rf; ++ref) { 1943 subpelmv |= mv_has_subpel(&mode_mv[this_mode][ref].as_mv); 1944 have_ref &= mode_mv[this_mode][ref].as_int == 1945 ref_bsi->rdstat[i][mode_idx].mvs[ref].as_int; 1946 } 1947 1948 if (filter_idx > 1 && !subpelmv && !have_ref) { 1949 ref_bsi = bsi_buf + 1; 1950 have_ref = 1; 1951 for (ref = 0; ref < 1 + has_second_rf; ++ref) 1952 have_ref &= mode_mv[this_mode][ref].as_int == 1953 ref_bsi->rdstat[i][mode_idx].mvs[ref].as_int; 1954 } 1955 1956 if (!subpelmv && have_ref && 1957 ref_bsi->rdstat[i][mode_idx].brdcost < INT64_MAX) { 1958 memcpy(&bsi->rdstat[i][mode_idx], &ref_bsi->rdstat[i][mode_idx], 1959 sizeof(SEG_RDSTAT)); 1960 if (num_4x4_blocks_wide > 1) 1961 bsi->rdstat[i + 1][mode_idx].eobs = 1962 ref_bsi->rdstat[i + 1][mode_idx].eobs; 1963 if (num_4x4_blocks_high > 1) 1964 bsi->rdstat[i + 2][mode_idx].eobs = 1965 ref_bsi->rdstat[i + 2][mode_idx].eobs; 1966 1967 if (bsi->rdstat[i][mode_idx].brdcost < best_rd) { 1968 mode_selected = this_mode; 1969 best_rd = bsi->rdstat[i][mode_idx].brdcost; 1970 } 1971 continue; 1972 } 1973 } 1974 1975 bsi->rdstat[i][mode_idx].brdcost = 1976 encode_inter_mb_segment(cpi, x, 1977 bsi->segment_rd - this_segment_rd, i, 1978 &bsi->rdstat[i][mode_idx].byrate, 1979 &bsi->rdstat[i][mode_idx].bdist, 1980 &bsi->rdstat[i][mode_idx].bsse, 1981 bsi->rdstat[i][mode_idx].ta, 1982 bsi->rdstat[i][mode_idx].tl, 1983 mi_row, mi_col); 1984 if (bsi->rdstat[i][mode_idx].brdcost < INT64_MAX) { 1985 bsi->rdstat[i][mode_idx].brdcost += RDCOST(x->rdmult, x->rddiv, 1986 bsi->rdstat[i][mode_idx].brate, 0); 1987 bsi->rdstat[i][mode_idx].brate += bsi->rdstat[i][mode_idx].byrate; 1988 bsi->rdstat[i][mode_idx].eobs = p->eobs[i]; 1989 if (num_4x4_blocks_wide > 1) 1990 bsi->rdstat[i + 1][mode_idx].eobs = p->eobs[i + 1]; 1991 if (num_4x4_blocks_high > 1) 1992 bsi->rdstat[i + 2][mode_idx].eobs = p->eobs[i + 2]; 1993 } 1994 1995 if (bsi->rdstat[i][mode_idx].brdcost < best_rd) { 1996 mode_selected = this_mode; 1997 best_rd = bsi->rdstat[i][mode_idx].brdcost; 1998 } 1999 } /*for each 4x4 mode*/ 2000 2001 if (best_rd == INT64_MAX) { 2002 int iy, midx; 2003 for (iy = i + 1; iy < 4; ++iy) 2004 for (midx = 0; midx < INTER_MODES; ++midx) 2005 bsi->rdstat[iy][midx].brdcost = INT64_MAX; 2006 bsi->segment_rd = INT64_MAX; 2007 return INT64_MAX; 2008 } 2009 2010 mode_idx = INTER_OFFSET(mode_selected); 2011 memcpy(t_above, bsi->rdstat[i][mode_idx].ta, sizeof(t_above)); 2012 memcpy(t_left, bsi->rdstat[i][mode_idx].tl, sizeof(t_left)); 2013 2014 set_and_cost_bmi_mvs(cpi, x, xd, i, mode_selected, mode_mv[mode_selected], 2015 frame_mv, seg_mvs[i], bsi->ref_mv, x->nmvjointcost, 2016 x->mvcost); 2017 2018 br += bsi->rdstat[i][mode_idx].brate; 2019 bd += bsi->rdstat[i][mode_idx].bdist; 2020 block_sse += bsi->rdstat[i][mode_idx].bsse; 2021 segmentyrate += bsi->rdstat[i][mode_idx].byrate; 2022 this_segment_rd += bsi->rdstat[i][mode_idx].brdcost; 2023 2024 if (this_segment_rd > bsi->segment_rd) { 2025 int iy, midx; 2026 for (iy = i + 1; iy < 4; ++iy) 2027 for (midx = 0; midx < INTER_MODES; ++midx) 2028 bsi->rdstat[iy][midx].brdcost = INT64_MAX; 2029 bsi->segment_rd = INT64_MAX; 2030 return INT64_MAX; 2031 } 2032 } 2033 } /* for each label */ 2034 2035 bsi->r = br; 2036 bsi->d = bd; 2037 bsi->segment_yrate = segmentyrate; 2038 bsi->segment_rd = this_segment_rd; 2039 bsi->sse = block_sse; 2040 2041 // update the coding decisions 2042 for (k = 0; k < 4; ++k) 2043 bsi->modes[k] = mi->bmi[k].as_mode; 2044 2045 if (bsi->segment_rd > best_rd) 2046 return INT64_MAX; 2047 /* set it to the best */ 2048 for (i = 0; i < 4; i++) { 2049 mode_idx = INTER_OFFSET(bsi->modes[i]); 2050 mi->bmi[i].as_mv[0].as_int = bsi->rdstat[i][mode_idx].mvs[0].as_int; 2051 if (has_second_ref(mbmi)) 2052 mi->bmi[i].as_mv[1].as_int = bsi->rdstat[i][mode_idx].mvs[1].as_int; 2053 x->plane[0].eobs[i] = bsi->rdstat[i][mode_idx].eobs; 2054 mi->bmi[i].as_mode = bsi->modes[i]; 2055 } 2056 2057 /* 2058 * used to set mbmi->mv.as_int 2059 */ 2060 *returntotrate = bsi->r; 2061 *returndistortion = bsi->d; 2062 *returnyrate = bsi->segment_yrate; 2063 *skippable = vp9_is_skippable_in_plane(x, BLOCK_8X8, 0); 2064 *psse = bsi->sse; 2065 mbmi->mode = bsi->modes[3]; 2066 2067 return bsi->segment_rd; 2068} 2069 2070static void estimate_ref_frame_costs(const VP9_COMMON *cm, 2071 const MACROBLOCKD *xd, 2072 int segment_id, 2073 unsigned int *ref_costs_single, 2074 unsigned int *ref_costs_comp, 2075 vpx_prob *comp_mode_p) { 2076 int seg_ref_active = segfeature_active(&cm->seg, segment_id, 2077 SEG_LVL_REF_FRAME); 2078 if (seg_ref_active) { 2079 memset(ref_costs_single, 0, MAX_REF_FRAMES * sizeof(*ref_costs_single)); 2080 memset(ref_costs_comp, 0, MAX_REF_FRAMES * sizeof(*ref_costs_comp)); 2081 *comp_mode_p = 128; 2082 } else { 2083 vpx_prob intra_inter_p = vp9_get_intra_inter_prob(cm, xd); 2084 vpx_prob comp_inter_p = 128; 2085 2086 if (cm->reference_mode == REFERENCE_MODE_SELECT) { 2087 comp_inter_p = vp9_get_reference_mode_prob(cm, xd); 2088 *comp_mode_p = comp_inter_p; 2089 } else { 2090 *comp_mode_p = 128; 2091 } 2092 2093 ref_costs_single[INTRA_FRAME] = vp9_cost_bit(intra_inter_p, 0); 2094 2095 if (cm->reference_mode != COMPOUND_REFERENCE) { 2096 vpx_prob ref_single_p1 = vp9_get_pred_prob_single_ref_p1(cm, xd); 2097 vpx_prob ref_single_p2 = vp9_get_pred_prob_single_ref_p2(cm, xd); 2098 unsigned int base_cost = vp9_cost_bit(intra_inter_p, 1); 2099 2100 if (cm->reference_mode == REFERENCE_MODE_SELECT) 2101 base_cost += vp9_cost_bit(comp_inter_p, 0); 2102 2103 ref_costs_single[LAST_FRAME] = ref_costs_single[GOLDEN_FRAME] = 2104 ref_costs_single[ALTREF_FRAME] = base_cost; 2105 ref_costs_single[LAST_FRAME] += vp9_cost_bit(ref_single_p1, 0); 2106 ref_costs_single[GOLDEN_FRAME] += vp9_cost_bit(ref_single_p1, 1); 2107 ref_costs_single[ALTREF_FRAME] += vp9_cost_bit(ref_single_p1, 1); 2108 ref_costs_single[GOLDEN_FRAME] += vp9_cost_bit(ref_single_p2, 0); 2109 ref_costs_single[ALTREF_FRAME] += vp9_cost_bit(ref_single_p2, 1); 2110 } else { 2111 ref_costs_single[LAST_FRAME] = 512; 2112 ref_costs_single[GOLDEN_FRAME] = 512; 2113 ref_costs_single[ALTREF_FRAME] = 512; 2114 } 2115 if (cm->reference_mode != SINGLE_REFERENCE) { 2116 vpx_prob ref_comp_p = vp9_get_pred_prob_comp_ref_p(cm, xd); 2117 unsigned int base_cost = vp9_cost_bit(intra_inter_p, 1); 2118 2119 if (cm->reference_mode == REFERENCE_MODE_SELECT) 2120 base_cost += vp9_cost_bit(comp_inter_p, 1); 2121 2122 ref_costs_comp[LAST_FRAME] = base_cost + vp9_cost_bit(ref_comp_p, 0); 2123 ref_costs_comp[GOLDEN_FRAME] = base_cost + vp9_cost_bit(ref_comp_p, 1); 2124 } else { 2125 ref_costs_comp[LAST_FRAME] = 512; 2126 ref_costs_comp[GOLDEN_FRAME] = 512; 2127 } 2128 } 2129} 2130 2131static void store_coding_context(MACROBLOCK *x, PICK_MODE_CONTEXT *ctx, 2132 int mode_index, 2133 int64_t comp_pred_diff[REFERENCE_MODES], 2134 int64_t best_filter_diff[SWITCHABLE_FILTER_CONTEXTS], 2135 int skippable) { 2136 MACROBLOCKD *const xd = &x->e_mbd; 2137 2138 // Take a snapshot of the coding context so it can be 2139 // restored if we decide to encode this way 2140 ctx->skip = x->skip; 2141 ctx->skippable = skippable; 2142 ctx->best_mode_index = mode_index; 2143 ctx->mic = *xd->mi[0]; 2144 ctx->mbmi_ext = *x->mbmi_ext; 2145 ctx->single_pred_diff = (int)comp_pred_diff[SINGLE_REFERENCE]; 2146 ctx->comp_pred_diff = (int)comp_pred_diff[COMPOUND_REFERENCE]; 2147 ctx->hybrid_pred_diff = (int)comp_pred_diff[REFERENCE_MODE_SELECT]; 2148 2149 memcpy(ctx->best_filter_diff, best_filter_diff, 2150 sizeof(*best_filter_diff) * SWITCHABLE_FILTER_CONTEXTS); 2151} 2152 2153static void setup_buffer_inter(VP9_COMP *cpi, MACROBLOCK *x, 2154 MV_REFERENCE_FRAME ref_frame, 2155 BLOCK_SIZE block_size, 2156 int mi_row, int mi_col, 2157 int_mv frame_nearest_mv[MAX_REF_FRAMES], 2158 int_mv frame_near_mv[MAX_REF_FRAMES], 2159 struct buf_2d yv12_mb[4][MAX_MB_PLANE]) { 2160 const VP9_COMMON *cm = &cpi->common; 2161 const YV12_BUFFER_CONFIG *yv12 = get_ref_frame_buffer(cpi, ref_frame); 2162 MACROBLOCKD *const xd = &x->e_mbd; 2163 MODE_INFO *const mi = xd->mi[0]; 2164 int_mv *const candidates = x->mbmi_ext->ref_mvs[ref_frame]; 2165 const struct scale_factors *const sf = &cm->frame_refs[ref_frame - 1].sf; 2166 MB_MODE_INFO_EXT *const mbmi_ext = x->mbmi_ext; 2167 2168 assert(yv12 != NULL); 2169 2170 // TODO(jkoleszar): Is the UV buffer ever used here? If so, need to make this 2171 // use the UV scaling factors. 2172 vp9_setup_pred_block(xd, yv12_mb[ref_frame], yv12, mi_row, mi_col, sf, sf); 2173 2174 // Gets an initial list of candidate vectors from neighbours and orders them 2175 vp9_find_mv_refs(cm, xd, mi, ref_frame, candidates, mi_row, mi_col, 2176 NULL, NULL, mbmi_ext->mode_context); 2177 2178 // Candidate refinement carried out at encoder and decoder 2179 vp9_find_best_ref_mvs(xd, cm->allow_high_precision_mv, candidates, 2180 &frame_nearest_mv[ref_frame], 2181 &frame_near_mv[ref_frame]); 2182 2183 // Further refinement that is encode side only to test the top few candidates 2184 // in full and choose the best as the centre point for subsequent searches. 2185 // The current implementation doesn't support scaling. 2186 if (!vp9_is_scaled(sf) && block_size >= BLOCK_8X8) 2187 vp9_mv_pred(cpi, x, yv12_mb[ref_frame][0].buf, yv12->y_stride, 2188 ref_frame, block_size); 2189} 2190 2191static void single_motion_search(VP9_COMP *cpi, MACROBLOCK *x, 2192 BLOCK_SIZE bsize, 2193 int mi_row, int mi_col, 2194 int_mv *tmp_mv, int *rate_mv) { 2195 MACROBLOCKD *xd = &x->e_mbd; 2196 const VP9_COMMON *cm = &cpi->common; 2197 MB_MODE_INFO *mbmi = &xd->mi[0]->mbmi; 2198 struct buf_2d backup_yv12[MAX_MB_PLANE] = {{0, 0}}; 2199 int bestsme = INT_MAX; 2200 int step_param; 2201 int sadpb = x->sadperbit16; 2202 MV mvp_full; 2203 int ref = mbmi->ref_frame[0]; 2204 MV ref_mv = x->mbmi_ext->ref_mvs[ref][0].as_mv; 2205 2206 int tmp_col_min = x->mv_col_min; 2207 int tmp_col_max = x->mv_col_max; 2208 int tmp_row_min = x->mv_row_min; 2209 int tmp_row_max = x->mv_row_max; 2210 int cost_list[5]; 2211 2212 const YV12_BUFFER_CONFIG *scaled_ref_frame = vp9_get_scaled_ref_frame(cpi, 2213 ref); 2214 2215 MV pred_mv[3]; 2216 pred_mv[0] = x->mbmi_ext->ref_mvs[ref][0].as_mv; 2217 pred_mv[1] = x->mbmi_ext->ref_mvs[ref][1].as_mv; 2218 pred_mv[2] = x->pred_mv[ref]; 2219 2220 if (scaled_ref_frame) { 2221 int i; 2222 // Swap out the reference frame for a version that's been scaled to 2223 // match the resolution of the current frame, allowing the existing 2224 // motion search code to be used without additional modifications. 2225 for (i = 0; i < MAX_MB_PLANE; i++) 2226 backup_yv12[i] = xd->plane[i].pre[0]; 2227 2228 vp9_setup_pre_planes(xd, 0, scaled_ref_frame, mi_row, mi_col, NULL); 2229 } 2230 2231 vp9_set_mv_search_range(x, &ref_mv); 2232 2233 // Work out the size of the first step in the mv step search. 2234 // 0 here is maximum length first step. 1 is MAX >> 1 etc. 2235 if (cpi->sf.mv.auto_mv_step_size && cm->show_frame) { 2236 // Take wtd average of the step_params based on the last frame's 2237 // max mv magnitude and that based on the best ref mvs of the current 2238 // block for the given reference. 2239 step_param = (vp9_init_search_range(x->max_mv_context[ref]) + 2240 cpi->mv_step_param) / 2; 2241 } else { 2242 step_param = cpi->mv_step_param; 2243 } 2244 2245 if (cpi->sf.adaptive_motion_search && bsize < BLOCK_64X64) { 2246 int boffset = 2 * (b_width_log2_lookup[BLOCK_64X64] - 2247 MIN(b_height_log2_lookup[bsize], b_width_log2_lookup[bsize])); 2248 step_param = MAX(step_param, boffset); 2249 } 2250 2251 if (cpi->sf.adaptive_motion_search) { 2252 int bwl = b_width_log2_lookup[bsize]; 2253 int bhl = b_height_log2_lookup[bsize]; 2254 int tlevel = x->pred_mv_sad[ref] >> (bwl + bhl + 4); 2255 2256 if (tlevel < 5) 2257 step_param += 2; 2258 2259 // prev_mv_sad is not setup for dynamically scaled frames. 2260 if (cpi->oxcf.resize_mode != RESIZE_DYNAMIC) { 2261 int i; 2262 for (i = LAST_FRAME; i <= ALTREF_FRAME && cm->show_frame; ++i) { 2263 if ((x->pred_mv_sad[ref] >> 3) > x->pred_mv_sad[i]) { 2264 x->pred_mv[ref].row = 0; 2265 x->pred_mv[ref].col = 0; 2266 tmp_mv->as_int = INVALID_MV; 2267 2268 if (scaled_ref_frame) { 2269 int i; 2270 for (i = 0; i < MAX_MB_PLANE; ++i) 2271 xd->plane[i].pre[0] = backup_yv12[i]; 2272 } 2273 return; 2274 } 2275 } 2276 } 2277 } 2278 2279 mvp_full = pred_mv[x->mv_best_ref_index[ref]]; 2280 2281 mvp_full.col >>= 3; 2282 mvp_full.row >>= 3; 2283 2284 bestsme = vp9_full_pixel_search(cpi, x, bsize, &mvp_full, step_param, sadpb, 2285 cond_cost_list(cpi, cost_list), 2286 &ref_mv, &tmp_mv->as_mv, INT_MAX, 1); 2287 2288 x->mv_col_min = tmp_col_min; 2289 x->mv_col_max = tmp_col_max; 2290 x->mv_row_min = tmp_row_min; 2291 x->mv_row_max = tmp_row_max; 2292 2293 if (bestsme < INT_MAX) { 2294 int dis; /* TODO: use dis in distortion calculation later. */ 2295 cpi->find_fractional_mv_step(x, &tmp_mv->as_mv, &ref_mv, 2296 cm->allow_high_precision_mv, 2297 x->errorperbit, 2298 &cpi->fn_ptr[bsize], 2299 cpi->sf.mv.subpel_force_stop, 2300 cpi->sf.mv.subpel_iters_per_step, 2301 cond_cost_list(cpi, cost_list), 2302 x->nmvjointcost, x->mvcost, 2303 &dis, &x->pred_sse[ref], NULL, 0, 0); 2304 } 2305 *rate_mv = vp9_mv_bit_cost(&tmp_mv->as_mv, &ref_mv, 2306 x->nmvjointcost, x->mvcost, MV_COST_WEIGHT); 2307 2308 if (cpi->sf.adaptive_motion_search) 2309 x->pred_mv[ref] = tmp_mv->as_mv; 2310 2311 if (scaled_ref_frame) { 2312 int i; 2313 for (i = 0; i < MAX_MB_PLANE; i++) 2314 xd->plane[i].pre[0] = backup_yv12[i]; 2315 } 2316} 2317 2318 2319 2320static INLINE void restore_dst_buf(MACROBLOCKD *xd, 2321 uint8_t *orig_dst[MAX_MB_PLANE], 2322 int orig_dst_stride[MAX_MB_PLANE]) { 2323 int i; 2324 for (i = 0; i < MAX_MB_PLANE; i++) { 2325 xd->plane[i].dst.buf = orig_dst[i]; 2326 xd->plane[i].dst.stride = orig_dst_stride[i]; 2327 } 2328} 2329 2330// In some situations we want to discount tha pparent cost of a new motion 2331// vector. Where there is a subtle motion field and especially where there is 2332// low spatial complexity then it can be hard to cover the cost of a new motion 2333// vector in a single block, even if that motion vector reduces distortion. 2334// However, once established that vector may be usable through the nearest and 2335// near mv modes to reduce distortion in subsequent blocks and also improve 2336// visual quality. 2337static int discount_newmv_test(const VP9_COMP *cpi, 2338 int this_mode, 2339 int_mv this_mv, 2340 int_mv (*mode_mv)[MAX_REF_FRAMES], 2341 int ref_frame) { 2342 return (!cpi->rc.is_src_frame_alt_ref && 2343 (this_mode == NEWMV) && 2344 (this_mv.as_int != 0) && 2345 ((mode_mv[NEARESTMV][ref_frame].as_int == 0) || 2346 (mode_mv[NEARESTMV][ref_frame].as_int == INVALID_MV)) && 2347 ((mode_mv[NEARMV][ref_frame].as_int == 0) || 2348 (mode_mv[NEARMV][ref_frame].as_int == INVALID_MV))); 2349} 2350 2351static int64_t handle_inter_mode(VP9_COMP *cpi, MACROBLOCK *x, 2352 BLOCK_SIZE bsize, 2353 int *rate2, int64_t *distortion, 2354 int *skippable, 2355 int *rate_y, int *rate_uv, 2356 int *disable_skip, 2357 int_mv (*mode_mv)[MAX_REF_FRAMES], 2358 int mi_row, int mi_col, 2359 int_mv single_newmv[MAX_REF_FRAMES], 2360 INTERP_FILTER (*single_filter)[MAX_REF_FRAMES], 2361 int (*single_skippable)[MAX_REF_FRAMES], 2362 int64_t *psse, 2363 const int64_t ref_best_rd, 2364 int64_t *mask_filter, 2365 int64_t filter_cache[]) { 2366 VP9_COMMON *cm = &cpi->common; 2367 MACROBLOCKD *xd = &x->e_mbd; 2368 MB_MODE_INFO *mbmi = &xd->mi[0]->mbmi; 2369 MB_MODE_INFO_EXT *const mbmi_ext = x->mbmi_ext; 2370 const int is_comp_pred = has_second_ref(mbmi); 2371 const int this_mode = mbmi->mode; 2372 int_mv *frame_mv = mode_mv[this_mode]; 2373 int i; 2374 int refs[2] = { mbmi->ref_frame[0], 2375 (mbmi->ref_frame[1] < 0 ? 0 : mbmi->ref_frame[1]) }; 2376 int_mv cur_mv[2]; 2377#if CONFIG_VP9_HIGHBITDEPTH 2378 DECLARE_ALIGNED(16, uint16_t, tmp_buf16[MAX_MB_PLANE * 64 * 64]); 2379 uint8_t *tmp_buf; 2380#else 2381 DECLARE_ALIGNED(16, uint8_t, tmp_buf[MAX_MB_PLANE * 64 * 64]); 2382#endif // CONFIG_VP9_HIGHBITDEPTH 2383 int pred_exists = 0; 2384 int intpel_mv; 2385 int64_t rd, tmp_rd, best_rd = INT64_MAX; 2386 int best_needs_copy = 0; 2387 uint8_t *orig_dst[MAX_MB_PLANE]; 2388 int orig_dst_stride[MAX_MB_PLANE]; 2389 int rs = 0; 2390 INTERP_FILTER best_filter = SWITCHABLE; 2391 uint8_t skip_txfm[MAX_MB_PLANE << 2] = {0}; 2392 int64_t bsse[MAX_MB_PLANE << 2] = {0}; 2393 2394 int bsl = mi_width_log2_lookup[bsize]; 2395 int pred_filter_search = cpi->sf.cb_pred_filter_search ? 2396 (((mi_row + mi_col) >> bsl) + 2397 get_chessboard_index(cm->current_video_frame)) & 0x1 : 0; 2398 2399 int skip_txfm_sb = 0; 2400 int64_t skip_sse_sb = INT64_MAX; 2401 int64_t distortion_y = 0, distortion_uv = 0; 2402 2403#if CONFIG_VP9_HIGHBITDEPTH 2404 if (xd->cur_buf->flags & YV12_FLAG_HIGHBITDEPTH) { 2405 tmp_buf = CONVERT_TO_BYTEPTR(tmp_buf16); 2406 } else { 2407 tmp_buf = (uint8_t *)tmp_buf16; 2408 } 2409#endif // CONFIG_VP9_HIGHBITDEPTH 2410 2411 if (pred_filter_search) { 2412 INTERP_FILTER af = SWITCHABLE, lf = SWITCHABLE; 2413 if (xd->up_available) 2414 af = xd->mi[-xd->mi_stride]->mbmi.interp_filter; 2415 if (xd->left_available) 2416 lf = xd->mi[-1]->mbmi.interp_filter; 2417 2418 if ((this_mode != NEWMV) || (af == lf)) 2419 best_filter = af; 2420 } 2421 2422 if (is_comp_pred) { 2423 if (frame_mv[refs[0]].as_int == INVALID_MV || 2424 frame_mv[refs[1]].as_int == INVALID_MV) 2425 return INT64_MAX; 2426 2427 if (cpi->sf.adaptive_mode_search) { 2428 if (single_filter[this_mode][refs[0]] == 2429 single_filter[this_mode][refs[1]]) 2430 best_filter = single_filter[this_mode][refs[0]]; 2431 } 2432 } 2433 2434 if (this_mode == NEWMV) { 2435 int rate_mv; 2436 if (is_comp_pred) { 2437 // Initialize mv using single prediction mode result. 2438 frame_mv[refs[0]].as_int = single_newmv[refs[0]].as_int; 2439 frame_mv[refs[1]].as_int = single_newmv[refs[1]].as_int; 2440 2441 if (cpi->sf.comp_inter_joint_search_thresh <= bsize) { 2442 joint_motion_search(cpi, x, bsize, frame_mv, 2443 mi_row, mi_col, single_newmv, &rate_mv); 2444 } else { 2445 rate_mv = vp9_mv_bit_cost(&frame_mv[refs[0]].as_mv, 2446 &x->mbmi_ext->ref_mvs[refs[0]][0].as_mv, 2447 x->nmvjointcost, x->mvcost, MV_COST_WEIGHT); 2448 rate_mv += vp9_mv_bit_cost(&frame_mv[refs[1]].as_mv, 2449 &x->mbmi_ext->ref_mvs[refs[1]][0].as_mv, 2450 x->nmvjointcost, x->mvcost, MV_COST_WEIGHT); 2451 } 2452 *rate2 += rate_mv; 2453 } else { 2454 int_mv tmp_mv; 2455 single_motion_search(cpi, x, bsize, mi_row, mi_col, 2456 &tmp_mv, &rate_mv); 2457 if (tmp_mv.as_int == INVALID_MV) 2458 return INT64_MAX; 2459 2460 frame_mv[refs[0]].as_int = 2461 xd->mi[0]->bmi[0].as_mv[0].as_int = tmp_mv.as_int; 2462 single_newmv[refs[0]].as_int = tmp_mv.as_int; 2463 2464 // Estimate the rate implications of a new mv but discount this 2465 // under certain circumstances where we want to help initiate a weak 2466 // motion field, where the distortion gain for a single block may not 2467 // be enough to overcome the cost of a new mv. 2468 if (discount_newmv_test(cpi, this_mode, tmp_mv, mode_mv, refs[0])) { 2469 *rate2 += MAX((rate_mv / NEW_MV_DISCOUNT_FACTOR), 1); 2470 } else { 2471 *rate2 += rate_mv; 2472 } 2473 } 2474 } 2475 2476 for (i = 0; i < is_comp_pred + 1; ++i) { 2477 cur_mv[i] = frame_mv[refs[i]]; 2478 // Clip "next_nearest" so that it does not extend to far out of image 2479 if (this_mode != NEWMV) 2480 clamp_mv2(&cur_mv[i].as_mv, xd); 2481 2482 if (mv_check_bounds(x, &cur_mv[i].as_mv)) 2483 return INT64_MAX; 2484 mbmi->mv[i].as_int = cur_mv[i].as_int; 2485 } 2486 2487 // do first prediction into the destination buffer. Do the next 2488 // prediction into a temporary buffer. Then keep track of which one 2489 // of these currently holds the best predictor, and use the other 2490 // one for future predictions. In the end, copy from tmp_buf to 2491 // dst if necessary. 2492 for (i = 0; i < MAX_MB_PLANE; i++) { 2493 orig_dst[i] = xd->plane[i].dst.buf; 2494 orig_dst_stride[i] = xd->plane[i].dst.stride; 2495 } 2496 2497 // We don't include the cost of the second reference here, because there 2498 // are only two options: Last/ARF or Golden/ARF; The second one is always 2499 // known, which is ARF. 2500 // 2501 // Under some circumstances we discount the cost of new mv mode to encourage 2502 // initiation of a motion field. 2503 if (discount_newmv_test(cpi, this_mode, frame_mv[refs[0]], 2504 mode_mv, refs[0])) { 2505 *rate2 += MIN(cost_mv_ref(cpi, this_mode, 2506 mbmi_ext->mode_context[refs[0]]), 2507 cost_mv_ref(cpi, NEARESTMV, 2508 mbmi_ext->mode_context[refs[0]])); 2509 } else { 2510 *rate2 += cost_mv_ref(cpi, this_mode, mbmi_ext->mode_context[refs[0]]); 2511 } 2512 2513 if (RDCOST(x->rdmult, x->rddiv, *rate2, 0) > ref_best_rd && 2514 mbmi->mode != NEARESTMV) 2515 return INT64_MAX; 2516 2517 pred_exists = 0; 2518 // Are all MVs integer pel for Y and UV 2519 intpel_mv = !mv_has_subpel(&mbmi->mv[0].as_mv); 2520 if (is_comp_pred) 2521 intpel_mv &= !mv_has_subpel(&mbmi->mv[1].as_mv); 2522 2523 // Search for best switchable filter by checking the variance of 2524 // pred error irrespective of whether the filter will be used 2525 for (i = 0; i < SWITCHABLE_FILTER_CONTEXTS; ++i) 2526 filter_cache[i] = INT64_MAX; 2527 2528 if (cm->interp_filter != BILINEAR) { 2529 if (x->source_variance < cpi->sf.disable_filter_search_var_thresh) { 2530 best_filter = EIGHTTAP; 2531 } else if (best_filter == SWITCHABLE) { 2532 int newbest; 2533 int tmp_rate_sum = 0; 2534 int64_t tmp_dist_sum = 0; 2535 2536 for (i = 0; i < SWITCHABLE_FILTERS; ++i) { 2537 int j; 2538 int64_t rs_rd; 2539 int tmp_skip_sb = 0; 2540 int64_t tmp_skip_sse = INT64_MAX; 2541 2542 mbmi->interp_filter = i; 2543 rs = vp9_get_switchable_rate(cpi, xd); 2544 rs_rd = RDCOST(x->rdmult, x->rddiv, rs, 0); 2545 2546 if (i > 0 && intpel_mv) { 2547 rd = RDCOST(x->rdmult, x->rddiv, tmp_rate_sum, tmp_dist_sum); 2548 filter_cache[i] = rd; 2549 filter_cache[SWITCHABLE_FILTERS] = 2550 MIN(filter_cache[SWITCHABLE_FILTERS], rd + rs_rd); 2551 if (cm->interp_filter == SWITCHABLE) 2552 rd += rs_rd; 2553 *mask_filter = MAX(*mask_filter, rd); 2554 } else { 2555 int rate_sum = 0; 2556 int64_t dist_sum = 0; 2557 if (i > 0 && cpi->sf.adaptive_interp_filter_search && 2558 (cpi->sf.interp_filter_search_mask & (1 << i))) { 2559 rate_sum = INT_MAX; 2560 dist_sum = INT64_MAX; 2561 continue; 2562 } 2563 2564 if ((cm->interp_filter == SWITCHABLE && 2565 (!i || best_needs_copy)) || 2566 (cm->interp_filter != SWITCHABLE && 2567 (cm->interp_filter == mbmi->interp_filter || 2568 (i == 0 && intpel_mv)))) { 2569 restore_dst_buf(xd, orig_dst, orig_dst_stride); 2570 } else { 2571 for (j = 0; j < MAX_MB_PLANE; j++) { 2572 xd->plane[j].dst.buf = tmp_buf + j * 64 * 64; 2573 xd->plane[j].dst.stride = 64; 2574 } 2575 } 2576 vp9_build_inter_predictors_sb(xd, mi_row, mi_col, bsize); 2577 model_rd_for_sb(cpi, bsize, x, xd, &rate_sum, &dist_sum, 2578 &tmp_skip_sb, &tmp_skip_sse); 2579 2580 rd = RDCOST(x->rdmult, x->rddiv, rate_sum, dist_sum); 2581 filter_cache[i] = rd; 2582 filter_cache[SWITCHABLE_FILTERS] = 2583 MIN(filter_cache[SWITCHABLE_FILTERS], rd + rs_rd); 2584 if (cm->interp_filter == SWITCHABLE) 2585 rd += rs_rd; 2586 *mask_filter = MAX(*mask_filter, rd); 2587 2588 if (i == 0 && intpel_mv) { 2589 tmp_rate_sum = rate_sum; 2590 tmp_dist_sum = dist_sum; 2591 } 2592 } 2593 2594 if (i == 0 && cpi->sf.use_rd_breakout && ref_best_rd < INT64_MAX) { 2595 if (rd / 2 > ref_best_rd) { 2596 restore_dst_buf(xd, orig_dst, orig_dst_stride); 2597 return INT64_MAX; 2598 } 2599 } 2600 newbest = i == 0 || rd < best_rd; 2601 2602 if (newbest) { 2603 best_rd = rd; 2604 best_filter = mbmi->interp_filter; 2605 if (cm->interp_filter == SWITCHABLE && i && !intpel_mv) 2606 best_needs_copy = !best_needs_copy; 2607 } 2608 2609 if ((cm->interp_filter == SWITCHABLE && newbest) || 2610 (cm->interp_filter != SWITCHABLE && 2611 cm->interp_filter == mbmi->interp_filter)) { 2612 pred_exists = 1; 2613 tmp_rd = best_rd; 2614 2615 skip_txfm_sb = tmp_skip_sb; 2616 skip_sse_sb = tmp_skip_sse; 2617 memcpy(skip_txfm, x->skip_txfm, sizeof(skip_txfm)); 2618 memcpy(bsse, x->bsse, sizeof(bsse)); 2619 } 2620 } 2621 restore_dst_buf(xd, orig_dst, orig_dst_stride); 2622 } 2623 } 2624 // Set the appropriate filter 2625 mbmi->interp_filter = cm->interp_filter != SWITCHABLE ? 2626 cm->interp_filter : best_filter; 2627 rs = cm->interp_filter == SWITCHABLE ? vp9_get_switchable_rate(cpi, xd) : 0; 2628 2629 if (pred_exists) { 2630 if (best_needs_copy) { 2631 // again temporarily set the buffers to local memory to prevent a memcpy 2632 for (i = 0; i < MAX_MB_PLANE; i++) { 2633 xd->plane[i].dst.buf = tmp_buf + i * 64 * 64; 2634 xd->plane[i].dst.stride = 64; 2635 } 2636 } 2637 rd = tmp_rd + RDCOST(x->rdmult, x->rddiv, rs, 0); 2638 } else { 2639 int tmp_rate; 2640 int64_t tmp_dist; 2641 // Handles the special case when a filter that is not in the 2642 // switchable list (ex. bilinear) is indicated at the frame level, or 2643 // skip condition holds. 2644 vp9_build_inter_predictors_sb(xd, mi_row, mi_col, bsize); 2645 model_rd_for_sb(cpi, bsize, x, xd, &tmp_rate, &tmp_dist, 2646 &skip_txfm_sb, &skip_sse_sb); 2647 rd = RDCOST(x->rdmult, x->rddiv, rs + tmp_rate, tmp_dist); 2648 memcpy(skip_txfm, x->skip_txfm, sizeof(skip_txfm)); 2649 memcpy(bsse, x->bsse, sizeof(bsse)); 2650 } 2651 2652 if (!is_comp_pred) 2653 single_filter[this_mode][refs[0]] = mbmi->interp_filter; 2654 2655 if (cpi->sf.adaptive_mode_search) 2656 if (is_comp_pred) 2657 if (single_skippable[this_mode][refs[0]] && 2658 single_skippable[this_mode][refs[1]]) 2659 memset(skip_txfm, SKIP_TXFM_AC_DC, sizeof(skip_txfm)); 2660 2661 if (cpi->sf.use_rd_breakout && ref_best_rd < INT64_MAX) { 2662 // if current pred_error modeled rd is substantially more than the best 2663 // so far, do not bother doing full rd 2664 if (rd / 2 > ref_best_rd) { 2665 restore_dst_buf(xd, orig_dst, orig_dst_stride); 2666 return INT64_MAX; 2667 } 2668 } 2669 2670 if (cm->interp_filter == SWITCHABLE) 2671 *rate2 += rs; 2672 2673 memcpy(x->skip_txfm, skip_txfm, sizeof(skip_txfm)); 2674 memcpy(x->bsse, bsse, sizeof(bsse)); 2675 2676 if (!skip_txfm_sb) { 2677 int skippable_y, skippable_uv; 2678 int64_t sseuv = INT64_MAX; 2679 int64_t rdcosty = INT64_MAX; 2680 2681 // Y cost and distortion 2682 vp9_subtract_plane(x, bsize, 0); 2683 super_block_yrd(cpi, x, rate_y, &distortion_y, &skippable_y, psse, 2684 bsize, ref_best_rd); 2685 2686 if (*rate_y == INT_MAX) { 2687 *rate2 = INT_MAX; 2688 *distortion = INT64_MAX; 2689 restore_dst_buf(xd, orig_dst, orig_dst_stride); 2690 return INT64_MAX; 2691 } 2692 2693 *rate2 += *rate_y; 2694 *distortion += distortion_y; 2695 2696 rdcosty = RDCOST(x->rdmult, x->rddiv, *rate2, *distortion); 2697 rdcosty = MIN(rdcosty, RDCOST(x->rdmult, x->rddiv, 0, *psse)); 2698 2699 if (!super_block_uvrd(cpi, x, rate_uv, &distortion_uv, &skippable_uv, 2700 &sseuv, bsize, ref_best_rd - rdcosty)) { 2701 *rate2 = INT_MAX; 2702 *distortion = INT64_MAX; 2703 restore_dst_buf(xd, orig_dst, orig_dst_stride); 2704 return INT64_MAX; 2705 } 2706 2707 *psse += sseuv; 2708 *rate2 += *rate_uv; 2709 *distortion += distortion_uv; 2710 *skippable = skippable_y && skippable_uv; 2711 } else { 2712 x->skip = 1; 2713 *disable_skip = 1; 2714 2715 // The cost of skip bit needs to be added. 2716 *rate2 += vp9_cost_bit(vp9_get_skip_prob(cm, xd), 1); 2717 2718 *distortion = skip_sse_sb; 2719 } 2720 2721 if (!is_comp_pred) 2722 single_skippable[this_mode][refs[0]] = *skippable; 2723 2724 restore_dst_buf(xd, orig_dst, orig_dst_stride); 2725 return 0; // The rate-distortion cost will be re-calculated by caller. 2726} 2727 2728void vp9_rd_pick_intra_mode_sb(VP9_COMP *cpi, MACROBLOCK *x, 2729 RD_COST *rd_cost, BLOCK_SIZE bsize, 2730 PICK_MODE_CONTEXT *ctx, int64_t best_rd) { 2731 VP9_COMMON *const cm = &cpi->common; 2732 MACROBLOCKD *const xd = &x->e_mbd; 2733 struct macroblockd_plane *const pd = xd->plane; 2734 int rate_y = 0, rate_uv = 0, rate_y_tokenonly = 0, rate_uv_tokenonly = 0; 2735 int y_skip = 0, uv_skip = 0; 2736 int64_t dist_y = 0, dist_uv = 0; 2737 TX_SIZE max_uv_tx_size; 2738 x->skip_encode = 0; 2739 ctx->skip = 0; 2740 xd->mi[0]->mbmi.ref_frame[0] = INTRA_FRAME; 2741 xd->mi[0]->mbmi.ref_frame[1] = NONE; 2742 2743 if (bsize >= BLOCK_8X8) { 2744 if (rd_pick_intra_sby_mode(cpi, x, &rate_y, &rate_y_tokenonly, 2745 &dist_y, &y_skip, bsize, 2746 best_rd) >= best_rd) { 2747 rd_cost->rate = INT_MAX; 2748 return; 2749 } 2750 } else { 2751 y_skip = 0; 2752 if (rd_pick_intra_sub_8x8_y_mode(cpi, x, &rate_y, &rate_y_tokenonly, 2753 &dist_y, best_rd) >= best_rd) { 2754 rd_cost->rate = INT_MAX; 2755 return; 2756 } 2757 } 2758 max_uv_tx_size = get_uv_tx_size_impl(xd->mi[0]->mbmi.tx_size, bsize, 2759 pd[1].subsampling_x, 2760 pd[1].subsampling_y); 2761 rd_pick_intra_sbuv_mode(cpi, x, ctx, &rate_uv, &rate_uv_tokenonly, 2762 &dist_uv, &uv_skip, MAX(BLOCK_8X8, bsize), 2763 max_uv_tx_size); 2764 2765 if (y_skip && uv_skip) { 2766 rd_cost->rate = rate_y + rate_uv - rate_y_tokenonly - rate_uv_tokenonly + 2767 vp9_cost_bit(vp9_get_skip_prob(cm, xd), 1); 2768 rd_cost->dist = dist_y + dist_uv; 2769 } else { 2770 rd_cost->rate = rate_y + rate_uv + 2771 vp9_cost_bit(vp9_get_skip_prob(cm, xd), 0); 2772 rd_cost->dist = dist_y + dist_uv; 2773 } 2774 2775 ctx->mic = *xd->mi[0]; 2776 ctx->mbmi_ext = *x->mbmi_ext; 2777 rd_cost->rdcost = RDCOST(x->rdmult, x->rddiv, rd_cost->rate, rd_cost->dist); 2778} 2779 2780// This function is designed to apply a bias or adjustment to an rd value based 2781// on the relative variance of the source and reconstruction. 2782#define LOW_VAR_THRESH 16 2783#define VLOW_ADJ_MAX 25 2784#define VHIGH_ADJ_MAX 8 2785static void rd_variance_adjustment(VP9_COMP *cpi, 2786 MACROBLOCK *x, 2787 BLOCK_SIZE bsize, 2788 int64_t *this_rd, 2789 MV_REFERENCE_FRAME ref_frame, 2790 unsigned int source_variance) { 2791 MACROBLOCKD *const xd = &x->e_mbd; 2792 unsigned int recon_variance; 2793 unsigned int absvar_diff = 0; 2794 int64_t var_error = 0; 2795 int64_t var_factor = 0; 2796 2797 if (*this_rd == INT64_MAX) 2798 return; 2799 2800#if CONFIG_VP9_HIGHBITDEPTH 2801 if (xd->cur_buf->flags & YV12_FLAG_HIGHBITDEPTH) { 2802 recon_variance = 2803 vp9_high_get_sby_perpixel_variance(cpi, &xd->plane[0].dst, bsize, xd->bd); 2804 } else { 2805 recon_variance = 2806 vp9_get_sby_perpixel_variance(cpi, &xd->plane[0].dst, bsize); 2807 } 2808#else 2809 recon_variance = 2810 vp9_get_sby_perpixel_variance(cpi, &xd->plane[0].dst, bsize); 2811#endif // CONFIG_VP9_HIGHBITDEPTH 2812 2813 if ((source_variance + recon_variance) > LOW_VAR_THRESH) { 2814 absvar_diff = (source_variance > recon_variance) 2815 ? (source_variance - recon_variance) 2816 : (recon_variance - source_variance); 2817 2818 var_error = (200 * source_variance * recon_variance) / 2819 ((source_variance * source_variance) + 2820 (recon_variance * recon_variance)); 2821 var_error = 100 - var_error; 2822 } 2823 2824 // Source variance above a threshold and ref frame is intra. 2825 // This case is targeted mainly at discouraging intra modes that give rise 2826 // to a predictor with a low spatial complexity compared to the source. 2827 if ((source_variance > LOW_VAR_THRESH) && (ref_frame == INTRA_FRAME) && 2828 (source_variance > recon_variance)) { 2829 var_factor = MIN(absvar_diff, MIN(VLOW_ADJ_MAX, var_error)); 2830 // A second possible case of interest is where the source variance 2831 // is very low and we wish to discourage false texture or motion trails. 2832 } else if ((source_variance < (LOW_VAR_THRESH >> 1)) && 2833 (recon_variance > source_variance)) { 2834 var_factor = MIN(absvar_diff, MIN(VHIGH_ADJ_MAX, var_error)); 2835 } 2836 *this_rd += (*this_rd * var_factor) / 100; 2837} 2838 2839 2840// Do we have an internal image edge (e.g. formatting bars). 2841int vp9_internal_image_edge(VP9_COMP *cpi) { 2842 return (cpi->oxcf.pass == 2) && 2843 ((cpi->twopass.this_frame_stats.inactive_zone_rows > 0) || 2844 (cpi->twopass.this_frame_stats.inactive_zone_cols > 0)); 2845} 2846 2847// Checks to see if a super block is on a horizontal image edge. 2848// In most cases this is the "real" edge unless there are formatting 2849// bars embedded in the stream. 2850int vp9_active_h_edge(VP9_COMP *cpi, int mi_row, int mi_step) { 2851 int top_edge = 0; 2852 int bottom_edge = cpi->common.mi_rows; 2853 int is_active_h_edge = 0; 2854 2855 // For two pass account for any formatting bars detected. 2856 if (cpi->oxcf.pass == 2) { 2857 TWO_PASS *twopass = &cpi->twopass; 2858 2859 // The inactive region is specified in MBs not mi units. 2860 // The image edge is in the following MB row. 2861 top_edge += (int)(twopass->this_frame_stats.inactive_zone_rows * 2); 2862 2863 bottom_edge -= (int)(twopass->this_frame_stats.inactive_zone_rows * 2); 2864 bottom_edge = MAX(top_edge, bottom_edge); 2865 } 2866 2867 if (((top_edge >= mi_row) && (top_edge < (mi_row + mi_step))) || 2868 ((bottom_edge >= mi_row) && (bottom_edge < (mi_row + mi_step)))) { 2869 is_active_h_edge = 1; 2870 } 2871 return is_active_h_edge; 2872} 2873 2874// Checks to see if a super block is on a vertical image edge. 2875// In most cases this is the "real" edge unless there are formatting 2876// bars embedded in the stream. 2877int vp9_active_v_edge(VP9_COMP *cpi, int mi_col, int mi_step) { 2878 int left_edge = 0; 2879 int right_edge = cpi->common.mi_cols; 2880 int is_active_v_edge = 0; 2881 2882 // For two pass account for any formatting bars detected. 2883 if (cpi->oxcf.pass == 2) { 2884 TWO_PASS *twopass = &cpi->twopass; 2885 2886 // The inactive region is specified in MBs not mi units. 2887 // The image edge is in the following MB row. 2888 left_edge += (int)(twopass->this_frame_stats.inactive_zone_cols * 2); 2889 2890 right_edge -= (int)(twopass->this_frame_stats.inactive_zone_cols * 2); 2891 right_edge = MAX(left_edge, right_edge); 2892 } 2893 2894 if (((left_edge >= mi_col) && (left_edge < (mi_col + mi_step))) || 2895 ((right_edge >= mi_col) && (right_edge < (mi_col + mi_step)))) { 2896 is_active_v_edge = 1; 2897 } 2898 return is_active_v_edge; 2899} 2900 2901// Checks to see if a super block is at the edge of the active image. 2902// In most cases this is the "real" edge unless there are formatting 2903// bars embedded in the stream. 2904int vp9_active_edge_sb(VP9_COMP *cpi, 2905 int mi_row, int mi_col) { 2906 return vp9_active_h_edge(cpi, mi_row, MI_BLOCK_SIZE) || 2907 vp9_active_v_edge(cpi, mi_col, MI_BLOCK_SIZE); 2908} 2909 2910void vp9_rd_pick_inter_mode_sb(VP9_COMP *cpi, 2911 TileDataEnc *tile_data, 2912 MACROBLOCK *x, 2913 int mi_row, int mi_col, 2914 RD_COST *rd_cost, BLOCK_SIZE bsize, 2915 PICK_MODE_CONTEXT *ctx, 2916 int64_t best_rd_so_far) { 2917 VP9_COMMON *const cm = &cpi->common; 2918 TileInfo *const tile_info = &tile_data->tile_info; 2919 RD_OPT *const rd_opt = &cpi->rd; 2920 SPEED_FEATURES *const sf = &cpi->sf; 2921 MACROBLOCKD *const xd = &x->e_mbd; 2922 MB_MODE_INFO *const mbmi = &xd->mi[0]->mbmi; 2923 MB_MODE_INFO_EXT *const mbmi_ext = x->mbmi_ext; 2924 const struct segmentation *const seg = &cm->seg; 2925 PREDICTION_MODE this_mode; 2926 MV_REFERENCE_FRAME ref_frame, second_ref_frame; 2927 unsigned char segment_id = mbmi->segment_id; 2928 int comp_pred, i, k; 2929 int_mv frame_mv[MB_MODE_COUNT][MAX_REF_FRAMES]; 2930 struct buf_2d yv12_mb[4][MAX_MB_PLANE]; 2931 int_mv single_newmv[MAX_REF_FRAMES] = { { 0 } }; 2932 INTERP_FILTER single_inter_filter[MB_MODE_COUNT][MAX_REF_FRAMES]; 2933 int single_skippable[MB_MODE_COUNT][MAX_REF_FRAMES]; 2934 static const int flag_list[4] = { 0, VP9_LAST_FLAG, VP9_GOLD_FLAG, 2935 VP9_ALT_FLAG }; 2936 int64_t best_rd = best_rd_so_far; 2937 int64_t best_pred_diff[REFERENCE_MODES]; 2938 int64_t best_pred_rd[REFERENCE_MODES]; 2939 int64_t best_filter_rd[SWITCHABLE_FILTER_CONTEXTS]; 2940 int64_t best_filter_diff[SWITCHABLE_FILTER_CONTEXTS]; 2941 MB_MODE_INFO best_mbmode; 2942 int best_mode_skippable = 0; 2943 int midx, best_mode_index = -1; 2944 unsigned int ref_costs_single[MAX_REF_FRAMES], ref_costs_comp[MAX_REF_FRAMES]; 2945 vpx_prob comp_mode_p; 2946 int64_t best_intra_rd = INT64_MAX; 2947 unsigned int best_pred_sse = UINT_MAX; 2948 PREDICTION_MODE best_intra_mode = DC_PRED; 2949 int rate_uv_intra[TX_SIZES], rate_uv_tokenonly[TX_SIZES]; 2950 int64_t dist_uv[TX_SIZES]; 2951 int skip_uv[TX_SIZES]; 2952 PREDICTION_MODE mode_uv[TX_SIZES]; 2953 const int intra_cost_penalty = vp9_get_intra_cost_penalty( 2954 cm->base_qindex, cm->y_dc_delta_q, cm->bit_depth); 2955 int best_skip2 = 0; 2956 uint8_t ref_frame_skip_mask[2] = { 0 }; 2957 uint16_t mode_skip_mask[MAX_REF_FRAMES] = { 0 }; 2958 int mode_skip_start = sf->mode_skip_start + 1; 2959 const int *const rd_threshes = rd_opt->threshes[segment_id][bsize]; 2960 const int *const rd_thresh_freq_fact = tile_data->thresh_freq_fact[bsize]; 2961 int64_t mode_threshold[MAX_MODES]; 2962 int *mode_map = tile_data->mode_map[bsize]; 2963 const int mode_search_skip_flags = sf->mode_search_skip_flags; 2964 int64_t mask_filter = 0; 2965 int64_t filter_cache[SWITCHABLE_FILTER_CONTEXTS]; 2966 2967 vp9_zero(best_mbmode); 2968 2969 x->skip_encode = sf->skip_encode_frame && x->q_index < QIDX_SKIP_THRESH; 2970 2971 for (i = 0; i < SWITCHABLE_FILTER_CONTEXTS; ++i) 2972 filter_cache[i] = INT64_MAX; 2973 2974 estimate_ref_frame_costs(cm, xd, segment_id, ref_costs_single, ref_costs_comp, 2975 &comp_mode_p); 2976 2977 for (i = 0; i < REFERENCE_MODES; ++i) 2978 best_pred_rd[i] = INT64_MAX; 2979 for (i = 0; i < SWITCHABLE_FILTER_CONTEXTS; i++) 2980 best_filter_rd[i] = INT64_MAX; 2981 for (i = 0; i < TX_SIZES; i++) 2982 rate_uv_intra[i] = INT_MAX; 2983 for (i = 0; i < MAX_REF_FRAMES; ++i) 2984 x->pred_sse[i] = INT_MAX; 2985 for (i = 0; i < MB_MODE_COUNT; ++i) { 2986 for (k = 0; k < MAX_REF_FRAMES; ++k) { 2987 single_inter_filter[i][k] = SWITCHABLE; 2988 single_skippable[i][k] = 0; 2989 } 2990 } 2991 2992 rd_cost->rate = INT_MAX; 2993 2994 for (ref_frame = LAST_FRAME; ref_frame <= ALTREF_FRAME; ++ref_frame) { 2995 x->pred_mv_sad[ref_frame] = INT_MAX; 2996 if (cpi->ref_frame_flags & flag_list[ref_frame]) { 2997 assert(get_ref_frame_buffer(cpi, ref_frame) != NULL); 2998 setup_buffer_inter(cpi, x, ref_frame, bsize, mi_row, mi_col, 2999 frame_mv[NEARESTMV], frame_mv[NEARMV], yv12_mb); 3000 } 3001 frame_mv[NEWMV][ref_frame].as_int = INVALID_MV; 3002 frame_mv[ZEROMV][ref_frame].as_int = 0; 3003 } 3004 3005 for (ref_frame = LAST_FRAME; ref_frame <= ALTREF_FRAME; ++ref_frame) { 3006 if (!(cpi->ref_frame_flags & flag_list[ref_frame])) { 3007 // Skip checking missing references in both single and compound reference 3008 // modes. Note that a mode will be skipped iff both reference frames 3009 // are masked out. 3010 ref_frame_skip_mask[0] |= (1 << ref_frame); 3011 ref_frame_skip_mask[1] |= SECOND_REF_FRAME_MASK; 3012 } else if (sf->reference_masking) { 3013 for (i = LAST_FRAME; i <= ALTREF_FRAME; ++i) { 3014 // Skip fixed mv modes for poor references 3015 if ((x->pred_mv_sad[ref_frame] >> 2) > x->pred_mv_sad[i]) { 3016 mode_skip_mask[ref_frame] |= INTER_NEAREST_NEAR_ZERO; 3017 break; 3018 } 3019 } 3020 } 3021 // If the segment reference frame feature is enabled.... 3022 // then do nothing if the current ref frame is not allowed.. 3023 if (segfeature_active(seg, segment_id, SEG_LVL_REF_FRAME) && 3024 get_segdata(seg, segment_id, SEG_LVL_REF_FRAME) != (int)ref_frame) { 3025 ref_frame_skip_mask[0] |= (1 << ref_frame); 3026 ref_frame_skip_mask[1] |= SECOND_REF_FRAME_MASK; 3027 } 3028 } 3029 3030 // Disable this drop out case if the ref frame 3031 // segment level feature is enabled for this segment. This is to 3032 // prevent the possibility that we end up unable to pick any mode. 3033 if (!segfeature_active(seg, segment_id, SEG_LVL_REF_FRAME)) { 3034 // Only consider ZEROMV/ALTREF_FRAME for alt ref frame, 3035 // unless ARNR filtering is enabled in which case we want 3036 // an unfiltered alternative. We allow near/nearest as well 3037 // because they may result in zero-zero MVs but be cheaper. 3038 if (cpi->rc.is_src_frame_alt_ref && (cpi->oxcf.arnr_max_frames == 0)) { 3039 ref_frame_skip_mask[0] = (1 << LAST_FRAME) | (1 << GOLDEN_FRAME); 3040 ref_frame_skip_mask[1] = SECOND_REF_FRAME_MASK; 3041 mode_skip_mask[ALTREF_FRAME] = ~INTER_NEAREST_NEAR_ZERO; 3042 if (frame_mv[NEARMV][ALTREF_FRAME].as_int != 0) 3043 mode_skip_mask[ALTREF_FRAME] |= (1 << NEARMV); 3044 if (frame_mv[NEARESTMV][ALTREF_FRAME].as_int != 0) 3045 mode_skip_mask[ALTREF_FRAME] |= (1 << NEARESTMV); 3046 } 3047 } 3048 3049 if (cpi->rc.is_src_frame_alt_ref) { 3050 if (sf->alt_ref_search_fp) { 3051 mode_skip_mask[ALTREF_FRAME] = 0; 3052 ref_frame_skip_mask[0] = ~(1 << ALTREF_FRAME); 3053 ref_frame_skip_mask[1] = SECOND_REF_FRAME_MASK; 3054 } 3055 } 3056 3057 if (sf->alt_ref_search_fp) 3058 if (!cm->show_frame && x->pred_mv_sad[GOLDEN_FRAME] < INT_MAX) 3059 if (x->pred_mv_sad[ALTREF_FRAME] > (x->pred_mv_sad[GOLDEN_FRAME] << 1)) 3060 mode_skip_mask[ALTREF_FRAME] |= INTER_ALL; 3061 3062 if (sf->adaptive_mode_search) { 3063 if (cm->show_frame && !cpi->rc.is_src_frame_alt_ref && 3064 cpi->rc.frames_since_golden >= 3) 3065 if (x->pred_mv_sad[GOLDEN_FRAME] > (x->pred_mv_sad[LAST_FRAME] << 1)) 3066 mode_skip_mask[GOLDEN_FRAME] |= INTER_ALL; 3067 } 3068 3069 if (bsize > sf->max_intra_bsize) { 3070 ref_frame_skip_mask[0] |= (1 << INTRA_FRAME); 3071 ref_frame_skip_mask[1] |= (1 << INTRA_FRAME); 3072 } 3073 3074 mode_skip_mask[INTRA_FRAME] |= 3075 ~(sf->intra_y_mode_mask[max_txsize_lookup[bsize]]); 3076 3077 for (i = 0; i <= LAST_NEW_MV_INDEX; ++i) 3078 mode_threshold[i] = 0; 3079 for (i = LAST_NEW_MV_INDEX + 1; i < MAX_MODES; ++i) 3080 mode_threshold[i] = ((int64_t)rd_threshes[i] * rd_thresh_freq_fact[i]) >> 5; 3081 3082 midx = sf->schedule_mode_search ? mode_skip_start : 0; 3083 while (midx > 4) { 3084 uint8_t end_pos = 0; 3085 for (i = 5; i < midx; ++i) { 3086 if (mode_threshold[mode_map[i - 1]] > mode_threshold[mode_map[i]]) { 3087 uint8_t tmp = mode_map[i]; 3088 mode_map[i] = mode_map[i - 1]; 3089 mode_map[i - 1] = tmp; 3090 end_pos = i; 3091 } 3092 } 3093 midx = end_pos; 3094 } 3095 3096 for (midx = 0; midx < MAX_MODES; ++midx) { 3097 int mode_index = mode_map[midx]; 3098 int mode_excluded = 0; 3099 int64_t this_rd = INT64_MAX; 3100 int disable_skip = 0; 3101 int compmode_cost = 0; 3102 int rate2 = 0, rate_y = 0, rate_uv = 0; 3103 int64_t distortion2 = 0, distortion_y = 0, distortion_uv = 0; 3104 int skippable = 0; 3105 int this_skip2 = 0; 3106 int64_t total_sse = INT64_MAX; 3107 int early_term = 0; 3108 3109 this_mode = vp9_mode_order[mode_index].mode; 3110 ref_frame = vp9_mode_order[mode_index].ref_frame[0]; 3111 second_ref_frame = vp9_mode_order[mode_index].ref_frame[1]; 3112 3113 // Look at the reference frame of the best mode so far and set the 3114 // skip mask to look at a subset of the remaining modes. 3115 if (midx == mode_skip_start && best_mode_index >= 0) { 3116 switch (best_mbmode.ref_frame[0]) { 3117 case INTRA_FRAME: 3118 break; 3119 case LAST_FRAME: 3120 ref_frame_skip_mask[0] |= LAST_FRAME_MODE_MASK; 3121 ref_frame_skip_mask[1] |= SECOND_REF_FRAME_MASK; 3122 break; 3123 case GOLDEN_FRAME: 3124 ref_frame_skip_mask[0] |= GOLDEN_FRAME_MODE_MASK; 3125 ref_frame_skip_mask[1] |= SECOND_REF_FRAME_MASK; 3126 break; 3127 case ALTREF_FRAME: 3128 ref_frame_skip_mask[0] |= ALT_REF_MODE_MASK; 3129 break; 3130 case NONE: 3131 case MAX_REF_FRAMES: 3132 assert(0 && "Invalid Reference frame"); 3133 break; 3134 } 3135 } 3136 3137 if ((ref_frame_skip_mask[0] & (1 << ref_frame)) && 3138 (ref_frame_skip_mask[1] & (1 << MAX(0, second_ref_frame)))) 3139 continue; 3140 3141 if (mode_skip_mask[ref_frame] & (1 << this_mode)) 3142 continue; 3143 3144 // Test best rd so far against threshold for trying this mode. 3145 if (best_mode_skippable && sf->schedule_mode_search) 3146 mode_threshold[mode_index] <<= 1; 3147 3148 if (best_rd < mode_threshold[mode_index]) 3149 continue; 3150 3151 if (sf->motion_field_mode_search) { 3152 const int mi_width = MIN(num_8x8_blocks_wide_lookup[bsize], 3153 tile_info->mi_col_end - mi_col); 3154 const int mi_height = MIN(num_8x8_blocks_high_lookup[bsize], 3155 tile_info->mi_row_end - mi_row); 3156 const int bsl = mi_width_log2_lookup[bsize]; 3157 int cb_partition_search_ctrl = (((mi_row + mi_col) >> bsl) 3158 + get_chessboard_index(cm->current_video_frame)) & 0x1; 3159 MB_MODE_INFO *ref_mbmi; 3160 int const_motion = 1; 3161 int skip_ref_frame = !cb_partition_search_ctrl; 3162 MV_REFERENCE_FRAME rf = NONE; 3163 int_mv ref_mv; 3164 ref_mv.as_int = INVALID_MV; 3165 3166 if ((mi_row - 1) >= tile_info->mi_row_start) { 3167 ref_mv = xd->mi[-xd->mi_stride]->mbmi.mv[0]; 3168 rf = xd->mi[-xd->mi_stride]->mbmi.ref_frame[0]; 3169 for (i = 0; i < mi_width; ++i) { 3170 ref_mbmi = &xd->mi[-xd->mi_stride + i]->mbmi; 3171 const_motion &= (ref_mv.as_int == ref_mbmi->mv[0].as_int) && 3172 (ref_frame == ref_mbmi->ref_frame[0]); 3173 skip_ref_frame &= (rf == ref_mbmi->ref_frame[0]); 3174 } 3175 } 3176 3177 if ((mi_col - 1) >= tile_info->mi_col_start) { 3178 if (ref_mv.as_int == INVALID_MV) 3179 ref_mv = xd->mi[-1]->mbmi.mv[0]; 3180 if (rf == NONE) 3181 rf = xd->mi[-1]->mbmi.ref_frame[0]; 3182 for (i = 0; i < mi_height; ++i) { 3183 ref_mbmi = &xd->mi[i * xd->mi_stride - 1]->mbmi; 3184 const_motion &= (ref_mv.as_int == ref_mbmi->mv[0].as_int) && 3185 (ref_frame == ref_mbmi->ref_frame[0]); 3186 skip_ref_frame &= (rf == ref_mbmi->ref_frame[0]); 3187 } 3188 } 3189 3190 if (skip_ref_frame && this_mode != NEARESTMV && this_mode != NEWMV) 3191 if (rf > INTRA_FRAME) 3192 if (ref_frame != rf) 3193 continue; 3194 3195 if (const_motion) 3196 if (this_mode == NEARMV || this_mode == ZEROMV) 3197 continue; 3198 } 3199 3200 comp_pred = second_ref_frame > INTRA_FRAME; 3201 if (comp_pred) { 3202 if (!cpi->allow_comp_inter_inter) 3203 continue; 3204 3205 // Skip compound inter modes if ARF is not available. 3206 if (!(cpi->ref_frame_flags & flag_list[second_ref_frame])) 3207 continue; 3208 3209 // Do not allow compound prediction if the segment level reference frame 3210 // feature is in use as in this case there can only be one reference. 3211 if (segfeature_active(seg, segment_id, SEG_LVL_REF_FRAME)) 3212 continue; 3213 3214 if ((mode_search_skip_flags & FLAG_SKIP_COMP_BESTINTRA) && 3215 best_mode_index >= 0 && best_mbmode.ref_frame[0] == INTRA_FRAME) 3216 continue; 3217 3218 mode_excluded = cm->reference_mode == SINGLE_REFERENCE; 3219 } else { 3220 if (ref_frame != INTRA_FRAME) 3221 mode_excluded = cm->reference_mode == COMPOUND_REFERENCE; 3222 } 3223 3224 if (ref_frame == INTRA_FRAME) { 3225 if (sf->adaptive_mode_search) 3226 if ((x->source_variance << num_pels_log2_lookup[bsize]) > best_pred_sse) 3227 continue; 3228 3229 if (this_mode != DC_PRED) { 3230 // Disable intra modes other than DC_PRED for blocks with low variance 3231 // Threshold for intra skipping based on source variance 3232 // TODO(debargha): Specialize the threshold for super block sizes 3233 const unsigned int skip_intra_var_thresh = 64; 3234 if ((mode_search_skip_flags & FLAG_SKIP_INTRA_LOWVAR) && 3235 x->source_variance < skip_intra_var_thresh) 3236 continue; 3237 // Only search the oblique modes if the best so far is 3238 // one of the neighboring directional modes 3239 if ((mode_search_skip_flags & FLAG_SKIP_INTRA_BESTINTER) && 3240 (this_mode >= D45_PRED && this_mode <= TM_PRED)) { 3241 if (best_mode_index >= 0 && 3242 best_mbmode.ref_frame[0] > INTRA_FRAME) 3243 continue; 3244 } 3245 if (mode_search_skip_flags & FLAG_SKIP_INTRA_DIRMISMATCH) { 3246 if (conditional_skipintra(this_mode, best_intra_mode)) 3247 continue; 3248 } 3249 } 3250 } else { 3251 const MV_REFERENCE_FRAME ref_frames[2] = {ref_frame, second_ref_frame}; 3252 if (!check_best_zero_mv(cpi, mbmi_ext->mode_context, frame_mv, 3253 this_mode, ref_frames)) 3254 continue; 3255 } 3256 3257 mbmi->mode = this_mode; 3258 mbmi->uv_mode = DC_PRED; 3259 mbmi->ref_frame[0] = ref_frame; 3260 mbmi->ref_frame[1] = second_ref_frame; 3261 // Evaluate all sub-pel filters irrespective of whether we can use 3262 // them for this frame. 3263 mbmi->interp_filter = cm->interp_filter == SWITCHABLE ? EIGHTTAP 3264 : cm->interp_filter; 3265 mbmi->mv[0].as_int = mbmi->mv[1].as_int = 0; 3266 3267 x->skip = 0; 3268 set_ref_ptrs(cm, xd, ref_frame, second_ref_frame); 3269 3270 // Select prediction reference frames. 3271 for (i = 0; i < MAX_MB_PLANE; i++) { 3272 xd->plane[i].pre[0] = yv12_mb[ref_frame][i]; 3273 if (comp_pred) 3274 xd->plane[i].pre[1] = yv12_mb[second_ref_frame][i]; 3275 } 3276 3277 if (ref_frame == INTRA_FRAME) { 3278 TX_SIZE uv_tx; 3279 struct macroblockd_plane *const pd = &xd->plane[1]; 3280 memset(x->skip_txfm, 0, sizeof(x->skip_txfm)); 3281 super_block_yrd(cpi, x, &rate_y, &distortion_y, &skippable, 3282 NULL, bsize, best_rd); 3283 if (rate_y == INT_MAX) 3284 continue; 3285 3286 uv_tx = get_uv_tx_size_impl(mbmi->tx_size, bsize, pd->subsampling_x, 3287 pd->subsampling_y); 3288 if (rate_uv_intra[uv_tx] == INT_MAX) { 3289 choose_intra_uv_mode(cpi, x, ctx, bsize, uv_tx, 3290 &rate_uv_intra[uv_tx], &rate_uv_tokenonly[uv_tx], 3291 &dist_uv[uv_tx], &skip_uv[uv_tx], &mode_uv[uv_tx]); 3292 } 3293 3294 rate_uv = rate_uv_tokenonly[uv_tx]; 3295 distortion_uv = dist_uv[uv_tx]; 3296 skippable = skippable && skip_uv[uv_tx]; 3297 mbmi->uv_mode = mode_uv[uv_tx]; 3298 3299 rate2 = rate_y + cpi->mbmode_cost[mbmi->mode] + rate_uv_intra[uv_tx]; 3300 if (this_mode != DC_PRED && this_mode != TM_PRED) 3301 rate2 += intra_cost_penalty; 3302 distortion2 = distortion_y + distortion_uv; 3303 } else { 3304 this_rd = handle_inter_mode(cpi, x, bsize, 3305 &rate2, &distortion2, &skippable, 3306 &rate_y, &rate_uv, 3307 &disable_skip, frame_mv, 3308 mi_row, mi_col, 3309 single_newmv, single_inter_filter, 3310 single_skippable, &total_sse, best_rd, 3311 &mask_filter, filter_cache); 3312 if (this_rd == INT64_MAX) 3313 continue; 3314 3315 compmode_cost = vp9_cost_bit(comp_mode_p, comp_pred); 3316 3317 if (cm->reference_mode == REFERENCE_MODE_SELECT) 3318 rate2 += compmode_cost; 3319 } 3320 3321 // Estimate the reference frame signaling cost and add it 3322 // to the rolling cost variable. 3323 if (comp_pred) { 3324 rate2 += ref_costs_comp[ref_frame]; 3325 } else { 3326 rate2 += ref_costs_single[ref_frame]; 3327 } 3328 3329 if (!disable_skip) { 3330 if (skippable) { 3331 // Back out the coefficient coding costs 3332 rate2 -= (rate_y + rate_uv); 3333 3334 // Cost the skip mb case 3335 rate2 += vp9_cost_bit(vp9_get_skip_prob(cm, xd), 1); 3336 } else if (ref_frame != INTRA_FRAME && !xd->lossless) { 3337 if (RDCOST(x->rdmult, x->rddiv, rate_y + rate_uv, distortion2) < 3338 RDCOST(x->rdmult, x->rddiv, 0, total_sse)) { 3339 // Add in the cost of the no skip flag. 3340 rate2 += vp9_cost_bit(vp9_get_skip_prob(cm, xd), 0); 3341 } else { 3342 // FIXME(rbultje) make this work for splitmv also 3343 rate2 += vp9_cost_bit(vp9_get_skip_prob(cm, xd), 1); 3344 distortion2 = total_sse; 3345 assert(total_sse >= 0); 3346 rate2 -= (rate_y + rate_uv); 3347 this_skip2 = 1; 3348 } 3349 } else { 3350 // Add in the cost of the no skip flag. 3351 rate2 += vp9_cost_bit(vp9_get_skip_prob(cm, xd), 0); 3352 } 3353 3354 // Calculate the final RD estimate for this mode. 3355 this_rd = RDCOST(x->rdmult, x->rddiv, rate2, distortion2); 3356 } 3357 3358 // Apply an adjustment to the rd value based on the similarity of the 3359 // source variance and reconstructed variance. 3360 rd_variance_adjustment(cpi, x, bsize, &this_rd, 3361 ref_frame, x->source_variance); 3362 3363 if (ref_frame == INTRA_FRAME) { 3364 // Keep record of best intra rd 3365 if (this_rd < best_intra_rd) { 3366 best_intra_rd = this_rd; 3367 best_intra_mode = mbmi->mode; 3368 } 3369 } 3370 3371 if (!disable_skip && ref_frame == INTRA_FRAME) { 3372 for (i = 0; i < REFERENCE_MODES; ++i) 3373 best_pred_rd[i] = MIN(best_pred_rd[i], this_rd); 3374 for (i = 0; i < SWITCHABLE_FILTER_CONTEXTS; i++) 3375 best_filter_rd[i] = MIN(best_filter_rd[i], this_rd); 3376 } 3377 3378 // Did this mode help.. i.e. is it the new best mode 3379 if (this_rd < best_rd || x->skip) { 3380 int max_plane = MAX_MB_PLANE; 3381 if (!mode_excluded) { 3382 // Note index of best mode so far 3383 best_mode_index = mode_index; 3384 3385 if (ref_frame == INTRA_FRAME) { 3386 /* required for left and above block mv */ 3387 mbmi->mv[0].as_int = 0; 3388 max_plane = 1; 3389 } else { 3390 best_pred_sse = x->pred_sse[ref_frame]; 3391 } 3392 3393 rd_cost->rate = rate2; 3394 rd_cost->dist = distortion2; 3395 rd_cost->rdcost = this_rd; 3396 best_rd = this_rd; 3397 best_mbmode = *mbmi; 3398 best_skip2 = this_skip2; 3399 best_mode_skippable = skippable; 3400 3401 if (!x->select_tx_size) 3402 swap_block_ptr(x, ctx, 1, 0, 0, max_plane); 3403 memcpy(ctx->zcoeff_blk, x->zcoeff_blk[mbmi->tx_size], 3404 sizeof(ctx->zcoeff_blk[0]) * ctx->num_4x4_blk); 3405 3406 // TODO(debargha): enhance this test with a better distortion prediction 3407 // based on qp, activity mask and history 3408 if ((mode_search_skip_flags & FLAG_EARLY_TERMINATE) && 3409 (mode_index > MIN_EARLY_TERM_INDEX)) { 3410 int qstep = xd->plane[0].dequant[1]; 3411 // TODO(debargha): Enhance this by specializing for each mode_index 3412 int scale = 4; 3413#if CONFIG_VP9_HIGHBITDEPTH 3414 if (xd->cur_buf->flags & YV12_FLAG_HIGHBITDEPTH) { 3415 qstep >>= (xd->bd - 8); 3416 } 3417#endif // CONFIG_VP9_HIGHBITDEPTH 3418 if (x->source_variance < UINT_MAX) { 3419 const int var_adjust = (x->source_variance < 16); 3420 scale -= var_adjust; 3421 } 3422 if (ref_frame > INTRA_FRAME && 3423 distortion2 * scale < qstep * qstep) { 3424 early_term = 1; 3425 } 3426 } 3427 } 3428 } 3429 3430 /* keep record of best compound/single-only prediction */ 3431 if (!disable_skip && ref_frame != INTRA_FRAME) { 3432 int64_t single_rd, hybrid_rd, single_rate, hybrid_rate; 3433 3434 if (cm->reference_mode == REFERENCE_MODE_SELECT) { 3435 single_rate = rate2 - compmode_cost; 3436 hybrid_rate = rate2; 3437 } else { 3438 single_rate = rate2; 3439 hybrid_rate = rate2 + compmode_cost; 3440 } 3441 3442 single_rd = RDCOST(x->rdmult, x->rddiv, single_rate, distortion2); 3443 hybrid_rd = RDCOST(x->rdmult, x->rddiv, hybrid_rate, distortion2); 3444 3445 if (!comp_pred) { 3446 if (single_rd < best_pred_rd[SINGLE_REFERENCE]) 3447 best_pred_rd[SINGLE_REFERENCE] = single_rd; 3448 } else { 3449 if (single_rd < best_pred_rd[COMPOUND_REFERENCE]) 3450 best_pred_rd[COMPOUND_REFERENCE] = single_rd; 3451 } 3452 if (hybrid_rd < best_pred_rd[REFERENCE_MODE_SELECT]) 3453 best_pred_rd[REFERENCE_MODE_SELECT] = hybrid_rd; 3454 3455 /* keep record of best filter type */ 3456 if (!mode_excluded && cm->interp_filter != BILINEAR) { 3457 int64_t ref = filter_cache[cm->interp_filter == SWITCHABLE ? 3458 SWITCHABLE_FILTERS : cm->interp_filter]; 3459 3460 for (i = 0; i < SWITCHABLE_FILTER_CONTEXTS; i++) { 3461 int64_t adj_rd; 3462 if (ref == INT64_MAX) 3463 adj_rd = 0; 3464 else if (filter_cache[i] == INT64_MAX) 3465 // when early termination is triggered, the encoder does not have 3466 // access to the rate-distortion cost. it only knows that the cost 3467 // should be above the maximum valid value. hence it takes the known 3468 // maximum plus an arbitrary constant as the rate-distortion cost. 3469 adj_rd = mask_filter - ref + 10; 3470 else 3471 adj_rd = filter_cache[i] - ref; 3472 3473 adj_rd += this_rd; 3474 best_filter_rd[i] = MIN(best_filter_rd[i], adj_rd); 3475 } 3476 } 3477 } 3478 3479 if (early_term) 3480 break; 3481 3482 if (x->skip && !comp_pred) 3483 break; 3484 } 3485 3486 // The inter modes' rate costs are not calculated precisely in some cases. 3487 // Therefore, sometimes, NEWMV is chosen instead of NEARESTMV, NEARMV, and 3488 // ZEROMV. Here, checks are added for those cases, and the mode decisions 3489 // are corrected. 3490 if (best_mbmode.mode == NEWMV) { 3491 const MV_REFERENCE_FRAME refs[2] = {best_mbmode.ref_frame[0], 3492 best_mbmode.ref_frame[1]}; 3493 int comp_pred_mode = refs[1] > INTRA_FRAME; 3494 3495 if (frame_mv[NEARESTMV][refs[0]].as_int == best_mbmode.mv[0].as_int && 3496 ((comp_pred_mode && frame_mv[NEARESTMV][refs[1]].as_int == 3497 best_mbmode.mv[1].as_int) || !comp_pred_mode)) 3498 best_mbmode.mode = NEARESTMV; 3499 else if (frame_mv[NEARMV][refs[0]].as_int == best_mbmode.mv[0].as_int && 3500 ((comp_pred_mode && frame_mv[NEARMV][refs[1]].as_int == 3501 best_mbmode.mv[1].as_int) || !comp_pred_mode)) 3502 best_mbmode.mode = NEARMV; 3503 else if (best_mbmode.mv[0].as_int == 0 && 3504 ((comp_pred_mode && best_mbmode.mv[1].as_int == 0) || !comp_pred_mode)) 3505 best_mbmode.mode = ZEROMV; 3506 } 3507 3508 if (best_mode_index < 0 || best_rd >= best_rd_so_far) { 3509 rd_cost->rate = INT_MAX; 3510 rd_cost->rdcost = INT64_MAX; 3511 return; 3512 } 3513 3514 // If we used an estimate for the uv intra rd in the loop above... 3515 if (sf->use_uv_intra_rd_estimate) { 3516 // Do Intra UV best rd mode selection if best mode choice above was intra. 3517 if (best_mbmode.ref_frame[0] == INTRA_FRAME) { 3518 TX_SIZE uv_tx_size; 3519 *mbmi = best_mbmode; 3520 uv_tx_size = get_uv_tx_size(mbmi, &xd->plane[1]); 3521 rd_pick_intra_sbuv_mode(cpi, x, ctx, &rate_uv_intra[uv_tx_size], 3522 &rate_uv_tokenonly[uv_tx_size], 3523 &dist_uv[uv_tx_size], 3524 &skip_uv[uv_tx_size], 3525 bsize < BLOCK_8X8 ? BLOCK_8X8 : bsize, 3526 uv_tx_size); 3527 } 3528 } 3529 3530 assert((cm->interp_filter == SWITCHABLE) || 3531 (cm->interp_filter == best_mbmode.interp_filter) || 3532 !is_inter_block(&best_mbmode)); 3533 3534 if (!cpi->rc.is_src_frame_alt_ref) 3535 vp9_update_rd_thresh_fact(tile_data->thresh_freq_fact, 3536 sf->adaptive_rd_thresh, bsize, best_mode_index); 3537 3538 // macroblock modes 3539 *mbmi = best_mbmode; 3540 x->skip |= best_skip2; 3541 3542 for (i = 0; i < REFERENCE_MODES; ++i) { 3543 if (best_pred_rd[i] == INT64_MAX) 3544 best_pred_diff[i] = INT_MIN; 3545 else 3546 best_pred_diff[i] = best_rd - best_pred_rd[i]; 3547 } 3548 3549 if (!x->skip) { 3550 for (i = 0; i < SWITCHABLE_FILTER_CONTEXTS; i++) { 3551 if (best_filter_rd[i] == INT64_MAX) 3552 best_filter_diff[i] = 0; 3553 else 3554 best_filter_diff[i] = best_rd - best_filter_rd[i]; 3555 } 3556 if (cm->interp_filter == SWITCHABLE) 3557 assert(best_filter_diff[SWITCHABLE_FILTERS] == 0); 3558 } else { 3559 vp9_zero(best_filter_diff); 3560 } 3561 3562 // TODO(yunqingwang): Moving this line in front of the above best_filter_diff 3563 // updating code causes PSNR loss. Need to figure out the confliction. 3564 x->skip |= best_mode_skippable; 3565 3566 if (!x->skip && !x->select_tx_size) { 3567 int has_high_freq_coeff = 0; 3568 int plane; 3569 int max_plane = is_inter_block(&xd->mi[0]->mbmi) 3570 ? MAX_MB_PLANE : 1; 3571 for (plane = 0; plane < max_plane; ++plane) { 3572 x->plane[plane].eobs = ctx->eobs_pbuf[plane][1]; 3573 has_high_freq_coeff |= vp9_has_high_freq_in_plane(x, bsize, plane); 3574 } 3575 3576 for (plane = max_plane; plane < MAX_MB_PLANE; ++plane) { 3577 x->plane[plane].eobs = ctx->eobs_pbuf[plane][2]; 3578 has_high_freq_coeff |= vp9_has_high_freq_in_plane(x, bsize, plane); 3579 } 3580 3581 best_mode_skippable |= !has_high_freq_coeff; 3582 } 3583 3584 assert(best_mode_index >= 0); 3585 3586 store_coding_context(x, ctx, best_mode_index, best_pred_diff, 3587 best_filter_diff, best_mode_skippable); 3588} 3589 3590void vp9_rd_pick_inter_mode_sb_seg_skip(VP9_COMP *cpi, 3591 TileDataEnc *tile_data, 3592 MACROBLOCK *x, 3593 RD_COST *rd_cost, 3594 BLOCK_SIZE bsize, 3595 PICK_MODE_CONTEXT *ctx, 3596 int64_t best_rd_so_far) { 3597 VP9_COMMON *const cm = &cpi->common; 3598 MACROBLOCKD *const xd = &x->e_mbd; 3599 MB_MODE_INFO *const mbmi = &xd->mi[0]->mbmi; 3600 unsigned char segment_id = mbmi->segment_id; 3601 const int comp_pred = 0; 3602 int i; 3603 int64_t best_pred_diff[REFERENCE_MODES]; 3604 int64_t best_filter_diff[SWITCHABLE_FILTER_CONTEXTS]; 3605 unsigned int ref_costs_single[MAX_REF_FRAMES], ref_costs_comp[MAX_REF_FRAMES]; 3606 vpx_prob comp_mode_p; 3607 INTERP_FILTER best_filter = SWITCHABLE; 3608 int64_t this_rd = INT64_MAX; 3609 int rate2 = 0; 3610 const int64_t distortion2 = 0; 3611 3612 x->skip_encode = cpi->sf.skip_encode_frame && x->q_index < QIDX_SKIP_THRESH; 3613 3614 estimate_ref_frame_costs(cm, xd, segment_id, ref_costs_single, ref_costs_comp, 3615 &comp_mode_p); 3616 3617 for (i = 0; i < MAX_REF_FRAMES; ++i) 3618 x->pred_sse[i] = INT_MAX; 3619 for (i = LAST_FRAME; i < MAX_REF_FRAMES; ++i) 3620 x->pred_mv_sad[i] = INT_MAX; 3621 3622 rd_cost->rate = INT_MAX; 3623 3624 assert(segfeature_active(&cm->seg, segment_id, SEG_LVL_SKIP)); 3625 3626 mbmi->mode = ZEROMV; 3627 mbmi->uv_mode = DC_PRED; 3628 mbmi->ref_frame[0] = LAST_FRAME; 3629 mbmi->ref_frame[1] = NONE; 3630 mbmi->mv[0].as_int = 0; 3631 x->skip = 1; 3632 3633 if (cm->interp_filter != BILINEAR) { 3634 best_filter = EIGHTTAP; 3635 if (cm->interp_filter == SWITCHABLE && 3636 x->source_variance >= cpi->sf.disable_filter_search_var_thresh) { 3637 int rs; 3638 int best_rs = INT_MAX; 3639 for (i = 0; i < SWITCHABLE_FILTERS; ++i) { 3640 mbmi->interp_filter = i; 3641 rs = vp9_get_switchable_rate(cpi, xd); 3642 if (rs < best_rs) { 3643 best_rs = rs; 3644 best_filter = mbmi->interp_filter; 3645 } 3646 } 3647 } 3648 } 3649 // Set the appropriate filter 3650 if (cm->interp_filter == SWITCHABLE) { 3651 mbmi->interp_filter = best_filter; 3652 rate2 += vp9_get_switchable_rate(cpi, xd); 3653 } else { 3654 mbmi->interp_filter = cm->interp_filter; 3655 } 3656 3657 if (cm->reference_mode == REFERENCE_MODE_SELECT) 3658 rate2 += vp9_cost_bit(comp_mode_p, comp_pred); 3659 3660 // Estimate the reference frame signaling cost and add it 3661 // to the rolling cost variable. 3662 rate2 += ref_costs_single[LAST_FRAME]; 3663 this_rd = RDCOST(x->rdmult, x->rddiv, rate2, distortion2); 3664 3665 rd_cost->rate = rate2; 3666 rd_cost->dist = distortion2; 3667 rd_cost->rdcost = this_rd; 3668 3669 if (this_rd >= best_rd_so_far) { 3670 rd_cost->rate = INT_MAX; 3671 rd_cost->rdcost = INT64_MAX; 3672 return; 3673 } 3674 3675 assert((cm->interp_filter == SWITCHABLE) || 3676 (cm->interp_filter == mbmi->interp_filter)); 3677 3678 vp9_update_rd_thresh_fact(tile_data->thresh_freq_fact, 3679 cpi->sf.adaptive_rd_thresh, bsize, THR_ZEROMV); 3680 3681 vp9_zero(best_pred_diff); 3682 vp9_zero(best_filter_diff); 3683 3684 if (!x->select_tx_size) 3685 swap_block_ptr(x, ctx, 1, 0, 0, MAX_MB_PLANE); 3686 store_coding_context(x, ctx, THR_ZEROMV, 3687 best_pred_diff, best_filter_diff, 0); 3688} 3689 3690void vp9_rd_pick_inter_mode_sub8x8(VP9_COMP *cpi, 3691 TileDataEnc *tile_data, 3692 MACROBLOCK *x, 3693 int mi_row, int mi_col, 3694 RD_COST *rd_cost, 3695 BLOCK_SIZE bsize, 3696 PICK_MODE_CONTEXT *ctx, 3697 int64_t best_rd_so_far) { 3698 VP9_COMMON *const cm = &cpi->common; 3699 RD_OPT *const rd_opt = &cpi->rd; 3700 SPEED_FEATURES *const sf = &cpi->sf; 3701 MACROBLOCKD *const xd = &x->e_mbd; 3702 MB_MODE_INFO *const mbmi = &xd->mi[0]->mbmi; 3703 const struct segmentation *const seg = &cm->seg; 3704 MV_REFERENCE_FRAME ref_frame, second_ref_frame; 3705 unsigned char segment_id = mbmi->segment_id; 3706 int comp_pred, i; 3707 int_mv frame_mv[MB_MODE_COUNT][MAX_REF_FRAMES]; 3708 struct buf_2d yv12_mb[4][MAX_MB_PLANE]; 3709 static const int flag_list[4] = { 0, VP9_LAST_FLAG, VP9_GOLD_FLAG, 3710 VP9_ALT_FLAG }; 3711 int64_t best_rd = best_rd_so_far; 3712 int64_t best_yrd = best_rd_so_far; // FIXME(rbultje) more precise 3713 int64_t best_pred_diff[REFERENCE_MODES]; 3714 int64_t best_pred_rd[REFERENCE_MODES]; 3715 int64_t best_filter_rd[SWITCHABLE_FILTER_CONTEXTS]; 3716 int64_t best_filter_diff[SWITCHABLE_FILTER_CONTEXTS]; 3717 MB_MODE_INFO best_mbmode; 3718 int ref_index, best_ref_index = 0; 3719 unsigned int ref_costs_single[MAX_REF_FRAMES], ref_costs_comp[MAX_REF_FRAMES]; 3720 vpx_prob comp_mode_p; 3721 INTERP_FILTER tmp_best_filter = SWITCHABLE; 3722 int rate_uv_intra, rate_uv_tokenonly; 3723 int64_t dist_uv; 3724 int skip_uv; 3725 PREDICTION_MODE mode_uv = DC_PRED; 3726 const int intra_cost_penalty = vp9_get_intra_cost_penalty( 3727 cm->base_qindex, cm->y_dc_delta_q, cm->bit_depth); 3728 int_mv seg_mvs[4][MAX_REF_FRAMES]; 3729 b_mode_info best_bmodes[4]; 3730 int best_skip2 = 0; 3731 int ref_frame_skip_mask[2] = { 0 }; 3732 int64_t mask_filter = 0; 3733 int64_t filter_cache[SWITCHABLE_FILTER_CONTEXTS]; 3734 int internal_active_edge = 3735 vp9_active_edge_sb(cpi, mi_row, mi_col) && vp9_internal_image_edge(cpi); 3736 3737 x->skip_encode = sf->skip_encode_frame && x->q_index < QIDX_SKIP_THRESH; 3738 memset(x->zcoeff_blk[TX_4X4], 0, 4); 3739 vp9_zero(best_mbmode); 3740 3741 for (i = 0; i < SWITCHABLE_FILTER_CONTEXTS; ++i) 3742 filter_cache[i] = INT64_MAX; 3743 3744 for (i = 0; i < 4; i++) { 3745 int j; 3746 for (j = 0; j < MAX_REF_FRAMES; j++) 3747 seg_mvs[i][j].as_int = INVALID_MV; 3748 } 3749 3750 estimate_ref_frame_costs(cm, xd, segment_id, ref_costs_single, ref_costs_comp, 3751 &comp_mode_p); 3752 3753 for (i = 0; i < REFERENCE_MODES; ++i) 3754 best_pred_rd[i] = INT64_MAX; 3755 for (i = 0; i < SWITCHABLE_FILTER_CONTEXTS; i++) 3756 best_filter_rd[i] = INT64_MAX; 3757 rate_uv_intra = INT_MAX; 3758 3759 rd_cost->rate = INT_MAX; 3760 3761 for (ref_frame = LAST_FRAME; ref_frame <= ALTREF_FRAME; ref_frame++) { 3762 if (cpi->ref_frame_flags & flag_list[ref_frame]) { 3763 setup_buffer_inter(cpi, x, ref_frame, bsize, mi_row, mi_col, 3764 frame_mv[NEARESTMV], frame_mv[NEARMV], 3765 yv12_mb); 3766 } else { 3767 ref_frame_skip_mask[0] |= (1 << ref_frame); 3768 ref_frame_skip_mask[1] |= SECOND_REF_FRAME_MASK; 3769 } 3770 frame_mv[NEWMV][ref_frame].as_int = INVALID_MV; 3771 frame_mv[ZEROMV][ref_frame].as_int = 0; 3772 } 3773 3774 for (ref_index = 0; ref_index < MAX_REFS; ++ref_index) { 3775 int mode_excluded = 0; 3776 int64_t this_rd = INT64_MAX; 3777 int disable_skip = 0; 3778 int compmode_cost = 0; 3779 int rate2 = 0, rate_y = 0, rate_uv = 0; 3780 int64_t distortion2 = 0, distortion_y = 0, distortion_uv = 0; 3781 int skippable = 0; 3782 int i; 3783 int this_skip2 = 0; 3784 int64_t total_sse = INT_MAX; 3785 int early_term = 0; 3786 3787 ref_frame = vp9_ref_order[ref_index].ref_frame[0]; 3788 second_ref_frame = vp9_ref_order[ref_index].ref_frame[1]; 3789 3790 // Look at the reference frame of the best mode so far and set the 3791 // skip mask to look at a subset of the remaining modes. 3792 if (ref_index > 2 && sf->mode_skip_start < MAX_MODES) { 3793 if (ref_index == 3) { 3794 switch (best_mbmode.ref_frame[0]) { 3795 case INTRA_FRAME: 3796 break; 3797 case LAST_FRAME: 3798 ref_frame_skip_mask[0] |= (1 << GOLDEN_FRAME) | (1 << ALTREF_FRAME); 3799 ref_frame_skip_mask[1] |= SECOND_REF_FRAME_MASK; 3800 break; 3801 case GOLDEN_FRAME: 3802 ref_frame_skip_mask[0] |= (1 << LAST_FRAME) | (1 << ALTREF_FRAME); 3803 ref_frame_skip_mask[1] |= SECOND_REF_FRAME_MASK; 3804 break; 3805 case ALTREF_FRAME: 3806 ref_frame_skip_mask[0] |= (1 << GOLDEN_FRAME) | (1 << LAST_FRAME); 3807 break; 3808 case NONE: 3809 case MAX_REF_FRAMES: 3810 assert(0 && "Invalid Reference frame"); 3811 break; 3812 } 3813 } 3814 } 3815 3816 if ((ref_frame_skip_mask[0] & (1 << ref_frame)) && 3817 (ref_frame_skip_mask[1] & (1 << MAX(0, second_ref_frame)))) 3818 continue; 3819 3820 // Test best rd so far against threshold for trying this mode. 3821 if (!internal_active_edge && 3822 rd_less_than_thresh(best_rd, 3823 rd_opt->threshes[segment_id][bsize][ref_index], 3824 tile_data->thresh_freq_fact[bsize][ref_index])) 3825 continue; 3826 3827 comp_pred = second_ref_frame > INTRA_FRAME; 3828 if (comp_pred) { 3829 if (!cpi->allow_comp_inter_inter) 3830 continue; 3831 if (!(cpi->ref_frame_flags & flag_list[second_ref_frame])) 3832 continue; 3833 // Do not allow compound prediction if the segment level reference frame 3834 // feature is in use as in this case there can only be one reference. 3835 if (segfeature_active(seg, segment_id, SEG_LVL_REF_FRAME)) 3836 continue; 3837 3838 if ((sf->mode_search_skip_flags & FLAG_SKIP_COMP_BESTINTRA) && 3839 best_mbmode.ref_frame[0] == INTRA_FRAME) 3840 continue; 3841 } 3842 3843 // TODO(jingning, jkoleszar): scaling reference frame not supported for 3844 // sub8x8 blocks. 3845 if (ref_frame > INTRA_FRAME && 3846 vp9_is_scaled(&cm->frame_refs[ref_frame - 1].sf)) 3847 continue; 3848 3849 if (second_ref_frame > INTRA_FRAME && 3850 vp9_is_scaled(&cm->frame_refs[second_ref_frame - 1].sf)) 3851 continue; 3852 3853 if (comp_pred) 3854 mode_excluded = cm->reference_mode == SINGLE_REFERENCE; 3855 else if (ref_frame != INTRA_FRAME) 3856 mode_excluded = cm->reference_mode == COMPOUND_REFERENCE; 3857 3858 // If the segment reference frame feature is enabled.... 3859 // then do nothing if the current ref frame is not allowed.. 3860 if (segfeature_active(seg, segment_id, SEG_LVL_REF_FRAME) && 3861 get_segdata(seg, segment_id, SEG_LVL_REF_FRAME) != (int)ref_frame) { 3862 continue; 3863 // Disable this drop out case if the ref frame 3864 // segment level feature is enabled for this segment. This is to 3865 // prevent the possibility that we end up unable to pick any mode. 3866 } else if (!segfeature_active(seg, segment_id, SEG_LVL_REF_FRAME)) { 3867 // Only consider ZEROMV/ALTREF_FRAME for alt ref frame, 3868 // unless ARNR filtering is enabled in which case we want 3869 // an unfiltered alternative. We allow near/nearest as well 3870 // because they may result in zero-zero MVs but be cheaper. 3871 if (cpi->rc.is_src_frame_alt_ref && (cpi->oxcf.arnr_max_frames == 0)) 3872 continue; 3873 } 3874 3875 mbmi->tx_size = TX_4X4; 3876 mbmi->uv_mode = DC_PRED; 3877 mbmi->ref_frame[0] = ref_frame; 3878 mbmi->ref_frame[1] = second_ref_frame; 3879 // Evaluate all sub-pel filters irrespective of whether we can use 3880 // them for this frame. 3881 mbmi->interp_filter = cm->interp_filter == SWITCHABLE ? EIGHTTAP 3882 : cm->interp_filter; 3883 x->skip = 0; 3884 set_ref_ptrs(cm, xd, ref_frame, second_ref_frame); 3885 3886 // Select prediction reference frames. 3887 for (i = 0; i < MAX_MB_PLANE; i++) { 3888 xd->plane[i].pre[0] = yv12_mb[ref_frame][i]; 3889 if (comp_pred) 3890 xd->plane[i].pre[1] = yv12_mb[second_ref_frame][i]; 3891 } 3892 3893 if (ref_frame == INTRA_FRAME) { 3894 int rate; 3895 if (rd_pick_intra_sub_8x8_y_mode(cpi, x, &rate, &rate_y, 3896 &distortion_y, best_rd) >= best_rd) 3897 continue; 3898 rate2 += rate; 3899 rate2 += intra_cost_penalty; 3900 distortion2 += distortion_y; 3901 3902 if (rate_uv_intra == INT_MAX) { 3903 choose_intra_uv_mode(cpi, x, ctx, bsize, TX_4X4, 3904 &rate_uv_intra, 3905 &rate_uv_tokenonly, 3906 &dist_uv, &skip_uv, 3907 &mode_uv); 3908 } 3909 rate2 += rate_uv_intra; 3910 rate_uv = rate_uv_tokenonly; 3911 distortion2 += dist_uv; 3912 distortion_uv = dist_uv; 3913 mbmi->uv_mode = mode_uv; 3914 } else { 3915 int rate; 3916 int64_t distortion; 3917 int64_t this_rd_thresh; 3918 int64_t tmp_rd, tmp_best_rd = INT64_MAX, tmp_best_rdu = INT64_MAX; 3919 int tmp_best_rate = INT_MAX, tmp_best_ratey = INT_MAX; 3920 int64_t tmp_best_distortion = INT_MAX, tmp_best_sse, uv_sse; 3921 int tmp_best_skippable = 0; 3922 int switchable_filter_index; 3923 int_mv *second_ref = comp_pred ? 3924 &x->mbmi_ext->ref_mvs[second_ref_frame][0] : NULL; 3925 b_mode_info tmp_best_bmodes[16]; 3926 MB_MODE_INFO tmp_best_mbmode; 3927 BEST_SEG_INFO bsi[SWITCHABLE_FILTERS]; 3928 int pred_exists = 0; 3929 int uv_skippable; 3930 3931 this_rd_thresh = (ref_frame == LAST_FRAME) ? 3932 rd_opt->threshes[segment_id][bsize][THR_LAST] : 3933 rd_opt->threshes[segment_id][bsize][THR_ALTR]; 3934 this_rd_thresh = (ref_frame == GOLDEN_FRAME) ? 3935 rd_opt->threshes[segment_id][bsize][THR_GOLD] : this_rd_thresh; 3936 for (i = 0; i < SWITCHABLE_FILTER_CONTEXTS; ++i) 3937 filter_cache[i] = INT64_MAX; 3938 3939 if (cm->interp_filter != BILINEAR) { 3940 tmp_best_filter = EIGHTTAP; 3941 if (x->source_variance < sf->disable_filter_search_var_thresh) { 3942 tmp_best_filter = EIGHTTAP; 3943 } else if (sf->adaptive_pred_interp_filter == 1 && 3944 ctx->pred_interp_filter < SWITCHABLE) { 3945 tmp_best_filter = ctx->pred_interp_filter; 3946 } else if (sf->adaptive_pred_interp_filter == 2) { 3947 tmp_best_filter = ctx->pred_interp_filter < SWITCHABLE ? 3948 ctx->pred_interp_filter : 0; 3949 } else { 3950 for (switchable_filter_index = 0; 3951 switchable_filter_index < SWITCHABLE_FILTERS; 3952 ++switchable_filter_index) { 3953 int newbest, rs; 3954 int64_t rs_rd; 3955 MB_MODE_INFO_EXT *mbmi_ext = x->mbmi_ext; 3956 mbmi->interp_filter = switchable_filter_index; 3957 tmp_rd = rd_pick_best_sub8x8_mode(cpi, x, 3958 &mbmi_ext->ref_mvs[ref_frame][0], 3959 second_ref, best_yrd, &rate, 3960 &rate_y, &distortion, 3961 &skippable, &total_sse, 3962 (int) this_rd_thresh, seg_mvs, 3963 bsi, switchable_filter_index, 3964 mi_row, mi_col); 3965 3966 if (tmp_rd == INT64_MAX) 3967 continue; 3968 rs = vp9_get_switchable_rate(cpi, xd); 3969 rs_rd = RDCOST(x->rdmult, x->rddiv, rs, 0); 3970 filter_cache[switchable_filter_index] = tmp_rd; 3971 filter_cache[SWITCHABLE_FILTERS] = 3972 MIN(filter_cache[SWITCHABLE_FILTERS], 3973 tmp_rd + rs_rd); 3974 if (cm->interp_filter == SWITCHABLE) 3975 tmp_rd += rs_rd; 3976 3977 mask_filter = MAX(mask_filter, tmp_rd); 3978 3979 newbest = (tmp_rd < tmp_best_rd); 3980 if (newbest) { 3981 tmp_best_filter = mbmi->interp_filter; 3982 tmp_best_rd = tmp_rd; 3983 } 3984 if ((newbest && cm->interp_filter == SWITCHABLE) || 3985 (mbmi->interp_filter == cm->interp_filter && 3986 cm->interp_filter != SWITCHABLE)) { 3987 tmp_best_rdu = tmp_rd; 3988 tmp_best_rate = rate; 3989 tmp_best_ratey = rate_y; 3990 tmp_best_distortion = distortion; 3991 tmp_best_sse = total_sse; 3992 tmp_best_skippable = skippable; 3993 tmp_best_mbmode = *mbmi; 3994 for (i = 0; i < 4; i++) { 3995 tmp_best_bmodes[i] = xd->mi[0]->bmi[i]; 3996 x->zcoeff_blk[TX_4X4][i] = !x->plane[0].eobs[i]; 3997 } 3998 pred_exists = 1; 3999 if (switchable_filter_index == 0 && 4000 sf->use_rd_breakout && 4001 best_rd < INT64_MAX) { 4002 if (tmp_best_rdu / 2 > best_rd) { 4003 // skip searching the other filters if the first is 4004 // already substantially larger than the best so far 4005 tmp_best_filter = mbmi->interp_filter; 4006 tmp_best_rdu = INT64_MAX; 4007 break; 4008 } 4009 } 4010 } 4011 } // switchable_filter_index loop 4012 } 4013 } 4014 4015 if (tmp_best_rdu == INT64_MAX && pred_exists) 4016 continue; 4017 4018 mbmi->interp_filter = (cm->interp_filter == SWITCHABLE ? 4019 tmp_best_filter : cm->interp_filter); 4020 if (!pred_exists) { 4021 // Handles the special case when a filter that is not in the 4022 // switchable list (bilinear, 6-tap) is indicated at the frame level 4023 tmp_rd = rd_pick_best_sub8x8_mode(cpi, x, 4024 &x->mbmi_ext->ref_mvs[ref_frame][0], 4025 second_ref, best_yrd, &rate, &rate_y, 4026 &distortion, &skippable, &total_sse, 4027 (int) this_rd_thresh, seg_mvs, bsi, 0, 4028 mi_row, mi_col); 4029 if (tmp_rd == INT64_MAX) 4030 continue; 4031 } else { 4032 total_sse = tmp_best_sse; 4033 rate = tmp_best_rate; 4034 rate_y = tmp_best_ratey; 4035 distortion = tmp_best_distortion; 4036 skippable = tmp_best_skippable; 4037 *mbmi = tmp_best_mbmode; 4038 for (i = 0; i < 4; i++) 4039 xd->mi[0]->bmi[i] = tmp_best_bmodes[i]; 4040 } 4041 4042 rate2 += rate; 4043 distortion2 += distortion; 4044 4045 if (cm->interp_filter == SWITCHABLE) 4046 rate2 += vp9_get_switchable_rate(cpi, xd); 4047 4048 if (!mode_excluded) 4049 mode_excluded = comp_pred ? cm->reference_mode == SINGLE_REFERENCE 4050 : cm->reference_mode == COMPOUND_REFERENCE; 4051 4052 compmode_cost = vp9_cost_bit(comp_mode_p, comp_pred); 4053 4054 tmp_best_rdu = best_rd - 4055 MIN(RDCOST(x->rdmult, x->rddiv, rate2, distortion2), 4056 RDCOST(x->rdmult, x->rddiv, 0, total_sse)); 4057 4058 if (tmp_best_rdu > 0) { 4059 // If even the 'Y' rd value of split is higher than best so far 4060 // then dont bother looking at UV 4061 vp9_build_inter_predictors_sbuv(&x->e_mbd, mi_row, mi_col, 4062 BLOCK_8X8); 4063 memset(x->skip_txfm, SKIP_TXFM_NONE, sizeof(x->skip_txfm)); 4064 if (!super_block_uvrd(cpi, x, &rate_uv, &distortion_uv, &uv_skippable, 4065 &uv_sse, BLOCK_8X8, tmp_best_rdu)) 4066 continue; 4067 4068 rate2 += rate_uv; 4069 distortion2 += distortion_uv; 4070 skippable = skippable && uv_skippable; 4071 total_sse += uv_sse; 4072 } 4073 } 4074 4075 if (cm->reference_mode == REFERENCE_MODE_SELECT) 4076 rate2 += compmode_cost; 4077 4078 // Estimate the reference frame signaling cost and add it 4079 // to the rolling cost variable. 4080 if (second_ref_frame > INTRA_FRAME) { 4081 rate2 += ref_costs_comp[ref_frame]; 4082 } else { 4083 rate2 += ref_costs_single[ref_frame]; 4084 } 4085 4086 if (!disable_skip) { 4087 // Skip is never coded at the segment level for sub8x8 blocks and instead 4088 // always coded in the bitstream at the mode info level. 4089 4090 if (ref_frame != INTRA_FRAME && !xd->lossless) { 4091 if (RDCOST(x->rdmult, x->rddiv, rate_y + rate_uv, distortion2) < 4092 RDCOST(x->rdmult, x->rddiv, 0, total_sse)) { 4093 // Add in the cost of the no skip flag. 4094 rate2 += vp9_cost_bit(vp9_get_skip_prob(cm, xd), 0); 4095 } else { 4096 // FIXME(rbultje) make this work for splitmv also 4097 rate2 += vp9_cost_bit(vp9_get_skip_prob(cm, xd), 1); 4098 distortion2 = total_sse; 4099 assert(total_sse >= 0); 4100 rate2 -= (rate_y + rate_uv); 4101 rate_y = 0; 4102 rate_uv = 0; 4103 this_skip2 = 1; 4104 } 4105 } else { 4106 // Add in the cost of the no skip flag. 4107 rate2 += vp9_cost_bit(vp9_get_skip_prob(cm, xd), 0); 4108 } 4109 4110 // Calculate the final RD estimate for this mode. 4111 this_rd = RDCOST(x->rdmult, x->rddiv, rate2, distortion2); 4112 } 4113 4114 if (!disable_skip && ref_frame == INTRA_FRAME) { 4115 for (i = 0; i < REFERENCE_MODES; ++i) 4116 best_pred_rd[i] = MIN(best_pred_rd[i], this_rd); 4117 for (i = 0; i < SWITCHABLE_FILTER_CONTEXTS; i++) 4118 best_filter_rd[i] = MIN(best_filter_rd[i], this_rd); 4119 } 4120 4121 // Did this mode help.. i.e. is it the new best mode 4122 if (this_rd < best_rd || x->skip) { 4123 if (!mode_excluded) { 4124 int max_plane = MAX_MB_PLANE; 4125 // Note index of best mode so far 4126 best_ref_index = ref_index; 4127 4128 if (ref_frame == INTRA_FRAME) { 4129 /* required for left and above block mv */ 4130 mbmi->mv[0].as_int = 0; 4131 max_plane = 1; 4132 } 4133 4134 rd_cost->rate = rate2; 4135 rd_cost->dist = distortion2; 4136 rd_cost->rdcost = this_rd; 4137 best_rd = this_rd; 4138 best_yrd = best_rd - 4139 RDCOST(x->rdmult, x->rddiv, rate_uv, distortion_uv); 4140 best_mbmode = *mbmi; 4141 best_skip2 = this_skip2; 4142 if (!x->select_tx_size) 4143 swap_block_ptr(x, ctx, 1, 0, 0, max_plane); 4144 memcpy(ctx->zcoeff_blk, x->zcoeff_blk[TX_4X4], 4145 sizeof(ctx->zcoeff_blk[0]) * ctx->num_4x4_blk); 4146 4147 for (i = 0; i < 4; i++) 4148 best_bmodes[i] = xd->mi[0]->bmi[i]; 4149 4150 // TODO(debargha): enhance this test with a better distortion prediction 4151 // based on qp, activity mask and history 4152 if ((sf->mode_search_skip_flags & FLAG_EARLY_TERMINATE) && 4153 (ref_index > MIN_EARLY_TERM_INDEX)) { 4154 int qstep = xd->plane[0].dequant[1]; 4155 // TODO(debargha): Enhance this by specializing for each mode_index 4156 int scale = 4; 4157#if CONFIG_VP9_HIGHBITDEPTH 4158 if (xd->cur_buf->flags & YV12_FLAG_HIGHBITDEPTH) { 4159 qstep >>= (xd->bd - 8); 4160 } 4161#endif // CONFIG_VP9_HIGHBITDEPTH 4162 if (x->source_variance < UINT_MAX) { 4163 const int var_adjust = (x->source_variance < 16); 4164 scale -= var_adjust; 4165 } 4166 if (ref_frame > INTRA_FRAME && 4167 distortion2 * scale < qstep * qstep) { 4168 early_term = 1; 4169 } 4170 } 4171 } 4172 } 4173 4174 /* keep record of best compound/single-only prediction */ 4175 if (!disable_skip && ref_frame != INTRA_FRAME) { 4176 int64_t single_rd, hybrid_rd, single_rate, hybrid_rate; 4177 4178 if (cm->reference_mode == REFERENCE_MODE_SELECT) { 4179 single_rate = rate2 - compmode_cost; 4180 hybrid_rate = rate2; 4181 } else { 4182 single_rate = rate2; 4183 hybrid_rate = rate2 + compmode_cost; 4184 } 4185 4186 single_rd = RDCOST(x->rdmult, x->rddiv, single_rate, distortion2); 4187 hybrid_rd = RDCOST(x->rdmult, x->rddiv, hybrid_rate, distortion2); 4188 4189 if (!comp_pred && single_rd < best_pred_rd[SINGLE_REFERENCE]) 4190 best_pred_rd[SINGLE_REFERENCE] = single_rd; 4191 else if (comp_pred && single_rd < best_pred_rd[COMPOUND_REFERENCE]) 4192 best_pred_rd[COMPOUND_REFERENCE] = single_rd; 4193 4194 if (hybrid_rd < best_pred_rd[REFERENCE_MODE_SELECT]) 4195 best_pred_rd[REFERENCE_MODE_SELECT] = hybrid_rd; 4196 } 4197 4198 /* keep record of best filter type */ 4199 if (!mode_excluded && !disable_skip && ref_frame != INTRA_FRAME && 4200 cm->interp_filter != BILINEAR) { 4201 int64_t ref = filter_cache[cm->interp_filter == SWITCHABLE ? 4202 SWITCHABLE_FILTERS : cm->interp_filter]; 4203 int64_t adj_rd; 4204 for (i = 0; i < SWITCHABLE_FILTER_CONTEXTS; i++) { 4205 if (ref == INT64_MAX) 4206 adj_rd = 0; 4207 else if (filter_cache[i] == INT64_MAX) 4208 // when early termination is triggered, the encoder does not have 4209 // access to the rate-distortion cost. it only knows that the cost 4210 // should be above the maximum valid value. hence it takes the known 4211 // maximum plus an arbitrary constant as the rate-distortion cost. 4212 adj_rd = mask_filter - ref + 10; 4213 else 4214 adj_rd = filter_cache[i] - ref; 4215 4216 adj_rd += this_rd; 4217 best_filter_rd[i] = MIN(best_filter_rd[i], adj_rd); 4218 } 4219 } 4220 4221 if (early_term) 4222 break; 4223 4224 if (x->skip && !comp_pred) 4225 break; 4226 } 4227 4228 if (best_rd >= best_rd_so_far) { 4229 rd_cost->rate = INT_MAX; 4230 rd_cost->rdcost = INT64_MAX; 4231 return; 4232 } 4233 4234 // If we used an estimate for the uv intra rd in the loop above... 4235 if (sf->use_uv_intra_rd_estimate) { 4236 // Do Intra UV best rd mode selection if best mode choice above was intra. 4237 if (best_mbmode.ref_frame[0] == INTRA_FRAME) { 4238 *mbmi = best_mbmode; 4239 rd_pick_intra_sbuv_mode(cpi, x, ctx, &rate_uv_intra, 4240 &rate_uv_tokenonly, 4241 &dist_uv, 4242 &skip_uv, 4243 BLOCK_8X8, TX_4X4); 4244 } 4245 } 4246 4247 if (best_rd == INT64_MAX) { 4248 rd_cost->rate = INT_MAX; 4249 rd_cost->dist = INT64_MAX; 4250 rd_cost->rdcost = INT64_MAX; 4251 return; 4252 } 4253 4254 assert((cm->interp_filter == SWITCHABLE) || 4255 (cm->interp_filter == best_mbmode.interp_filter) || 4256 !is_inter_block(&best_mbmode)); 4257 4258 vp9_update_rd_thresh_fact(tile_data->thresh_freq_fact, 4259 sf->adaptive_rd_thresh, bsize, best_ref_index); 4260 4261 // macroblock modes 4262 *mbmi = best_mbmode; 4263 x->skip |= best_skip2; 4264 if (!is_inter_block(&best_mbmode)) { 4265 for (i = 0; i < 4; i++) 4266 xd->mi[0]->bmi[i].as_mode = best_bmodes[i].as_mode; 4267 } else { 4268 for (i = 0; i < 4; ++i) 4269 memcpy(&xd->mi[0]->bmi[i], &best_bmodes[i], sizeof(b_mode_info)); 4270 4271 mbmi->mv[0].as_int = xd->mi[0]->bmi[3].as_mv[0].as_int; 4272 mbmi->mv[1].as_int = xd->mi[0]->bmi[3].as_mv[1].as_int; 4273 } 4274 4275 for (i = 0; i < REFERENCE_MODES; ++i) { 4276 if (best_pred_rd[i] == INT64_MAX) 4277 best_pred_diff[i] = INT_MIN; 4278 else 4279 best_pred_diff[i] = best_rd - best_pred_rd[i]; 4280 } 4281 4282 if (!x->skip) { 4283 for (i = 0; i < SWITCHABLE_FILTER_CONTEXTS; i++) { 4284 if (best_filter_rd[i] == INT64_MAX) 4285 best_filter_diff[i] = 0; 4286 else 4287 best_filter_diff[i] = best_rd - best_filter_rd[i]; 4288 } 4289 if (cm->interp_filter == SWITCHABLE) 4290 assert(best_filter_diff[SWITCHABLE_FILTERS] == 0); 4291 } else { 4292 vp9_zero(best_filter_diff); 4293 } 4294 4295 store_coding_context(x, ctx, best_ref_index, 4296 best_pred_diff, best_filter_diff, 0); 4297} 4298