vp9_rdopt.c revision ba164dffc5a6795bce97fae02b51ccf3330e15e4
1/* 2 * Copyright (c) 2010 The WebM project authors. All Rights Reserved. 3 * 4 * Use of this source code is governed by a BSD-style license 5 * that can be found in the LICENSE file in the root of the source 6 * tree. An additional intellectual property rights grant can be found 7 * in the file PATENTS. All contributing project authors may 8 * be found in the AUTHORS file in the root of the source tree. 9 */ 10 11 12#include <stdio.h> 13#include <math.h> 14#include <limits.h> 15#include <assert.h> 16 17#include "vp9/common/vp9_pragmas.h" 18#include "vp9/encoder/vp9_tokenize.h" 19#include "vp9/encoder/vp9_treewriter.h" 20#include "vp9/encoder/vp9_onyx_int.h" 21#include "vp9/encoder/vp9_modecosts.h" 22#include "vp9/encoder/vp9_encodeintra.h" 23#include "vp9/common/vp9_entropymode.h" 24#include "vp9/common/vp9_reconinter.h" 25#include "vp9/common/vp9_reconintra.h" 26#include "vp9/common/vp9_findnearmv.h" 27#include "vp9/common/vp9_quant_common.h" 28#include "vp9/encoder/vp9_encodemb.h" 29#include "vp9/encoder/vp9_quantize.h" 30#include "vp9/encoder/vp9_variance.h" 31#include "vp9/encoder/vp9_mcomp.h" 32#include "vp9/encoder/vp9_rdopt.h" 33#include "vp9/encoder/vp9_ratectrl.h" 34#include "vpx_mem/vpx_mem.h" 35#include "vp9/common/vp9_systemdependent.h" 36#include "vp9/encoder/vp9_encodemv.h" 37#include "vp9/common/vp9_seg_common.h" 38#include "vp9/common/vp9_pred_common.h" 39#include "vp9/common/vp9_entropy.h" 40#include "vp9_rtcd.h" 41#include "vp9/common/vp9_mvref_common.h" 42#include "vp9/common/vp9_common.h" 43 44#define INVALID_MV 0x80008000 45 46/* Factor to weigh the rate for switchable interp filters */ 47#define SWITCHABLE_INTERP_RATE_FACTOR 1 48 49DECLARE_ALIGNED(16, extern const uint8_t, 50 vp9_pt_energy_class[MAX_ENTROPY_TOKENS]); 51 52#define I4X4_PRED 0x8000 53#define SPLITMV 0x10000 54 55const MODE_DEFINITION vp9_mode_order[MAX_MODES] = { 56 {ZEROMV, LAST_FRAME, NONE}, 57 {DC_PRED, INTRA_FRAME, NONE}, 58 59 {NEARESTMV, LAST_FRAME, NONE}, 60 {NEARMV, LAST_FRAME, NONE}, 61 62 {ZEROMV, GOLDEN_FRAME, NONE}, 63 {NEARESTMV, GOLDEN_FRAME, NONE}, 64 65 {ZEROMV, ALTREF_FRAME, NONE}, 66 {NEARESTMV, ALTREF_FRAME, NONE}, 67 68 {NEARMV, GOLDEN_FRAME, NONE}, 69 {NEARMV, ALTREF_FRAME, NONE}, 70 71 {V_PRED, INTRA_FRAME, NONE}, 72 {H_PRED, INTRA_FRAME, NONE}, 73 {D45_PRED, INTRA_FRAME, NONE}, 74 {D135_PRED, INTRA_FRAME, NONE}, 75 {D117_PRED, INTRA_FRAME, NONE}, 76 {D153_PRED, INTRA_FRAME, NONE}, 77 {D27_PRED, INTRA_FRAME, NONE}, 78 {D63_PRED, INTRA_FRAME, NONE}, 79 80 {TM_PRED, INTRA_FRAME, NONE}, 81 82 {NEWMV, LAST_FRAME, NONE}, 83 {NEWMV, GOLDEN_FRAME, NONE}, 84 {NEWMV, ALTREF_FRAME, NONE}, 85 86 {SPLITMV, LAST_FRAME, NONE}, 87 {SPLITMV, GOLDEN_FRAME, NONE}, 88 {SPLITMV, ALTREF_FRAME, NONE}, 89 90 {I4X4_PRED, INTRA_FRAME, NONE}, 91 92 /* compound prediction modes */ 93 {ZEROMV, LAST_FRAME, ALTREF_FRAME}, 94 {NEARESTMV, LAST_FRAME, ALTREF_FRAME}, 95 {NEARMV, LAST_FRAME, ALTREF_FRAME}, 96 97 {ZEROMV, GOLDEN_FRAME, ALTREF_FRAME}, 98 {NEARESTMV, GOLDEN_FRAME, ALTREF_FRAME}, 99 {NEARMV, GOLDEN_FRAME, ALTREF_FRAME}, 100 101 {NEWMV, LAST_FRAME, ALTREF_FRAME}, 102 {NEWMV, GOLDEN_FRAME, ALTREF_FRAME}, 103 104 {SPLITMV, LAST_FRAME, ALTREF_FRAME}, 105 {SPLITMV, GOLDEN_FRAME, ALTREF_FRAME}, 106}; 107 108// The baseline rd thresholds for breaking out of the rd loop for 109// certain modes are assumed to be based on 8x8 blocks. 110// This table is used to correct for blocks size. 111// The factors here are << 2 (2 = x0.5, 32 = x8 etc). 112static int rd_thresh_block_size_factor[BLOCK_SIZE_TYPES] = 113 {2, 3, 3, 4, 6, 6, 8, 12, 12, 16, 24, 24, 32}; 114 115#define BASE_RD_THRESH_FREQ_FACT 16 116#define MAX_RD_THRESH_FREQ_FACT 32 117#define MAX_RD_THRESH_FREQ_INC 1 118 119static void fill_token_costs(vp9_coeff_count (*c)[BLOCK_TYPES], 120 vp9_coeff_count (*cnoskip)[BLOCK_TYPES], 121 vp9_coeff_probs_model (*p)[BLOCK_TYPES]) { 122 int i, j, k, l; 123 TX_SIZE t; 124 for (t = TX_4X4; t <= TX_32X32; t++) 125 for (i = 0; i < BLOCK_TYPES; i++) 126 for (j = 0; j < REF_TYPES; j++) 127 for (k = 0; k < COEF_BANDS; k++) 128 for (l = 0; l < PREV_COEF_CONTEXTS; l++) { 129 vp9_prob probs[ENTROPY_NODES]; 130 vp9_model_to_full_probs(p[t][i][j][k][l], probs); 131 vp9_cost_tokens((int *)cnoskip[t][i][j][k][l], probs, 132 vp9_coef_tree); 133#if CONFIG_BALANCED_COEFTREE 134 // Replace the eob node prob with a very small value so that the 135 // cost approximately equals the cost without the eob node 136 probs[1] = 1; 137 vp9_cost_tokens((int *)c[t][i][j][k][l], probs, vp9_coef_tree); 138#else 139 vp9_cost_tokens_skip((int *)c[t][i][j][k][l], probs, 140 vp9_coef_tree); 141 assert(c[t][i][j][k][l][DCT_EOB_TOKEN] == 142 cnoskip[t][i][j][k][l][DCT_EOB_TOKEN]); 143#endif 144 } 145} 146 147static int rd_iifactor[32] = { 4, 4, 3, 2, 1, 0, 0, 0, 148 0, 0, 0, 0, 0, 0, 0, 0, 149 0, 0, 0, 0, 0, 0, 0, 0, 150 0, 0, 0, 0, 0, 0, 0, 0, }; 151 152// 3* dc_qlookup[Q]*dc_qlookup[Q]; 153 154/* values are now correlated to quantizer */ 155static int sad_per_bit16lut[QINDEX_RANGE]; 156static int sad_per_bit4lut[QINDEX_RANGE]; 157 158void vp9_init_me_luts() { 159 int i; 160 161 // Initialize the sad lut tables using a formulaic calculation for now 162 // This is to make it easier to resolve the impact of experimental changes 163 // to the quantizer tables. 164 for (i = 0; i < QINDEX_RANGE; i++) { 165 sad_per_bit16lut[i] = 166 (int)((0.0418 * vp9_convert_qindex_to_q(i)) + 2.4107); 167 sad_per_bit4lut[i] = (int)(0.063 * vp9_convert_qindex_to_q(i) + 2.742); 168 } 169} 170 171static int compute_rd_mult(int qindex) { 172 const int q = vp9_dc_quant(qindex, 0); 173 return (11 * q * q) >> 2; 174} 175 176void vp9_initialize_me_consts(VP9_COMP *cpi, int qindex) { 177 cpi->mb.sadperbit16 = sad_per_bit16lut[qindex]; 178 cpi->mb.sadperbit4 = sad_per_bit4lut[qindex]; 179} 180 181 182void vp9_initialize_rd_consts(VP9_COMP *cpi, int qindex) { 183 int q, i, bsize; 184 185 vp9_clear_system_state(); // __asm emms; 186 187 // Further tests required to see if optimum is different 188 // for key frames, golden frames and arf frames. 189 // if (cpi->common.refresh_golden_frame || 190 // cpi->common.refresh_alt_ref_frame) 191 qindex = clamp(qindex, 0, MAXQ); 192 193 cpi->RDMULT = compute_rd_mult(qindex); 194 if (cpi->pass == 2 && (cpi->common.frame_type != KEY_FRAME)) { 195 if (cpi->twopass.next_iiratio > 31) 196 cpi->RDMULT += (cpi->RDMULT * rd_iifactor[31]) >> 4; 197 else 198 cpi->RDMULT += 199 (cpi->RDMULT * rd_iifactor[cpi->twopass.next_iiratio]) >> 4; 200 } 201 cpi->mb.errorperbit = cpi->RDMULT >> 6; 202 cpi->mb.errorperbit += (cpi->mb.errorperbit == 0); 203 204 vp9_set_speed_features(cpi); 205 206 q = (int)pow(vp9_dc_quant(qindex, 0) >> 2, 1.25); 207 q <<= 2; 208 if (q < 8) 209 q = 8; 210 211 if (cpi->RDMULT > 1000) { 212 cpi->RDDIV = 1; 213 cpi->RDMULT /= 100; 214 215 for (bsize = 0; bsize < BLOCK_SIZE_TYPES; ++bsize) { 216 for (i = 0; i < MAX_MODES; ++i) { 217 // Threshold here seem unecessarily harsh but fine given actual 218 // range of values used for cpi->sf.thresh_mult[] 219 int thresh_max = INT_MAX / (q * rd_thresh_block_size_factor[bsize]); 220 221 // *4 relates to the scaling of rd_thresh_block_size_factor[] 222 if ((int64_t)cpi->sf.thresh_mult[i] < thresh_max) { 223 cpi->rd_threshes[bsize][i] = 224 cpi->sf.thresh_mult[i] * q * 225 rd_thresh_block_size_factor[bsize] / (4 * 100); 226 } else { 227 cpi->rd_threshes[bsize][i] = INT_MAX; 228 } 229 cpi->rd_baseline_thresh[bsize][i] = cpi->rd_threshes[bsize][i]; 230 cpi->rd_thresh_freq_fact[bsize][i] = BASE_RD_THRESH_FREQ_FACT; 231 } 232 } 233 } else { 234 cpi->RDDIV = 100; 235 236 for (bsize = 0; bsize < BLOCK_SIZE_TYPES; ++bsize) { 237 for (i = 0; i < MAX_MODES; i++) { 238 // Threshold here seem unecessarily harsh but fine given actual 239 // range of values used for cpi->sf.thresh_mult[] 240 int thresh_max = INT_MAX / (q * rd_thresh_block_size_factor[bsize]); 241 242 if (cpi->sf.thresh_mult[i] < thresh_max) { 243 cpi->rd_threshes[bsize][i] = 244 cpi->sf.thresh_mult[i] * q * 245 rd_thresh_block_size_factor[bsize] / 4; 246 } else { 247 cpi->rd_threshes[bsize][i] = INT_MAX; 248 } 249 cpi->rd_baseline_thresh[bsize][i] = cpi->rd_threshes[bsize][i]; 250 cpi->rd_thresh_freq_fact[bsize][i] = BASE_RD_THRESH_FREQ_FACT; 251 } 252 } 253 } 254 255 fill_token_costs(cpi->mb.token_costs, 256 cpi->mb.token_costs_noskip, 257 cpi->common.fc.coef_probs); 258 259 for (i = 0; i < NUM_PARTITION_CONTEXTS; i++) 260 vp9_cost_tokens(cpi->mb.partition_cost[i], 261 cpi->common.fc.partition_prob[cpi->common.frame_type][i], 262 vp9_partition_tree); 263 264 /*rough estimate for costing*/ 265 vp9_init_mode_costs(cpi); 266 267 if (cpi->common.frame_type != KEY_FRAME) { 268 vp9_build_nmv_cost_table( 269 cpi->mb.nmvjointcost, 270 cpi->mb.e_mbd.allow_high_precision_mv ? 271 cpi->mb.nmvcost_hp : cpi->mb.nmvcost, 272 &cpi->common.fc.nmvc, 273 cpi->mb.e_mbd.allow_high_precision_mv, 1, 1); 274 } 275} 276 277int vp9_block_error_c(int16_t *coeff, int16_t *dqcoeff, int block_size) { 278 int i, error = 0; 279 280 for (i = 0; i < block_size; i++) { 281 int this_diff = coeff[i] - dqcoeff[i]; 282 error += this_diff * this_diff; 283 } 284 285 return error; 286} 287 288static INLINE int cost_coeffs(VP9_COMMON *const cm, MACROBLOCK *mb, 289 int plane, int block, PLANE_TYPE type, 290 ENTROPY_CONTEXT *A, 291 ENTROPY_CONTEXT *L, 292 TX_SIZE tx_size, 293 int y_blocks) { 294 MACROBLOCKD *const xd = &mb->e_mbd; 295 MB_MODE_INFO *mbmi = &xd->mode_info_context->mbmi; 296 int pt; 297 int c = 0; 298 int cost = 0, pad; 299 const int *scan, *nb; 300 const int eob = xd->plane[plane].eobs[block]; 301 const int16_t *qcoeff_ptr = BLOCK_OFFSET(xd->plane[plane].qcoeff, 302 block, 16); 303 const int ref = mbmi->ref_frame[0] != INTRA_FRAME; 304 unsigned int (*token_costs)[PREV_COEF_CONTEXTS][MAX_ENTROPY_TOKENS] = 305 mb->token_costs[tx_size][type][ref]; 306 ENTROPY_CONTEXT above_ec, left_ec; 307 TX_TYPE tx_type = DCT_DCT; 308 309 const int segment_id = xd->mode_info_context->mbmi.segment_id; 310 unsigned int (*token_costs_noskip)[PREV_COEF_CONTEXTS][MAX_ENTROPY_TOKENS] = 311 mb->token_costs_noskip[tx_size][type][ref]; 312 313 int seg_eob, default_eob; 314 uint8_t token_cache[1024]; 315 const uint8_t * band_translate; 316 317 // Check for consistency of tx_size with mode info 318 assert((!type && !plane) || (type && plane)); 319 if (type == PLANE_TYPE_Y_WITH_DC) { 320 assert(xd->mode_info_context->mbmi.txfm_size == tx_size); 321 } else { 322 TX_SIZE tx_size_uv = get_uv_tx_size(mbmi); 323 assert(tx_size == tx_size_uv); 324 } 325 326 switch (tx_size) { 327 case TX_4X4: { 328 tx_type = (type == PLANE_TYPE_Y_WITH_DC) ? 329 get_tx_type_4x4(xd, block) : DCT_DCT; 330 above_ec = A[0] != 0; 331 left_ec = L[0] != 0; 332 seg_eob = 16; 333 scan = get_scan_4x4(tx_type); 334 band_translate = vp9_coefband_trans_4x4; 335 break; 336 } 337 case TX_8X8: { 338 const BLOCK_SIZE_TYPE sb_type = xd->mode_info_context->mbmi.sb_type; 339 const int sz = 1 + b_width_log2(sb_type); 340 const int x = block & ((1 << sz) - 1), y = block - x; 341 TX_TYPE tx_type = (type == PLANE_TYPE_Y_WITH_DC) ? 342 get_tx_type_8x8(xd, y + (x >> 1)) : DCT_DCT; 343 above_ec = (A[0] + A[1]) != 0; 344 left_ec = (L[0] + L[1]) != 0; 345 scan = get_scan_8x8(tx_type); 346 seg_eob = 64; 347 band_translate = vp9_coefband_trans_8x8plus; 348 break; 349 } 350 case TX_16X16: { 351 const BLOCK_SIZE_TYPE sb_type = xd->mode_info_context->mbmi.sb_type; 352 const int sz = 2 + b_width_log2(sb_type); 353 const int x = block & ((1 << sz) - 1), y = block - x; 354 TX_TYPE tx_type = (type == PLANE_TYPE_Y_WITH_DC) ? 355 get_tx_type_16x16(xd, y + (x >> 2)) : DCT_DCT; 356 scan = get_scan_16x16(tx_type); 357 seg_eob = 256; 358 above_ec = (A[0] + A[1] + A[2] + A[3]) != 0; 359 left_ec = (L[0] + L[1] + L[2] + L[3]) != 0; 360 band_translate = vp9_coefband_trans_8x8plus; 361 break; 362 } 363 case TX_32X32: 364 scan = vp9_default_scan_32x32; 365 seg_eob = 1024; 366 above_ec = (A[0] + A[1] + A[2] + A[3] + A[4] + A[5] + A[6] + A[7]) != 0; 367 left_ec = (L[0] + L[1] + L[2] + L[3] + L[4] + L[5] + L[6] + L[7]) != 0; 368 band_translate = vp9_coefband_trans_8x8plus; 369 break; 370 default: 371 abort(); 372 break; 373 } 374 assert(eob <= seg_eob); 375 376 pt = combine_entropy_contexts(above_ec, left_ec); 377 nb = vp9_get_coef_neighbors_handle(scan, &pad); 378 default_eob = seg_eob; 379 380 if (vp9_segfeature_active(xd, segment_id, SEG_LVL_SKIP)) 381 seg_eob = 0; 382 383 /* sanity check to ensure that we do not have spurious non-zero q values */ 384 if (eob < seg_eob) 385 assert(qcoeff_ptr[scan[eob]] == 0); 386 387 { 388 for (c = 0; c < eob; c++) { 389 int v = qcoeff_ptr[scan[c]]; 390 int t = vp9_dct_value_tokens_ptr[v].token; 391 int band = get_coef_band(band_translate, c); 392 if (c) 393 pt = vp9_get_coef_context(scan, nb, pad, token_cache, c, default_eob); 394 395 if (!c || token_cache[scan[c - 1]]) // do not skip eob 396 cost += token_costs_noskip[band][pt][t] + vp9_dct_value_cost_ptr[v]; 397 else 398 cost += token_costs[band][pt][t] + vp9_dct_value_cost_ptr[v]; 399 token_cache[scan[c]] = vp9_pt_energy_class[t]; 400 } 401 if (c < seg_eob) { 402 if (c) 403 pt = vp9_get_coef_context(scan, nb, pad, token_cache, c, default_eob); 404 cost += mb->token_costs_noskip[tx_size][type][ref] 405 [get_coef_band(band_translate, c)] 406 [pt][DCT_EOB_TOKEN]; 407 } 408 } 409 410 // is eob first coefficient; 411 for (pt = 0; pt < (1 << tx_size); pt++) { 412 A[pt] = L[pt] = c > 0; 413 } 414 415 return cost; 416} 417 418static void choose_txfm_size_from_rd(VP9_COMP *cpi, MACROBLOCK *x, 419 int (*r)[2], int *rate, 420 int *d, int *distortion, 421 int *s, int *skip, 422 int64_t txfm_cache[NB_TXFM_MODES], 423 TX_SIZE max_txfm_size) { 424 VP9_COMMON *const cm = &cpi->common; 425 MACROBLOCKD *const xd = &x->e_mbd; 426 MB_MODE_INFO *const mbmi = &xd->mode_info_context->mbmi; 427 vp9_prob skip_prob = vp9_get_pred_prob(cm, xd, PRED_MBSKIP); 428 int64_t rd[TX_SIZE_MAX_SB][2]; 429 int n, m; 430 int s0, s1; 431 432 const vp9_prob *tx_probs = vp9_get_pred_probs(cm, xd, PRED_TX_SIZE); 433 434 for (n = TX_4X4; n <= max_txfm_size; n++) { 435 r[n][1] = r[n][0]; 436 for (m = 0; m <= n - (n == max_txfm_size); m++) { 437 if (m == n) 438 r[n][1] += vp9_cost_zero(tx_probs[m]); 439 else 440 r[n][1] += vp9_cost_one(tx_probs[m]); 441 } 442 } 443 444 assert(skip_prob > 0); 445 s0 = vp9_cost_bit(skip_prob, 0); 446 s1 = vp9_cost_bit(skip_prob, 1); 447 448 for (n = TX_4X4; n <= max_txfm_size; n++) { 449 if (s[n]) { 450 rd[n][0] = rd[n][1] = RDCOST(x->rdmult, x->rddiv, s1, d[n]); 451 } else { 452 rd[n][0] = RDCOST(x->rdmult, x->rddiv, r[n][0] + s0, d[n]); 453 rd[n][1] = RDCOST(x->rdmult, x->rddiv, r[n][1] + s0, d[n]); 454 } 455 } 456 457 if (max_txfm_size == TX_32X32 && 458 (cm->txfm_mode == ALLOW_32X32 || 459 (cm->txfm_mode == TX_MODE_SELECT && 460 rd[TX_32X32][1] < rd[TX_16X16][1] && rd[TX_32X32][1] < rd[TX_8X8][1] && 461 rd[TX_32X32][1] < rd[TX_4X4][1]))) { 462 mbmi->txfm_size = TX_32X32; 463 } else if (max_txfm_size >= TX_16X16 && 464 (cm->txfm_mode == ALLOW_16X16 || 465 cm->txfm_mode == ALLOW_32X32 || 466 (cm->txfm_mode == TX_MODE_SELECT && 467 rd[TX_16X16][1] < rd[TX_8X8][1] && 468 rd[TX_16X16][1] < rd[TX_4X4][1]))) { 469 mbmi->txfm_size = TX_16X16; 470 } else if (cm->txfm_mode == ALLOW_8X8 || 471 cm->txfm_mode == ALLOW_16X16 || 472 cm->txfm_mode == ALLOW_32X32 || 473 (cm->txfm_mode == TX_MODE_SELECT && rd[TX_8X8][1] < rd[TX_4X4][1])) { 474 mbmi->txfm_size = TX_8X8; 475 } else { 476 mbmi->txfm_size = TX_4X4; 477 } 478 479 *distortion = d[mbmi->txfm_size]; 480 *rate = r[mbmi->txfm_size][cm->txfm_mode == TX_MODE_SELECT]; 481 *skip = s[mbmi->txfm_size]; 482 483 txfm_cache[ONLY_4X4] = rd[TX_4X4][0]; 484 txfm_cache[ALLOW_8X8] = rd[TX_8X8][0]; 485 txfm_cache[ALLOW_16X16] = rd[MIN(max_txfm_size, TX_16X16)][0]; 486 txfm_cache[ALLOW_32X32] = rd[MIN(max_txfm_size, TX_32X32)][0]; 487 if (max_txfm_size == TX_32X32 && 488 rd[TX_32X32][1] < rd[TX_16X16][1] && rd[TX_32X32][1] < rd[TX_8X8][1] && 489 rd[TX_32X32][1] < rd[TX_4X4][1]) 490 txfm_cache[TX_MODE_SELECT] = rd[TX_32X32][1]; 491 else if (max_txfm_size >= TX_16X16 && 492 rd[TX_16X16][1] < rd[TX_8X8][1] && rd[TX_16X16][1] < rd[TX_4X4][1]) 493 txfm_cache[TX_MODE_SELECT] = rd[TX_16X16][1]; 494 else 495 txfm_cache[TX_MODE_SELECT] = rd[TX_4X4][1] < rd[TX_8X8][1] ? 496 rd[TX_4X4][1] : rd[TX_8X8][1]; 497} 498 499static int block_error(int16_t *coeff, int16_t *dqcoeff, 500 int block_size, int shift) { 501 int i; 502 int64_t error = 0; 503 504 for (i = 0; i < block_size; i++) { 505 int this_diff = coeff[i] - dqcoeff[i]; 506 error += (unsigned)this_diff * this_diff; 507 } 508 error >>= shift; 509 510 return error > INT_MAX ? INT_MAX : (int)error; 511} 512 513static int block_error_sby(MACROBLOCK *x, BLOCK_SIZE_TYPE bsize, int shift) { 514 const int bwl = b_width_log2(bsize), bhl = b_height_log2(bsize); 515 return block_error(x->plane[0].coeff, x->e_mbd.plane[0].dqcoeff, 516 16 << (bwl + bhl), shift); 517} 518 519static int block_error_sbuv(MACROBLOCK *x, BLOCK_SIZE_TYPE bsize, int shift) { 520 const int bwl = b_width_log2(bsize), bhl = b_height_log2(bsize); 521 int64_t sum = 0; 522 int plane; 523 524 for (plane = 1; plane < MAX_MB_PLANE; plane++) { 525 const int subsampling = x->e_mbd.plane[plane].subsampling_x + 526 x->e_mbd.plane[plane].subsampling_y; 527 sum += block_error(x->plane[plane].coeff, x->e_mbd.plane[plane].dqcoeff, 528 16 << (bwl + bhl - subsampling), 0); 529 } 530 sum >>= shift; 531 return sum > INT_MAX ? INT_MAX : (int)sum; 532} 533 534struct rdcost_block_args { 535 VP9_COMMON *cm; 536 MACROBLOCK *x; 537 ENTROPY_CONTEXT t_above[16]; 538 ENTROPY_CONTEXT t_left[16]; 539 TX_SIZE tx_size; 540 int bw; 541 int bh; 542 int cost; 543}; 544 545static void rdcost_block(int plane, int block, BLOCK_SIZE_TYPE bsize, 546 int ss_txfrm_size, void *arg) { 547 struct rdcost_block_args* args = arg; 548 int x_idx, y_idx; 549 MACROBLOCKD * const xd = &args->x->e_mbd; 550 551 txfrm_block_to_raster_xy(xd, bsize, plane, block, args->tx_size * 2, &x_idx, 552 &y_idx); 553 554 args->cost += cost_coeffs(args->cm, args->x, plane, block, 555 xd->plane[plane].plane_type, args->t_above + x_idx, 556 args->t_left + y_idx, args->tx_size, 557 args->bw * args->bh); 558} 559 560static int rdcost_plane(VP9_COMMON * const cm, MACROBLOCK *x, int plane, 561 BLOCK_SIZE_TYPE bsize, TX_SIZE tx_size) { 562 MACROBLOCKD * const xd = &x->e_mbd; 563 const int bwl = b_width_log2(bsize) - xd->plane[plane].subsampling_x; 564 const int bhl = b_height_log2(bsize) - xd->plane[plane].subsampling_y; 565 const int bw = 1 << bwl, bh = 1 << bhl; 566 struct rdcost_block_args args = { cm, x, { 0 }, { 0 }, tx_size, bw, bh, 0 }; 567 568 vpx_memcpy(&args.t_above, xd->plane[plane].above_context, 569 sizeof(ENTROPY_CONTEXT) * bw); 570 vpx_memcpy(&args.t_left, xd->plane[plane].left_context, 571 sizeof(ENTROPY_CONTEXT) * bh); 572 573 foreach_transformed_block_in_plane(xd, bsize, plane, rdcost_block, &args); 574 575 return args.cost; 576} 577 578static int rdcost_uv(VP9_COMMON *const cm, MACROBLOCK *x, 579 BLOCK_SIZE_TYPE bsize, TX_SIZE tx_size) { 580 int cost = 0, plane; 581 582 for (plane = 1; plane < MAX_MB_PLANE; plane++) { 583 cost += rdcost_plane(cm, x, plane, bsize, tx_size); 584 } 585 return cost; 586} 587 588static void super_block_yrd_for_txfm(VP9_COMMON *const cm, MACROBLOCK *x, 589 int *rate, int *distortion, int *skippable, 590 BLOCK_SIZE_TYPE bsize, TX_SIZE tx_size) { 591 MACROBLOCKD *const xd = &x->e_mbd; 592 xd->mode_info_context->mbmi.txfm_size = tx_size; 593 594 if (xd->mode_info_context->mbmi.ref_frame[0] == INTRA_FRAME) 595 vp9_encode_intra_block_y(cm, x, bsize); 596 else 597 vp9_xform_quant_sby(cm, x, bsize); 598 599 *distortion = block_error_sby(x, bsize, tx_size == TX_32X32 ? 0 : 2); 600 *rate = rdcost_plane(cm, x, 0, bsize, tx_size); 601 *skippable = vp9_sby_is_skippable(xd, bsize); 602} 603 604static void super_block_yrd(VP9_COMP *cpi, 605 MACROBLOCK *x, int *rate, int *distortion, 606 int *skip, BLOCK_SIZE_TYPE bs, 607 int64_t txfm_cache[NB_TXFM_MODES]) { 608 VP9_COMMON *const cm = &cpi->common; 609 int r[TX_SIZE_MAX_SB][2], d[TX_SIZE_MAX_SB], s[TX_SIZE_MAX_SB]; 610 MACROBLOCKD *xd = &x->e_mbd; 611 MB_MODE_INFO *const mbmi = &xd->mode_info_context->mbmi; 612 613 assert(bs == mbmi->sb_type); 614 if (mbmi->ref_frame[0] > INTRA_FRAME) 615 vp9_subtract_sby(x, bs); 616 617 if (cpi->speed > 4) { 618 if (bs >= BLOCK_SIZE_SB32X32) { 619 mbmi->txfm_size = TX_32X32; 620 } else if (bs >= BLOCK_SIZE_MB16X16) { 621 mbmi->txfm_size = TX_16X16; 622 } else if (bs >= BLOCK_SIZE_SB8X8) { 623 mbmi->txfm_size = TX_8X8; 624 } else { 625 mbmi->txfm_size = TX_4X4; 626 } 627 vpx_memset(txfm_cache, 0, NB_TXFM_MODES * sizeof(int64_t)); 628 super_block_yrd_for_txfm(cm, x, rate, distortion, skip, bs, 629 mbmi->txfm_size); 630 return; 631 } 632 if (bs >= BLOCK_SIZE_SB32X32) 633 super_block_yrd_for_txfm(cm, x, &r[TX_32X32][0], &d[TX_32X32], &s[TX_32X32], 634 bs, TX_32X32); 635 if (bs >= BLOCK_SIZE_MB16X16) 636 super_block_yrd_for_txfm(cm, x, &r[TX_16X16][0], &d[TX_16X16], &s[TX_16X16], 637 bs, TX_16X16); 638 super_block_yrd_for_txfm(cm, x, &r[TX_8X8][0], &d[TX_8X8], &s[TX_8X8], bs, 639 TX_8X8); 640 super_block_yrd_for_txfm(cm, x, &r[TX_4X4][0], &d[TX_4X4], &s[TX_4X4], bs, 641 TX_4X4); 642 643 choose_txfm_size_from_rd(cpi, x, r, rate, d, distortion, s, 644 skip, txfm_cache, 645 TX_32X32 - (bs < BLOCK_SIZE_SB32X32) 646 - (bs < BLOCK_SIZE_MB16X16)); 647} 648 649static int64_t rd_pick_intra4x4block(VP9_COMP *cpi, MACROBLOCK *x, int ib, 650 MB_PREDICTION_MODE *best_mode, 651 int *bmode_costs, 652 ENTROPY_CONTEXT *a, ENTROPY_CONTEXT *l, 653 int *bestrate, int *bestratey, 654 int *bestdistortion, 655 BLOCK_SIZE_TYPE bsize) { 656 MB_PREDICTION_MODE mode; 657 MACROBLOCKD *xd = &x->e_mbd; 658 int64_t best_rd = INT64_MAX; 659 int rate = 0; 660 int distortion; 661 VP9_COMMON *const cm = &cpi->common; 662 const int src_stride = x->plane[0].src.stride; 663 uint8_t *src, *dst; 664 int16_t *src_diff, *coeff; 665 666 ENTROPY_CONTEXT ta[2], tempa[2]; 667 ENTROPY_CONTEXT tl[2], templ[2]; 668 TX_TYPE tx_type = DCT_DCT; 669 TX_TYPE best_tx_type = DCT_DCT; 670 int bw = 1 << b_width_log2(bsize); 671 int bh = 1 << b_height_log2(bsize); 672 int idx, idy, block; 673 DECLARE_ALIGNED(16, int16_t, best_dqcoeff[4][16]); 674 675 assert(ib < 4); 676 677 vpx_memcpy(ta, a, sizeof(ta)); 678 vpx_memcpy(tl, l, sizeof(tl)); 679 xd->mode_info_context->mbmi.txfm_size = TX_4X4; 680 681 for (mode = DC_PRED; mode <= TM_PRED; ++mode) { 682 int64_t this_rd; 683 int ratey = 0; 684 685 rate = bmode_costs[mode]; 686 distortion = 0; 687 688 vpx_memcpy(tempa, ta, sizeof(ta)); 689 vpx_memcpy(templ, tl, sizeof(tl)); 690 691 for (idy = 0; idy < bh; ++idy) { 692 for (idx = 0; idx < bw; ++idx) { 693 block = ib + idy * 2 + idx; 694 xd->mode_info_context->bmi[block].as_mode.first = mode; 695 src = raster_block_offset_uint8(xd, BLOCK_SIZE_SB8X8, 0, block, 696 x->plane[0].src.buf, src_stride); 697 src_diff = raster_block_offset_int16(xd, BLOCK_SIZE_SB8X8, 0, block, 698 x->plane[0].src_diff); 699 coeff = BLOCK_OFFSET(x->plane[0].coeff, block, 16); 700 dst = raster_block_offset_uint8(xd, BLOCK_SIZE_SB8X8, 0, block, 701 xd->plane[0].dst.buf, 702 xd->plane[0].dst.stride); 703 vp9_intra4x4_predict(xd, block, BLOCK_SIZE_SB8X8, mode, 704 dst, xd->plane[0].dst.stride); 705 vp9_subtract_block(4, 4, src_diff, 8, 706 src, src_stride, 707 dst, xd->plane[0].dst.stride); 708 709 tx_type = get_tx_type_4x4(xd, block); 710 if (tx_type != DCT_DCT) { 711 vp9_short_fht4x4(src_diff, coeff, 8, tx_type); 712 x->quantize_b_4x4(x, block, tx_type, 16); 713 } else { 714 x->fwd_txm4x4(src_diff, coeff, 16); 715 x->quantize_b_4x4(x, block, tx_type, 16); 716 } 717 718 ratey += cost_coeffs(cm, x, 0, block, PLANE_TYPE_Y_WITH_DC, 719 tempa + idx, templ + idy, TX_4X4, 16); 720 distortion += vp9_block_error(coeff, BLOCK_OFFSET(xd->plane[0].dqcoeff, 721 block, 16), 16) >> 2; 722 723 if (best_tx_type != DCT_DCT) 724 vp9_short_iht4x4_add(BLOCK_OFFSET(xd->plane[0].dqcoeff, block, 16), 725 dst, xd->plane[0].dst.stride, best_tx_type); 726 else 727 xd->inv_txm4x4_add(BLOCK_OFFSET(xd->plane[0].dqcoeff, block, 16), 728 dst, xd->plane[0].dst.stride); 729 } 730 } 731 732 rate += ratey; 733 this_rd = RDCOST(x->rdmult, x->rddiv, rate, distortion); 734 735 if (this_rd < best_rd) { 736 *bestrate = rate; 737 *bestratey = ratey; 738 *bestdistortion = distortion; 739 best_rd = this_rd; 740 *best_mode = mode; 741 best_tx_type = tx_type; 742 vpx_memcpy(a, tempa, sizeof(tempa)); 743 vpx_memcpy(l, templ, sizeof(templ)); 744 for (idy = 0; idy < bh; ++idy) { 745 for (idx = 0; idx < bw; ++idx) { 746 block = ib + idy * 2 + idx; 747 vpx_memcpy(best_dqcoeff[idy * 2 + idx], 748 BLOCK_OFFSET(xd->plane[0].dqcoeff, block, 16), 749 sizeof(best_dqcoeff[0])); 750 } 751 } 752 } 753 } 754 755 for (idy = 0; idy < bh; ++idy) { 756 for (idx = 0; idx < bw; ++idx) { 757 block = ib + idy * 2 + idx; 758 xd->mode_info_context->bmi[block].as_mode.first = *best_mode; 759 dst = raster_block_offset_uint8(xd, BLOCK_SIZE_SB8X8, 0, block, 760 xd->plane[0].dst.buf, 761 xd->plane[0].dst.stride); 762 763 vp9_intra4x4_predict(xd, block, BLOCK_SIZE_SB8X8, *best_mode, 764 dst, xd->plane[0].dst.stride); 765 // inverse transform 766 if (best_tx_type != DCT_DCT) 767 vp9_short_iht4x4_add(best_dqcoeff[idy * 2 + idx], dst, 768 xd->plane[0].dst.stride, best_tx_type); 769 else 770 xd->inv_txm4x4_add(best_dqcoeff[idy * 2 + idx], dst, 771 xd->plane[0].dst.stride); 772 } 773 } 774 775 return best_rd; 776} 777 778static int64_t rd_pick_intra4x4mby_modes(VP9_COMP *cpi, MACROBLOCK *mb, 779 int *Rate, int *rate_y, 780 int *Distortion, int64_t best_rd) { 781 int i, j; 782 MACROBLOCKD *const xd = &mb->e_mbd; 783 BLOCK_SIZE_TYPE bsize = xd->mode_info_context->mbmi.sb_type; 784 int bw = 1 << b_width_log2(bsize); 785 int bh = 1 << b_height_log2(bsize); 786 int idx, idy; 787 int cost = 0; 788 int distortion = 0; 789 int tot_rate_y = 0; 790 int64_t total_rd = 0; 791 ENTROPY_CONTEXT t_above[4], t_left[4]; 792 int *bmode_costs; 793 MODE_INFO *const mic = xd->mode_info_context; 794 795 vpx_memcpy(t_above, xd->plane[0].above_context, sizeof(t_above)); 796 vpx_memcpy(t_left, xd->plane[0].left_context, sizeof(t_left)); 797 798 bmode_costs = mb->mbmode_cost; 799 800 for (idy = 0; idy < 2; idy += bh) { 801 for (idx = 0; idx < 2; idx += bw) { 802 const int mis = xd->mode_info_stride; 803 MB_PREDICTION_MODE UNINITIALIZED_IS_SAFE(best_mode); 804 int UNINITIALIZED_IS_SAFE(r), UNINITIALIZED_IS_SAFE(ry); 805 int UNINITIALIZED_IS_SAFE(d); 806 i = idy * 2 + idx; 807 808 if (xd->frame_type == KEY_FRAME) { 809 const MB_PREDICTION_MODE A = above_block_mode(mic, i, mis); 810 const MB_PREDICTION_MODE L = (xd->left_available || idx) ? 811 left_block_mode(mic, i) : DC_PRED; 812 813 bmode_costs = mb->y_mode_costs[A][L]; 814 } 815 816 total_rd += rd_pick_intra4x4block(cpi, mb, i, &best_mode, bmode_costs, 817 t_above + idx, t_left + idy, 818 &r, &ry, &d, bsize); 819 cost += r; 820 distortion += d; 821 tot_rate_y += ry; 822 823 mic->bmi[i].as_mode.first = best_mode; 824 for (j = 1; j < bh; ++j) 825 mic->bmi[i + j * 2].as_mode.first = best_mode; 826 for (j = 1; j < bw; ++j) 827 mic->bmi[i + j].as_mode.first = best_mode; 828 829 if (total_rd >= best_rd) 830 break; 831 } 832 } 833 834 if (total_rd >= best_rd) 835 return INT64_MAX; 836 837 *Rate = cost; 838 *rate_y = tot_rate_y; 839 *Distortion = distortion; 840 xd->mode_info_context->mbmi.mode = mic->bmi[3].as_mode.first; 841 842 return RDCOST(mb->rdmult, mb->rddiv, cost, distortion); 843} 844 845static int64_t rd_pick_intra_sby_mode(VP9_COMP *cpi, MACROBLOCK *x, 846 int *rate, int *rate_tokenonly, 847 int *distortion, int *skippable, 848 BLOCK_SIZE_TYPE bsize, 849 int64_t txfm_cache[NB_TXFM_MODES]) { 850 MB_PREDICTION_MODE mode; 851 MB_PREDICTION_MODE UNINITIALIZED_IS_SAFE(mode_selected); 852 MACROBLOCKD *const xd = &x->e_mbd; 853 int this_rate, this_rate_tokenonly; 854 int this_distortion, s; 855 int64_t best_rd = INT64_MAX, this_rd; 856 TX_SIZE UNINITIALIZED_IS_SAFE(best_tx); 857 int i; 858 int *bmode_costs = x->mbmode_cost; 859 860 if (bsize < BLOCK_SIZE_SB8X8) { 861 x->e_mbd.mode_info_context->mbmi.txfm_size = TX_4X4; 862 return best_rd; 863 } 864 865 for (i = 0; i < NB_TXFM_MODES; i++) 866 txfm_cache[i] = INT64_MAX; 867 868 /* Y Search for 32x32 intra prediction mode */ 869 for (mode = DC_PRED; mode <= TM_PRED; mode++) { 870 int64_t local_txfm_cache[NB_TXFM_MODES]; 871 MODE_INFO *const mic = xd->mode_info_context; 872 const int mis = xd->mode_info_stride; 873 874 if (cpi->common.frame_type == KEY_FRAME) { 875 const MB_PREDICTION_MODE A = above_block_mode(mic, 0, mis); 876 const MB_PREDICTION_MODE L = xd->left_available ? 877 left_block_mode(mic, 0) : DC_PRED; 878 879 bmode_costs = x->y_mode_costs[A][L]; 880 } 881 x->e_mbd.mode_info_context->mbmi.mode = mode; 882 883 super_block_yrd(cpi, x, &this_rate_tokenonly, &this_distortion, &s, 884 bsize, local_txfm_cache); 885 886 this_rate = this_rate_tokenonly + bmode_costs[mode]; 887 this_rd = RDCOST(x->rdmult, x->rddiv, this_rate, this_distortion); 888 889 if (this_rd < best_rd) { 890 mode_selected = mode; 891 best_rd = this_rd; 892 best_tx = x->e_mbd.mode_info_context->mbmi.txfm_size; 893 *rate = this_rate; 894 *rate_tokenonly = this_rate_tokenonly; 895 *distortion = this_distortion; 896 *skippable = s; 897 } 898 899 for (i = 0; i < NB_TXFM_MODES; i++) { 900 int64_t adj_rd = this_rd + local_txfm_cache[i] - 901 local_txfm_cache[cpi->common.txfm_mode]; 902 if (adj_rd < txfm_cache[i]) { 903 txfm_cache[i] = adj_rd; 904 } 905 } 906 } 907 908 x->e_mbd.mode_info_context->mbmi.mode = mode_selected; 909 x->e_mbd.mode_info_context->mbmi.txfm_size = best_tx; 910 911 return best_rd; 912} 913 914static void super_block_uvrd_for_txfm(VP9_COMMON *const cm, MACROBLOCK *x, 915 int *rate, int *distortion, 916 int *skippable, BLOCK_SIZE_TYPE bsize, 917 TX_SIZE uv_tx_size) { 918 MACROBLOCKD *const xd = &x->e_mbd; 919 if (xd->mode_info_context->mbmi.ref_frame[0] == INTRA_FRAME) 920 vp9_encode_intra_block_uv(cm, x, bsize); 921 else 922 vp9_xform_quant_sbuv(cm, x, bsize); 923 924 *distortion = block_error_sbuv(x, bsize, uv_tx_size == TX_32X32 ? 0 : 2); 925 *rate = rdcost_uv(cm, x, bsize, uv_tx_size); 926 *skippable = vp9_sbuv_is_skippable(xd, bsize); 927} 928 929static void super_block_uvrd(VP9_COMMON *const cm, MACROBLOCK *x, 930 int *rate, int *distortion, int *skippable, 931 BLOCK_SIZE_TYPE bsize) { 932 MACROBLOCKD *const xd = &x->e_mbd; 933 MB_MODE_INFO *const mbmi = &xd->mode_info_context->mbmi; 934 935 if (mbmi->ref_frame[0] > INTRA_FRAME) 936 vp9_subtract_sbuv(x, bsize); 937 938 if (mbmi->txfm_size >= TX_32X32 && bsize >= BLOCK_SIZE_SB64X64) { 939 super_block_uvrd_for_txfm(cm, x, rate, distortion, skippable, bsize, 940 TX_32X32); 941 } else if (mbmi->txfm_size >= TX_16X16 && bsize >= BLOCK_SIZE_SB32X32) { 942 super_block_uvrd_for_txfm(cm, x, rate, distortion, skippable, bsize, 943 TX_16X16); 944 } else if (mbmi->txfm_size >= TX_8X8 && bsize >= BLOCK_SIZE_MB16X16) { 945 super_block_uvrd_for_txfm(cm, x, rate, distortion, skippable, bsize, 946 TX_8X8); 947 } else { 948 super_block_uvrd_for_txfm(cm, x, rate, distortion, skippable, bsize, 949 TX_4X4); 950 } 951} 952 953static int64_t rd_pick_intra_sbuv_mode(VP9_COMP *cpi, MACROBLOCK *x, 954 int *rate, int *rate_tokenonly, 955 int *distortion, int *skippable, 956 BLOCK_SIZE_TYPE bsize) { 957 MB_PREDICTION_MODE mode; 958 MB_PREDICTION_MODE UNINITIALIZED_IS_SAFE(mode_selected); 959 int64_t best_rd = INT64_MAX, this_rd; 960 int this_rate_tokenonly, this_rate; 961 int this_distortion, s; 962 963 for (mode = DC_PRED; mode <= TM_PRED; mode++) { 964 x->e_mbd.mode_info_context->mbmi.uv_mode = mode; 965 super_block_uvrd(&cpi->common, x, &this_rate_tokenonly, 966 &this_distortion, &s, bsize); 967 this_rate = this_rate_tokenonly + 968 x->intra_uv_mode_cost[x->e_mbd.frame_type][mode]; 969 this_rd = RDCOST(x->rdmult, x->rddiv, this_rate, this_distortion); 970 971 if (this_rd < best_rd) { 972 mode_selected = mode; 973 best_rd = this_rd; 974 *rate = this_rate; 975 *rate_tokenonly = this_rate_tokenonly; 976 *distortion = this_distortion; 977 *skippable = s; 978 } 979 } 980 981 x->e_mbd.mode_info_context->mbmi.uv_mode = mode_selected; 982 983 return best_rd; 984} 985 986int vp9_cost_mv_ref(VP9_COMP *cpi, 987 MB_PREDICTION_MODE m, 988 const int mode_context) { 989 MACROBLOCKD *xd = &cpi->mb.e_mbd; 990 int segment_id = xd->mode_info_context->mbmi.segment_id; 991 992 // Dont account for mode here if segment skip is enabled. 993 if (!vp9_segfeature_active(xd, segment_id, SEG_LVL_SKIP)) { 994 VP9_COMMON *pc = &cpi->common; 995 assert(NEARESTMV <= m && m <= NEWMV); 996 return cost_token(vp9_sb_mv_ref_tree, 997 pc->fc.inter_mode_probs[mode_context], 998 vp9_sb_mv_ref_encoding_array - NEARESTMV + m); 999 } else 1000 return 0; 1001} 1002 1003void vp9_set_mbmode_and_mvs(MACROBLOCK *x, MB_PREDICTION_MODE mb, int_mv *mv) { 1004 x->e_mbd.mode_info_context->mbmi.mode = mb; 1005 x->e_mbd.mode_info_context->mbmi.mv[0].as_int = mv->as_int; 1006} 1007 1008static void joint_motion_search(VP9_COMP *cpi, MACROBLOCK *x, 1009 BLOCK_SIZE_TYPE bsize, 1010 int_mv *frame_mv, 1011 int mi_row, int mi_col, 1012 int_mv single_newmv[MAX_REF_FRAMES], 1013 int *rate_mv); 1014static void single_motion_search(VP9_COMP *cpi, MACROBLOCK *x, 1015 BLOCK_SIZE_TYPE bsize, 1016 int mi_row, int mi_col, 1017 int_mv *tmp_mv, int *rate_mv); 1018 1019static int labels2mode(MACROBLOCK *x, int i, 1020 MB_PREDICTION_MODE this_mode, 1021 int_mv *this_mv, int_mv *this_second_mv, 1022 int_mv frame_mv[MB_MODE_COUNT][MAX_REF_FRAMES], 1023 int_mv seg_mvs[MAX_REF_FRAMES], 1024 int_mv *best_ref_mv, 1025 int_mv *second_best_ref_mv, 1026 int *mvjcost, int *mvcost[2], VP9_COMP *cpi) { 1027 MACROBLOCKD *const xd = &x->e_mbd; 1028 MODE_INFO *const mic = xd->mode_info_context; 1029 MB_MODE_INFO * mbmi = &mic->mbmi; 1030 int cost = 0, thismvcost = 0; 1031 int idx, idy; 1032 int bw = 1 << b_width_log2(mbmi->sb_type); 1033 int bh = 1 << b_height_log2(mbmi->sb_type); 1034 1035 /* We have to be careful retrieving previously-encoded motion vectors. 1036 Ones from this macroblock have to be pulled from the BLOCKD array 1037 as they have not yet made it to the bmi array in our MB_MODE_INFO. */ 1038 MB_PREDICTION_MODE m; 1039 1040 // the only time we should do costing for new motion vector or mode 1041 // is when we are on a new label (jbb May 08, 2007) 1042 switch (m = this_mode) { 1043 case NEWMV: 1044 this_mv->as_int = seg_mvs[mbmi->ref_frame[0]].as_int; 1045 thismvcost = vp9_mv_bit_cost(this_mv, best_ref_mv, mvjcost, mvcost, 1046 102, xd->allow_high_precision_mv); 1047 if (mbmi->ref_frame[1] > 0) { 1048 this_second_mv->as_int = seg_mvs[mbmi->ref_frame[1]].as_int; 1049 thismvcost += vp9_mv_bit_cost(this_second_mv, second_best_ref_mv, 1050 mvjcost, mvcost, 102, 1051 xd->allow_high_precision_mv); 1052 } 1053 break; 1054 case NEARESTMV: 1055 this_mv->as_int = frame_mv[NEARESTMV][mbmi->ref_frame[0]].as_int; 1056 if (mbmi->ref_frame[1] > 0) 1057 this_second_mv->as_int = 1058 frame_mv[NEARESTMV][mbmi->ref_frame[1]].as_int; 1059 break; 1060 case NEARMV: 1061 this_mv->as_int = frame_mv[NEARMV][mbmi->ref_frame[0]].as_int; 1062 if (mbmi->ref_frame[1] > 0) 1063 this_second_mv->as_int = 1064 frame_mv[NEARMV][mbmi->ref_frame[1]].as_int; 1065 break; 1066 case ZEROMV: 1067 this_mv->as_int = 0; 1068 if (mbmi->ref_frame[1] > 0) 1069 this_second_mv->as_int = 0; 1070 break; 1071 default: 1072 break; 1073 } 1074 1075 cost = vp9_cost_mv_ref(cpi, this_mode, 1076 mbmi->mb_mode_context[mbmi->ref_frame[0]]); 1077 1078 mic->bmi[i].as_mv[0].as_int = this_mv->as_int; 1079 if (mbmi->ref_frame[1] > 0) 1080 mic->bmi[i].as_mv[1].as_int = this_second_mv->as_int; 1081 1082 x->partition_info->bmi[i].mode = m; 1083 x->partition_info->bmi[i].mv.as_int = this_mv->as_int; 1084 if (mbmi->ref_frame[1] > 0) 1085 x->partition_info->bmi[i].second_mv.as_int = this_second_mv->as_int; 1086 for (idy = 0; idy < bh; ++idy) { 1087 for (idx = 0; idx < bw; ++idx) { 1088 vpx_memcpy(&mic->bmi[i + idy * 2 + idx], 1089 &mic->bmi[i], sizeof(mic->bmi[i])); 1090 vpx_memcpy(&x->partition_info->bmi[i + idy * 2 + idx], 1091 &x->partition_info->bmi[i], 1092 sizeof(x->partition_info->bmi[i])); 1093 } 1094 } 1095 1096 cost += thismvcost; 1097 return cost; 1098} 1099 1100static int64_t encode_inter_mb_segment(VP9_COMMON *const cm, 1101 MACROBLOCK *x, 1102 int i, 1103 int *labelyrate, 1104 int *distortion, 1105 ENTROPY_CONTEXT *ta, 1106 ENTROPY_CONTEXT *tl) { 1107 int k; 1108 MACROBLOCKD *xd = &x->e_mbd; 1109 BLOCK_SIZE_TYPE bsize = xd->mode_info_context->mbmi.sb_type; 1110 int bwl = b_width_log2(bsize), bw = 1 << bwl; 1111 int bhl = b_height_log2(bsize), bh = 1 << bhl; 1112 int idx, idy; 1113 const int src_stride = x->plane[0].src.stride; 1114 uint8_t* const src = 1115 raster_block_offset_uint8(xd, BLOCK_SIZE_SB8X8, 0, i, 1116 x->plane[0].src.buf, src_stride); 1117 int16_t* src_diff = 1118 raster_block_offset_int16(xd, BLOCK_SIZE_SB8X8, 0, i, 1119 x->plane[0].src_diff); 1120 int16_t* coeff = BLOCK_OFFSET(x->plane[0].coeff, 16, i); 1121 uint8_t* const pre = 1122 raster_block_offset_uint8(xd, BLOCK_SIZE_SB8X8, 0, i, 1123 xd->plane[0].pre[0].buf, 1124 xd->plane[0].pre[0].stride); 1125 uint8_t* const dst = 1126 raster_block_offset_uint8(xd, BLOCK_SIZE_SB8X8, 0, i, 1127 xd->plane[0].dst.buf, 1128 xd->plane[0].dst.stride); 1129 int thisdistortion = 0; 1130 int thisrate = 0; 1131 1132 *labelyrate = 0; 1133 *distortion = 0; 1134 1135 vp9_build_inter_predictor(pre, 1136 xd->plane[0].pre[0].stride, 1137 dst, 1138 xd->plane[0].dst.stride, 1139 &xd->mode_info_context->bmi[i].as_mv[0], 1140 &xd->scale_factor[0], 1141 4 * bw, 4 * bh, 0 /* no avg */, &xd->subpix); 1142 1143 // TODO(debargha): Make this work properly with the 1144 // implicit-compoundinter-weight experiment when implicit 1145 // weighting for splitmv modes is turned on. 1146 if (xd->mode_info_context->mbmi.ref_frame[1] > 0) { 1147 uint8_t* const second_pre = 1148 raster_block_offset_uint8(xd, BLOCK_SIZE_SB8X8, 0, i, 1149 xd->plane[0].pre[1].buf, 1150 xd->plane[0].pre[1].stride); 1151 vp9_build_inter_predictor(second_pre, xd->plane[0].pre[1].stride, 1152 dst, xd->plane[0].dst.stride, 1153 &xd->mode_info_context->bmi[i].as_mv[1], 1154 &xd->scale_factor[1], 4 * bw, 4 * bh, 1, 1155 &xd->subpix); 1156 } 1157 1158 vp9_subtract_block(4 * bh, 4 * bw, src_diff, 8, 1159 src, src_stride, 1160 dst, xd->plane[0].dst.stride); 1161 1162 k = i; 1163 for (idy = 0; idy < bh; ++idy) { 1164 for (idx = 0; idx < bw; ++idx) { 1165 k += (idy * 2 + idx); 1166 src_diff = raster_block_offset_int16(xd, BLOCK_SIZE_SB8X8, 0, k, 1167 x->plane[0].src_diff); 1168 coeff = BLOCK_OFFSET(x->plane[0].coeff, 16, k); 1169 x->fwd_txm4x4(src_diff, coeff, 16); 1170 x->quantize_b_4x4(x, k, DCT_DCT, 16); 1171 thisdistortion += vp9_block_error(coeff, 1172 BLOCK_OFFSET(xd->plane[0].dqcoeff, 1173 k, 16), 16); 1174 thisrate += cost_coeffs(cm, x, 0, k, PLANE_TYPE_Y_WITH_DC, 1175 ta + (k & 1), 1176 tl + (k >> 1), TX_4X4, 16); 1177 } 1178 } 1179 *distortion += thisdistortion; 1180 *labelyrate += thisrate; 1181 1182 *distortion >>= 2; 1183 return RDCOST(x->rdmult, x->rddiv, *labelyrate, *distortion); 1184} 1185 1186typedef struct { 1187 int_mv *ref_mv, *second_ref_mv; 1188 int_mv mvp; 1189 1190 int64_t segment_rd; 1191 int r; 1192 int d; 1193 int segment_yrate; 1194 MB_PREDICTION_MODE modes[4]; 1195 int_mv mvs[4], second_mvs[4]; 1196 int eobs[4]; 1197 int mvthresh; 1198} BEST_SEG_INFO; 1199 1200static INLINE int mv_check_bounds(MACROBLOCK *x, int_mv *mv) { 1201 int r = 0; 1202 r |= (mv->as_mv.row >> 3) < x->mv_row_min; 1203 r |= (mv->as_mv.row >> 3) > x->mv_row_max; 1204 r |= (mv->as_mv.col >> 3) < x->mv_col_min; 1205 r |= (mv->as_mv.col >> 3) > x->mv_col_max; 1206 return r; 1207} 1208 1209static enum BlockSize get_block_size(int bw, int bh) { 1210 if (bw == 4 && bh == 4) 1211 return BLOCK_4X4; 1212 1213 if (bw == 4 && bh == 8) 1214 return BLOCK_4X8; 1215 1216 if (bw == 8 && bh == 4) 1217 return BLOCK_8X4; 1218 1219 if (bw == 8 && bh == 8) 1220 return BLOCK_8X8; 1221 1222 if (bw == 8 && bh == 16) 1223 return BLOCK_8X16; 1224 1225 if (bw == 16 && bh == 8) 1226 return BLOCK_16X8; 1227 1228 if (bw == 16 && bh == 16) 1229 return BLOCK_16X16; 1230 1231 if (bw == 32 && bh == 32) 1232 return BLOCK_32X32; 1233 1234 if (bw == 32 && bh == 16) 1235 return BLOCK_32X16; 1236 1237 if (bw == 16 && bh == 32) 1238 return BLOCK_16X32; 1239 1240 if (bw == 64 && bh == 32) 1241 return BLOCK_64X32; 1242 1243 if (bw == 32 && bh == 64) 1244 return BLOCK_32X64; 1245 1246 if (bw == 64 && bh == 64) 1247 return BLOCK_64X64; 1248 1249 assert(0); 1250 return -1; 1251} 1252 1253static INLINE void mi_buf_shift(MACROBLOCK *x, int i) { 1254 MB_MODE_INFO *mbmi = &x->e_mbd.mode_info_context->mbmi; 1255 x->plane[0].src.buf = 1256 raster_block_offset_uint8(&x->e_mbd, BLOCK_SIZE_SB8X8, 0, i, 1257 x->plane[0].src.buf, 1258 x->plane[0].src.stride); 1259 assert(((intptr_t)x->e_mbd.plane[0].pre[0].buf & 0x7) == 0); 1260 x->e_mbd.plane[0].pre[0].buf = 1261 raster_block_offset_uint8(&x->e_mbd, BLOCK_SIZE_SB8X8, 0, i, 1262 x->e_mbd.plane[0].pre[0].buf, 1263 x->e_mbd.plane[0].pre[0].stride); 1264 if (mbmi->ref_frame[1]) 1265 x->e_mbd.plane[0].pre[1].buf = 1266 raster_block_offset_uint8(&x->e_mbd, BLOCK_SIZE_SB8X8, 0, i, 1267 x->e_mbd.plane[0].pre[1].buf, 1268 x->e_mbd.plane[0].pre[1].stride); 1269} 1270 1271static INLINE void mi_buf_restore(MACROBLOCK *x, struct buf_2d orig_src, 1272 struct buf_2d orig_pre[2]) { 1273 MB_MODE_INFO *mbmi = &x->e_mbd.mode_info_context->mbmi; 1274 x->plane[0].src = orig_src; 1275 x->e_mbd.plane[0].pre[0] = orig_pre[0]; 1276 if (mbmi->ref_frame[1]) 1277 x->e_mbd.plane[0].pre[1] = orig_pre[1]; 1278} 1279 1280static void rd_check_segment_txsize(VP9_COMP *cpi, MACROBLOCK *x, 1281 BEST_SEG_INFO *bsi, 1282 int_mv seg_mvs[4][MAX_REF_FRAMES], 1283 int mi_row, int mi_col) { 1284 int i, j; 1285 int br = 0, bd = 0; 1286 MB_PREDICTION_MODE this_mode; 1287 MB_MODE_INFO * mbmi = &x->e_mbd.mode_info_context->mbmi; 1288 const int label_count = 4; 1289 int64_t this_segment_rd = 0, other_segment_rd; 1290 int label_mv_thresh; 1291 int rate = 0; 1292 int sbr = 0, sbd = 0; 1293 int segmentyrate = 0; 1294 int best_eobs[4] = { 0 }; 1295 BLOCK_SIZE_TYPE bsize = mbmi->sb_type; 1296 int bwl = b_width_log2(bsize), bw = 1 << bwl; 1297 int bhl = b_height_log2(bsize), bh = 1 << bhl; 1298 int idx, idy; 1299 vp9_variance_fn_ptr_t *v_fn_ptr; 1300 ENTROPY_CONTEXT t_above[4], t_left[4]; 1301 ENTROPY_CONTEXT t_above_b[4], t_left_b[4]; 1302 1303 vpx_memcpy(t_above, x->e_mbd.plane[0].above_context, sizeof(t_above)); 1304 vpx_memcpy(t_left, x->e_mbd.plane[0].left_context, sizeof(t_left)); 1305 1306 v_fn_ptr = &cpi->fn_ptr[get_block_size(4 << bwl, 4 << bhl)]; 1307 1308 // 64 makes this threshold really big effectively 1309 // making it so that we very rarely check mvs on 1310 // segments. setting this to 1 would make mv thresh 1311 // roughly equal to what it is for macroblocks 1312 label_mv_thresh = 1 * bsi->mvthresh / label_count; 1313 1314 // Segmentation method overheads 1315 other_segment_rd = this_segment_rd; 1316 1317 for (idy = 0; idy < 2; idy += bh) { 1318 for (idx = 0; idx < 2; idx += bw) { 1319 // TODO(jingning,rbultje): rewrite the rate-distortion optimization 1320 // loop for 4x4/4x8/8x4 block coding. to be replaced with new rd loop 1321 int_mv mode_mv[MB_MODE_COUNT], second_mode_mv[MB_MODE_COUNT]; 1322 int_mv frame_mv[MB_MODE_COUNT][MAX_REF_FRAMES]; 1323 int64_t best_label_rd = INT64_MAX, best_other_rd = INT64_MAX; 1324 MB_PREDICTION_MODE mode_selected = ZEROMV; 1325 int bestlabelyrate = 0; 1326 i = idy * 2 + idx; 1327 1328 frame_mv[ZEROMV][mbmi->ref_frame[0]].as_int = 0; 1329 frame_mv[ZEROMV][mbmi->ref_frame[1]].as_int = 0; 1330 vp9_append_sub8x8_mvs_for_idx(&cpi->common, &x->e_mbd, 1331 &frame_mv[NEARESTMV][mbmi->ref_frame[0]], 1332 &frame_mv[NEARMV][mbmi->ref_frame[0]], 1333 i, 0); 1334 if (mbmi->ref_frame[1] > 0) 1335 vp9_append_sub8x8_mvs_for_idx(&cpi->common, &x->e_mbd, 1336 &frame_mv[NEARESTMV][mbmi->ref_frame[1]], 1337 &frame_mv[NEARMV][mbmi->ref_frame[1]], 1338 i, 1); 1339 1340 // search for the best motion vector on this segment 1341 for (this_mode = NEARESTMV; this_mode <= NEWMV; ++this_mode) { 1342 int64_t this_rd; 1343 int distortion; 1344 int labelyrate; 1345 ENTROPY_CONTEXT t_above_s[4], t_left_s[4]; 1346 const struct buf_2d orig_src = x->plane[0].src; 1347 struct buf_2d orig_pre[2]; 1348 1349 vpx_memcpy(orig_pre, x->e_mbd.plane[0].pre, sizeof(orig_pre)); 1350 1351 vpx_memcpy(t_above_s, t_above, sizeof(t_above_s)); 1352 vpx_memcpy(t_left_s, t_left, sizeof(t_left_s)); 1353 1354 // motion search for newmv (single predictor case only) 1355 if (mbmi->ref_frame[1] <= 0 && this_mode == NEWMV) { 1356 int step_param = 0; 1357 int further_steps; 1358 int thissme, bestsme = INT_MAX; 1359 int sadpb = x->sadperbit4; 1360 int_mv mvp_full; 1361 1362 /* Is the best so far sufficiently good that we cant justify doing 1363 * and new motion search. */ 1364 if (best_label_rd < label_mv_thresh) 1365 break; 1366 1367 if (cpi->compressor_speed) { 1368 // use previous block's result as next block's MV predictor. 1369 if (i > 0) { 1370 bsi->mvp.as_int = 1371 x->e_mbd.mode_info_context->bmi[i - 1].as_mv[0].as_int; 1372 if (i == 2) 1373 bsi->mvp.as_int = 1374 x->e_mbd.mode_info_context->bmi[i - 2].as_mv[0].as_int; 1375 step_param = 2; 1376 } 1377 } 1378 1379 further_steps = (MAX_MVSEARCH_STEPS - 1) - step_param; 1380 1381 mvp_full.as_mv.row = bsi->mvp.as_mv.row >> 3; 1382 mvp_full.as_mv.col = bsi->mvp.as_mv.col >> 3; 1383 1384 // adjust src pointer for this block 1385 mi_buf_shift(x, i); 1386 bestsme = vp9_full_pixel_diamond(cpi, x, &mvp_full, step_param, 1387 sadpb, further_steps, 0, v_fn_ptr, 1388 bsi->ref_mv, &mode_mv[NEWMV]); 1389 1390 // Should we do a full search (best quality only) 1391 if (cpi->compressor_speed == 0) { 1392 /* Check if mvp_full is within the range. */ 1393 clamp_mv(&mvp_full, x->mv_col_min, x->mv_col_max, 1394 x->mv_row_min, x->mv_row_max); 1395 1396 thissme = cpi->full_search_sad(x, &mvp_full, 1397 sadpb, 16, v_fn_ptr, 1398 x->nmvjointcost, x->mvcost, 1399 bsi->ref_mv, i); 1400 1401 if (thissme < bestsme) { 1402 bestsme = thissme; 1403 mode_mv[NEWMV].as_int = 1404 x->e_mbd.mode_info_context->bmi[i].as_mv[0].as_int; 1405 } else { 1406 /* The full search result is actually worse so re-instate the 1407 * previous best vector */ 1408 x->e_mbd.mode_info_context->bmi[i].as_mv[0].as_int = 1409 mode_mv[NEWMV].as_int; 1410 } 1411 } 1412 1413 if (bestsme < INT_MAX) { 1414 int distortion; 1415 unsigned int sse; 1416 cpi->find_fractional_mv_step(x, &mode_mv[NEWMV], 1417 bsi->ref_mv, x->errorperbit, v_fn_ptr, 1418 x->nmvjointcost, x->mvcost, 1419 &distortion, &sse); 1420 1421 // safe motion search result for use in compound prediction 1422 seg_mvs[i][mbmi->ref_frame[0]].as_int = mode_mv[NEWMV].as_int; 1423 } 1424 1425 // restore src pointers 1426 mi_buf_restore(x, orig_src, orig_pre); 1427 } else if (mbmi->ref_frame[1] > 0 && this_mode == NEWMV) { 1428 if (seg_mvs[i][mbmi->ref_frame[1]].as_int == INVALID_MV || 1429 seg_mvs[i][mbmi->ref_frame[0]].as_int == INVALID_MV) 1430 continue; 1431 1432 // adjust src pointers 1433 mi_buf_shift(x, i); 1434 if (cpi->sf.comp_inter_joint_search_thresh < bsize) { 1435 int rate_mv; 1436 joint_motion_search(cpi, x, bsize, frame_mv[this_mode], 1437 mi_row, mi_col, seg_mvs[i], 1438 &rate_mv); 1439 seg_mvs[i][mbmi->ref_frame[0]].as_int = 1440 frame_mv[this_mode][mbmi->ref_frame[0]].as_int; 1441 seg_mvs[i][mbmi->ref_frame[1]].as_int = 1442 frame_mv[this_mode][mbmi->ref_frame[1]].as_int; 1443 } 1444 // restore src pointers 1445 mi_buf_restore(x, orig_src, orig_pre); 1446 } 1447 1448 rate = labels2mode(x, i, this_mode, &mode_mv[this_mode], 1449 &second_mode_mv[this_mode], frame_mv, seg_mvs[i], 1450 bsi->ref_mv, bsi->second_ref_mv, x->nmvjointcost, 1451 x->mvcost, cpi); 1452 1453 // Trap vectors that reach beyond the UMV borders 1454 if (((mode_mv[this_mode].as_mv.row >> 3) < x->mv_row_min) || 1455 ((mode_mv[this_mode].as_mv.row >> 3) > x->mv_row_max) || 1456 ((mode_mv[this_mode].as_mv.col >> 3) < x->mv_col_min) || 1457 ((mode_mv[this_mode].as_mv.col >> 3) > x->mv_col_max)) { 1458 continue; 1459 } 1460 if (mbmi->ref_frame[1] > 0 && 1461 mv_check_bounds(x, &second_mode_mv[this_mode])) 1462 continue; 1463 1464 this_rd = encode_inter_mb_segment(&cpi->common, 1465 x, i, &labelyrate, 1466 &distortion, t_above_s, t_left_s); 1467 this_rd += RDCOST(x->rdmult, x->rddiv, rate, 0); 1468 rate += labelyrate; 1469 1470 if (this_rd < best_label_rd) { 1471 sbr = rate; 1472 sbd = distortion; 1473 bestlabelyrate = labelyrate; 1474 mode_selected = this_mode; 1475 best_label_rd = this_rd; 1476 best_eobs[i] = x->e_mbd.plane[0].eobs[i]; 1477 vpx_memcpy(t_above_b, t_above_s, sizeof(t_above_s)); 1478 vpx_memcpy(t_left_b, t_left_s, sizeof(t_left_s)); 1479 } 1480 } /*for each 4x4 mode*/ 1481 1482 vpx_memcpy(t_above, t_above_b, sizeof(t_above)); 1483 vpx_memcpy(t_left, t_left_b, sizeof(t_left)); 1484 1485 labels2mode(x, i, mode_selected, &mode_mv[mode_selected], 1486 &second_mode_mv[mode_selected], frame_mv, seg_mvs[i], 1487 bsi->ref_mv, bsi->second_ref_mv, x->nmvjointcost, 1488 x->mvcost, cpi); 1489 1490 br += sbr; 1491 bd += sbd; 1492 segmentyrate += bestlabelyrate; 1493 this_segment_rd += best_label_rd; 1494 other_segment_rd += best_other_rd; 1495 1496 for (j = 1; j < bh; ++j) 1497 vpx_memcpy(&x->partition_info->bmi[i + j * 2], 1498 &x->partition_info->bmi[i], 1499 sizeof(x->partition_info->bmi[i])); 1500 for (j = 1; j < bw; ++j) 1501 vpx_memcpy(&x->partition_info->bmi[i + j], 1502 &x->partition_info->bmi[i], 1503 sizeof(x->partition_info->bmi[i])); 1504 } 1505 } /* for each label */ 1506 1507 if (this_segment_rd < bsi->segment_rd) { 1508 bsi->r = br; 1509 bsi->d = bd; 1510 bsi->segment_yrate = segmentyrate; 1511 bsi->segment_rd = this_segment_rd; 1512 1513 // store everything needed to come back to this!! 1514 for (i = 0; i < 4; i++) { 1515 bsi->mvs[i].as_mv = x->partition_info->bmi[i].mv.as_mv; 1516 if (mbmi->ref_frame[1] > 0) 1517 bsi->second_mvs[i].as_mv = x->partition_info->bmi[i].second_mv.as_mv; 1518 bsi->modes[i] = x->partition_info->bmi[i].mode; 1519 bsi->eobs[i] = best_eobs[i]; 1520 } 1521 } 1522} 1523 1524static int rd_pick_best_mbsegmentation(VP9_COMP *cpi, MACROBLOCK *x, 1525 int_mv *best_ref_mv, 1526 int_mv *second_best_ref_mv, 1527 int64_t best_rd, 1528 int *returntotrate, 1529 int *returnyrate, 1530 int *returndistortion, 1531 int *skippable, int mvthresh, 1532 int_mv seg_mvs[4][MAX_REF_FRAMES], 1533 int mi_row, int mi_col) { 1534 int i; 1535 BEST_SEG_INFO bsi; 1536 MB_MODE_INFO * mbmi = &x->e_mbd.mode_info_context->mbmi; 1537 1538 vpx_memset(&bsi, 0, sizeof(bsi)); 1539 1540 bsi.segment_rd = best_rd; 1541 bsi.ref_mv = best_ref_mv; 1542 bsi.second_ref_mv = second_best_ref_mv; 1543 bsi.mvp.as_int = best_ref_mv->as_int; 1544 bsi.mvthresh = mvthresh; 1545 1546 for (i = 0; i < 4; i++) 1547 bsi.modes[i] = ZEROMV; 1548 1549 rd_check_segment_txsize(cpi, x, &bsi, seg_mvs, mi_row, mi_col); 1550 1551 /* set it to the best */ 1552 for (i = 0; i < 4; i++) { 1553 x->e_mbd.mode_info_context->bmi[i].as_mv[0].as_int = bsi.mvs[i].as_int; 1554 if (mbmi->ref_frame[1] > 0) 1555 x->e_mbd.mode_info_context->bmi[i].as_mv[1].as_int = 1556 bsi.second_mvs[i].as_int; 1557 x->e_mbd.plane[0].eobs[i] = bsi.eobs[i]; 1558 } 1559 1560 /* save partitions */ 1561 x->partition_info->count = 4; 1562 1563 for (i = 0; i < x->partition_info->count; i++) { 1564 x->partition_info->bmi[i].mode = bsi.modes[i]; 1565 x->partition_info->bmi[i].mv.as_mv = bsi.mvs[i].as_mv; 1566 if (mbmi->ref_frame[1] > 0) 1567 x->partition_info->bmi[i].second_mv.as_mv = bsi.second_mvs[i].as_mv; 1568 } 1569 /* 1570 * used to set mbmi->mv.as_int 1571 */ 1572 x->partition_info->bmi[3].mv.as_int = bsi.mvs[3].as_int; 1573 if (mbmi->ref_frame[1] > 0) 1574 x->partition_info->bmi[3].second_mv.as_int = bsi.second_mvs[3].as_int; 1575 1576 *returntotrate = bsi.r; 1577 *returndistortion = bsi.d; 1578 *returnyrate = bsi.segment_yrate; 1579 *skippable = vp9_sby_is_skippable(&x->e_mbd, BLOCK_SIZE_SB8X8); 1580 mbmi->mode = bsi.modes[3]; 1581 1582 return (int)(bsi.segment_rd); 1583} 1584 1585static void mv_pred(VP9_COMP *cpi, MACROBLOCK *x, 1586 uint8_t *ref_y_buffer, int ref_y_stride, 1587 int ref_frame, enum BlockSize block_size ) { 1588 MACROBLOCKD *xd = &x->e_mbd; 1589 MB_MODE_INFO *mbmi = &xd->mode_info_context->mbmi; 1590 int_mv this_mv; 1591 int i; 1592 int zero_seen = 0; 1593 int best_index = 0; 1594 int best_sad = INT_MAX; 1595 int this_sad = INT_MAX; 1596 1597 uint8_t *src_y_ptr = x->plane[0].src.buf; 1598 uint8_t *ref_y_ptr; 1599 int row_offset, col_offset; 1600 1601 // Get the sad for each candidate reference mv 1602 for (i = 0; i < MAX_MV_REF_CANDIDATES; i++) { 1603 this_mv.as_int = mbmi->ref_mvs[ref_frame][i].as_int; 1604 1605 // The list is at an end if we see 0 for a second time. 1606 if (!this_mv.as_int && zero_seen) 1607 break; 1608 zero_seen = zero_seen || !this_mv.as_int; 1609 1610 row_offset = this_mv.as_mv.row >> 3; 1611 col_offset = this_mv.as_mv.col >> 3; 1612 ref_y_ptr = ref_y_buffer + (ref_y_stride * row_offset) + col_offset; 1613 1614 // Find sad for current vector. 1615 this_sad = cpi->fn_ptr[block_size].sdf(src_y_ptr, x->plane[0].src.stride, 1616 ref_y_ptr, ref_y_stride, 1617 0x7fffffff); 1618 1619 // Note if it is the best so far. 1620 if (this_sad < best_sad) { 1621 best_sad = this_sad; 1622 best_index = i; 1623 } 1624 } 1625 1626 // Note the index of the mv that worked best in the reference list. 1627 x->mv_best_ref_index[ref_frame] = best_index; 1628} 1629 1630static void estimate_ref_frame_costs(VP9_COMP *cpi, int segment_id, 1631 unsigned int *ref_costs_single, 1632 unsigned int *ref_costs_comp, 1633 vp9_prob *comp_mode_p) { 1634 VP9_COMMON *const cm = &cpi->common; 1635 MACROBLOCKD *const xd = &cpi->mb.e_mbd; 1636 int seg_ref_active = vp9_segfeature_active(xd, segment_id, 1637 SEG_LVL_REF_FRAME); 1638 if (seg_ref_active) { 1639 vpx_memset(ref_costs_single, 0, MAX_REF_FRAMES * sizeof(*ref_costs_single)); 1640 vpx_memset(ref_costs_comp, 0, MAX_REF_FRAMES * sizeof(*ref_costs_comp)); 1641 *comp_mode_p = 128; 1642 } else { 1643 vp9_prob intra_inter_p = vp9_get_pred_prob(cm, xd, PRED_INTRA_INTER); 1644 vp9_prob comp_inter_p = 128; 1645 1646 if (cm->comp_pred_mode == HYBRID_PREDICTION) { 1647 comp_inter_p = vp9_get_pred_prob(cm, xd, PRED_COMP_INTER_INTER); 1648 *comp_mode_p = comp_inter_p; 1649 } else { 1650 *comp_mode_p = 128; 1651 } 1652 1653 ref_costs_single[INTRA_FRAME] = vp9_cost_bit(intra_inter_p, 0); 1654 1655 if (cm->comp_pred_mode != COMP_PREDICTION_ONLY) { 1656 vp9_prob ref_single_p1 = vp9_get_pred_prob(cm, xd, PRED_SINGLE_REF_P1); 1657 vp9_prob ref_single_p2 = vp9_get_pred_prob(cm, xd, PRED_SINGLE_REF_P2); 1658 unsigned int base_cost = vp9_cost_bit(intra_inter_p, 1); 1659 1660 if (cm->comp_pred_mode == HYBRID_PREDICTION) 1661 base_cost += vp9_cost_bit(comp_inter_p, 0); 1662 1663 ref_costs_single[LAST_FRAME] = ref_costs_single[GOLDEN_FRAME] = 1664 ref_costs_single[ALTREF_FRAME] = base_cost; 1665 ref_costs_single[LAST_FRAME] += vp9_cost_bit(ref_single_p1, 0); 1666 ref_costs_single[GOLDEN_FRAME] += vp9_cost_bit(ref_single_p1, 1); 1667 ref_costs_single[ALTREF_FRAME] += vp9_cost_bit(ref_single_p1, 1); 1668 ref_costs_single[GOLDEN_FRAME] += vp9_cost_bit(ref_single_p2, 0); 1669 ref_costs_single[ALTREF_FRAME] += vp9_cost_bit(ref_single_p2, 1); 1670 } else { 1671 ref_costs_single[LAST_FRAME] = 512; 1672 ref_costs_single[GOLDEN_FRAME] = 512; 1673 ref_costs_single[ALTREF_FRAME] = 512; 1674 } 1675 if (cm->comp_pred_mode != SINGLE_PREDICTION_ONLY) { 1676 vp9_prob ref_comp_p = vp9_get_pred_prob(cm, xd, PRED_COMP_REF_P); 1677 unsigned int base_cost = vp9_cost_bit(intra_inter_p, 1); 1678 1679 if (cm->comp_pred_mode == HYBRID_PREDICTION) 1680 base_cost += vp9_cost_bit(comp_inter_p, 1); 1681 1682 ref_costs_comp[LAST_FRAME] = base_cost + vp9_cost_bit(ref_comp_p, 0); 1683 ref_costs_comp[GOLDEN_FRAME] = base_cost + vp9_cost_bit(ref_comp_p, 1); 1684 } else { 1685 ref_costs_comp[LAST_FRAME] = 512; 1686 ref_costs_comp[GOLDEN_FRAME] = 512; 1687 } 1688 } 1689} 1690 1691static void store_coding_context(MACROBLOCK *x, PICK_MODE_CONTEXT *ctx, 1692 int mode_index, 1693 PARTITION_INFO *partition, 1694 int_mv *ref_mv, 1695 int_mv *second_ref_mv, 1696 int64_t comp_pred_diff[NB_PREDICTION_TYPES], 1697 int64_t txfm_size_diff[NB_TXFM_MODES]) { 1698 MACROBLOCKD *const xd = &x->e_mbd; 1699 1700 // Take a snapshot of the coding context so it can be 1701 // restored if we decide to encode this way 1702 ctx->skip = x->skip; 1703 ctx->best_mode_index = mode_index; 1704 ctx->mic = *xd->mode_info_context; 1705 1706 if (partition) 1707 ctx->partition_info = *partition; 1708 1709 ctx->best_ref_mv.as_int = ref_mv->as_int; 1710 ctx->second_best_ref_mv.as_int = second_ref_mv->as_int; 1711 1712 ctx->single_pred_diff = (int)comp_pred_diff[SINGLE_PREDICTION_ONLY]; 1713 ctx->comp_pred_diff = (int)comp_pred_diff[COMP_PREDICTION_ONLY]; 1714 ctx->hybrid_pred_diff = (int)comp_pred_diff[HYBRID_PREDICTION]; 1715 1716 memcpy(ctx->txfm_rd_diff, txfm_size_diff, sizeof(ctx->txfm_rd_diff)); 1717} 1718 1719static void setup_pred_block(const MACROBLOCKD *xd, 1720 struct buf_2d dst[MAX_MB_PLANE], 1721 const YV12_BUFFER_CONFIG *src, 1722 int mi_row, int mi_col, 1723 const struct scale_factors *scale, 1724 const struct scale_factors *scale_uv) { 1725 int i; 1726 1727 dst[0].buf = src->y_buffer; 1728 dst[0].stride = src->y_stride; 1729 dst[1].buf = src->u_buffer; 1730 dst[2].buf = src->v_buffer; 1731 dst[1].stride = dst[2].stride = src->uv_stride; 1732#if CONFIG_ALPHA 1733 dst[3].buf = src->alpha_buffer; 1734 dst[3].stride = src->alpha_stride; 1735#endif 1736 1737 // TODO(jkoleszar): Make scale factors per-plane data 1738 for (i = 0; i < MAX_MB_PLANE; i++) { 1739 setup_pred_plane(dst + i, dst[i].buf, dst[i].stride, mi_row, mi_col, 1740 i ? scale_uv : scale, 1741 xd->plane[i].subsampling_x, xd->plane[i].subsampling_y); 1742 } 1743} 1744 1745static void setup_buffer_inter(VP9_COMP *cpi, MACROBLOCK *x, 1746 int idx, MV_REFERENCE_FRAME frame_type, 1747 enum BlockSize block_size, 1748 int mi_row, int mi_col, 1749 int_mv frame_nearest_mv[MAX_REF_FRAMES], 1750 int_mv frame_near_mv[MAX_REF_FRAMES], 1751 struct buf_2d yv12_mb[4][MAX_MB_PLANE], 1752 struct scale_factors scale[MAX_REF_FRAMES]) { 1753 VP9_COMMON *cm = &cpi->common; 1754 YV12_BUFFER_CONFIG *yv12 = &cm->yv12_fb[cpi->common.ref_frame_map[idx]]; 1755 MACROBLOCKD *const xd = &x->e_mbd; 1756 MB_MODE_INFO *const mbmi = &xd->mode_info_context->mbmi; 1757 1758 // set up scaling factors 1759 scale[frame_type] = cpi->common.active_ref_scale[frame_type - 1]; 1760 1761 scale[frame_type].x_offset_q4 = 1762 ROUND_POWER_OF_TWO(mi_col * MI_SIZE * scale[frame_type].x_scale_fp, 1763 VP9_REF_SCALE_SHIFT) & 0xf; 1764 scale[frame_type].y_offset_q4 = 1765 ROUND_POWER_OF_TWO(mi_row * MI_SIZE * scale[frame_type].y_scale_fp, 1766 VP9_REF_SCALE_SHIFT) & 0xf; 1767 1768 // TODO(jkoleszar): Is the UV buffer ever used here? If so, need to make this 1769 // use the UV scaling factors. 1770 setup_pred_block(xd, yv12_mb[frame_type], yv12, mi_row, mi_col, 1771 &scale[frame_type], &scale[frame_type]); 1772 1773 // Gets an initial list of candidate vectors from neighbours and orders them 1774 vp9_find_mv_refs(&cpi->common, xd, xd->mode_info_context, 1775 xd->prev_mode_info_context, 1776 frame_type, 1777 mbmi->ref_mvs[frame_type], 1778 cpi->common.ref_frame_sign_bias); 1779 1780 // Candidate refinement carried out at encoder and decoder 1781 vp9_find_best_ref_mvs(xd, 1782 mbmi->ref_mvs[frame_type], 1783 &frame_nearest_mv[frame_type], 1784 &frame_near_mv[frame_type]); 1785 1786 // Further refinement that is encode side only to test the top few candidates 1787 // in full and choose the best as the centre point for subsequent searches. 1788 // The current implementation doesn't support scaling. 1789 if (scale[frame_type].x_scale_fp == (1 << VP9_REF_SCALE_SHIFT) && 1790 scale[frame_type].y_scale_fp == (1 << VP9_REF_SCALE_SHIFT)) 1791 mv_pred(cpi, x, yv12_mb[frame_type][0].buf, yv12->y_stride, 1792 frame_type, block_size); 1793} 1794 1795static YV12_BUFFER_CONFIG *get_scaled_ref_frame(VP9_COMP *cpi, int ref_frame) { 1796 YV12_BUFFER_CONFIG *scaled_ref_frame = NULL; 1797 int fb = get_ref_frame_idx(cpi, ref_frame); 1798 if (cpi->scaled_ref_idx[fb] != cpi->common.ref_frame_map[fb]) 1799 scaled_ref_frame = &cpi->common.yv12_fb[cpi->scaled_ref_idx[fb]]; 1800 return scaled_ref_frame; 1801} 1802 1803static void model_rd_from_var_lapndz(int var, int n, int qstep, 1804 int *rate, int *dist) { 1805 // This function models the rate and distortion for a Laplacian 1806 // source with given variance when quantized with a uniform quantizer 1807 // with given stepsize. The closed form expressions are in: 1808 // Hang and Chen, "Source Model for transform video coder and its 1809 // application - Part I: Fundamental Theory", IEEE Trans. Circ. 1810 // Sys. for Video Tech., April 1997. 1811 // The function is implemented as piecewise approximation to the 1812 // exact computation. 1813 // TODO(debargha): Implement the functions by interpolating from a 1814 // look-up table 1815 vp9_clear_system_state(); 1816 if (var == 0 || n == 0) { 1817 *rate = 0; 1818 *dist = 0; 1819 } else { 1820 double D, R; 1821 double s2 = (double) var / n; 1822 double s = sqrt(s2); 1823 double x = qstep / s; 1824 if (x > 1.0) { 1825 double y = exp(-x / 2); 1826 double y2 = y * y; 1827 D = 2.069981728764738 * y2 - 2.764286806516079 * y + 1.003956960819275; 1828 R = 0.924056758535089 * y2 + 2.738636469814024 * y - 0.005169662030017; 1829 } else { 1830 double x2 = x * x; 1831 D = 0.075303187668830 * x2 + 0.004296954321112 * x - 0.000413209252807; 1832 if (x > 0.125) 1833 R = 1 / (-0.03459733614226 * x2 + 0.36561675733603 * x + 1834 0.1626989668625); 1835 else 1836 R = -1.442252874826093 * log(x) + 1.944647760719664; 1837 } 1838 if (R < 0) { 1839 *rate = 0; 1840 *dist = var; 1841 } else { 1842 *rate = (n * R * 256 + 0.5); 1843 *dist = (n * D * s2 + 0.5); 1844 } 1845 } 1846 vp9_clear_system_state(); 1847} 1848 1849static enum BlockSize get_plane_block_size(BLOCK_SIZE_TYPE bsize, 1850 struct macroblockd_plane *pd) { 1851 return get_block_size(plane_block_width(bsize, pd), 1852 plane_block_height(bsize, pd)); 1853} 1854 1855static void model_rd_for_sb(VP9_COMP *cpi, BLOCK_SIZE_TYPE bsize, 1856 MACROBLOCK *x, MACROBLOCKD *xd, 1857 int *out_rate_sum, int *out_dist_sum) { 1858 // Note our transform coeffs are 8 times an orthogonal transform. 1859 // Hence quantizer step is also 8 times. To get effective quantizer 1860 // we need to divide by 8 before sending to modeling function. 1861 unsigned int sse, var; 1862 int i, rate_sum = 0, dist_sum = 0; 1863 1864 for (i = 0; i < MAX_MB_PLANE; ++i) { 1865 struct macroblock_plane *const p = &x->plane[i]; 1866 struct macroblockd_plane *const pd = &xd->plane[i]; 1867 1868 // TODO(dkovalev) the same code in get_plane_block_size 1869 const int bw = plane_block_width(bsize, pd); 1870 const int bh = plane_block_height(bsize, pd); 1871 const enum BlockSize bs = get_block_size(bw, bh); 1872 int rate, dist; 1873 var = cpi->fn_ptr[bs].vf(p->src.buf, p->src.stride, 1874 pd->dst.buf, pd->dst.stride, &sse); 1875 model_rd_from_var_lapndz(var, bw * bh, pd->dequant[1] >> 3, &rate, &dist); 1876 1877 rate_sum += rate; 1878 dist_sum += dist; 1879 } 1880 1881 *out_rate_sum = rate_sum; 1882 *out_dist_sum = dist_sum; 1883} 1884 1885static INLINE int get_switchable_rate(VP9_COMMON *cm, MACROBLOCK *x) { 1886 MACROBLOCKD *xd = &x->e_mbd; 1887 MB_MODE_INFO *const mbmi = &xd->mode_info_context->mbmi; 1888 1889 const int c = vp9_get_pred_context(cm, xd, PRED_SWITCHABLE_INTERP); 1890 const int m = vp9_switchable_interp_map[mbmi->interp_filter]; 1891 return SWITCHABLE_INTERP_RATE_FACTOR * x->switchable_interp_costs[c][m]; 1892} 1893 1894static void single_motion_search(VP9_COMP *cpi, MACROBLOCK *x, 1895 BLOCK_SIZE_TYPE bsize, 1896 int mi_row, int mi_col, 1897 int_mv *tmp_mv, int *rate_mv) { 1898 MACROBLOCKD *xd = &x->e_mbd; 1899 MB_MODE_INFO *mbmi = &xd->mode_info_context->mbmi; 1900 struct buf_2d backup_yv12[MAX_MB_PLANE] = {{0}}; 1901 int bestsme = INT_MAX; 1902 int further_steps, step_param = cpi->sf.first_step; 1903 int sadpb = x->sadperbit16; 1904 int_mv mvp_full; 1905 int ref = mbmi->ref_frame[0]; 1906 int_mv ref_mv = mbmi->ref_mvs[ref][0]; 1907 int sr = 0; 1908 const enum BlockSize block_size = get_plane_block_size(bsize, &xd->plane[0]); 1909 1910 int tmp_col_min = x->mv_col_min; 1911 int tmp_col_max = x->mv_col_max; 1912 int tmp_row_min = x->mv_row_min; 1913 int tmp_row_max = x->mv_row_max; 1914 1915 YV12_BUFFER_CONFIG *scaled_ref_frame = get_scaled_ref_frame(cpi, ref); 1916 1917 if (scaled_ref_frame) { 1918 int i; 1919 // Swap out the reference frame for a version that's been scaled to 1920 // match the resolution of the current frame, allowing the existing 1921 // motion search code to be used without additional modifications. 1922 for (i = 0; i < MAX_MB_PLANE; i++) 1923 backup_yv12[i] = xd->plane[i].pre[0]; 1924 1925 setup_pre_planes(xd, scaled_ref_frame, NULL, mi_row, mi_col, 1926 NULL, NULL); 1927 } 1928 1929 vp9_clamp_mv_min_max(x, &ref_mv); 1930 1931 sr = vp9_init_search_range(cpi->common.width, cpi->common.height); 1932 1933 // mvp_full.as_int = ref_mv[0].as_int; 1934 mvp_full.as_int = 1935 mbmi->ref_mvs[ref][x->mv_best_ref_index[ref]].as_int; 1936 1937 mvp_full.as_mv.col >>= 3; 1938 mvp_full.as_mv.row >>= 3; 1939 1940 // adjust search range according to sr from mv prediction 1941 step_param = MAX(step_param, sr); 1942 1943 // Further step/diamond searches as necessary 1944 further_steps = (cpi->sf.max_step_search_steps - 1) - step_param; 1945 1946 bestsme = vp9_full_pixel_diamond(cpi, x, &mvp_full, step_param, 1947 sadpb, further_steps, 1, 1948 &cpi->fn_ptr[block_size], 1949 &ref_mv, tmp_mv); 1950 1951 x->mv_col_min = tmp_col_min; 1952 x->mv_col_max = tmp_col_max; 1953 x->mv_row_min = tmp_row_min; 1954 x->mv_row_max = tmp_row_max; 1955 1956 if (bestsme < INT_MAX) { 1957 int dis; /* TODO: use dis in distortion calculation later. */ 1958 unsigned int sse; 1959 cpi->find_fractional_mv_step(x, tmp_mv, &ref_mv, 1960 x->errorperbit, 1961 &cpi->fn_ptr[block_size], 1962 x->nmvjointcost, x->mvcost, 1963 &dis, &sse); 1964 } 1965 *rate_mv = vp9_mv_bit_cost(tmp_mv, &ref_mv, 1966 x->nmvjointcost, x->mvcost, 1967 96, xd->allow_high_precision_mv); 1968 if (scaled_ref_frame) { 1969 int i; 1970 for (i = 0; i < MAX_MB_PLANE; i++) 1971 xd->plane[i].pre[0] = backup_yv12[i]; 1972 } 1973} 1974 1975static void joint_motion_search(VP9_COMP *cpi, MACROBLOCK *x, 1976 BLOCK_SIZE_TYPE bsize, 1977 int_mv *frame_mv, 1978 int mi_row, int mi_col, 1979 int_mv single_newmv[MAX_REF_FRAMES], 1980 int *rate_mv) { 1981 int pw = 4 << b_width_log2(bsize), ph = 4 << b_height_log2(bsize); 1982 MACROBLOCKD *xd = &x->e_mbd; 1983 MB_MODE_INFO *mbmi = &xd->mode_info_context->mbmi; 1984 int refs[2] = { mbmi->ref_frame[0], 1985 (mbmi->ref_frame[1] < 0 ? 0 : mbmi->ref_frame[1]) }; 1986 int_mv ref_mv[2]; 1987 const enum BlockSize block_size = get_plane_block_size(bsize, &xd->plane[0]); 1988 int ite; 1989 // Prediction buffer from second frame. 1990 uint8_t *second_pred = vpx_memalign(16, pw * ph * sizeof(uint8_t)); 1991 1992 // Do joint motion search in compound mode to get more accurate mv. 1993 struct buf_2d backup_yv12[MAX_MB_PLANE] = {{0}}; 1994 struct buf_2d backup_second_yv12[MAX_MB_PLANE] = {{0}}; 1995 struct buf_2d scaled_first_yv12; 1996 int last_besterr[2] = {INT_MAX, INT_MAX}; 1997 YV12_BUFFER_CONFIG *scaled_ref_frame[2] = {NULL, NULL}; 1998 scaled_ref_frame[0] = get_scaled_ref_frame(cpi, mbmi->ref_frame[0]); 1999 scaled_ref_frame[1] = get_scaled_ref_frame(cpi, mbmi->ref_frame[1]); 2000 2001 ref_mv[0] = mbmi->ref_mvs[refs[0]][0]; 2002 ref_mv[1] = mbmi->ref_mvs[refs[1]][0]; 2003 2004 if (scaled_ref_frame[0]) { 2005 int i; 2006 // Swap out the reference frame for a version that's been scaled to 2007 // match the resolution of the current frame, allowing the existing 2008 // motion search code to be used without additional modifications. 2009 for (i = 0; i < MAX_MB_PLANE; i++) 2010 backup_yv12[i] = xd->plane[i].pre[0]; 2011 setup_pre_planes(xd, scaled_ref_frame[0], NULL, mi_row, mi_col, 2012 NULL, NULL); 2013 } 2014 2015 if (scaled_ref_frame[1]) { 2016 int i; 2017 for (i = 0; i < MAX_MB_PLANE; i++) 2018 backup_second_yv12[i] = xd->plane[i].pre[1]; 2019 2020 setup_pre_planes(xd, scaled_ref_frame[1], NULL, mi_row, mi_col, 2021 NULL, NULL); 2022 } 2023 2024 xd->scale_factor[0].set_scaled_offsets(&xd->scale_factor[0], 2025 mi_row, mi_col); 2026 xd->scale_factor[1].set_scaled_offsets(&xd->scale_factor[1], 2027 mi_row, mi_col); 2028 scaled_first_yv12 = xd->plane[0].pre[0]; 2029 2030 // Initialize mv using single prediction mode result. 2031 frame_mv[refs[0]].as_int = single_newmv[refs[0]].as_int; 2032 frame_mv[refs[1]].as_int = single_newmv[refs[1]].as_int; 2033 2034 // Allow joint search multiple times iteratively for each ref frame 2035 // and break out the search loop if it couldn't find better mv. 2036 for (ite = 0; ite < 4; ite++) { 2037 struct buf_2d ref_yv12[2]; 2038 int bestsme = INT_MAX; 2039 int sadpb = x->sadperbit16; 2040 int_mv tmp_mv; 2041 int search_range = 3; 2042 2043 int tmp_col_min = x->mv_col_min; 2044 int tmp_col_max = x->mv_col_max; 2045 int tmp_row_min = x->mv_row_min; 2046 int tmp_row_max = x->mv_row_max; 2047 int id = ite % 2; 2048 2049 // Initialized here because of compiler problem in Visual Studio. 2050 ref_yv12[0] = xd->plane[0].pre[0]; 2051 ref_yv12[1] = xd->plane[0].pre[1]; 2052 2053 // Get pred block from second frame. 2054 vp9_build_inter_predictor(ref_yv12[!id].buf, 2055 ref_yv12[!id].stride, 2056 second_pred, pw, 2057 &frame_mv[refs[!id]], 2058 &xd->scale_factor[!id], 2059 pw, ph, 0, 2060 &xd->subpix); 2061 2062 // Compound motion search on first ref frame. 2063 if (id) 2064 xd->plane[0].pre[0] = ref_yv12[id]; 2065 vp9_clamp_mv_min_max(x, &ref_mv[id]); 2066 2067 // Use mv result from single mode as mvp. 2068 tmp_mv.as_int = frame_mv[refs[id]].as_int; 2069 2070 tmp_mv.as_mv.col >>= 3; 2071 tmp_mv.as_mv.row >>= 3; 2072 2073 // Small-range full-pixel motion search 2074 bestsme = vp9_refining_search_8p_c(x, &tmp_mv, sadpb, 2075 search_range, 2076 &cpi->fn_ptr[block_size], 2077 x->nmvjointcost, x->mvcost, 2078 &ref_mv[id], second_pred, 2079 pw, ph); 2080 2081 x->mv_col_min = tmp_col_min; 2082 x->mv_col_max = tmp_col_max; 2083 x->mv_row_min = tmp_row_min; 2084 x->mv_row_max = tmp_row_max; 2085 2086 if (bestsme < INT_MAX) { 2087 int dis; /* TODO: use dis in distortion calculation later. */ 2088 unsigned int sse; 2089 2090 bestsme = vp9_find_best_sub_pixel_comp(x, &tmp_mv, 2091 &ref_mv[id], 2092 x->errorperbit, 2093 &cpi->fn_ptr[block_size], 2094 x->nmvjointcost, x->mvcost, 2095 &dis, &sse, second_pred, 2096 pw, ph); 2097 } 2098 2099 if (id) 2100 xd->plane[0].pre[0] = scaled_first_yv12; 2101 2102 if (bestsme < last_besterr[id]) { 2103 frame_mv[refs[id]].as_int = tmp_mv.as_int; 2104 last_besterr[id] = bestsme; 2105 } else { 2106 break; 2107 } 2108 } 2109 2110 // restore the predictor 2111 if (scaled_ref_frame[0]) { 2112 int i; 2113 for (i = 0; i < MAX_MB_PLANE; i++) 2114 xd->plane[i].pre[0] = backup_yv12[i]; 2115 } 2116 2117 if (scaled_ref_frame[1]) { 2118 int i; 2119 for (i = 0; i < MAX_MB_PLANE; i++) 2120 xd->plane[i].pre[1] = backup_second_yv12[i]; 2121 } 2122 *rate_mv = vp9_mv_bit_cost(&frame_mv[refs[0]], 2123 &mbmi->ref_mvs[refs[0]][0], 2124 x->nmvjointcost, x->mvcost, 96, 2125 x->e_mbd.allow_high_precision_mv); 2126 *rate_mv += vp9_mv_bit_cost(&frame_mv[refs[1]], 2127 &mbmi->ref_mvs[refs[1]][0], 2128 x->nmvjointcost, x->mvcost, 96, 2129 x->e_mbd.allow_high_precision_mv); 2130 2131 vpx_free(second_pred); 2132} 2133 2134static int64_t handle_inter_mode(VP9_COMP *cpi, MACROBLOCK *x, 2135 BLOCK_SIZE_TYPE bsize, 2136 int64_t txfm_cache[], 2137 int *rate2, int *distortion, int *skippable, 2138 int *rate_y, int *distortion_y, 2139 int *rate_uv, int *distortion_uv, 2140 int *mode_excluded, int *disable_skip, 2141 INTERPOLATIONFILTERTYPE *best_filter, 2142 int_mv *frame_mv, 2143 int mi_row, int mi_col, 2144 int_mv single_newmv[MAX_REF_FRAMES]) { 2145 const int bw = 1 << mi_width_log2(bsize), bh = 1 << mi_height_log2(bsize); 2146 2147 VP9_COMMON *cm = &cpi->common; 2148 MACROBLOCKD *xd = &x->e_mbd; 2149 const enum BlockSize block_size = get_plane_block_size(bsize, &xd->plane[0]); 2150 const enum BlockSize uv_block_size = get_plane_block_size(bsize, 2151 &xd->plane[1]); 2152 MB_MODE_INFO *mbmi = &xd->mode_info_context->mbmi; 2153 const int is_comp_pred = (mbmi->ref_frame[1] > 0); 2154 const int num_refs = is_comp_pred ? 2 : 1; 2155 const int this_mode = mbmi->mode; 2156 int i; 2157 int refs[2] = { mbmi->ref_frame[0], 2158 (mbmi->ref_frame[1] < 0 ? 0 : mbmi->ref_frame[1]) }; 2159 int_mv cur_mv[2]; 2160 int64_t this_rd = 0; 2161 unsigned char tmp_buf[MAX_MB_PLANE][64 * 64]; 2162 int pred_exists = 0; 2163 int interpolating_intpel_seen = 0; 2164 int intpel_mv; 2165 int64_t rd, best_rd = INT64_MAX; 2166 2167 switch (this_mode) { 2168 int rate_mv; 2169 case NEWMV: 2170 if (is_comp_pred) { 2171 // Initialize mv using single prediction mode result. 2172 frame_mv[refs[0]].as_int = single_newmv[refs[0]].as_int; 2173 frame_mv[refs[1]].as_int = single_newmv[refs[1]].as_int; 2174 2175 if (cpi->sf.comp_inter_joint_search_thresh < bsize) { 2176 joint_motion_search(cpi, x, bsize, frame_mv, 2177 mi_row, mi_col, single_newmv, &rate_mv); 2178 } else { 2179 rate_mv = vp9_mv_bit_cost(&frame_mv[refs[0]], 2180 &mbmi->ref_mvs[refs[0]][0], 2181 x->nmvjointcost, x->mvcost, 96, 2182 x->e_mbd.allow_high_precision_mv); 2183 rate_mv += vp9_mv_bit_cost(&frame_mv[refs[1]], 2184 &mbmi->ref_mvs[refs[1]][0], 2185 x->nmvjointcost, x->mvcost, 96, 2186 x->e_mbd.allow_high_precision_mv); 2187 } 2188 if (frame_mv[refs[0]].as_int == INVALID_MV || 2189 frame_mv[refs[1]].as_int == INVALID_MV) 2190 return INT64_MAX; 2191 *rate2 += rate_mv; 2192 2193 } else { 2194 int_mv tmp_mv; 2195 single_motion_search(cpi, x, bsize, mi_row, mi_col, 2196 &tmp_mv, &rate_mv); 2197 *rate2 += rate_mv; 2198 frame_mv[refs[0]].as_int = 2199 xd->mode_info_context->bmi[0].as_mv[0].as_int = tmp_mv.as_int; 2200 single_newmv[refs[0]].as_int = tmp_mv.as_int; 2201 } 2202 break; 2203 case NEARMV: 2204 case NEARESTMV: 2205 case ZEROMV: 2206 default: 2207 break; 2208 } 2209 for (i = 0; i < num_refs; ++i) { 2210 cur_mv[i] = frame_mv[refs[i]]; 2211 // Clip "next_nearest" so that it does not extend to far out of image 2212 if (this_mode == NEWMV) 2213 assert(!clamp_mv2(&cur_mv[i], xd)); 2214 else 2215 clamp_mv2(&cur_mv[i], xd); 2216 2217 if (mv_check_bounds(x, &cur_mv[i])) 2218 return INT64_MAX; 2219 mbmi->mv[i].as_int = cur_mv[i].as_int; 2220 } 2221 2222 /* We don't include the cost of the second reference here, because there 2223 * are only three options: Last/Golden, ARF/Last or Golden/ARF, or in other 2224 * words if you present them in that order, the second one is always known 2225 * if the first is known */ 2226 *rate2 += vp9_cost_mv_ref(cpi, this_mode, 2227 mbmi->mb_mode_context[mbmi->ref_frame[0]]); 2228 2229 pred_exists = 0; 2230 interpolating_intpel_seen = 0; 2231 // Are all MVs integer pel for Y and UV 2232 intpel_mv = (mbmi->mv[0].as_mv.row & 15) == 0 && 2233 (mbmi->mv[0].as_mv.col & 15) == 0; 2234 if (is_comp_pred) 2235 intpel_mv &= (mbmi->mv[1].as_mv.row & 15) == 0 && 2236 (mbmi->mv[1].as_mv.col & 15) == 0; 2237 // Search for best switchable filter by checking the variance of 2238 // pred error irrespective of whether the filter will be used 2239 if (cpi->speed > 4) { 2240 *best_filter = EIGHTTAP; 2241 } else { 2242 int i, newbest; 2243 int tmp_rate_sum = 0, tmp_dist_sum = 0; 2244 for (i = 0; i < VP9_SWITCHABLE_FILTERS; ++i) { 2245 int rs = 0; 2246 const INTERPOLATIONFILTERTYPE filter = vp9_switchable_interp[i]; 2247 const int is_intpel_interp = intpel_mv && 2248 vp9_is_interpolating_filter[filter]; 2249 mbmi->interp_filter = filter; 2250 vp9_setup_interp_filters(xd, mbmi->interp_filter, cm); 2251 2252 if (cm->mcomp_filter_type == SWITCHABLE) 2253 rs = get_switchable_rate(cm, x); 2254 2255 if (interpolating_intpel_seen && is_intpel_interp) { 2256 rd = RDCOST(x->rdmult, x->rddiv, rs + tmp_rate_sum, tmp_dist_sum); 2257 } else { 2258 int rate_sum = 0, dist_sum = 0; 2259 vp9_build_inter_predictors_sb(xd, mi_row, mi_col, bsize); 2260 model_rd_for_sb(cpi, bsize, x, xd, &rate_sum, &dist_sum); 2261 rd = RDCOST(x->rdmult, x->rddiv, rs + rate_sum, dist_sum); 2262 if (!interpolating_intpel_seen && is_intpel_interp) { 2263 tmp_rate_sum = rate_sum; 2264 tmp_dist_sum = dist_sum; 2265 } 2266 } 2267 newbest = i == 0 || rd < best_rd; 2268 2269 if (newbest) { 2270 best_rd = rd; 2271 *best_filter = mbmi->interp_filter; 2272 } 2273 2274 if ((cm->mcomp_filter_type == SWITCHABLE && newbest) || 2275 (cm->mcomp_filter_type != SWITCHABLE && 2276 cm->mcomp_filter_type == mbmi->interp_filter)) { 2277 int p; 2278 2279 for (p = 0; p < MAX_MB_PLANE; p++) { 2280 const int y = (MI_SIZE * bh) >> xd->plane[p].subsampling_y; 2281 const int x = (MI_SIZE * bw) >> xd->plane[p].subsampling_x; 2282 int i; 2283 2284 for (i = 0; i < y; i++) 2285 vpx_memcpy(&tmp_buf[p][64 * i], 2286 xd->plane[p].dst.buf + i * xd->plane[p].dst.stride, x); 2287 } 2288 pred_exists = 1; 2289 } 2290 interpolating_intpel_seen |= is_intpel_interp; 2291 } 2292 } 2293 2294 // Set the appripriate filter 2295 mbmi->interp_filter = cm->mcomp_filter_type != SWITCHABLE ? 2296 cm->mcomp_filter_type : *best_filter; 2297 vp9_setup_interp_filters(xd, mbmi->interp_filter, cm); 2298 2299 2300 if (pred_exists) { 2301 int p; 2302 2303 for (p = 0; p < MAX_MB_PLANE; p++) { 2304 const int y = (MI_SIZE * bh) >> xd->plane[p].subsampling_y; 2305 const int x = (MI_SIZE * bw) >> xd->plane[p].subsampling_x; 2306 int i; 2307 2308 for (i = 0; i < y; i++) 2309 vpx_memcpy(xd->plane[p].dst.buf + i * xd->plane[p].dst.stride, 2310 &tmp_buf[p][64 * i], x); 2311 } 2312 } else { 2313 // Handles the special case when a filter that is not in the 2314 // switchable list (ex. bilinear, 6-tap) is indicated at the frame level 2315 vp9_build_inter_predictors_sb(xd, mi_row, mi_col, bsize); 2316 } 2317 2318 if (cpi->common.mcomp_filter_type == SWITCHABLE) 2319 *rate2 += get_switchable_rate(cm, x); 2320 2321 if (cpi->active_map_enabled && x->active_ptr[0] == 0) 2322 x->skip = 1; 2323 else if (x->encode_breakout) { 2324 unsigned int var, sse; 2325 int threshold = (xd->plane[0].dequant[1] 2326 * xd->plane[0].dequant[1] >> 4); 2327 2328 if (threshold < x->encode_breakout) 2329 threshold = x->encode_breakout; 2330 2331 var = cpi->fn_ptr[block_size].vf(x->plane[0].src.buf, 2332 x->plane[0].src.stride, 2333 xd->plane[0].dst.buf, 2334 xd->plane[0].dst.stride, 2335 &sse); 2336 2337 if ((int)sse < threshold) { 2338 unsigned int q2dc = xd->plane[0].dequant[0]; 2339 /* If there is no codeable 2nd order dc 2340 or a very small uniform pixel change change */ 2341 if ((sse - var < q2dc * q2dc >> 4) || 2342 (sse / 2 > var && sse - var < 64)) { 2343 // Check u and v to make sure skip is ok 2344 int sse2; 2345 unsigned int sse2u, sse2v; 2346 var = cpi->fn_ptr[uv_block_size].vf(x->plane[1].src.buf, 2347 x->plane[1].src.stride, 2348 xd->plane[1].dst.buf, 2349 xd->plane[1].dst.stride, &sse2u); 2350 var = cpi->fn_ptr[uv_block_size].vf(x->plane[2].src.buf, 2351 x->plane[1].src.stride, 2352 xd->plane[2].dst.buf, 2353 xd->plane[1].dst.stride, &sse2v); 2354 sse2 = sse2u + sse2v; 2355 2356 if (sse2 * 2 < threshold) { 2357 x->skip = 1; 2358 *distortion = sse + sse2; 2359 *rate2 = 500; 2360 2361 /* for best_yrd calculation */ 2362 *rate_uv = 0; 2363 *distortion_uv = sse2; 2364 2365 *disable_skip = 1; 2366 this_rd = RDCOST(x->rdmult, x->rddiv, *rate2, *distortion); 2367 } 2368 } 2369 } 2370 } 2371 2372 if (!x->skip) { 2373 int skippable_y, skippable_uv; 2374 2375 // Y cost and distortion 2376 super_block_yrd(cpi, x, rate_y, distortion_y, &skippable_y, 2377 bsize, txfm_cache); 2378 2379 *rate2 += *rate_y; 2380 *distortion += *distortion_y; 2381 2382 super_block_uvrd(cm, x, rate_uv, distortion_uv, 2383 &skippable_uv, bsize); 2384 2385 *rate2 += *rate_uv; 2386 *distortion += *distortion_uv; 2387 *skippable = skippable_y && skippable_uv; 2388 } 2389 2390 if (!(*mode_excluded)) { 2391 if (is_comp_pred) { 2392 *mode_excluded = (cpi->common.comp_pred_mode == SINGLE_PREDICTION_ONLY); 2393 } else { 2394 *mode_excluded = (cpi->common.comp_pred_mode == COMP_PREDICTION_ONLY); 2395 } 2396 } 2397 2398 return this_rd; // if 0, this will be re-calculated by caller 2399} 2400 2401void vp9_rd_pick_intra_mode_sb(VP9_COMP *cpi, MACROBLOCK *x, 2402 int *returnrate, int *returndist, 2403 BLOCK_SIZE_TYPE bsize, 2404 PICK_MODE_CONTEXT *ctx) { 2405 VP9_COMMON *cm = &cpi->common; 2406 MACROBLOCKD *xd = &x->e_mbd; 2407 int rate_y = 0, rate_uv; 2408 int rate_y_tokenonly = 0, rate_uv_tokenonly; 2409 int dist_y = 0, dist_uv; 2410 int y_skip = 0, uv_skip; 2411 int64_t txfm_cache[NB_TXFM_MODES], err; 2412 MB_PREDICTION_MODE mode; 2413 TX_SIZE txfm_size; 2414 int rate4x4_y, rate4x4_y_tokenonly, dist4x4_y; 2415 int64_t err4x4 = INT64_MAX; 2416 int i; 2417 2418 vpx_memset(&txfm_cache,0,sizeof(txfm_cache)); 2419 ctx->skip = 0; 2420 xd->mode_info_context->mbmi.mode = DC_PRED; 2421 xd->mode_info_context->mbmi.ref_frame[0] = INTRA_FRAME; 2422 err = rd_pick_intra_sby_mode(cpi, x, &rate_y, &rate_y_tokenonly, 2423 &dist_y, &y_skip, bsize, txfm_cache); 2424 mode = xd->mode_info_context->mbmi.mode; 2425 txfm_size = xd->mode_info_context->mbmi.txfm_size; 2426 rd_pick_intra_sbuv_mode(cpi, x, &rate_uv, &rate_uv_tokenonly, 2427 &dist_uv, &uv_skip, 2428 (bsize < BLOCK_SIZE_SB8X8) ? BLOCK_SIZE_SB8X8 : 2429 bsize); 2430 if (bsize < BLOCK_SIZE_SB8X8) 2431 err4x4 = rd_pick_intra4x4mby_modes(cpi, x, &rate4x4_y, 2432 &rate4x4_y_tokenonly, 2433 &dist4x4_y, err); 2434 2435 if (y_skip && uv_skip) { 2436 *returnrate = rate_y + rate_uv - rate_y_tokenonly - rate_uv_tokenonly + 2437 vp9_cost_bit(vp9_get_pred_prob(cm, xd, PRED_MBSKIP), 1); 2438 *returndist = dist_y + (dist_uv >> 2); 2439 memset(ctx->txfm_rd_diff, 0, sizeof(ctx->txfm_rd_diff)); 2440 xd->mode_info_context->mbmi.mode = mode; 2441 xd->mode_info_context->mbmi.txfm_size = txfm_size; 2442 } else if (bsize < BLOCK_SIZE_SB8X8 && err4x4 < err) { 2443 *returnrate = rate4x4_y + rate_uv + 2444 vp9_cost_bit(vp9_get_pred_prob(cm, xd, PRED_MBSKIP), 0); 2445 *returndist = dist4x4_y + (dist_uv >> 2); 2446 vpx_memset(ctx->txfm_rd_diff, 0, sizeof(ctx->txfm_rd_diff)); 2447 xd->mode_info_context->mbmi.txfm_size = TX_4X4; 2448 } else { 2449 *returnrate = rate_y + rate_uv + 2450 vp9_cost_bit(vp9_get_pred_prob(cm, xd, PRED_MBSKIP), 0); 2451 *returndist = dist_y + (dist_uv >> 2); 2452 for (i = 0; i < NB_TXFM_MODES; i++) { 2453 ctx->txfm_rd_diff[i] = txfm_cache[i] - txfm_cache[cm->txfm_mode]; 2454 } 2455 xd->mode_info_context->mbmi.txfm_size = txfm_size; 2456 xd->mode_info_context->mbmi.mode = mode; 2457 } 2458 2459 ctx->mic = *xd->mode_info_context; 2460} 2461 2462int64_t vp9_rd_pick_inter_mode_sb(VP9_COMP *cpi, MACROBLOCK *x, 2463 int mi_row, int mi_col, 2464 int *returnrate, 2465 int *returndistortion, 2466 BLOCK_SIZE_TYPE bsize, 2467 PICK_MODE_CONTEXT *ctx) { 2468 VP9_COMMON *cm = &cpi->common; 2469 MACROBLOCKD *xd = &x->e_mbd; 2470 MB_MODE_INFO *mbmi = &xd->mode_info_context->mbmi; 2471 const enum BlockSize block_size = get_plane_block_size(bsize, &xd->plane[0]); 2472 MB_PREDICTION_MODE this_mode; 2473 MB_PREDICTION_MODE best_mode = DC_PRED; 2474 MV_REFERENCE_FRAME ref_frame; 2475 unsigned char segment_id = xd->mode_info_context->mbmi.segment_id; 2476 int comp_pred, i; 2477 int_mv frame_mv[MB_MODE_COUNT][MAX_REF_FRAMES]; 2478 struct buf_2d yv12_mb[4][MAX_MB_PLANE]; 2479 int_mv single_newmv[MAX_REF_FRAMES]; 2480 static const int flag_list[4] = { 0, VP9_LAST_FLAG, VP9_GOLD_FLAG, 2481 VP9_ALT_FLAG }; 2482 int idx_list[4] = {0, 2483 cpi->lst_fb_idx, 2484 cpi->gld_fb_idx, 2485 cpi->alt_fb_idx}; 2486 int64_t best_rd = INT64_MAX; 2487 int64_t best_txfm_rd[NB_TXFM_MODES]; 2488 int64_t best_txfm_diff[NB_TXFM_MODES]; 2489 int64_t best_pred_diff[NB_PREDICTION_TYPES]; 2490 int64_t best_pred_rd[NB_PREDICTION_TYPES]; 2491 MB_MODE_INFO best_mbmode; 2492 int j; 2493 int mode_index, best_mode_index = 0; 2494 unsigned int ref_costs_single[MAX_REF_FRAMES], ref_costs_comp[MAX_REF_FRAMES]; 2495 vp9_prob comp_mode_p; 2496 int64_t best_overall_rd = INT64_MAX; 2497 INTERPOLATIONFILTERTYPE best_filter = SWITCHABLE; 2498 INTERPOLATIONFILTERTYPE tmp_best_filter = SWITCHABLE; 2499 int rate_uv_intra[TX_SIZE_MAX_SB], rate_uv_tokenonly[TX_SIZE_MAX_SB]; 2500 int dist_uv[TX_SIZE_MAX_SB], skip_uv[TX_SIZE_MAX_SB]; 2501 MB_PREDICTION_MODE mode_uv[TX_SIZE_MAX_SB]; 2502 struct scale_factors scale_factor[4]; 2503 unsigned int ref_frame_mask = 0; 2504 unsigned int mode_mask = 0; 2505 int64_t mode_distortions[MB_MODE_COUNT] = {-1}; 2506 int64_t frame_distortions[MAX_REF_FRAMES] = {-1}; 2507 int intra_cost_penalty = 20 * vp9_dc_quant(cpi->common.base_qindex, 2508 cpi->common.y_dc_delta_q); 2509 int_mv seg_mvs[4][MAX_REF_FRAMES]; 2510 union b_mode_info best_bmodes[4]; 2511 PARTITION_INFO best_partition; 2512 int bwsl = b_width_log2(bsize); 2513 int bws = (1 << bwsl) / 4; // mode_info step for subsize 2514 int bhsl = b_height_log2(bsize); 2515 int bhs = (1 << bhsl) / 4; // mode_info step for subsize 2516 2517 for (i = 0; i < 4; i++) { 2518 int j; 2519 2520 for (j = 0; j < MAX_REF_FRAMES; j++) 2521 seg_mvs[i][j].as_int = INVALID_MV; 2522 } 2523 // Everywhere the flag is set the error is much higher than its neighbors. 2524 ctx->frames_with_high_error = 0; 2525 ctx->modes_with_high_error = 0; 2526 2527 xd->mode_info_context->mbmi.segment_id = segment_id; 2528 estimate_ref_frame_costs(cpi, segment_id, ref_costs_single, ref_costs_comp, 2529 &comp_mode_p); 2530 vpx_memset(&best_mbmode, 0, sizeof(best_mbmode)); 2531 vpx_memset(&single_newmv, 0, sizeof(single_newmv)); 2532 2533 for (i = 0; i < NB_PREDICTION_TYPES; ++i) 2534 best_pred_rd[i] = INT64_MAX; 2535 for (i = 0; i < NB_TXFM_MODES; i++) 2536 best_txfm_rd[i] = INT64_MAX; 2537 2538 // Create a mask set to 1 for each frame used by a smaller resolution. 2539 if (cpi->speed > 0) { 2540 switch (block_size) { 2541 case BLOCK_64X64: 2542 for (i = 0; i < 4; i++) { 2543 for (j = 0; j < 4; j++) { 2544 ref_frame_mask |= x->mb_context[i][j].frames_with_high_error; 2545 mode_mask |= x->mb_context[i][j].modes_with_high_error; 2546 } 2547 } 2548 for (i = 0; i < 4; i++) { 2549 ref_frame_mask |= x->sb32_context[i].frames_with_high_error; 2550 mode_mask |= x->sb32_context[i].modes_with_high_error; 2551 } 2552 break; 2553 case BLOCK_32X32: 2554 for (i = 0; i < 4; i++) { 2555 ref_frame_mask |= 2556 x->mb_context[xd->sb_index][i].frames_with_high_error; 2557 mode_mask |= x->mb_context[xd->sb_index][i].modes_with_high_error; 2558 } 2559 break; 2560 default: 2561 // Until we handle all block sizes set it to present; 2562 ref_frame_mask = 0; 2563 mode_mask = 0; 2564 break; 2565 } 2566 ref_frame_mask = ~ref_frame_mask; 2567 mode_mask = ~mode_mask; 2568 } 2569 2570 for (ref_frame = LAST_FRAME; ref_frame <= ALTREF_FRAME; ref_frame++) { 2571 if (cpi->ref_frame_flags & flag_list[ref_frame]) { 2572 setup_buffer_inter(cpi, x, idx_list[ref_frame], ref_frame, block_size, 2573 mi_row, mi_col, frame_mv[NEARESTMV], frame_mv[NEARMV], 2574 yv12_mb, scale_factor); 2575 } 2576 frame_mv[NEWMV][ref_frame].as_int = INVALID_MV; 2577 frame_mv[ZEROMV][ref_frame].as_int = 0; 2578 } 2579 if (cpi->speed == 0 2580 || (cpi->speed > 0 && (ref_frame_mask & (1 << INTRA_FRAME)))) { 2581 mbmi->mode = DC_PRED; 2582 mbmi->ref_frame[0] = INTRA_FRAME; 2583 for (i = 0; i <= (bsize < BLOCK_SIZE_MB16X16 ? TX_4X4 : 2584 (bsize < BLOCK_SIZE_SB32X32 ? TX_8X8 : 2585 (bsize < BLOCK_SIZE_SB64X64 ? TX_16X16 : TX_32X32))); 2586 i++) { 2587 mbmi->txfm_size = i; 2588 rd_pick_intra_sbuv_mode(cpi, x, &rate_uv_intra[i], &rate_uv_tokenonly[i], 2589 &dist_uv[i], &skip_uv[i], 2590 (bsize < BLOCK_SIZE_SB8X8) ? BLOCK_SIZE_SB8X8 : 2591 bsize); 2592 mode_uv[i] = mbmi->uv_mode; 2593 } 2594 } 2595 2596 for (mode_index = 0; mode_index < MAX_MODES; ++mode_index) { 2597 int mode_excluded = 0; 2598 int64_t this_rd = INT64_MAX; 2599 int disable_skip = 0; 2600 int compmode_cost = 0; 2601 int rate2 = 0, rate_y = 0, rate_uv = 0; 2602 int distortion2 = 0, distortion_y = 0, distortion_uv = 0; 2603 int skippable; 2604 int64_t txfm_cache[NB_TXFM_MODES]; 2605 int i; 2606 2607 for (i = 0; i < NB_TXFM_MODES; ++i) 2608 txfm_cache[i] = INT64_MAX; 2609 2610 // Test best rd so far against threshold for trying this mode. 2611 if ((best_rd < ((cpi->rd_threshes[bsize][mode_index] * 2612 cpi->rd_thresh_freq_fact[bsize][mode_index]) >> 4)) || 2613 cpi->rd_threshes[bsize][mode_index] == INT_MAX) 2614 continue; 2615 2616 // Do not allow compound prediction if the segment level reference 2617 // frame feature is in use as in this case there can only be one reference. 2618 if ((vp9_mode_order[mode_index].second_ref_frame > INTRA_FRAME) && 2619 vp9_segfeature_active(xd, segment_id, SEG_LVL_REF_FRAME)) 2620 continue; 2621 2622 x->skip = 0; 2623 this_mode = vp9_mode_order[mode_index].mode; 2624 ref_frame = vp9_mode_order[mode_index].ref_frame; 2625 2626 if (cpi->speed > 0 && bsize >= BLOCK_SIZE_SB8X8) { 2627 if (!(ref_frame_mask & (1 << ref_frame))) { 2628 continue; 2629 } 2630 if (!(mode_mask & (1 << this_mode))) { 2631 continue; 2632 } 2633 if (vp9_mode_order[mode_index].second_ref_frame != NONE 2634 && !(ref_frame_mask 2635 & (1 << vp9_mode_order[mode_index].second_ref_frame))) { 2636 continue; 2637 } 2638 } 2639 2640 mbmi->ref_frame[0] = ref_frame; 2641 mbmi->ref_frame[1] = vp9_mode_order[mode_index].second_ref_frame; 2642 2643 if (!(ref_frame == INTRA_FRAME 2644 || (cpi->ref_frame_flags & flag_list[ref_frame]))) { 2645 continue; 2646 } 2647 if (!(mbmi->ref_frame[1] == NONE 2648 || (cpi->ref_frame_flags & flag_list[mbmi->ref_frame[1]]))) { 2649 continue; 2650 } 2651 2652 // TODO(jingning, jkoleszar): scaling reference frame not supported for 2653 // SPLITMV. 2654 if (mbmi->ref_frame[0] > 0 && 2655 (scale_factor[mbmi->ref_frame[0]].x_scale_fp != 2656 (1 << VP9_REF_SCALE_SHIFT) || 2657 scale_factor[mbmi->ref_frame[0]].y_scale_fp != 2658 (1 << VP9_REF_SCALE_SHIFT)) && 2659 this_mode == SPLITMV) 2660 continue; 2661 2662 if (mbmi->ref_frame[1] > 0 && 2663 (scale_factor[mbmi->ref_frame[1]].x_scale_fp != 2664 (1 << VP9_REF_SCALE_SHIFT) || 2665 scale_factor[mbmi->ref_frame[1]].y_scale_fp != 2666 (1 << VP9_REF_SCALE_SHIFT)) && 2667 this_mode == SPLITMV) 2668 continue; 2669 2670 set_scale_factors(xd, mbmi->ref_frame[0], mbmi->ref_frame[1], 2671 scale_factor); 2672 comp_pred = mbmi->ref_frame[1] > INTRA_FRAME; 2673 mbmi->mode = this_mode; 2674 mbmi->uv_mode = DC_PRED; 2675 2676 // Evaluate all sub-pel filters irrespective of whether we can use 2677 // them for this frame. 2678 mbmi->interp_filter = cm->mcomp_filter_type; 2679 vp9_setup_interp_filters(xd, mbmi->interp_filter, &cpi->common); 2680 2681 if (bsize >= BLOCK_SIZE_SB8X8 && 2682 (this_mode == I4X4_PRED || this_mode == SPLITMV)) 2683 continue; 2684 if (bsize < BLOCK_SIZE_SB8X8 && 2685 !(this_mode == I4X4_PRED || this_mode == SPLITMV)) 2686 continue; 2687 2688 if (comp_pred) { 2689 if (!(cpi->ref_frame_flags & flag_list[mbmi->ref_frame[1]])) 2690 continue; 2691 set_scale_factors(xd, mbmi->ref_frame[0], mbmi->ref_frame[1], 2692 scale_factor); 2693 2694 mode_excluded = 2695 mode_excluded ? 2696 mode_excluded : cm->comp_pred_mode == SINGLE_PREDICTION_ONLY; 2697 } else { 2698 // mbmi->ref_frame[1] = vp9_mode_order[mode_index].ref_frame[1]; 2699 if (ref_frame != INTRA_FRAME) { 2700 if (mbmi->ref_frame[1] != INTRA_FRAME) 2701 mode_excluded = 2702 mode_excluded ? 2703 mode_excluded : cm->comp_pred_mode == COMP_PREDICTION_ONLY; 2704 } 2705 } 2706 2707 // Select predictors 2708 for (i = 0; i < MAX_MB_PLANE; i++) { 2709 xd->plane[i].pre[0] = yv12_mb[ref_frame][i]; 2710 if (comp_pred) 2711 xd->plane[i].pre[1] = yv12_mb[mbmi->ref_frame[1]][i]; 2712 } 2713 2714 // If the segment reference frame feature is enabled.... 2715 // then do nothing if the current ref frame is not allowed.. 2716 if (vp9_segfeature_active(xd, segment_id, SEG_LVL_REF_FRAME) && 2717 vp9_get_segdata(xd, segment_id, SEG_LVL_REF_FRAME) != (int)ref_frame) { 2718 continue; 2719 // If the segment skip feature is enabled.... 2720 // then do nothing if the current mode is not allowed.. 2721 } else if (vp9_segfeature_active(xd, segment_id, SEG_LVL_SKIP) && 2722 (this_mode != ZEROMV && ref_frame != INTRA_FRAME)) { 2723 continue; 2724 // Disable this drop out case if the ref frame 2725 // segment level feature is enabled for this segment. This is to 2726 // prevent the possibility that we end up unable to pick any mode. 2727 } else if (!vp9_segfeature_active(xd, segment_id, SEG_LVL_REF_FRAME)) { 2728 // Only consider ZEROMV/ALTREF_FRAME for alt ref frame, 2729 // unless ARNR filtering is enabled in which case we want 2730 // an unfiltered alternative 2731 if (cpi->is_src_frame_alt_ref && (cpi->oxcf.arnr_max_frames == 0)) { 2732 if (this_mode != ZEROMV || ref_frame != ALTREF_FRAME) { 2733 continue; 2734 } 2735 } 2736 } 2737 // TODO(JBB): This is to make up for the fact that we don't have sad 2738 // functions that work when the block size reads outside the umv. We 2739 // should fix this either by making the motion search just work on 2740 // a representative block in the boundary ( first ) and then implement a 2741 // function that does sads when inside the border.. 2742 if (((mi_row + bhs) > cm->mi_rows || (mi_col + bws) > cm->mi_cols) && 2743 this_mode == NEWMV) { 2744 continue; 2745 } 2746 2747 if (this_mode == I4X4_PRED) { 2748 int rate; 2749 2750 mbmi->txfm_size = TX_4X4; 2751 rd_pick_intra4x4mby_modes(cpi, x, &rate, &rate_y, 2752 &distortion_y, INT64_MAX); 2753 rate2 += rate; 2754 rate2 += intra_cost_penalty; 2755 distortion2 += distortion_y; 2756 2757 rate2 += rate_uv_intra[TX_4X4]; 2758 rate_uv = rate_uv_intra[TX_4X4]; 2759 distortion2 += dist_uv[TX_4X4]; 2760 distortion_uv = dist_uv[TX_4X4]; 2761 mbmi->uv_mode = mode_uv[TX_4X4]; 2762 txfm_cache[ONLY_4X4] = RDCOST(x->rdmult, x->rddiv, rate2, distortion2); 2763 for (i = 0; i < NB_TXFM_MODES; ++i) 2764 txfm_cache[i] = txfm_cache[ONLY_4X4]; 2765 } else if (ref_frame == INTRA_FRAME) { 2766 TX_SIZE uv_tx; 2767 super_block_yrd(cpi, x, &rate_y, &distortion_y, &skippable, 2768 bsize, txfm_cache); 2769 2770 uv_tx = mbmi->txfm_size; 2771 if (bsize < BLOCK_SIZE_MB16X16 && uv_tx == TX_8X8) 2772 uv_tx = TX_4X4; 2773 if (bsize < BLOCK_SIZE_SB32X32 && uv_tx == TX_16X16) 2774 uv_tx = TX_8X8; 2775 else if (bsize < BLOCK_SIZE_SB64X64 && uv_tx == TX_32X32) 2776 uv_tx = TX_16X16; 2777 2778 rate_uv = rate_uv_intra[uv_tx]; 2779 distortion_uv = dist_uv[uv_tx]; 2780 skippable = skippable && skip_uv[uv_tx]; 2781 mbmi->uv_mode = mode_uv[uv_tx]; 2782 2783 rate2 = rate_y + x->mbmode_cost[mbmi->mode] + rate_uv; 2784 if (mbmi->mode != DC_PRED && mbmi->mode != TM_PRED) 2785 rate2 += intra_cost_penalty; 2786 distortion2 = distortion_y + distortion_uv; 2787 } else if (this_mode == SPLITMV) { 2788 const int is_comp_pred = mbmi->ref_frame[1] > 0; 2789 int rate, distortion; 2790 int64_t this_rd_thresh; 2791 int64_t tmp_rd, tmp_best_rd = INT64_MAX, tmp_best_rdu = INT64_MAX; 2792 int tmp_best_rate = INT_MAX, tmp_best_ratey = INT_MAX; 2793 int tmp_best_distortion = INT_MAX, tmp_best_skippable = 0; 2794 int switchable_filter_index; 2795 int_mv *second_ref = is_comp_pred ? 2796 &mbmi->ref_mvs[mbmi->ref_frame[1]][0] : NULL; 2797 union b_mode_info tmp_best_bmodes[16]; 2798 MB_MODE_INFO tmp_best_mbmode; 2799 PARTITION_INFO tmp_best_partition; 2800 int pred_exists = 0; 2801 int uv_skippable; 2802 2803 this_rd_thresh = (mbmi->ref_frame[0] == LAST_FRAME) ? 2804 cpi->rd_threshes[bsize][THR_NEWMV] : 2805 cpi->rd_threshes[bsize][THR_NEWA]; 2806 this_rd_thresh = (mbmi->ref_frame[0] == GOLDEN_FRAME) ? 2807 cpi->rd_threshes[bsize][THR_NEWG] : this_rd_thresh; 2808 xd->mode_info_context->mbmi.txfm_size = TX_4X4; 2809 2810 for (switchable_filter_index = 0; 2811 switchable_filter_index < VP9_SWITCHABLE_FILTERS; 2812 ++switchable_filter_index) { 2813 int newbest; 2814 mbmi->interp_filter = 2815 vp9_switchable_interp[switchable_filter_index]; 2816 vp9_setup_interp_filters(xd, mbmi->interp_filter, &cpi->common); 2817 2818 tmp_rd = rd_pick_best_mbsegmentation(cpi, x, 2819 &mbmi->ref_mvs[mbmi->ref_frame[0]][0], 2820 second_ref, INT64_MAX, 2821 &rate, &rate_y, &distortion, 2822 &skippable, 2823 (int)this_rd_thresh, seg_mvs, 2824 mi_row, mi_col); 2825 if (cpi->common.mcomp_filter_type == SWITCHABLE) { 2826 const int rs = get_switchable_rate(cm, x); 2827 tmp_rd += RDCOST(x->rdmult, x->rddiv, rs, 0); 2828 } 2829 newbest = (tmp_rd < tmp_best_rd); 2830 if (newbest) { 2831 tmp_best_filter = mbmi->interp_filter; 2832 tmp_best_rd = tmp_rd; 2833 } 2834 if ((newbest && cm->mcomp_filter_type == SWITCHABLE) || 2835 (mbmi->interp_filter == cm->mcomp_filter_type && 2836 cm->mcomp_filter_type != SWITCHABLE)) { 2837 tmp_best_rdu = tmp_rd; 2838 tmp_best_rate = rate; 2839 tmp_best_ratey = rate_y; 2840 tmp_best_distortion = distortion; 2841 tmp_best_skippable = skippable; 2842 tmp_best_mbmode = *mbmi; 2843 tmp_best_partition = *x->partition_info; 2844 for (i = 0; i < 4; i++) 2845 tmp_best_bmodes[i] = xd->mode_info_context->bmi[i]; 2846 pred_exists = 1; 2847 } 2848 } // switchable_filter_index loop 2849 2850 mbmi->interp_filter = (cm->mcomp_filter_type == SWITCHABLE ? 2851 tmp_best_filter : cm->mcomp_filter_type); 2852 vp9_setup_interp_filters(xd, mbmi->interp_filter, &cpi->common); 2853 if (!pred_exists) { 2854 // Handles the special case when a filter that is not in the 2855 // switchable list (bilinear, 6-tap) is indicated at the frame level 2856 tmp_rd = rd_pick_best_mbsegmentation(cpi, x, 2857 &mbmi->ref_mvs[mbmi->ref_frame[0]][0], 2858 second_ref, INT64_MAX, 2859 &rate, &rate_y, &distortion, 2860 &skippable, 2861 (int)this_rd_thresh, seg_mvs, 2862 mi_row, mi_col); 2863 } else { 2864 if (cpi->common.mcomp_filter_type == SWITCHABLE) { 2865 int rs = get_switchable_rate(cm, x); 2866 tmp_best_rdu -= RDCOST(x->rdmult, x->rddiv, rs, 0); 2867 } 2868 tmp_rd = tmp_best_rdu; 2869 rate = tmp_best_rate; 2870 rate_y = tmp_best_ratey; 2871 distortion = tmp_best_distortion; 2872 skippable = tmp_best_skippable; 2873 *mbmi = tmp_best_mbmode; 2874 *x->partition_info = tmp_best_partition; 2875 for (i = 0; i < 4; i++) 2876 xd->mode_info_context->bmi[i] = tmp_best_bmodes[i]; 2877 } 2878 2879 rate2 += rate; 2880 distortion2 += distortion; 2881 2882 if (cpi->common.mcomp_filter_type == SWITCHABLE) 2883 rate2 += get_switchable_rate(cm, x); 2884 2885 // If even the 'Y' rd value of split is higher than best so far 2886 // then dont bother looking at UV 2887 vp9_build_inter_predictors_sbuv(&x->e_mbd, mi_row, mi_col, 2888 BLOCK_SIZE_SB8X8); 2889 vp9_subtract_sbuv(x, BLOCK_SIZE_SB8X8); 2890 super_block_uvrd_for_txfm(cm, x, &rate_uv, &distortion_uv, 2891 &uv_skippable, BLOCK_SIZE_SB8X8, TX_4X4); 2892 rate2 += rate_uv; 2893 distortion2 += distortion_uv; 2894 skippable = skippable && uv_skippable; 2895 2896 txfm_cache[ONLY_4X4] = RDCOST(x->rdmult, x->rddiv, rate2, distortion2); 2897 for (i = 0; i < NB_TXFM_MODES; ++i) 2898 txfm_cache[i] = txfm_cache[ONLY_4X4]; 2899 2900 if (!mode_excluded) { 2901 if (is_comp_pred) 2902 mode_excluded = cpi->common.comp_pred_mode == SINGLE_PREDICTION_ONLY; 2903 else 2904 mode_excluded = cpi->common.comp_pred_mode == COMP_PREDICTION_ONLY; 2905 } 2906 2907 compmode_cost = vp9_cost_bit(comp_mode_p, is_comp_pred); 2908 } else { 2909 compmode_cost = vp9_cost_bit(comp_mode_p, 2910 mbmi->ref_frame[1] > INTRA_FRAME); 2911 this_rd = handle_inter_mode(cpi, x, bsize, 2912 txfm_cache, 2913 &rate2, &distortion2, &skippable, 2914 &rate_y, &distortion_y, 2915 &rate_uv, &distortion_uv, 2916 &mode_excluded, &disable_skip, 2917 &tmp_best_filter, frame_mv[this_mode], 2918 mi_row, mi_col, 2919 single_newmv); 2920 if (this_rd == INT64_MAX) 2921 continue; 2922 } 2923 2924 if (cpi->common.comp_pred_mode == HYBRID_PREDICTION) { 2925 rate2 += compmode_cost; 2926 } 2927 2928 // Estimate the reference frame signaling cost and add it 2929 // to the rolling cost variable. 2930 if (mbmi->ref_frame[1] > INTRA_FRAME) { 2931 rate2 += ref_costs_comp[mbmi->ref_frame[0]]; 2932 } else { 2933 rate2 += ref_costs_single[mbmi->ref_frame[0]]; 2934 } 2935 2936 if (!disable_skip) { 2937 // Test for the condition where skip block will be activated 2938 // because there are no non zero coefficients and make any 2939 // necessary adjustment for rate. Ignore if skip is coded at 2940 // segment level as the cost wont have been added in. 2941 int mb_skip_allowed; 2942 2943 // Is Mb level skip allowed (i.e. not coded at segment level). 2944 mb_skip_allowed = !vp9_segfeature_active(xd, segment_id, SEG_LVL_SKIP); 2945 2946 if (skippable && bsize >= BLOCK_SIZE_SB8X8) { 2947 // Back out the coefficient coding costs 2948 rate2 -= (rate_y + rate_uv); 2949 // for best_yrd calculation 2950 rate_uv = 0; 2951 2952 if (mb_skip_allowed) { 2953 int prob_skip_cost; 2954 2955 // Cost the skip mb case 2956 vp9_prob skip_prob = 2957 vp9_get_pred_prob(cm, xd, PRED_MBSKIP); 2958 2959 if (skip_prob) { 2960 prob_skip_cost = vp9_cost_bit(skip_prob, 1); 2961 rate2 += prob_skip_cost; 2962 } 2963 } 2964 } else if (mb_skip_allowed) { 2965 // Add in the cost of the no skip flag. 2966 int prob_skip_cost = vp9_cost_bit(vp9_get_pred_prob(cm, xd, 2967 PRED_MBSKIP), 0); 2968 rate2 += prob_skip_cost; 2969 } 2970 2971 // Calculate the final RD estimate for this mode. 2972 this_rd = RDCOST(x->rdmult, x->rddiv, rate2, distortion2); 2973 } 2974 2975#if 0 2976 // Keep record of best intra distortion 2977 if ((xd->mode_info_context->mbmi.ref_frame[0] == INTRA_FRAME) && 2978 (this_rd < best_intra_rd)) { 2979 best_intra_rd = this_rd; 2980 *returnintra = distortion2; 2981 } 2982#endif 2983 2984 if (!disable_skip && mbmi->ref_frame[0] == INTRA_FRAME) 2985 for (i = 0; i < NB_PREDICTION_TYPES; ++i) 2986 best_pred_rd[i] = MIN(best_pred_rd[i], this_rd); 2987 2988 if (this_rd < best_overall_rd) { 2989 best_overall_rd = this_rd; 2990 best_filter = tmp_best_filter; 2991 best_mode = this_mode; 2992 } 2993 2994 if (this_mode != I4X4_PRED && this_mode != SPLITMV) { 2995 // Store the respective mode distortions for later use. 2996 if (mode_distortions[this_mode] == -1 2997 || distortion2 < mode_distortions[this_mode]) { 2998 mode_distortions[this_mode] = distortion2; 2999 } 3000 if (frame_distortions[mbmi->ref_frame[0]] == -1 3001 || distortion2 < frame_distortions[mbmi->ref_frame[0]]) { 3002 frame_distortions[mbmi->ref_frame[0]] = distortion2; 3003 } 3004 } 3005 3006 // Did this mode help.. i.e. is it the new best mode 3007 if (this_rd < best_rd || x->skip) { 3008 if (!mode_excluded) { 3009 // Note index of best mode so far 3010 best_mode_index = mode_index; 3011 3012 if (ref_frame == INTRA_FRAME) { 3013 /* required for left and above block mv */ 3014 mbmi->mv[0].as_int = 0; 3015 } 3016 3017 *returnrate = rate2; 3018 *returndistortion = distortion2; 3019 best_rd = this_rd; 3020 best_mbmode = *mbmi; 3021 best_partition = *x->partition_info; 3022 3023 if (this_mode == I4X4_PRED || this_mode == SPLITMV) 3024 for (i = 0; i < 4; i++) 3025 best_bmodes[i] = xd->mode_info_context->bmi[i]; 3026 } 3027#if 0 3028 // Testing this mode gave rise to an improvement in best error score. 3029 // Lower threshold a bit for next time 3030 cpi->rd_thresh_mult[mode_index] = 3031 (cpi->rd_thresh_mult[mode_index] >= (MIN_THRESHMULT + 2)) ? 3032 cpi->rd_thresh_mult[mode_index] - 2 : MIN_THRESHMULT; 3033 cpi->rd_threshes[mode_index] = 3034 (cpi->rd_baseline_thresh[mode_index] >> 7) 3035 * cpi->rd_thresh_mult[mode_index]; 3036#endif 3037 } else { 3038 // If the mode did not help improve the best error case then 3039 // raise the threshold for testing that mode next time around. 3040#if 0 3041 cpi->rd_thresh_mult[mode_index] += 4; 3042 3043 if (cpi->rd_thresh_mult[mode_index] > MAX_THRESHMULT) 3044 cpi->rd_thresh_mult[mode_index] = MAX_THRESHMULT; 3045 3046 cpi->rd_threshes[mode_index] = 3047 (cpi->rd_baseline_thresh[mode_index] >> 7) 3048 * cpi->rd_thresh_mult[mode_index]; 3049#endif 3050 } 3051 3052 /* keep record of best compound/single-only prediction */ 3053 if (!disable_skip && mbmi->ref_frame[0] != INTRA_FRAME) { 3054 int single_rd, hybrid_rd, single_rate, hybrid_rate; 3055 3056 if (cpi->common.comp_pred_mode == HYBRID_PREDICTION) { 3057 single_rate = rate2 - compmode_cost; 3058 hybrid_rate = rate2; 3059 } else { 3060 single_rate = rate2; 3061 hybrid_rate = rate2 + compmode_cost; 3062 } 3063 3064 single_rd = RDCOST(x->rdmult, x->rddiv, single_rate, distortion2); 3065 hybrid_rd = RDCOST(x->rdmult, x->rddiv, hybrid_rate, distortion2); 3066 3067 if (mbmi->ref_frame[1] <= INTRA_FRAME && 3068 single_rd < best_pred_rd[SINGLE_PREDICTION_ONLY]) { 3069 best_pred_rd[SINGLE_PREDICTION_ONLY] = single_rd; 3070 } else if (mbmi->ref_frame[1] > INTRA_FRAME && 3071 single_rd < best_pred_rd[COMP_PREDICTION_ONLY]) { 3072 best_pred_rd[COMP_PREDICTION_ONLY] = single_rd; 3073 } 3074 if (hybrid_rd < best_pred_rd[HYBRID_PREDICTION]) 3075 best_pred_rd[HYBRID_PREDICTION] = hybrid_rd; 3076 } 3077 3078 /* keep record of best txfm size */ 3079 if (bsize < BLOCK_SIZE_SB32X32) { 3080 if (bsize < BLOCK_SIZE_MB16X16) { 3081 if (this_mode == SPLITMV || this_mode == I4X4_PRED) 3082 txfm_cache[ALLOW_8X8] = txfm_cache[ONLY_4X4]; 3083 txfm_cache[ALLOW_16X16] = txfm_cache[ALLOW_8X8]; 3084 } 3085 txfm_cache[ALLOW_32X32] = txfm_cache[ALLOW_16X16]; 3086 } 3087 if (!mode_excluded && this_rd != INT64_MAX) { 3088 for (i = 0; i < NB_TXFM_MODES; i++) { 3089 int64_t adj_rd = INT64_MAX; 3090 if (this_mode != I4X4_PRED) { 3091 adj_rd = this_rd + txfm_cache[i] - txfm_cache[cm->txfm_mode]; 3092 } else { 3093 adj_rd = this_rd; 3094 } 3095 3096 if (adj_rd < best_txfm_rd[i]) 3097 best_txfm_rd[i] = adj_rd; 3098 } 3099 } 3100 3101 if (x->skip && !mode_excluded) 3102 break; 3103 } 3104 // Flag all modes that have a distortion thats > 2x the best we found at 3105 // this level. 3106 for (mode_index = 0; mode_index < MB_MODE_COUNT; ++mode_index) { 3107 if (mode_index == NEARESTMV || mode_index == NEARMV || mode_index == NEWMV) 3108 continue; 3109 3110 if (mode_distortions[mode_index] > 2 * *returndistortion) { 3111 ctx->modes_with_high_error |= (1 << mode_index); 3112 } 3113 } 3114 3115 // Flag all ref frames that have a distortion thats > 2x the best we found at 3116 // this level. 3117 for (ref_frame = INTRA_FRAME; ref_frame <= ALTREF_FRAME; ref_frame++) { 3118 if (frame_distortions[ref_frame] > 2 * *returndistortion) { 3119 ctx->frames_with_high_error |= (1 << ref_frame); 3120 } 3121 } 3122 3123 if (best_rd == INT64_MAX && bsize < BLOCK_SIZE_SB8X8) { 3124 *returnrate = INT_MAX; 3125 *returndistortion = INT_MAX; 3126 return best_rd; 3127 } 3128 3129 assert((cm->mcomp_filter_type == SWITCHABLE) || 3130 (cm->mcomp_filter_type == best_mbmode.interp_filter) || 3131 (best_mbmode.ref_frame[0] == INTRA_FRAME)); 3132 3133 // Accumulate filter usage stats 3134 // TODO(agrange): Use RD criteria to select interpolation filter mode. 3135 if (is_inter_mode(best_mode)) 3136 ++cpi->best_switchable_interp_count[vp9_switchable_interp_map[best_filter]]; 3137 3138 // Updating rd_thresh_freq_fact[] here means that the differnt 3139 // partition/block sizes are handled independently based on the best 3140 // choice for the current partition. It may well be better to keep a scaled 3141 // best rd so far value and update rd_thresh_freq_fact based on the mode/size 3142 // combination that wins out. 3143 if (cpi->sf.adpative_rd_thresh) { 3144 for (mode_index = 0; mode_index < MAX_MODES; ++mode_index) { 3145 if (mode_index == best_mode_index) { 3146 cpi->rd_thresh_freq_fact[bsize][mode_index] = BASE_RD_THRESH_FREQ_FACT; 3147 } else { 3148 cpi->rd_thresh_freq_fact[bsize][mode_index] += MAX_RD_THRESH_FREQ_INC; 3149 if (cpi->rd_thresh_freq_fact[bsize][mode_index] > 3150 (cpi->sf.adpative_rd_thresh * MAX_RD_THRESH_FREQ_FACT)) { 3151 cpi->rd_thresh_freq_fact[bsize][mode_index] = 3152 cpi->sf.adpative_rd_thresh * MAX_RD_THRESH_FREQ_FACT; 3153 } 3154 } 3155 } 3156 } 3157 3158 // TODO(rbultje) integrate with RD trd_thresh_freq_facthresholding 3159#if 0 3160 // Reduce the activation RD thresholds for the best choice mode 3161 if ((cpi->rd_baseline_thresh[best_mode_index] > 0) && 3162 (cpi->rd_baseline_thresh[best_mode_index] < (INT_MAX >> 2))) { 3163 int best_adjustment = (cpi->rd_thresh_mult[best_mode_index] >> 2); 3164 3165 cpi->rd_thresh_mult[best_mode_index] = 3166 (cpi->rd_thresh_mult[best_mode_index] >= (MIN_THRESHMULT + best_adjustment)) ? 3167 cpi->rd_thresh_mult[best_mode_index] - best_adjustment : MIN_THRESHMULT; 3168 cpi->rd_threshes[best_mode_index] = 3169 (cpi->rd_baseline_thresh[best_mode_index] >> 7) * cpi->rd_thresh_mult[best_mode_index]; 3170 } 3171#endif 3172 3173 // This code forces Altref,0,0 and skip for the frame that overlays a 3174 // an alrtef unless Altref is filtered. However, this is unsafe if 3175 // segment level coding of ref frame is enabled for this segment. 3176 if (!vp9_segfeature_active(xd, segment_id, SEG_LVL_REF_FRAME) && 3177 cpi->is_src_frame_alt_ref && 3178 (cpi->oxcf.arnr_max_frames == 0) && 3179 (best_mbmode.mode != ZEROMV || best_mbmode.ref_frame[0] != ALTREF_FRAME) 3180 && bsize >= BLOCK_SIZE_SB8X8) { 3181 mbmi->mode = ZEROMV; 3182 mbmi->ref_frame[0] = ALTREF_FRAME; 3183 mbmi->ref_frame[1] = NONE; 3184 mbmi->mv[0].as_int = 0; 3185 mbmi->uv_mode = DC_PRED; 3186 mbmi->mb_skip_coeff = 1; 3187 if (cm->txfm_mode == TX_MODE_SELECT) { 3188 if (bsize >= BLOCK_SIZE_SB32X32) 3189 mbmi->txfm_size = TX_32X32; 3190 else if (bsize >= BLOCK_SIZE_MB16X16) 3191 mbmi->txfm_size = TX_16X16; 3192 else 3193 mbmi->txfm_size = TX_8X8; 3194 } 3195 3196 vpx_memset(best_txfm_diff, 0, sizeof(best_txfm_diff)); 3197 vpx_memset(best_pred_diff, 0, sizeof(best_pred_diff)); 3198 goto end; 3199 } 3200 3201 // macroblock modes 3202 *mbmi = best_mbmode; 3203 if (best_mbmode.ref_frame[0] == INTRA_FRAME && 3204 best_mbmode.sb_type < BLOCK_SIZE_SB8X8) { 3205 for (i = 0; i < 4; i++) 3206 xd->mode_info_context->bmi[i].as_mode = best_bmodes[i].as_mode; 3207 } 3208 3209 if (best_mbmode.ref_frame[0] != INTRA_FRAME && 3210 best_mbmode.sb_type < BLOCK_SIZE_SB8X8) { 3211 for (i = 0; i < 4; i++) 3212 xd->mode_info_context->bmi[i].as_mv[0].as_int = 3213 best_bmodes[i].as_mv[0].as_int; 3214 3215 if (mbmi->ref_frame[1] > 0) 3216 for (i = 0; i < 4; i++) 3217 xd->mode_info_context->bmi[i].as_mv[1].as_int = 3218 best_bmodes[i].as_mv[1].as_int; 3219 3220 *x->partition_info = best_partition; 3221 3222 mbmi->mv[0].as_int = x->partition_info->bmi[3].mv.as_int; 3223 mbmi->mv[1].as_int = x->partition_info->bmi[3].second_mv.as_int; 3224 } 3225 3226 for (i = 0; i < NB_PREDICTION_TYPES; ++i) { 3227 if (best_pred_rd[i] == INT64_MAX) 3228 best_pred_diff[i] = INT_MIN; 3229 else 3230 best_pred_diff[i] = best_rd - best_pred_rd[i]; 3231 } 3232 3233 if (!x->skip) { 3234 for (i = 0; i < NB_TXFM_MODES; i++) { 3235 if (best_txfm_rd[i] == INT64_MAX) 3236 best_txfm_diff[i] = 0; 3237 else 3238 best_txfm_diff[i] = best_rd - best_txfm_rd[i]; 3239 } 3240 } else { 3241 vpx_memset(best_txfm_diff, 0, sizeof(best_txfm_diff)); 3242 } 3243 3244 end: 3245 set_scale_factors(xd, mbmi->ref_frame[0], mbmi->ref_frame[1], 3246 scale_factor); 3247 store_coding_context(x, ctx, best_mode_index, 3248 &best_partition, 3249 &mbmi->ref_mvs[mbmi->ref_frame[0]][0], 3250 &mbmi->ref_mvs[mbmi->ref_frame[1] < 0 ? 0 : 3251 mbmi->ref_frame[1]][0], 3252 best_pred_diff, best_txfm_diff); 3253 3254 return best_rd; 3255} 3256