1/* 2 * Copyright (c) 2010 The WebM project authors. All Rights Reserved. 3 * 4 * Use of this source code is governed by a BSD-style license 5 * that can be found in the LICENSE file in the root of the source 6 * tree. An additional intellectual property rights grant can be found 7 * in the file PATENTS. All contributing project authors may 8 * be found in the AUTHORS file in the root of the source tree. 9 */ 10 11#include <limits.h> 12#include <math.h> 13#include <stdio.h> 14 15#include "./vp9_rtcd.h" 16#include "./vpx_dsp_rtcd.h" 17#include "./vpx_config.h" 18 19#include "vpx_dsp/vpx_dsp_common.h" 20#include "vpx_ports/mem.h" 21#include "vpx_ports/vpx_timer.h" 22#include "vpx_ports/system_state.h" 23 24#include "vp9/common/vp9_common.h" 25#include "vp9/common/vp9_entropy.h" 26#include "vp9/common/vp9_entropymode.h" 27#include "vp9/common/vp9_idct.h" 28#include "vp9/common/vp9_mvref_common.h" 29#include "vp9/common/vp9_pred_common.h" 30#include "vp9/common/vp9_quant_common.h" 31#include "vp9/common/vp9_reconintra.h" 32#include "vp9/common/vp9_reconinter.h" 33#include "vp9/common/vp9_seg_common.h" 34#include "vp9/common/vp9_tile_common.h" 35 36#include "vp9/encoder/vp9_aq_360.h" 37#include "vp9/encoder/vp9_aq_complexity.h" 38#include "vp9/encoder/vp9_aq_cyclicrefresh.h" 39#include "vp9/encoder/vp9_aq_variance.h" 40#include "vp9/encoder/vp9_encodeframe.h" 41#include "vp9/encoder/vp9_encodemb.h" 42#include "vp9/encoder/vp9_encodemv.h" 43#include "vp9/encoder/vp9_ethread.h" 44#include "vp9/encoder/vp9_extend.h" 45#include "vp9/encoder/vp9_pickmode.h" 46#include "vp9/encoder/vp9_rd.h" 47#include "vp9/encoder/vp9_rdopt.h" 48#include "vp9/encoder/vp9_segmentation.h" 49#include "vp9/encoder/vp9_tokenize.h" 50 51static void encode_superblock(VP9_COMP *cpi, ThreadData *td, TOKENEXTRA **t, 52 int output_enabled, int mi_row, int mi_col, 53 BLOCK_SIZE bsize, PICK_MODE_CONTEXT *ctx); 54 55// Machine learning-based early termination parameters. 56static const double train_mean[24] = { 57 303501.697372, 3042630.372158, 24.694696, 1.392182, 58 689.413511, 162.027012, 1.478213, 0.0, 59 135382.260230, 912738.513263, 28.845217, 1.515230, 60 544.158492, 131.807995, 1.436863, 0.0, 61 43682.377587, 208131.711766, 28.084737, 1.356677, 62 138.254122, 119.522553, 1.252322, 0.0 63}; 64 65static const double train_stdm[24] = { 66 673689.212982, 5996652.516628, 0.024449, 1.989792, 67 985.880847, 0.014638, 2.001898, 0.0, 68 208798.775332, 1812548.443284, 0.018693, 1.838009, 69 396.986910, 0.015657, 1.332541, 0.0, 70 55888.847031, 448587.962714, 0.017900, 1.904776, 71 98.652832, 0.016598, 1.320992, 0.0 72}; 73 74// Error tolerance: 0.01%-0.0.05%-0.1% 75static const double classifiers[24] = { 76 0.111736, 0.289977, 0.042219, 0.204765, 0.120410, -0.143863, 77 0.282376, 0.847811, 0.637161, 0.131570, 0.018636, 0.202134, 78 0.112797, 0.028162, 0.182450, 1.124367, 0.386133, 0.083700, 79 0.050028, 0.150873, 0.061119, 0.109318, 0.127255, 0.625211 80}; 81 82// This is used as a reference when computing the source variance for the 83// purpose of activity masking. 84// Eventually this should be replaced by custom no-reference routines, 85// which will be faster. 86static const uint8_t VP9_VAR_OFFS[64] = { 87 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 88 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 89 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 90 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 91 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128 92}; 93 94#if CONFIG_VP9_HIGHBITDEPTH 95static const uint16_t VP9_HIGH_VAR_OFFS_8[64] = { 96 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 97 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 98 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 99 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 100 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128 101}; 102 103static const uint16_t VP9_HIGH_VAR_OFFS_10[64] = { 104 128 * 4, 128 * 4, 128 * 4, 128 * 4, 128 * 4, 128 * 4, 128 * 4, 128 * 4, 105 128 * 4, 128 * 4, 128 * 4, 128 * 4, 128 * 4, 128 * 4, 128 * 4, 128 * 4, 106 128 * 4, 128 * 4, 128 * 4, 128 * 4, 128 * 4, 128 * 4, 128 * 4, 128 * 4, 107 128 * 4, 128 * 4, 128 * 4, 128 * 4, 128 * 4, 128 * 4, 128 * 4, 128 * 4, 108 128 * 4, 128 * 4, 128 * 4, 128 * 4, 128 * 4, 128 * 4, 128 * 4, 128 * 4, 109 128 * 4, 128 * 4, 128 * 4, 128 * 4, 128 * 4, 128 * 4, 128 * 4, 128 * 4, 110 128 * 4, 128 * 4, 128 * 4, 128 * 4, 128 * 4, 128 * 4, 128 * 4, 128 * 4, 111 128 * 4, 128 * 4, 128 * 4, 128 * 4, 128 * 4, 128 * 4, 128 * 4, 128 * 4 112}; 113 114static const uint16_t VP9_HIGH_VAR_OFFS_12[64] = { 115 128 * 16, 128 * 16, 128 * 16, 128 * 16, 128 * 16, 128 * 16, 128 * 16, 116 128 * 16, 128 * 16, 128 * 16, 128 * 16, 128 * 16, 128 * 16, 128 * 16, 117 128 * 16, 128 * 16, 128 * 16, 128 * 16, 128 * 16, 128 * 16, 128 * 16, 118 128 * 16, 128 * 16, 128 * 16, 128 * 16, 128 * 16, 128 * 16, 128 * 16, 119 128 * 16, 128 * 16, 128 * 16, 128 * 16, 128 * 16, 128 * 16, 128 * 16, 120 128 * 16, 128 * 16, 128 * 16, 128 * 16, 128 * 16, 128 * 16, 128 * 16, 121 128 * 16, 128 * 16, 128 * 16, 128 * 16, 128 * 16, 128 * 16, 128 * 16, 122 128 * 16, 128 * 16, 128 * 16, 128 * 16, 128 * 16, 128 * 16, 128 * 16, 123 128 * 16, 128 * 16, 128 * 16, 128 * 16, 128 * 16, 128 * 16, 128 * 16, 124 128 * 16 125}; 126#endif // CONFIG_VP9_HIGHBITDEPTH 127 128unsigned int vp9_get_sby_variance(VP9_COMP *cpi, const struct buf_2d *ref, 129 BLOCK_SIZE bs) { 130 unsigned int sse; 131 const unsigned int var = 132 cpi->fn_ptr[bs].vf(ref->buf, ref->stride, VP9_VAR_OFFS, 0, &sse); 133 return var; 134} 135 136#if CONFIG_VP9_HIGHBITDEPTH 137unsigned int vp9_high_get_sby_variance(VP9_COMP *cpi, const struct buf_2d *ref, 138 BLOCK_SIZE bs, int bd) { 139 unsigned int var, sse; 140 switch (bd) { 141 case 10: 142 var = 143 cpi->fn_ptr[bs].vf(ref->buf, ref->stride, 144 CONVERT_TO_BYTEPTR(VP9_HIGH_VAR_OFFS_10), 0, &sse); 145 break; 146 case 12: 147 var = 148 cpi->fn_ptr[bs].vf(ref->buf, ref->stride, 149 CONVERT_TO_BYTEPTR(VP9_HIGH_VAR_OFFS_12), 0, &sse); 150 break; 151 case 8: 152 default: 153 var = 154 cpi->fn_ptr[bs].vf(ref->buf, ref->stride, 155 CONVERT_TO_BYTEPTR(VP9_HIGH_VAR_OFFS_8), 0, &sse); 156 break; 157 } 158 return var; 159} 160#endif // CONFIG_VP9_HIGHBITDEPTH 161 162unsigned int vp9_get_sby_perpixel_variance(VP9_COMP *cpi, 163 const struct buf_2d *ref, 164 BLOCK_SIZE bs) { 165 return ROUND_POWER_OF_TWO(vp9_get_sby_variance(cpi, ref, bs), 166 num_pels_log2_lookup[bs]); 167} 168 169#if CONFIG_VP9_HIGHBITDEPTH 170unsigned int vp9_high_get_sby_perpixel_variance(VP9_COMP *cpi, 171 const struct buf_2d *ref, 172 BLOCK_SIZE bs, int bd) { 173 return (unsigned int)ROUND64_POWER_OF_TWO( 174 (int64_t)vp9_high_get_sby_variance(cpi, ref, bs, bd), 175 num_pels_log2_lookup[bs]); 176} 177#endif // CONFIG_VP9_HIGHBITDEPTH 178 179static unsigned int get_sby_perpixel_diff_variance(VP9_COMP *cpi, 180 const struct buf_2d *ref, 181 int mi_row, int mi_col, 182 BLOCK_SIZE bs) { 183 unsigned int sse, var; 184 uint8_t *last_y; 185 const YV12_BUFFER_CONFIG *last = get_ref_frame_buffer(cpi, LAST_FRAME); 186 187 assert(last != NULL); 188 last_y = 189 &last->y_buffer[mi_row * MI_SIZE * last->y_stride + mi_col * MI_SIZE]; 190 var = cpi->fn_ptr[bs].vf(ref->buf, ref->stride, last_y, last->y_stride, &sse); 191 return ROUND_POWER_OF_TWO(var, num_pels_log2_lookup[bs]); 192} 193 194static BLOCK_SIZE get_rd_var_based_fixed_partition(VP9_COMP *cpi, MACROBLOCK *x, 195 int mi_row, int mi_col) { 196 unsigned int var = get_sby_perpixel_diff_variance( 197 cpi, &x->plane[0].src, mi_row, mi_col, BLOCK_64X64); 198 if (var < 8) 199 return BLOCK_64X64; 200 else if (var < 128) 201 return BLOCK_32X32; 202 else if (var < 2048) 203 return BLOCK_16X16; 204 else 205 return BLOCK_8X8; 206} 207 208// Lighter version of set_offsets that only sets the mode info 209// pointers. 210static INLINE void set_mode_info_offsets(VP9_COMMON *const cm, 211 MACROBLOCK *const x, 212 MACROBLOCKD *const xd, int mi_row, 213 int mi_col) { 214 const int idx_str = xd->mi_stride * mi_row + mi_col; 215 xd->mi = cm->mi_grid_visible + idx_str; 216 xd->mi[0] = cm->mi + idx_str; 217 x->mbmi_ext = x->mbmi_ext_base + (mi_row * cm->mi_cols + mi_col); 218} 219 220static void set_offsets(VP9_COMP *cpi, const TileInfo *const tile, 221 MACROBLOCK *const x, int mi_row, int mi_col, 222 BLOCK_SIZE bsize) { 223 VP9_COMMON *const cm = &cpi->common; 224 MACROBLOCKD *const xd = &x->e_mbd; 225 MODE_INFO *mi; 226 const int mi_width = num_8x8_blocks_wide_lookup[bsize]; 227 const int mi_height = num_8x8_blocks_high_lookup[bsize]; 228 const struct segmentation *const seg = &cm->seg; 229 MvLimits *const mv_limits = &x->mv_limits; 230 231 set_skip_context(xd, mi_row, mi_col); 232 233 set_mode_info_offsets(cm, x, xd, mi_row, mi_col); 234 235 mi = xd->mi[0]; 236 237 // Set up destination pointers. 238 vp9_setup_dst_planes(xd->plane, get_frame_new_buffer(cm), mi_row, mi_col); 239 240 // Set up limit values for MV components. 241 // Mv beyond the range do not produce new/different prediction block. 242 mv_limits->row_min = -(((mi_row + mi_height) * MI_SIZE) + VP9_INTERP_EXTEND); 243 mv_limits->col_min = -(((mi_col + mi_width) * MI_SIZE) + VP9_INTERP_EXTEND); 244 mv_limits->row_max = (cm->mi_rows - mi_row) * MI_SIZE + VP9_INTERP_EXTEND; 245 mv_limits->col_max = (cm->mi_cols - mi_col) * MI_SIZE + VP9_INTERP_EXTEND; 246 247 // Set up distance of MB to edge of frame in 1/8th pel units. 248 assert(!(mi_col & (mi_width - 1)) && !(mi_row & (mi_height - 1))); 249 set_mi_row_col(xd, tile, mi_row, mi_height, mi_col, mi_width, cm->mi_rows, 250 cm->mi_cols); 251 252 // Set up source buffers. 253 vp9_setup_src_planes(x, cpi->Source, mi_row, mi_col); 254 255 // R/D setup. 256 x->rddiv = cpi->rd.RDDIV; 257 x->rdmult = cpi->rd.RDMULT; 258 259 // Setup segment ID. 260 if (seg->enabled) { 261 if (cpi->oxcf.aq_mode != VARIANCE_AQ && cpi->oxcf.aq_mode != LOOKAHEAD_AQ && 262 cpi->oxcf.aq_mode != EQUATOR360_AQ) { 263 const uint8_t *const map = 264 seg->update_map ? cpi->segmentation_map : cm->last_frame_seg_map; 265 mi->segment_id = get_segment_id(cm, map, bsize, mi_row, mi_col); 266 } 267 vp9_init_plane_quantizers(cpi, x); 268 269 x->encode_breakout = cpi->segment_encode_breakout[mi->segment_id]; 270 } else { 271 mi->segment_id = 0; 272 x->encode_breakout = cpi->encode_breakout; 273 } 274 275 // required by vp9_append_sub8x8_mvs_for_idx() and vp9_find_best_ref_mvs() 276 xd->tile = *tile; 277} 278 279static void duplicate_mode_info_in_sb(VP9_COMMON *cm, MACROBLOCKD *xd, 280 int mi_row, int mi_col, 281 BLOCK_SIZE bsize) { 282 const int block_width = 283 VPXMIN(num_8x8_blocks_wide_lookup[bsize], cm->mi_cols - mi_col); 284 const int block_height = 285 VPXMIN(num_8x8_blocks_high_lookup[bsize], cm->mi_rows - mi_row); 286 const int mi_stride = xd->mi_stride; 287 MODE_INFO *const src_mi = xd->mi[0]; 288 int i, j; 289 290 for (j = 0; j < block_height; ++j) 291 for (i = 0; i < block_width; ++i) xd->mi[j * mi_stride + i] = src_mi; 292} 293 294static void set_block_size(VP9_COMP *const cpi, MACROBLOCK *const x, 295 MACROBLOCKD *const xd, int mi_row, int mi_col, 296 BLOCK_SIZE bsize) { 297 if (cpi->common.mi_cols > mi_col && cpi->common.mi_rows > mi_row) { 298 set_mode_info_offsets(&cpi->common, x, xd, mi_row, mi_col); 299 xd->mi[0]->sb_type = bsize; 300 } 301} 302 303typedef struct { 304 // This struct is used for computing variance in choose_partitioning(), where 305 // the max number of samples within a superblock is 16x16 (with 4x4 avg). Even 306 // in high bitdepth, uint32_t is enough for sum_square_error (2^12 * 2^12 * 16 307 // * 16 = 2^32). 308 uint32_t sum_square_error; 309 int32_t sum_error; 310 int log2_count; 311 int variance; 312} var; 313 314typedef struct { 315 var none; 316 var horz[2]; 317 var vert[2]; 318} partition_variance; 319 320typedef struct { 321 partition_variance part_variances; 322 var split[4]; 323} v4x4; 324 325typedef struct { 326 partition_variance part_variances; 327 v4x4 split[4]; 328} v8x8; 329 330typedef struct { 331 partition_variance part_variances; 332 v8x8 split[4]; 333} v16x16; 334 335typedef struct { 336 partition_variance part_variances; 337 v16x16 split[4]; 338} v32x32; 339 340typedef struct { 341 partition_variance part_variances; 342 v32x32 split[4]; 343} v64x64; 344 345typedef struct { 346 partition_variance *part_variances; 347 var *split[4]; 348} variance_node; 349 350typedef enum { 351 V16X16, 352 V32X32, 353 V64X64, 354} TREE_LEVEL; 355 356static void tree_to_node(void *data, BLOCK_SIZE bsize, variance_node *node) { 357 int i; 358 node->part_variances = NULL; 359 switch (bsize) { 360 case BLOCK_64X64: { 361 v64x64 *vt = (v64x64 *)data; 362 node->part_variances = &vt->part_variances; 363 for (i = 0; i < 4; i++) 364 node->split[i] = &vt->split[i].part_variances.none; 365 break; 366 } 367 case BLOCK_32X32: { 368 v32x32 *vt = (v32x32 *)data; 369 node->part_variances = &vt->part_variances; 370 for (i = 0; i < 4; i++) 371 node->split[i] = &vt->split[i].part_variances.none; 372 break; 373 } 374 case BLOCK_16X16: { 375 v16x16 *vt = (v16x16 *)data; 376 node->part_variances = &vt->part_variances; 377 for (i = 0; i < 4; i++) 378 node->split[i] = &vt->split[i].part_variances.none; 379 break; 380 } 381 case BLOCK_8X8: { 382 v8x8 *vt = (v8x8 *)data; 383 node->part_variances = &vt->part_variances; 384 for (i = 0; i < 4; i++) 385 node->split[i] = &vt->split[i].part_variances.none; 386 break; 387 } 388 case BLOCK_4X4: { 389 v4x4 *vt = (v4x4 *)data; 390 node->part_variances = &vt->part_variances; 391 for (i = 0; i < 4; i++) node->split[i] = &vt->split[i]; 392 break; 393 } 394 default: { 395 assert(0); 396 break; 397 } 398 } 399} 400 401// Set variance values given sum square error, sum error, count. 402static void fill_variance(uint32_t s2, int32_t s, int c, var *v) { 403 v->sum_square_error = s2; 404 v->sum_error = s; 405 v->log2_count = c; 406} 407 408static void get_variance(var *v) { 409 v->variance = 410 (int)(256 * (v->sum_square_error - 411 ((v->sum_error * v->sum_error) >> v->log2_count)) >> 412 v->log2_count); 413} 414 415static void sum_2_variances(const var *a, const var *b, var *r) { 416 assert(a->log2_count == b->log2_count); 417 fill_variance(a->sum_square_error + b->sum_square_error, 418 a->sum_error + b->sum_error, a->log2_count + 1, r); 419} 420 421static void fill_variance_tree(void *data, BLOCK_SIZE bsize) { 422 variance_node node; 423 memset(&node, 0, sizeof(node)); 424 tree_to_node(data, bsize, &node); 425 sum_2_variances(node.split[0], node.split[1], &node.part_variances->horz[0]); 426 sum_2_variances(node.split[2], node.split[3], &node.part_variances->horz[1]); 427 sum_2_variances(node.split[0], node.split[2], &node.part_variances->vert[0]); 428 sum_2_variances(node.split[1], node.split[3], &node.part_variances->vert[1]); 429 sum_2_variances(&node.part_variances->vert[0], &node.part_variances->vert[1], 430 &node.part_variances->none); 431} 432 433static int set_vt_partitioning(VP9_COMP *cpi, MACROBLOCK *const x, 434 MACROBLOCKD *const xd, void *data, 435 BLOCK_SIZE bsize, int mi_row, int mi_col, 436 int64_t threshold, BLOCK_SIZE bsize_min, 437 int force_split) { 438 VP9_COMMON *const cm = &cpi->common; 439 variance_node vt; 440 const int block_width = num_8x8_blocks_wide_lookup[bsize]; 441 const int block_height = num_8x8_blocks_high_lookup[bsize]; 442 443 assert(block_height == block_width); 444 tree_to_node(data, bsize, &vt); 445 446 if (force_split == 1) return 0; 447 448 // For bsize=bsize_min (16x16/8x8 for 8x8/4x4 downsampling), select if 449 // variance is below threshold, otherwise split will be selected. 450 // No check for vert/horiz split as too few samples for variance. 451 if (bsize == bsize_min) { 452 // Variance already computed to set the force_split. 453 if (cm->frame_type == KEY_FRAME) get_variance(&vt.part_variances->none); 454 if (mi_col + block_width / 2 < cm->mi_cols && 455 mi_row + block_height / 2 < cm->mi_rows && 456 vt.part_variances->none.variance < threshold) { 457 set_block_size(cpi, x, xd, mi_row, mi_col, bsize); 458 return 1; 459 } 460 return 0; 461 } else if (bsize > bsize_min) { 462 // Variance already computed to set the force_split. 463 if (cm->frame_type == KEY_FRAME) get_variance(&vt.part_variances->none); 464 // For key frame: take split for bsize above 32X32 or very high variance. 465 if (cm->frame_type == KEY_FRAME && 466 (bsize > BLOCK_32X32 || 467 vt.part_variances->none.variance > (threshold << 4))) { 468 return 0; 469 } 470 // If variance is low, take the bsize (no split). 471 if (mi_col + block_width / 2 < cm->mi_cols && 472 mi_row + block_height / 2 < cm->mi_rows && 473 vt.part_variances->none.variance < threshold) { 474 set_block_size(cpi, x, xd, mi_row, mi_col, bsize); 475 return 1; 476 } 477 478 // Check vertical split. 479 if (mi_row + block_height / 2 < cm->mi_rows) { 480 BLOCK_SIZE subsize = get_subsize(bsize, PARTITION_VERT); 481 get_variance(&vt.part_variances->vert[0]); 482 get_variance(&vt.part_variances->vert[1]); 483 if (vt.part_variances->vert[0].variance < threshold && 484 vt.part_variances->vert[1].variance < threshold && 485 get_plane_block_size(subsize, &xd->plane[1]) < BLOCK_INVALID) { 486 set_block_size(cpi, x, xd, mi_row, mi_col, subsize); 487 set_block_size(cpi, x, xd, mi_row, mi_col + block_width / 2, subsize); 488 return 1; 489 } 490 } 491 // Check horizontal split. 492 if (mi_col + block_width / 2 < cm->mi_cols) { 493 BLOCK_SIZE subsize = get_subsize(bsize, PARTITION_HORZ); 494 get_variance(&vt.part_variances->horz[0]); 495 get_variance(&vt.part_variances->horz[1]); 496 if (vt.part_variances->horz[0].variance < threshold && 497 vt.part_variances->horz[1].variance < threshold && 498 get_plane_block_size(subsize, &xd->plane[1]) < BLOCK_INVALID) { 499 set_block_size(cpi, x, xd, mi_row, mi_col, subsize); 500 set_block_size(cpi, x, xd, mi_row + block_height / 2, mi_col, subsize); 501 return 1; 502 } 503 } 504 505 return 0; 506 } 507 return 0; 508} 509 510static int64_t scale_part_thresh_sumdiff(int64_t threshold_base, int speed, 511 int width, int height, 512 int content_state) { 513 if (speed >= 8) { 514 if (width <= 640 && height <= 480) 515 return (5 * threshold_base) >> 2; 516 else if ((content_state == kLowSadLowSumdiff) || 517 (content_state == kHighSadLowSumdiff) || 518 (content_state == kLowVarHighSumdiff)) 519 return (5 * threshold_base) >> 2; 520 } else if (speed == 7) { 521 if ((content_state == kLowSadLowSumdiff) || 522 (content_state == kHighSadLowSumdiff) || 523 (content_state == kLowVarHighSumdiff)) { 524 return (5 * threshold_base) >> 2; 525 } 526 } 527 return threshold_base; 528} 529 530// Set the variance split thresholds for following the block sizes: 531// 0 - threshold_64x64, 1 - threshold_32x32, 2 - threshold_16x16, 532// 3 - vbp_threshold_8x8. vbp_threshold_8x8 (to split to 4x4 partition) is 533// currently only used on key frame. 534static void set_vbp_thresholds(VP9_COMP *cpi, int64_t thresholds[], int q, 535 int content_state) { 536 VP9_COMMON *const cm = &cpi->common; 537 const int is_key_frame = (cm->frame_type == KEY_FRAME); 538 const int threshold_multiplier = is_key_frame ? 20 : 1; 539 int64_t threshold_base = 540 (int64_t)(threshold_multiplier * cpi->y_dequant[q][1]); 541 542 if (is_key_frame) { 543 thresholds[0] = threshold_base; 544 thresholds[1] = threshold_base >> 2; 545 thresholds[2] = threshold_base >> 2; 546 thresholds[3] = threshold_base << 2; 547 } else { 548 // Increase base variance threshold based on estimated noise level. 549 if (cpi->noise_estimate.enabled && cm->width >= 640 && cm->height >= 480) { 550 NOISE_LEVEL noise_level = 551 vp9_noise_estimate_extract_level(&cpi->noise_estimate); 552 if (noise_level == kHigh) 553 threshold_base = 3 * threshold_base; 554 else if (noise_level == kMedium) 555 threshold_base = threshold_base << 1; 556 else if (noise_level < kLow) 557 threshold_base = (7 * threshold_base) >> 3; 558 } 559#if CONFIG_VP9_TEMPORAL_DENOISING 560 if (cpi->oxcf.noise_sensitivity > 0 && denoise_svc(cpi) && 561 cpi->oxcf.speed > 5 && cpi->denoiser.denoising_level >= kDenLow) 562 threshold_base = 563 vp9_scale_part_thresh(threshold_base, cpi->denoiser.denoising_level, 564 content_state, cpi->svc.temporal_layer_id); 565 else 566 threshold_base = 567 scale_part_thresh_sumdiff(threshold_base, cpi->oxcf.speed, cm->width, 568 cm->height, content_state); 569#else 570 // Increase base variance threshold based on content_state/sum_diff level. 571 threshold_base = scale_part_thresh_sumdiff( 572 threshold_base, cpi->oxcf.speed, cm->width, cm->height, content_state); 573#endif 574 thresholds[0] = threshold_base; 575 thresholds[2] = threshold_base << cpi->oxcf.speed; 576 if (cm->width >= 1280 && cm->height >= 720 && cpi->oxcf.speed < 7) 577 thresholds[2] = thresholds[2] << 1; 578 if (cm->width <= 352 && cm->height <= 288) { 579 thresholds[0] = threshold_base >> 3; 580 thresholds[1] = threshold_base >> 1; 581 thresholds[2] = threshold_base << 3; 582 } else if (cm->width < 1280 && cm->height < 720) { 583 thresholds[1] = (5 * threshold_base) >> 2; 584 } else if (cm->width < 1920 && cm->height < 1080) { 585 thresholds[1] = threshold_base << 1; 586 } else { 587 thresholds[1] = (5 * threshold_base) >> 1; 588 } 589 } 590} 591 592void vp9_set_variance_partition_thresholds(VP9_COMP *cpi, int q, 593 int content_state) { 594 VP9_COMMON *const cm = &cpi->common; 595 SPEED_FEATURES *const sf = &cpi->sf; 596 const int is_key_frame = (cm->frame_type == KEY_FRAME); 597 if (sf->partition_search_type != VAR_BASED_PARTITION && 598 sf->partition_search_type != REFERENCE_PARTITION) { 599 return; 600 } else { 601 set_vbp_thresholds(cpi, cpi->vbp_thresholds, q, content_state); 602 // The thresholds below are not changed locally. 603 if (is_key_frame) { 604 cpi->vbp_threshold_sad = 0; 605 cpi->vbp_threshold_copy = 0; 606 cpi->vbp_bsize_min = BLOCK_8X8; 607 } else { 608 if (cm->width <= 352 && cm->height <= 288) 609 cpi->vbp_threshold_sad = 10; 610 else 611 cpi->vbp_threshold_sad = (cpi->y_dequant[q][1] << 1) > 1000 612 ? (cpi->y_dequant[q][1] << 1) 613 : 1000; 614 cpi->vbp_bsize_min = BLOCK_16X16; 615 if (cm->width <= 352 && cm->height <= 288) 616 cpi->vbp_threshold_copy = 4000; 617 else if (cm->width <= 640 && cm->height <= 360) 618 cpi->vbp_threshold_copy = 8000; 619 else 620 cpi->vbp_threshold_copy = (cpi->y_dequant[q][1] << 3) > 8000 621 ? (cpi->y_dequant[q][1] << 3) 622 : 8000; 623 } 624 cpi->vbp_threshold_minmax = 15 + (q >> 3); 625 } 626} 627 628// Compute the minmax over the 8x8 subblocks. 629static int compute_minmax_8x8(const uint8_t *s, int sp, const uint8_t *d, 630 int dp, int x16_idx, int y16_idx, 631#if CONFIG_VP9_HIGHBITDEPTH 632 int highbd_flag, 633#endif 634 int pixels_wide, int pixels_high) { 635 int k; 636 int minmax_max = 0; 637 int minmax_min = 255; 638 // Loop over the 4 8x8 subblocks. 639 for (k = 0; k < 4; k++) { 640 int x8_idx = x16_idx + ((k & 1) << 3); 641 int y8_idx = y16_idx + ((k >> 1) << 3); 642 int min = 0; 643 int max = 0; 644 if (x8_idx < pixels_wide && y8_idx < pixels_high) { 645#if CONFIG_VP9_HIGHBITDEPTH 646 if (highbd_flag & YV12_FLAG_HIGHBITDEPTH) { 647 vpx_highbd_minmax_8x8(s + y8_idx * sp + x8_idx, sp, 648 d + y8_idx * dp + x8_idx, dp, &min, &max); 649 } else { 650 vpx_minmax_8x8(s + y8_idx * sp + x8_idx, sp, d + y8_idx * dp + x8_idx, 651 dp, &min, &max); 652 } 653#else 654 vpx_minmax_8x8(s + y8_idx * sp + x8_idx, sp, d + y8_idx * dp + x8_idx, dp, 655 &min, &max); 656#endif 657 if ((max - min) > minmax_max) minmax_max = (max - min); 658 if ((max - min) < minmax_min) minmax_min = (max - min); 659 } 660 } 661 return (minmax_max - minmax_min); 662} 663 664static void fill_variance_4x4avg(const uint8_t *s, int sp, const uint8_t *d, 665 int dp, int x8_idx, int y8_idx, v8x8 *vst, 666#if CONFIG_VP9_HIGHBITDEPTH 667 int highbd_flag, 668#endif 669 int pixels_wide, int pixels_high, 670 int is_key_frame) { 671 int k; 672 for (k = 0; k < 4; k++) { 673 int x4_idx = x8_idx + ((k & 1) << 2); 674 int y4_idx = y8_idx + ((k >> 1) << 2); 675 unsigned int sse = 0; 676 int sum = 0; 677 if (x4_idx < pixels_wide && y4_idx < pixels_high) { 678 int s_avg; 679 int d_avg = 128; 680#if CONFIG_VP9_HIGHBITDEPTH 681 if (highbd_flag & YV12_FLAG_HIGHBITDEPTH) { 682 s_avg = vpx_highbd_avg_4x4(s + y4_idx * sp + x4_idx, sp); 683 if (!is_key_frame) 684 d_avg = vpx_highbd_avg_4x4(d + y4_idx * dp + x4_idx, dp); 685 } else { 686 s_avg = vpx_avg_4x4(s + y4_idx * sp + x4_idx, sp); 687 if (!is_key_frame) d_avg = vpx_avg_4x4(d + y4_idx * dp + x4_idx, dp); 688 } 689#else 690 s_avg = vpx_avg_4x4(s + y4_idx * sp + x4_idx, sp); 691 if (!is_key_frame) d_avg = vpx_avg_4x4(d + y4_idx * dp + x4_idx, dp); 692#endif 693 sum = s_avg - d_avg; 694 sse = sum * sum; 695 } 696 fill_variance(sse, sum, 0, &vst->split[k].part_variances.none); 697 } 698} 699 700static void fill_variance_8x8avg(const uint8_t *s, int sp, const uint8_t *d, 701 int dp, int x16_idx, int y16_idx, v16x16 *vst, 702#if CONFIG_VP9_HIGHBITDEPTH 703 int highbd_flag, 704#endif 705 int pixels_wide, int pixels_high, 706 int is_key_frame) { 707 int k; 708 for (k = 0; k < 4; k++) { 709 int x8_idx = x16_idx + ((k & 1) << 3); 710 int y8_idx = y16_idx + ((k >> 1) << 3); 711 unsigned int sse = 0; 712 int sum = 0; 713 if (x8_idx < pixels_wide && y8_idx < pixels_high) { 714 int s_avg; 715 int d_avg = 128; 716#if CONFIG_VP9_HIGHBITDEPTH 717 if (highbd_flag & YV12_FLAG_HIGHBITDEPTH) { 718 s_avg = vpx_highbd_avg_8x8(s + y8_idx * sp + x8_idx, sp); 719 if (!is_key_frame) 720 d_avg = vpx_highbd_avg_8x8(d + y8_idx * dp + x8_idx, dp); 721 } else { 722 s_avg = vpx_avg_8x8(s + y8_idx * sp + x8_idx, sp); 723 if (!is_key_frame) d_avg = vpx_avg_8x8(d + y8_idx * dp + x8_idx, dp); 724 } 725#else 726 s_avg = vpx_avg_8x8(s + y8_idx * sp + x8_idx, sp); 727 if (!is_key_frame) d_avg = vpx_avg_8x8(d + y8_idx * dp + x8_idx, dp); 728#endif 729 sum = s_avg - d_avg; 730 sse = sum * sum; 731 } 732 fill_variance(sse, sum, 0, &vst->split[k].part_variances.none); 733 } 734} 735 736// Check if most of the superblock is skin content, and if so, force split to 737// 32x32, and set x->sb_is_skin for use in mode selection. 738static int skin_sb_split(VP9_COMP *cpi, MACROBLOCK *x, const int low_res, 739 int mi_row, int mi_col, int *force_split) { 740 VP9_COMMON *const cm = &cpi->common; 741#if CONFIG_VP9_HIGHBITDEPTH 742 if (cm->use_highbitdepth) return 0; 743#endif 744 // Avoid checking superblocks on/near boundary and avoid low resolutions. 745 // Note superblock may still pick 64X64 if y_sad is very small 746 // (i.e., y_sad < cpi->vbp_threshold_sad) below. For now leave this as is. 747 if (!low_res && (mi_col >= 8 && mi_col + 8 < cm->mi_cols && mi_row >= 8 && 748 mi_row + 8 < cm->mi_rows)) { 749 int num_16x16_skin = 0; 750 int num_16x16_nonskin = 0; 751 uint8_t *ysignal = x->plane[0].src.buf; 752 uint8_t *usignal = x->plane[1].src.buf; 753 uint8_t *vsignal = x->plane[2].src.buf; 754 int sp = x->plane[0].src.stride; 755 int spuv = x->plane[1].src.stride; 756 const int block_index = mi_row * cm->mi_cols + mi_col; 757 const int bw = num_8x8_blocks_wide_lookup[BLOCK_64X64]; 758 const int bh = num_8x8_blocks_high_lookup[BLOCK_64X64]; 759 const int xmis = VPXMIN(cm->mi_cols - mi_col, bw); 760 const int ymis = VPXMIN(cm->mi_rows - mi_row, bh); 761 // Loop through the 16x16 sub-blocks. 762 int i, j; 763 for (i = 0; i < ymis; i += 2) { 764 for (j = 0; j < xmis; j += 2) { 765 int bl_index = block_index + i * cm->mi_cols + j; 766 int is_skin = cpi->skin_map[bl_index]; 767 num_16x16_skin += is_skin; 768 num_16x16_nonskin += (1 - is_skin); 769 if (num_16x16_nonskin > 3) { 770 // Exit loop if at least 4 of the 16x16 blocks are not skin. 771 i = ymis; 772 break; 773 } 774 ysignal += 16; 775 usignal += 8; 776 vsignal += 8; 777 } 778 ysignal += (sp << 4) - 64; 779 usignal += (spuv << 3) - 32; 780 vsignal += (spuv << 3) - 32; 781 } 782 if (num_16x16_skin > 12) { 783 *force_split = 1; 784 return 1; 785 } 786 } 787 return 0; 788} 789 790static void set_low_temp_var_flag(VP9_COMP *cpi, MACROBLOCK *x, MACROBLOCKD *xd, 791 v64x64 *vt, int64_t thresholds[], 792 MV_REFERENCE_FRAME ref_frame_partition, 793 int mi_col, int mi_row) { 794 int i, j; 795 VP9_COMMON *const cm = &cpi->common; 796 const int mv_thr = cm->width > 640 ? 8 : 4; 797 // Check temporal variance for bsize >= 16x16, if LAST_FRAME was selected and 798 // int_pro mv is small. If the temporal variance is small set the flag 799 // variance_low for the block. The variance threshold can be adjusted, the 800 // higher the more aggressive. 801 if (ref_frame_partition == LAST_FRAME && 802 (cpi->sf.short_circuit_low_temp_var == 1 || 803 (xd->mi[0]->mv[0].as_mv.col < mv_thr && 804 xd->mi[0]->mv[0].as_mv.col > -mv_thr && 805 xd->mi[0]->mv[0].as_mv.row < mv_thr && 806 xd->mi[0]->mv[0].as_mv.row > -mv_thr))) { 807 if (xd->mi[0]->sb_type == BLOCK_64X64) { 808 if ((vt->part_variances).none.variance < (thresholds[0] >> 1)) 809 x->variance_low[0] = 1; 810 } else if (xd->mi[0]->sb_type == BLOCK_64X32) { 811 for (i = 0; i < 2; i++) { 812 if (vt->part_variances.horz[i].variance < (thresholds[0] >> 2)) 813 x->variance_low[i + 1] = 1; 814 } 815 } else if (xd->mi[0]->sb_type == BLOCK_32X64) { 816 for (i = 0; i < 2; i++) { 817 if (vt->part_variances.vert[i].variance < (thresholds[0] >> 2)) 818 x->variance_low[i + 3] = 1; 819 } 820 } else { 821 for (i = 0; i < 4; i++) { 822 const int idx[4][2] = { { 0, 0 }, { 0, 4 }, { 4, 0 }, { 4, 4 } }; 823 const int idx_str = 824 cm->mi_stride * (mi_row + idx[i][0]) + mi_col + idx[i][1]; 825 MODE_INFO **this_mi = cm->mi_grid_visible + idx_str; 826 827 if (cm->mi_cols <= mi_col + idx[i][1] || 828 cm->mi_rows <= mi_row + idx[i][0]) 829 continue; 830 831 if ((*this_mi)->sb_type == BLOCK_32X32) { 832 int64_t threshold_32x32 = (cpi->sf.short_circuit_low_temp_var == 1 || 833 cpi->sf.short_circuit_low_temp_var == 3) 834 ? ((5 * thresholds[1]) >> 3) 835 : (thresholds[1] >> 1); 836 if (vt->split[i].part_variances.none.variance < threshold_32x32) 837 x->variance_low[i + 5] = 1; 838 } else if (cpi->sf.short_circuit_low_temp_var >= 2) { 839 // For 32x16 and 16x32 blocks, the flag is set on each 16x16 block 840 // inside. 841 if ((*this_mi)->sb_type == BLOCK_16X16 || 842 (*this_mi)->sb_type == BLOCK_32X16 || 843 (*this_mi)->sb_type == BLOCK_16X32) { 844 for (j = 0; j < 4; j++) { 845 if (vt->split[i].split[j].part_variances.none.variance < 846 (thresholds[2] >> 8)) 847 x->variance_low[(i << 2) + j + 9] = 1; 848 } 849 } 850 } 851 } 852 } 853 } 854} 855 856static void copy_partitioning_helper(VP9_COMP *cpi, MACROBLOCK *x, 857 MACROBLOCKD *xd, BLOCK_SIZE bsize, 858 int mi_row, int mi_col) { 859 VP9_COMMON *const cm = &cpi->common; 860 BLOCK_SIZE *prev_part = cpi->prev_partition; 861 int start_pos = mi_row * cm->mi_stride + mi_col; 862 863 const int bsl = b_width_log2_lookup[bsize]; 864 const int bs = (1 << bsl) >> 2; 865 BLOCK_SIZE subsize; 866 PARTITION_TYPE partition; 867 868 if (mi_row >= cm->mi_rows || mi_col >= cm->mi_cols) return; 869 870 partition = partition_lookup[bsl][prev_part[start_pos]]; 871 subsize = get_subsize(bsize, partition); 872 873 if (subsize < BLOCK_8X8) { 874 set_block_size(cpi, x, xd, mi_row, mi_col, bsize); 875 } else { 876 switch (partition) { 877 case PARTITION_NONE: 878 set_block_size(cpi, x, xd, mi_row, mi_col, bsize); 879 break; 880 case PARTITION_HORZ: 881 set_block_size(cpi, x, xd, mi_row, mi_col, subsize); 882 set_block_size(cpi, x, xd, mi_row + bs, mi_col, subsize); 883 break; 884 case PARTITION_VERT: 885 set_block_size(cpi, x, xd, mi_row, mi_col, subsize); 886 set_block_size(cpi, x, xd, mi_row, mi_col + bs, subsize); 887 break; 888 case PARTITION_SPLIT: 889 copy_partitioning_helper(cpi, x, xd, subsize, mi_row, mi_col); 890 copy_partitioning_helper(cpi, x, xd, subsize, mi_row + bs, mi_col); 891 copy_partitioning_helper(cpi, x, xd, subsize, mi_row, mi_col + bs); 892 copy_partitioning_helper(cpi, x, xd, subsize, mi_row + bs, mi_col + bs); 893 break; 894 default: assert(0); 895 } 896 } 897} 898 899static int copy_partitioning(VP9_COMP *cpi, MACROBLOCK *x, MACROBLOCKD *xd, 900 int mi_row, int mi_col, int segment_id, 901 int sb_offset) { 902 int svc_copy_allowed = 1; 903 int frames_since_key_thresh = 1; 904 if (cpi->use_svc) { 905 // For SVC, don't allow copy if base spatial layer is key frame, or if 906 // frame is not a temporal enhancement layer frame. 907 int layer = LAYER_IDS_TO_IDX(0, cpi->svc.temporal_layer_id, 908 cpi->svc.number_temporal_layers); 909 const LAYER_CONTEXT *lc = &cpi->svc.layer_context[layer]; 910 if (lc->is_key_frame || !cpi->svc.non_reference_frame) svc_copy_allowed = 0; 911 frames_since_key_thresh = cpi->svc.number_spatial_layers << 1; 912 } 913 if (cpi->rc.frames_since_key > frames_since_key_thresh && svc_copy_allowed && 914 !cpi->resize_pending && segment_id == CR_SEGMENT_ID_BASE && 915 cpi->prev_segment_id[sb_offset] == CR_SEGMENT_ID_BASE && 916 cpi->copied_frame_cnt[sb_offset] < cpi->max_copied_frame) { 917 if (cpi->prev_partition != NULL) { 918 copy_partitioning_helper(cpi, x, xd, BLOCK_64X64, mi_row, mi_col); 919 cpi->copied_frame_cnt[sb_offset] += 1; 920 memcpy(x->variance_low, &(cpi->prev_variance_low[sb_offset * 25]), 921 sizeof(x->variance_low)); 922 return 1; 923 } 924 } 925 926 return 0; 927} 928 929static int scale_partitioning_svc(VP9_COMP *cpi, MACROBLOCK *x, MACROBLOCKD *xd, 930 BLOCK_SIZE bsize, int mi_row, int mi_col, 931 int mi_row_high, int mi_col_high) { 932 VP9_COMMON *const cm = &cpi->common; 933 SVC *const svc = &cpi->svc; 934 BLOCK_SIZE *prev_part = svc->prev_partition_svc; 935 // Variables with _high are for higher resolution. 936 int bsize_high = 0; 937 int subsize_high = 0; 938 const int bsl_high = b_width_log2_lookup[bsize]; 939 const int bs_high = (1 << bsl_high) >> 2; 940 const int has_rows = (mi_row_high + bs_high) < cm->mi_rows; 941 const int has_cols = (mi_col_high + bs_high) < cm->mi_cols; 942 943 const int row_boundary_block_scale_factor[BLOCK_SIZES] = { 944 13, 13, 13, 1, 0, 1, 1, 0, 1, 1, 0, 1, 0 945 }; 946 const int col_boundary_block_scale_factor[BLOCK_SIZES] = { 947 13, 13, 13, 2, 2, 0, 2, 2, 0, 2, 2, 0, 0 948 }; 949 int start_pos; 950 BLOCK_SIZE bsize_low; 951 PARTITION_TYPE partition_high; 952 953 if (mi_row_high >= cm->mi_rows || mi_col_high >= cm->mi_cols) return 0; 954 if (mi_row >= (cm->mi_rows >> 1) || mi_col >= (cm->mi_cols >> 1)) return 0; 955 956 // Find corresponding (mi_col/mi_row) block down-scaled by 2x2. 957 start_pos = mi_row * (svc->mi_stride[svc->spatial_layer_id - 1]) + mi_col; 958 bsize_low = prev_part[start_pos]; 959 // The block size is too big for boundaries. Do variance based partitioning. 960 if ((!has_rows || !has_cols) && bsize_low > BLOCK_16X16) return 1; 961 962 // For reference frames: return 1 (do variance-based partitioning) if the 963 // superblock is not low source sad and lower-resoln bsize is below 32x32. 964 if (!cpi->svc.non_reference_frame && !x->skip_low_source_sad && 965 bsize_low < BLOCK_32X32) 966 return 1; 967 968 // Scale up block size by 2x2. Force 64x64 for size larger than 32x32. 969 if (bsize_low < BLOCK_32X32) { 970 bsize_high = bsize_low + 3; 971 } else if (bsize_low >= BLOCK_32X32) { 972 bsize_high = BLOCK_64X64; 973 } 974 // Scale up blocks on boundary. 975 if (!has_cols && has_rows) { 976 bsize_high = bsize_low + row_boundary_block_scale_factor[bsize_low]; 977 } else if (has_cols && !has_rows) { 978 bsize_high = bsize_low + col_boundary_block_scale_factor[bsize_low]; 979 } else if (!has_cols && !has_rows) { 980 bsize_high = bsize_low; 981 } 982 983 partition_high = partition_lookup[bsl_high][bsize_high]; 984 subsize_high = get_subsize(bsize, partition_high); 985 986 if (subsize_high < BLOCK_8X8) { 987 set_block_size(cpi, x, xd, mi_row_high, mi_col_high, bsize_high); 988 } else { 989 const int bsl = b_width_log2_lookup[bsize]; 990 const int bs = (1 << bsl) >> 2; 991 switch (partition_high) { 992 case PARTITION_NONE: 993 set_block_size(cpi, x, xd, mi_row_high, mi_col_high, bsize_high); 994 break; 995 case PARTITION_HORZ: 996 set_block_size(cpi, x, xd, mi_row_high, mi_col_high, subsize_high); 997 if (subsize_high < BLOCK_64X64) 998 set_block_size(cpi, x, xd, mi_row_high + bs_high, mi_col_high, 999 subsize_high); 1000 break; 1001 case PARTITION_VERT: 1002 set_block_size(cpi, x, xd, mi_row_high, mi_col_high, subsize_high); 1003 if (subsize_high < BLOCK_64X64) 1004 set_block_size(cpi, x, xd, mi_row_high, mi_col_high + bs_high, 1005 subsize_high); 1006 break; 1007 case PARTITION_SPLIT: 1008 if (scale_partitioning_svc(cpi, x, xd, subsize_high, mi_row, mi_col, 1009 mi_row_high, mi_col_high)) 1010 return 1; 1011 if (scale_partitioning_svc(cpi, x, xd, subsize_high, mi_row + (bs >> 1), 1012 mi_col, mi_row_high + bs_high, mi_col_high)) 1013 return 1; 1014 if (scale_partitioning_svc(cpi, x, xd, subsize_high, mi_row, 1015 mi_col + (bs >> 1), mi_row_high, 1016 mi_col_high + bs_high)) 1017 return 1; 1018 if (scale_partitioning_svc(cpi, x, xd, subsize_high, mi_row + (bs >> 1), 1019 mi_col + (bs >> 1), mi_row_high + bs_high, 1020 mi_col_high + bs_high)) 1021 return 1; 1022 break; 1023 default: assert(0); 1024 } 1025 } 1026 1027 return 0; 1028} 1029 1030static void update_partition_svc(VP9_COMP *cpi, BLOCK_SIZE bsize, int mi_row, 1031 int mi_col) { 1032 VP9_COMMON *const cm = &cpi->common; 1033 BLOCK_SIZE *prev_part = cpi->svc.prev_partition_svc; 1034 int start_pos = mi_row * cm->mi_stride + mi_col; 1035 const int bsl = b_width_log2_lookup[bsize]; 1036 const int bs = (1 << bsl) >> 2; 1037 BLOCK_SIZE subsize; 1038 PARTITION_TYPE partition; 1039 const MODE_INFO *mi = NULL; 1040 int xx, yy; 1041 1042 if (mi_row >= cm->mi_rows || mi_col >= cm->mi_cols) return; 1043 1044 mi = cm->mi_grid_visible[start_pos]; 1045 partition = partition_lookup[bsl][mi->sb_type]; 1046 subsize = get_subsize(bsize, partition); 1047 if (subsize < BLOCK_8X8) { 1048 prev_part[start_pos] = bsize; 1049 } else { 1050 switch (partition) { 1051 case PARTITION_NONE: 1052 prev_part[start_pos] = bsize; 1053 if (bsize == BLOCK_64X64) { 1054 for (xx = 0; xx < 8; xx += 4) 1055 for (yy = 0; yy < 8; yy += 4) { 1056 if ((mi_row + xx < cm->mi_rows) && (mi_col + yy < cm->mi_cols)) 1057 prev_part[start_pos + xx * cm->mi_stride + yy] = bsize; 1058 } 1059 } 1060 break; 1061 case PARTITION_HORZ: 1062 prev_part[start_pos] = subsize; 1063 if (mi_row + bs < cm->mi_rows) 1064 prev_part[start_pos + bs * cm->mi_stride] = subsize; 1065 break; 1066 case PARTITION_VERT: 1067 prev_part[start_pos] = subsize; 1068 if (mi_col + bs < cm->mi_cols) prev_part[start_pos + bs] = subsize; 1069 break; 1070 case PARTITION_SPLIT: 1071 update_partition_svc(cpi, subsize, mi_row, mi_col); 1072 update_partition_svc(cpi, subsize, mi_row + bs, mi_col); 1073 update_partition_svc(cpi, subsize, mi_row, mi_col + bs); 1074 update_partition_svc(cpi, subsize, mi_row + bs, mi_col + bs); 1075 break; 1076 default: assert(0); 1077 } 1078 } 1079} 1080 1081static void update_prev_partition_helper(VP9_COMP *cpi, BLOCK_SIZE bsize, 1082 int mi_row, int mi_col) { 1083 VP9_COMMON *const cm = &cpi->common; 1084 BLOCK_SIZE *prev_part = cpi->prev_partition; 1085 int start_pos = mi_row * cm->mi_stride + mi_col; 1086 const int bsl = b_width_log2_lookup[bsize]; 1087 const int bs = (1 << bsl) >> 2; 1088 BLOCK_SIZE subsize; 1089 PARTITION_TYPE partition; 1090 const MODE_INFO *mi = NULL; 1091 1092 if (mi_row >= cm->mi_rows || mi_col >= cm->mi_cols) return; 1093 1094 mi = cm->mi_grid_visible[start_pos]; 1095 partition = partition_lookup[bsl][mi->sb_type]; 1096 subsize = get_subsize(bsize, partition); 1097 if (subsize < BLOCK_8X8) { 1098 prev_part[start_pos] = bsize; 1099 } else { 1100 switch (partition) { 1101 case PARTITION_NONE: prev_part[start_pos] = bsize; break; 1102 case PARTITION_HORZ: 1103 prev_part[start_pos] = subsize; 1104 if (mi_row + bs < cm->mi_rows) 1105 prev_part[start_pos + bs * cm->mi_stride] = subsize; 1106 break; 1107 case PARTITION_VERT: 1108 prev_part[start_pos] = subsize; 1109 if (mi_col + bs < cm->mi_cols) prev_part[start_pos + bs] = subsize; 1110 break; 1111 case PARTITION_SPLIT: 1112 update_prev_partition_helper(cpi, subsize, mi_row, mi_col); 1113 update_prev_partition_helper(cpi, subsize, mi_row + bs, mi_col); 1114 update_prev_partition_helper(cpi, subsize, mi_row, mi_col + bs); 1115 update_prev_partition_helper(cpi, subsize, mi_row + bs, mi_col + bs); 1116 break; 1117 default: assert(0); 1118 } 1119 } 1120} 1121 1122static void update_prev_partition(VP9_COMP *cpi, MACROBLOCK *x, int segment_id, 1123 int mi_row, int mi_col, int sb_offset) { 1124 update_prev_partition_helper(cpi, BLOCK_64X64, mi_row, mi_col); 1125 cpi->prev_segment_id[sb_offset] = segment_id; 1126 memcpy(&(cpi->prev_variance_low[sb_offset * 25]), x->variance_low, 1127 sizeof(x->variance_low)); 1128 // Reset the counter for copy partitioning 1129 cpi->copied_frame_cnt[sb_offset] = 0; 1130} 1131 1132static void chroma_check(VP9_COMP *cpi, MACROBLOCK *x, int bsize, 1133 unsigned int y_sad, int is_key_frame) { 1134 int i; 1135 MACROBLOCKD *xd = &x->e_mbd; 1136 1137 if (is_key_frame) return; 1138 1139 // For speed >= 8, avoid the chroma check if y_sad is above threshold. 1140 if (cpi->oxcf.speed >= 8) { 1141 if (y_sad > cpi->vbp_thresholds[1] && 1142 (!cpi->noise_estimate.enabled || 1143 vp9_noise_estimate_extract_level(&cpi->noise_estimate) < kMedium)) 1144 return; 1145 } 1146 1147 for (i = 1; i <= 2; ++i) { 1148 unsigned int uv_sad = UINT_MAX; 1149 struct macroblock_plane *p = &x->plane[i]; 1150 struct macroblockd_plane *pd = &xd->plane[i]; 1151 const BLOCK_SIZE bs = get_plane_block_size(bsize, pd); 1152 1153 if (bs != BLOCK_INVALID) 1154 uv_sad = cpi->fn_ptr[bs].sdf(p->src.buf, p->src.stride, pd->dst.buf, 1155 pd->dst.stride); 1156 1157 // TODO(marpan): Investigate if we should lower this threshold if 1158 // superblock is detected as skin. 1159 x->color_sensitivity[i - 1] = uv_sad > (y_sad >> 2); 1160 } 1161} 1162 1163static uint64_t avg_source_sad(VP9_COMP *cpi, MACROBLOCK *x, int shift, 1164 int sb_offset) { 1165 unsigned int tmp_sse; 1166 uint64_t tmp_sad; 1167 unsigned int tmp_variance; 1168 const BLOCK_SIZE bsize = BLOCK_64X64; 1169 uint8_t *src_y = cpi->Source->y_buffer; 1170 int src_ystride = cpi->Source->y_stride; 1171 uint8_t *last_src_y = cpi->Last_Source->y_buffer; 1172 int last_src_ystride = cpi->Last_Source->y_stride; 1173 uint64_t avg_source_sad_threshold = 10000; 1174 uint64_t avg_source_sad_threshold2 = 12000; 1175#if CONFIG_VP9_HIGHBITDEPTH 1176 if (cpi->common.use_highbitdepth) return 0; 1177#endif 1178 src_y += shift; 1179 last_src_y += shift; 1180 tmp_sad = 1181 cpi->fn_ptr[bsize].sdf(src_y, src_ystride, last_src_y, last_src_ystride); 1182 tmp_variance = vpx_variance64x64(src_y, src_ystride, last_src_y, 1183 last_src_ystride, &tmp_sse); 1184 // Note: tmp_sse - tmp_variance = ((sum * sum) >> 12) 1185 if (tmp_sad < avg_source_sad_threshold) 1186 x->content_state_sb = ((tmp_sse - tmp_variance) < 25) ? kLowSadLowSumdiff 1187 : kLowSadHighSumdiff; 1188 else 1189 x->content_state_sb = ((tmp_sse - tmp_variance) < 25) ? kHighSadLowSumdiff 1190 : kHighSadHighSumdiff; 1191 1192 // Detect large lighting change. 1193 if (cpi->oxcf.content != VP9E_CONTENT_SCREEN && 1194 cpi->oxcf.rc_mode == VPX_CBR && tmp_variance < (tmp_sse >> 3) && 1195 (tmp_sse - tmp_variance) > 10000) 1196 x->content_state_sb = kLowVarHighSumdiff; 1197 else if (tmp_sad > (avg_source_sad_threshold << 1)) 1198 x->content_state_sb = kVeryHighSad; 1199 1200 if (cpi->content_state_sb_fd != NULL) { 1201 if (tmp_sad < avg_source_sad_threshold2) { 1202 // Cap the increment to 255. 1203 if (cpi->content_state_sb_fd[sb_offset] < 255) 1204 cpi->content_state_sb_fd[sb_offset]++; 1205 } else { 1206 cpi->content_state_sb_fd[sb_offset] = 0; 1207 } 1208 } 1209 return tmp_sad; 1210} 1211 1212// This function chooses partitioning based on the variance between source and 1213// reconstructed last, where variance is computed for down-sampled inputs. 1214static int choose_partitioning(VP9_COMP *cpi, const TileInfo *const tile, 1215 MACROBLOCK *x, int mi_row, int mi_col) { 1216 VP9_COMMON *const cm = &cpi->common; 1217 MACROBLOCKD *xd = &x->e_mbd; 1218 int i, j, k, m; 1219 v64x64 vt; 1220 v16x16 *vt2 = NULL; 1221 int force_split[21]; 1222 int avg_32x32; 1223 int max_var_32x32 = 0; 1224 int min_var_32x32 = INT_MAX; 1225 int var_32x32; 1226 int avg_16x16[4]; 1227 int maxvar_16x16[4]; 1228 int minvar_16x16[4]; 1229 int64_t threshold_4x4avg; 1230 NOISE_LEVEL noise_level = kLow; 1231 int content_state = 0; 1232 uint8_t *s; 1233 const uint8_t *d; 1234 int sp; 1235 int dp; 1236 int compute_minmax_variance = 1; 1237 unsigned int y_sad = UINT_MAX; 1238 BLOCK_SIZE bsize = BLOCK_64X64; 1239 // Ref frame used in partitioning. 1240 MV_REFERENCE_FRAME ref_frame_partition = LAST_FRAME; 1241 int pixels_wide = 64, pixels_high = 64; 1242 int64_t thresholds[4] = { cpi->vbp_thresholds[0], cpi->vbp_thresholds[1], 1243 cpi->vbp_thresholds[2], cpi->vbp_thresholds[3] }; 1244 1245 // For the variance computation under SVC mode, we treat the frame as key if 1246 // the reference (base layer frame) is key frame (i.e., is_key_frame == 1). 1247 const int is_key_frame = 1248 (cm->frame_type == KEY_FRAME || 1249 (is_one_pass_cbr_svc(cpi) && 1250 cpi->svc.layer_context[cpi->svc.temporal_layer_id].is_key_frame)); 1251 // Always use 4x4 partition for key frame. 1252 const int use_4x4_partition = cm->frame_type == KEY_FRAME; 1253 const int low_res = (cm->width <= 352 && cm->height <= 288); 1254 int variance4x4downsample[16]; 1255 int segment_id; 1256 int sb_offset = (cm->mi_stride >> 3) * (mi_row >> 3) + (mi_col >> 3); 1257 1258 set_offsets(cpi, tile, x, mi_row, mi_col, BLOCK_64X64); 1259 segment_id = xd->mi[0]->segment_id; 1260 1261 if (cpi->oxcf.speed >= 8 || (cpi->use_svc && cpi->svc.non_reference_frame)) 1262 compute_minmax_variance = 0; 1263 1264 memset(x->variance_low, 0, sizeof(x->variance_low)); 1265 1266 if (cpi->sf.use_source_sad && !is_key_frame) { 1267 int sb_offset2 = ((cm->mi_cols + 7) >> 3) * (mi_row >> 3) + (mi_col >> 3); 1268 content_state = x->content_state_sb; 1269 x->skip_low_source_sad = (content_state == kLowSadLowSumdiff || 1270 content_state == kLowSadHighSumdiff) 1271 ? 1 1272 : 0; 1273 x->lowvar_highsumdiff = (content_state == kLowVarHighSumdiff) ? 1 : 0; 1274 if (cpi->content_state_sb_fd != NULL) 1275 x->last_sb_high_content = cpi->content_state_sb_fd[sb_offset2]; 1276 1277 // For SVC on top spatial layer: use/scale the partition from 1278 // the lower spatial resolution if svc_use_lowres_part is enabled. 1279 if (cpi->sf.svc_use_lowres_part && 1280 cpi->svc.spatial_layer_id == cpi->svc.number_spatial_layers - 1 && 1281 cpi->svc.prev_partition_svc != NULL && content_state != kVeryHighSad) { 1282 if (!scale_partitioning_svc(cpi, x, xd, BLOCK_64X64, mi_row >> 1, 1283 mi_col >> 1, mi_row, mi_col)) { 1284 if (cpi->sf.copy_partition_flag) { 1285 update_prev_partition(cpi, x, segment_id, mi_row, mi_col, sb_offset); 1286 } 1287 return 0; 1288 } 1289 } 1290 // If source_sad is low copy the partition without computing the y_sad. 1291 if (x->skip_low_source_sad && cpi->sf.copy_partition_flag && 1292 copy_partitioning(cpi, x, xd, mi_row, mi_col, segment_id, sb_offset)) { 1293 x->sb_use_mv_part = 1; 1294 if (cpi->sf.svc_use_lowres_part && 1295 cpi->svc.spatial_layer_id == cpi->svc.number_spatial_layers - 2) 1296 update_partition_svc(cpi, BLOCK_64X64, mi_row, mi_col); 1297 return 0; 1298 } 1299 } 1300 1301 if (cpi->oxcf.aq_mode == CYCLIC_REFRESH_AQ && cm->seg.enabled && 1302 cyclic_refresh_segment_id_boosted(segment_id)) { 1303 int q = vp9_get_qindex(&cm->seg, segment_id, cm->base_qindex); 1304 set_vbp_thresholds(cpi, thresholds, q, content_state); 1305 } else { 1306 set_vbp_thresholds(cpi, thresholds, cm->base_qindex, content_state); 1307 } 1308 1309 // For non keyframes, disable 4x4 average for low resolution when speed = 8 1310 threshold_4x4avg = (cpi->oxcf.speed < 8) ? thresholds[1] << 1 : INT64_MAX; 1311 1312 if (xd->mb_to_right_edge < 0) pixels_wide += (xd->mb_to_right_edge >> 3); 1313 if (xd->mb_to_bottom_edge < 0) pixels_high += (xd->mb_to_bottom_edge >> 3); 1314 1315 s = x->plane[0].src.buf; 1316 sp = x->plane[0].src.stride; 1317 1318 // Index for force_split: 0 for 64x64, 1-4 for 32x32 blocks, 1319 // 5-20 for the 16x16 blocks. 1320 force_split[0] = 0; 1321 1322 if (!is_key_frame) { 1323 // In the case of spatial/temporal scalable coding, the assumption here is 1324 // that the temporal reference frame will always be of type LAST_FRAME. 1325 // TODO(marpan): If that assumption is broken, we need to revisit this code. 1326 MODE_INFO *mi = xd->mi[0]; 1327 YV12_BUFFER_CONFIG *yv12 = get_ref_frame_buffer(cpi, LAST_FRAME); 1328 1329 const YV12_BUFFER_CONFIG *yv12_g = NULL; 1330 unsigned int y_sad_g, y_sad_thr, y_sad_last; 1331 bsize = BLOCK_32X32 + (mi_col + 4 < cm->mi_cols) * 2 + 1332 (mi_row + 4 < cm->mi_rows); 1333 1334 assert(yv12 != NULL); 1335 1336 if (!(is_one_pass_cbr_svc(cpi) && cpi->svc.spatial_layer_id)) { 1337 // For now, GOLDEN will not be used for non-zero spatial layers, since 1338 // it may not be a temporal reference. 1339 yv12_g = get_ref_frame_buffer(cpi, GOLDEN_FRAME); 1340 } 1341 1342 // Only compute y_sad_g (sad for golden reference) for speed < 8. 1343 if (cpi->oxcf.speed < 8 && yv12_g && yv12_g != yv12 && 1344 (cpi->ref_frame_flags & VP9_GOLD_FLAG)) { 1345 vp9_setup_pre_planes(xd, 0, yv12_g, mi_row, mi_col, 1346 &cm->frame_refs[GOLDEN_FRAME - 1].sf); 1347 y_sad_g = cpi->fn_ptr[bsize].sdf( 1348 x->plane[0].src.buf, x->plane[0].src.stride, xd->plane[0].pre[0].buf, 1349 xd->plane[0].pre[0].stride); 1350 } else { 1351 y_sad_g = UINT_MAX; 1352 } 1353 1354 if (cpi->oxcf.lag_in_frames > 0 && cpi->oxcf.rc_mode == VPX_VBR && 1355 cpi->rc.is_src_frame_alt_ref) { 1356 yv12 = get_ref_frame_buffer(cpi, ALTREF_FRAME); 1357 vp9_setup_pre_planes(xd, 0, yv12, mi_row, mi_col, 1358 &cm->frame_refs[ALTREF_FRAME - 1].sf); 1359 mi->ref_frame[0] = ALTREF_FRAME; 1360 y_sad_g = UINT_MAX; 1361 } else { 1362 vp9_setup_pre_planes(xd, 0, yv12, mi_row, mi_col, 1363 &cm->frame_refs[LAST_FRAME - 1].sf); 1364 mi->ref_frame[0] = LAST_FRAME; 1365 } 1366 mi->ref_frame[1] = NONE; 1367 mi->sb_type = BLOCK_64X64; 1368 mi->mv[0].as_int = 0; 1369 mi->interp_filter = BILINEAR; 1370 1371 if (cpi->oxcf.speed >= 8 && !low_res && 1372 x->content_state_sb != kVeryHighSad) { 1373 y_sad = cpi->fn_ptr[bsize].sdf( 1374 x->plane[0].src.buf, x->plane[0].src.stride, xd->plane[0].pre[0].buf, 1375 xd->plane[0].pre[0].stride); 1376 } else { 1377 y_sad = vp9_int_pro_motion_estimation(cpi, x, bsize, mi_row, mi_col); 1378 x->sb_use_mv_part = 1; 1379 x->sb_mvcol_part = mi->mv[0].as_mv.col; 1380 x->sb_mvrow_part = mi->mv[0].as_mv.row; 1381 } 1382 1383 y_sad_last = y_sad; 1384 // Pick ref frame for partitioning, bias last frame when y_sad_g and y_sad 1385 // are close if short_circuit_low_temp_var is on. 1386 y_sad_thr = cpi->sf.short_circuit_low_temp_var ? (y_sad * 7) >> 3 : y_sad; 1387 if (y_sad_g < y_sad_thr) { 1388 vp9_setup_pre_planes(xd, 0, yv12_g, mi_row, mi_col, 1389 &cm->frame_refs[GOLDEN_FRAME - 1].sf); 1390 mi->ref_frame[0] = GOLDEN_FRAME; 1391 mi->mv[0].as_int = 0; 1392 y_sad = y_sad_g; 1393 ref_frame_partition = GOLDEN_FRAME; 1394 } else { 1395 x->pred_mv[LAST_FRAME] = mi->mv[0].as_mv; 1396 ref_frame_partition = LAST_FRAME; 1397 } 1398 1399 set_ref_ptrs(cm, xd, mi->ref_frame[0], mi->ref_frame[1]); 1400 vp9_build_inter_predictors_sb(xd, mi_row, mi_col, BLOCK_64X64); 1401 1402 if (cpi->use_skin_detection) 1403 x->sb_is_skin = 1404 skin_sb_split(cpi, x, low_res, mi_row, mi_col, force_split); 1405 1406 d = xd->plane[0].dst.buf; 1407 dp = xd->plane[0].dst.stride; 1408 1409 // If the y_sad is very small, take 64x64 as partition and exit. 1410 // Don't check on boosted segment for now, as 64x64 is suppressed there. 1411 if (segment_id == CR_SEGMENT_ID_BASE && y_sad < cpi->vbp_threshold_sad) { 1412 const int block_width = num_8x8_blocks_wide_lookup[BLOCK_64X64]; 1413 const int block_height = num_8x8_blocks_high_lookup[BLOCK_64X64]; 1414 if (mi_col + block_width / 2 < cm->mi_cols && 1415 mi_row + block_height / 2 < cm->mi_rows) { 1416 set_block_size(cpi, x, xd, mi_row, mi_col, BLOCK_64X64); 1417 x->variance_low[0] = 1; 1418 chroma_check(cpi, x, bsize, y_sad, is_key_frame); 1419 if (cpi->sf.svc_use_lowres_part && 1420 cpi->svc.spatial_layer_id == cpi->svc.number_spatial_layers - 2) 1421 update_partition_svc(cpi, BLOCK_64X64, mi_row, mi_col); 1422 if (cpi->sf.copy_partition_flag) { 1423 update_prev_partition(cpi, x, segment_id, mi_row, mi_col, sb_offset); 1424 } 1425 return 0; 1426 } 1427 } 1428 1429 // If the y_sad is small enough, copy the partition of the superblock in the 1430 // last frame to current frame only if the last frame is not a keyframe. 1431 // Stop the copy every cpi->max_copied_frame to refresh the partition. 1432 // TODO(jianj) : tune the threshold. 1433 if (cpi->sf.copy_partition_flag && y_sad_last < cpi->vbp_threshold_copy && 1434 copy_partitioning(cpi, x, xd, mi_row, mi_col, segment_id, sb_offset)) { 1435 chroma_check(cpi, x, bsize, y_sad, is_key_frame); 1436 if (cpi->sf.svc_use_lowres_part && 1437 cpi->svc.spatial_layer_id == cpi->svc.number_spatial_layers - 2) 1438 update_partition_svc(cpi, BLOCK_64X64, mi_row, mi_col); 1439 return 0; 1440 } 1441 } else { 1442 d = VP9_VAR_OFFS; 1443 dp = 0; 1444#if CONFIG_VP9_HIGHBITDEPTH 1445 if (xd->cur_buf->flags & YV12_FLAG_HIGHBITDEPTH) { 1446 switch (xd->bd) { 1447 case 10: d = CONVERT_TO_BYTEPTR(VP9_HIGH_VAR_OFFS_10); break; 1448 case 12: d = CONVERT_TO_BYTEPTR(VP9_HIGH_VAR_OFFS_12); break; 1449 case 8: 1450 default: d = CONVERT_TO_BYTEPTR(VP9_HIGH_VAR_OFFS_8); break; 1451 } 1452 } 1453#endif // CONFIG_VP9_HIGHBITDEPTH 1454 } 1455 1456 if (low_res && threshold_4x4avg < INT64_MAX) 1457 CHECK_MEM_ERROR(cm, vt2, vpx_calloc(16, sizeof(*vt2))); 1458 // Fill in the entire tree of 8x8 (or 4x4 under some conditions) variances 1459 // for splits. 1460 for (i = 0; i < 4; i++) { 1461 const int x32_idx = ((i & 1) << 5); 1462 const int y32_idx = ((i >> 1) << 5); 1463 const int i2 = i << 2; 1464 force_split[i + 1] = 0; 1465 avg_16x16[i] = 0; 1466 maxvar_16x16[i] = 0; 1467 minvar_16x16[i] = INT_MAX; 1468 for (j = 0; j < 4; j++) { 1469 const int x16_idx = x32_idx + ((j & 1) << 4); 1470 const int y16_idx = y32_idx + ((j >> 1) << 4); 1471 const int split_index = 5 + i2 + j; 1472 v16x16 *vst = &vt.split[i].split[j]; 1473 force_split[split_index] = 0; 1474 variance4x4downsample[i2 + j] = 0; 1475 if (!is_key_frame) { 1476 fill_variance_8x8avg(s, sp, d, dp, x16_idx, y16_idx, vst, 1477#if CONFIG_VP9_HIGHBITDEPTH 1478 xd->cur_buf->flags, 1479#endif 1480 pixels_wide, pixels_high, is_key_frame); 1481 fill_variance_tree(&vt.split[i].split[j], BLOCK_16X16); 1482 get_variance(&vt.split[i].split[j].part_variances.none); 1483 avg_16x16[i] += vt.split[i].split[j].part_variances.none.variance; 1484 if (vt.split[i].split[j].part_variances.none.variance < minvar_16x16[i]) 1485 minvar_16x16[i] = vt.split[i].split[j].part_variances.none.variance; 1486 if (vt.split[i].split[j].part_variances.none.variance > maxvar_16x16[i]) 1487 maxvar_16x16[i] = vt.split[i].split[j].part_variances.none.variance; 1488 if (vt.split[i].split[j].part_variances.none.variance > thresholds[2]) { 1489 // 16X16 variance is above threshold for split, so force split to 8x8 1490 // for this 16x16 block (this also forces splits for upper levels). 1491 force_split[split_index] = 1; 1492 force_split[i + 1] = 1; 1493 force_split[0] = 1; 1494 } else if (compute_minmax_variance && 1495 vt.split[i].split[j].part_variances.none.variance > 1496 thresholds[1] && 1497 !cyclic_refresh_segment_id_boosted(segment_id)) { 1498 // We have some nominal amount of 16x16 variance (based on average), 1499 // compute the minmax over the 8x8 sub-blocks, and if above threshold, 1500 // force split to 8x8 block for this 16x16 block. 1501 int minmax = compute_minmax_8x8(s, sp, d, dp, x16_idx, y16_idx, 1502#if CONFIG_VP9_HIGHBITDEPTH 1503 xd->cur_buf->flags, 1504#endif 1505 pixels_wide, pixels_high); 1506 int thresh_minmax = (int)cpi->vbp_threshold_minmax; 1507 if (x->content_state_sb == kVeryHighSad) 1508 thresh_minmax = thresh_minmax << 1; 1509 if (minmax > thresh_minmax) { 1510 force_split[split_index] = 1; 1511 force_split[i + 1] = 1; 1512 force_split[0] = 1; 1513 } 1514 } 1515 } 1516 if (is_key_frame || (low_res && 1517 vt.split[i].split[j].part_variances.none.variance > 1518 threshold_4x4avg)) { 1519 force_split[split_index] = 0; 1520 // Go down to 4x4 down-sampling for variance. 1521 variance4x4downsample[i2 + j] = 1; 1522 for (k = 0; k < 4; k++) { 1523 int x8_idx = x16_idx + ((k & 1) << 3); 1524 int y8_idx = y16_idx + ((k >> 1) << 3); 1525 v8x8 *vst2 = is_key_frame ? &vst->split[k] : &vt2[i2 + j].split[k]; 1526 fill_variance_4x4avg(s, sp, d, dp, x8_idx, y8_idx, vst2, 1527#if CONFIG_VP9_HIGHBITDEPTH 1528 xd->cur_buf->flags, 1529#endif 1530 pixels_wide, pixels_high, is_key_frame); 1531 } 1532 } 1533 } 1534 } 1535 if (cpi->noise_estimate.enabled) 1536 noise_level = vp9_noise_estimate_extract_level(&cpi->noise_estimate); 1537 // Fill the rest of the variance tree by summing split partition values. 1538 avg_32x32 = 0; 1539 for (i = 0; i < 4; i++) { 1540 const int i2 = i << 2; 1541 for (j = 0; j < 4; j++) { 1542 if (variance4x4downsample[i2 + j] == 1) { 1543 v16x16 *vtemp = (!is_key_frame) ? &vt2[i2 + j] : &vt.split[i].split[j]; 1544 for (m = 0; m < 4; m++) fill_variance_tree(&vtemp->split[m], BLOCK_8X8); 1545 fill_variance_tree(vtemp, BLOCK_16X16); 1546 // If variance of this 16x16 block is above the threshold, force block 1547 // to split. This also forces a split on the upper levels. 1548 get_variance(&vtemp->part_variances.none); 1549 if (vtemp->part_variances.none.variance > thresholds[2]) { 1550 force_split[5 + i2 + j] = 1; 1551 force_split[i + 1] = 1; 1552 force_split[0] = 1; 1553 } 1554 } 1555 } 1556 fill_variance_tree(&vt.split[i], BLOCK_32X32); 1557 // If variance of this 32x32 block is above the threshold, or if its above 1558 // (some threshold of) the average variance over the sub-16x16 blocks, then 1559 // force this block to split. This also forces a split on the upper 1560 // (64x64) level. 1561 if (!force_split[i + 1]) { 1562 get_variance(&vt.split[i].part_variances.none); 1563 var_32x32 = vt.split[i].part_variances.none.variance; 1564 max_var_32x32 = VPXMAX(var_32x32, max_var_32x32); 1565 min_var_32x32 = VPXMIN(var_32x32, min_var_32x32); 1566 if (vt.split[i].part_variances.none.variance > thresholds[1] || 1567 (!is_key_frame && 1568 vt.split[i].part_variances.none.variance > (thresholds[1] >> 1) && 1569 vt.split[i].part_variances.none.variance > (avg_16x16[i] >> 1))) { 1570 force_split[i + 1] = 1; 1571 force_split[0] = 1; 1572 } else if (!is_key_frame && noise_level < kLow && cm->height <= 360 && 1573 (maxvar_16x16[i] - minvar_16x16[i]) > (thresholds[1] >> 1) && 1574 maxvar_16x16[i] > thresholds[1]) { 1575 force_split[i + 1] = 1; 1576 force_split[0] = 1; 1577 } 1578 avg_32x32 += var_32x32; 1579 } 1580 } 1581 if (!force_split[0]) { 1582 fill_variance_tree(&vt, BLOCK_64X64); 1583 get_variance(&vt.part_variances.none); 1584 // If variance of this 64x64 block is above (some threshold of) the average 1585 // variance over the sub-32x32 blocks, then force this block to split. 1586 // Only checking this for noise level >= medium for now. 1587 if (!is_key_frame && noise_level >= kMedium && 1588 vt.part_variances.none.variance > (9 * avg_32x32) >> 5) 1589 force_split[0] = 1; 1590 // Else if the maximum 32x32 variance minus the miniumum 32x32 variance in 1591 // a 64x64 block is greater than threshold and the maximum 32x32 variance is 1592 // above a miniumum threshold, then force the split of a 64x64 block 1593 // Only check this for low noise. 1594 else if (!is_key_frame && noise_level < kMedium && 1595 (max_var_32x32 - min_var_32x32) > 3 * (thresholds[0] >> 3) && 1596 max_var_32x32 > thresholds[0] >> 1) 1597 force_split[0] = 1; 1598 } 1599 1600 // Now go through the entire structure, splitting every block size until 1601 // we get to one that's got a variance lower than our threshold. 1602 if (mi_col + 8 > cm->mi_cols || mi_row + 8 > cm->mi_rows || 1603 !set_vt_partitioning(cpi, x, xd, &vt, BLOCK_64X64, mi_row, mi_col, 1604 thresholds[0], BLOCK_16X16, force_split[0])) { 1605 for (i = 0; i < 4; ++i) { 1606 const int x32_idx = ((i & 1) << 2); 1607 const int y32_idx = ((i >> 1) << 2); 1608 const int i2 = i << 2; 1609 if (!set_vt_partitioning(cpi, x, xd, &vt.split[i], BLOCK_32X32, 1610 (mi_row + y32_idx), (mi_col + x32_idx), 1611 thresholds[1], BLOCK_16X16, 1612 force_split[i + 1])) { 1613 for (j = 0; j < 4; ++j) { 1614 const int x16_idx = ((j & 1) << 1); 1615 const int y16_idx = ((j >> 1) << 1); 1616 // For inter frames: if variance4x4downsample[] == 1 for this 16x16 1617 // block, then the variance is based on 4x4 down-sampling, so use vt2 1618 // in set_vt_partioning(), otherwise use vt. 1619 v16x16 *vtemp = (!is_key_frame && variance4x4downsample[i2 + j] == 1) 1620 ? &vt2[i2 + j] 1621 : &vt.split[i].split[j]; 1622 if (!set_vt_partitioning( 1623 cpi, x, xd, vtemp, BLOCK_16X16, mi_row + y32_idx + y16_idx, 1624 mi_col + x32_idx + x16_idx, thresholds[2], cpi->vbp_bsize_min, 1625 force_split[5 + i2 + j])) { 1626 for (k = 0; k < 4; ++k) { 1627 const int x8_idx = (k & 1); 1628 const int y8_idx = (k >> 1); 1629 if (use_4x4_partition) { 1630 if (!set_vt_partitioning(cpi, x, xd, &vtemp->split[k], 1631 BLOCK_8X8, 1632 mi_row + y32_idx + y16_idx + y8_idx, 1633 mi_col + x32_idx + x16_idx + x8_idx, 1634 thresholds[3], BLOCK_8X8, 0)) { 1635 set_block_size( 1636 cpi, x, xd, (mi_row + y32_idx + y16_idx + y8_idx), 1637 (mi_col + x32_idx + x16_idx + x8_idx), BLOCK_4X4); 1638 } 1639 } else { 1640 set_block_size( 1641 cpi, x, xd, (mi_row + y32_idx + y16_idx + y8_idx), 1642 (mi_col + x32_idx + x16_idx + x8_idx), BLOCK_8X8); 1643 } 1644 } 1645 } 1646 } 1647 } 1648 } 1649 } 1650 1651 if (cm->frame_type != KEY_FRAME && cpi->sf.copy_partition_flag) { 1652 update_prev_partition(cpi, x, segment_id, mi_row, mi_col, sb_offset); 1653 } 1654 1655 if (cm->frame_type != KEY_FRAME && cpi->sf.svc_use_lowres_part && 1656 cpi->svc.spatial_layer_id == cpi->svc.number_spatial_layers - 2) 1657 update_partition_svc(cpi, BLOCK_64X64, mi_row, mi_col); 1658 1659 if (cpi->sf.short_circuit_low_temp_var) { 1660 set_low_temp_var_flag(cpi, x, xd, &vt, thresholds, ref_frame_partition, 1661 mi_col, mi_row); 1662 } 1663 1664 chroma_check(cpi, x, bsize, y_sad, is_key_frame); 1665 if (vt2) vpx_free(vt2); 1666 return 0; 1667} 1668 1669static void update_state(VP9_COMP *cpi, ThreadData *td, PICK_MODE_CONTEXT *ctx, 1670 int mi_row, int mi_col, BLOCK_SIZE bsize, 1671 int output_enabled) { 1672 int i, x_idx, y; 1673 VP9_COMMON *const cm = &cpi->common; 1674 RD_COUNTS *const rdc = &td->rd_counts; 1675 MACROBLOCK *const x = &td->mb; 1676 MACROBLOCKD *const xd = &x->e_mbd; 1677 struct macroblock_plane *const p = x->plane; 1678 struct macroblockd_plane *const pd = xd->plane; 1679 MODE_INFO *mi = &ctx->mic; 1680 MODE_INFO *const xdmi = xd->mi[0]; 1681 MODE_INFO *mi_addr = xd->mi[0]; 1682 const struct segmentation *const seg = &cm->seg; 1683 const int bw = num_8x8_blocks_wide_lookup[mi->sb_type]; 1684 const int bh = num_8x8_blocks_high_lookup[mi->sb_type]; 1685 const int x_mis = VPXMIN(bw, cm->mi_cols - mi_col); 1686 const int y_mis = VPXMIN(bh, cm->mi_rows - mi_row); 1687 MV_REF *const frame_mvs = cm->cur_frame->mvs + mi_row * cm->mi_cols + mi_col; 1688 int w, h; 1689 1690 const int mis = cm->mi_stride; 1691 const int mi_width = num_8x8_blocks_wide_lookup[bsize]; 1692 const int mi_height = num_8x8_blocks_high_lookup[bsize]; 1693 int max_plane; 1694 1695 assert(mi->sb_type == bsize); 1696 1697 *mi_addr = *mi; 1698 *x->mbmi_ext = ctx->mbmi_ext; 1699 1700 // If segmentation in use 1701 if (seg->enabled) { 1702 // For in frame complexity AQ copy the segment id from the segment map. 1703 if (cpi->oxcf.aq_mode == COMPLEXITY_AQ) { 1704 const uint8_t *const map = 1705 seg->update_map ? cpi->segmentation_map : cm->last_frame_seg_map; 1706 mi_addr->segment_id = get_segment_id(cm, map, bsize, mi_row, mi_col); 1707 } 1708 // Else for cyclic refresh mode update the segment map, set the segment id 1709 // and then update the quantizer. 1710 if (cpi->oxcf.aq_mode == CYCLIC_REFRESH_AQ) { 1711 vp9_cyclic_refresh_update_segment(cpi, xd->mi[0], mi_row, mi_col, bsize, 1712 ctx->rate, ctx->dist, x->skip, p); 1713 } 1714 } 1715 1716 max_plane = is_inter_block(xdmi) ? MAX_MB_PLANE : 1; 1717 for (i = 0; i < max_plane; ++i) { 1718 p[i].coeff = ctx->coeff_pbuf[i][1]; 1719 p[i].qcoeff = ctx->qcoeff_pbuf[i][1]; 1720 pd[i].dqcoeff = ctx->dqcoeff_pbuf[i][1]; 1721 p[i].eobs = ctx->eobs_pbuf[i][1]; 1722 } 1723 1724 for (i = max_plane; i < MAX_MB_PLANE; ++i) { 1725 p[i].coeff = ctx->coeff_pbuf[i][2]; 1726 p[i].qcoeff = ctx->qcoeff_pbuf[i][2]; 1727 pd[i].dqcoeff = ctx->dqcoeff_pbuf[i][2]; 1728 p[i].eobs = ctx->eobs_pbuf[i][2]; 1729 } 1730 1731 // Restore the coding context of the MB to that that was in place 1732 // when the mode was picked for it 1733 for (y = 0; y < mi_height; y++) 1734 for (x_idx = 0; x_idx < mi_width; x_idx++) 1735 if ((xd->mb_to_right_edge >> (3 + MI_SIZE_LOG2)) + mi_width > x_idx && 1736 (xd->mb_to_bottom_edge >> (3 + MI_SIZE_LOG2)) + mi_height > y) { 1737 xd->mi[x_idx + y * mis] = mi_addr; 1738 } 1739 1740 if (cpi->oxcf.aq_mode != NO_AQ) vp9_init_plane_quantizers(cpi, x); 1741 1742 if (is_inter_block(xdmi) && xdmi->sb_type < BLOCK_8X8) { 1743 xdmi->mv[0].as_int = mi->bmi[3].as_mv[0].as_int; 1744 xdmi->mv[1].as_int = mi->bmi[3].as_mv[1].as_int; 1745 } 1746 1747 x->skip = ctx->skip; 1748 memcpy(x->zcoeff_blk[xdmi->tx_size], ctx->zcoeff_blk, 1749 sizeof(ctx->zcoeff_blk[0]) * ctx->num_4x4_blk); 1750 1751 if (!output_enabled) return; 1752 1753#if CONFIG_INTERNAL_STATS 1754 if (frame_is_intra_only(cm)) { 1755 static const int kf_mode_index[] = { 1756 THR_DC /*DC_PRED*/, THR_V_PRED /*V_PRED*/, 1757 THR_H_PRED /*H_PRED*/, THR_D45_PRED /*D45_PRED*/, 1758 THR_D135_PRED /*D135_PRED*/, THR_D117_PRED /*D117_PRED*/, 1759 THR_D153_PRED /*D153_PRED*/, THR_D207_PRED /*D207_PRED*/, 1760 THR_D63_PRED /*D63_PRED*/, THR_TM /*TM_PRED*/, 1761 }; 1762 ++cpi->mode_chosen_counts[kf_mode_index[xdmi->mode]]; 1763 } else { 1764 // Note how often each mode chosen as best 1765 ++cpi->mode_chosen_counts[ctx->best_mode_index]; 1766 } 1767#endif 1768 if (!frame_is_intra_only(cm)) { 1769 if (is_inter_block(xdmi)) { 1770 vp9_update_mv_count(td); 1771 1772 if (cm->interp_filter == SWITCHABLE) { 1773 const int ctx = get_pred_context_switchable_interp(xd); 1774 ++td->counts->switchable_interp[ctx][xdmi->interp_filter]; 1775 } 1776 } 1777 1778 rdc->comp_pred_diff[SINGLE_REFERENCE] += ctx->single_pred_diff; 1779 rdc->comp_pred_diff[COMPOUND_REFERENCE] += ctx->comp_pred_diff; 1780 rdc->comp_pred_diff[REFERENCE_MODE_SELECT] += ctx->hybrid_pred_diff; 1781 1782 for (i = 0; i < SWITCHABLE_FILTER_CONTEXTS; ++i) 1783 rdc->filter_diff[i] += ctx->best_filter_diff[i]; 1784 } 1785 1786 for (h = 0; h < y_mis; ++h) { 1787 MV_REF *const frame_mv = frame_mvs + h * cm->mi_cols; 1788 for (w = 0; w < x_mis; ++w) { 1789 MV_REF *const mv = frame_mv + w; 1790 mv->ref_frame[0] = mi->ref_frame[0]; 1791 mv->ref_frame[1] = mi->ref_frame[1]; 1792 mv->mv[0].as_int = mi->mv[0].as_int; 1793 mv->mv[1].as_int = mi->mv[1].as_int; 1794 } 1795 } 1796} 1797 1798void vp9_setup_src_planes(MACROBLOCK *x, const YV12_BUFFER_CONFIG *src, 1799 int mi_row, int mi_col) { 1800 uint8_t *const buffers[3] = { src->y_buffer, src->u_buffer, src->v_buffer }; 1801 const int strides[3] = { src->y_stride, src->uv_stride, src->uv_stride }; 1802 int i; 1803 1804 // Set current frame pointer. 1805 x->e_mbd.cur_buf = src; 1806 1807 for (i = 0; i < MAX_MB_PLANE; i++) 1808 setup_pred_plane(&x->plane[i].src, buffers[i], strides[i], mi_row, mi_col, 1809 NULL, x->e_mbd.plane[i].subsampling_x, 1810 x->e_mbd.plane[i].subsampling_y); 1811} 1812 1813static void set_mode_info_seg_skip(MACROBLOCK *x, TX_MODE tx_mode, 1814 RD_COST *rd_cost, BLOCK_SIZE bsize) { 1815 MACROBLOCKD *const xd = &x->e_mbd; 1816 MODE_INFO *const mi = xd->mi[0]; 1817 INTERP_FILTER filter_ref; 1818 1819 filter_ref = get_pred_context_switchable_interp(xd); 1820 if (filter_ref == SWITCHABLE_FILTERS) filter_ref = EIGHTTAP; 1821 1822 mi->sb_type = bsize; 1823 mi->mode = ZEROMV; 1824 mi->tx_size = 1825 VPXMIN(max_txsize_lookup[bsize], tx_mode_to_biggest_tx_size[tx_mode]); 1826 mi->skip = 1; 1827 mi->uv_mode = DC_PRED; 1828 mi->ref_frame[0] = LAST_FRAME; 1829 mi->ref_frame[1] = NONE; 1830 mi->mv[0].as_int = 0; 1831 mi->interp_filter = filter_ref; 1832 1833 xd->mi[0]->bmi[0].as_mv[0].as_int = 0; 1834 x->skip = 1; 1835 1836 vp9_rd_cost_init(rd_cost); 1837} 1838 1839static int set_segment_rdmult(VP9_COMP *const cpi, MACROBLOCK *const x, 1840 int8_t segment_id) { 1841 int segment_qindex; 1842 VP9_COMMON *const cm = &cpi->common; 1843 vp9_init_plane_quantizers(cpi, x); 1844 vpx_clear_system_state(); 1845 segment_qindex = vp9_get_qindex(&cm->seg, segment_id, cm->base_qindex); 1846 return vp9_compute_rd_mult(cpi, segment_qindex + cm->y_dc_delta_q); 1847} 1848 1849static void rd_pick_sb_modes(VP9_COMP *cpi, TileDataEnc *tile_data, 1850 MACROBLOCK *const x, int mi_row, int mi_col, 1851 RD_COST *rd_cost, BLOCK_SIZE bsize, 1852 PICK_MODE_CONTEXT *ctx, int64_t best_rd) { 1853 VP9_COMMON *const cm = &cpi->common; 1854 TileInfo *const tile_info = &tile_data->tile_info; 1855 MACROBLOCKD *const xd = &x->e_mbd; 1856 MODE_INFO *mi; 1857 struct macroblock_plane *const p = x->plane; 1858 struct macroblockd_plane *const pd = xd->plane; 1859 const AQ_MODE aq_mode = cpi->oxcf.aq_mode; 1860 int i, orig_rdmult; 1861 1862 vpx_clear_system_state(); 1863 1864 // Use the lower precision, but faster, 32x32 fdct for mode selection. 1865 x->use_lp32x32fdct = 1; 1866 1867 set_offsets(cpi, tile_info, x, mi_row, mi_col, bsize); 1868 mi = xd->mi[0]; 1869 mi->sb_type = bsize; 1870 1871 for (i = 0; i < MAX_MB_PLANE; ++i) { 1872 p[i].coeff = ctx->coeff_pbuf[i][0]; 1873 p[i].qcoeff = ctx->qcoeff_pbuf[i][0]; 1874 pd[i].dqcoeff = ctx->dqcoeff_pbuf[i][0]; 1875 p[i].eobs = ctx->eobs_pbuf[i][0]; 1876 } 1877 ctx->is_coded = 0; 1878 ctx->skippable = 0; 1879 ctx->pred_pixel_ready = 0; 1880 x->skip_recode = 0; 1881 1882 // Set to zero to make sure we do not use the previous encoded frame stats 1883 mi->skip = 0; 1884 1885#if CONFIG_VP9_HIGHBITDEPTH 1886 if (xd->cur_buf->flags & YV12_FLAG_HIGHBITDEPTH) { 1887 x->source_variance = vp9_high_get_sby_perpixel_variance( 1888 cpi, &x->plane[0].src, bsize, xd->bd); 1889 } else { 1890 x->source_variance = 1891 vp9_get_sby_perpixel_variance(cpi, &x->plane[0].src, bsize); 1892 } 1893#else 1894 x->source_variance = 1895 vp9_get_sby_perpixel_variance(cpi, &x->plane[0].src, bsize); 1896#endif // CONFIG_VP9_HIGHBITDEPTH 1897 1898 // Save rdmult before it might be changed, so it can be restored later. 1899 orig_rdmult = x->rdmult; 1900 1901 if ((cpi->sf.tx_domain_thresh > 0.0) || (cpi->sf.quant_opt_thresh > 0.0)) { 1902 double logvar = vp9_log_block_var(cpi, x, bsize); 1903 // Check block complexity as part of descision on using pixel or transform 1904 // domain distortion in rd tests. 1905 x->block_tx_domain = cpi->sf.allow_txfm_domain_distortion && 1906 (logvar >= cpi->sf.tx_domain_thresh); 1907 1908 // Check block complexity as part of descision on using quantized 1909 // coefficient optimisation inside the rd loop. 1910 x->block_qcoeff_opt = 1911 cpi->sf.allow_quant_coeff_opt && (logvar <= cpi->sf.quant_opt_thresh); 1912 } else { 1913 x->block_tx_domain = cpi->sf.allow_txfm_domain_distortion; 1914 x->block_qcoeff_opt = cpi->sf.allow_quant_coeff_opt; 1915 } 1916 1917 if (aq_mode == VARIANCE_AQ) { 1918 const int energy = 1919 bsize <= BLOCK_16X16 ? x->mb_energy : vp9_block_energy(cpi, x, bsize); 1920 1921 if (cm->frame_type == KEY_FRAME || cpi->refresh_alt_ref_frame || 1922 cpi->force_update_segmentation || 1923 (cpi->refresh_golden_frame && !cpi->rc.is_src_frame_alt_ref)) { 1924 mi->segment_id = vp9_vaq_segment_id(energy); 1925 } else { 1926 const uint8_t *const map = 1927 cm->seg.update_map ? cpi->segmentation_map : cm->last_frame_seg_map; 1928 mi->segment_id = get_segment_id(cm, map, bsize, mi_row, mi_col); 1929 } 1930 x->rdmult = set_segment_rdmult(cpi, x, mi->segment_id); 1931 } else if (aq_mode == LOOKAHEAD_AQ) { 1932 const uint8_t *const map = cpi->segmentation_map; 1933 1934 // I do not change rdmult here consciously. 1935 mi->segment_id = get_segment_id(cm, map, bsize, mi_row, mi_col); 1936 } else if (aq_mode == EQUATOR360_AQ) { 1937 if (cm->frame_type == KEY_FRAME || cpi->force_update_segmentation) { 1938 mi->segment_id = vp9_360aq_segment_id(mi_row, cm->mi_rows); 1939 } else { 1940 const uint8_t *const map = 1941 cm->seg.update_map ? cpi->segmentation_map : cm->last_frame_seg_map; 1942 mi->segment_id = get_segment_id(cm, map, bsize, mi_row, mi_col); 1943 } 1944 x->rdmult = set_segment_rdmult(cpi, x, mi->segment_id); 1945 } else if (aq_mode == COMPLEXITY_AQ) { 1946 x->rdmult = set_segment_rdmult(cpi, x, mi->segment_id); 1947 } else if (aq_mode == CYCLIC_REFRESH_AQ) { 1948 const uint8_t *const map = 1949 cm->seg.update_map ? cpi->segmentation_map : cm->last_frame_seg_map; 1950 // If segment is boosted, use rdmult for that segment. 1951 if (cyclic_refresh_segment_id_boosted( 1952 get_segment_id(cm, map, bsize, mi_row, mi_col))) 1953 x->rdmult = vp9_cyclic_refresh_get_rdmult(cpi->cyclic_refresh); 1954 } 1955 1956 // Find best coding mode & reconstruct the MB so it is available 1957 // as a predictor for MBs that follow in the SB 1958 if (frame_is_intra_only(cm)) { 1959 vp9_rd_pick_intra_mode_sb(cpi, x, rd_cost, bsize, ctx, best_rd); 1960 } else { 1961 if (bsize >= BLOCK_8X8) { 1962 if (segfeature_active(&cm->seg, mi->segment_id, SEG_LVL_SKIP)) 1963 vp9_rd_pick_inter_mode_sb_seg_skip(cpi, tile_data, x, rd_cost, bsize, 1964 ctx, best_rd); 1965 else 1966 vp9_rd_pick_inter_mode_sb(cpi, tile_data, x, mi_row, mi_col, rd_cost, 1967 bsize, ctx, best_rd); 1968 } else { 1969 vp9_rd_pick_inter_mode_sub8x8(cpi, tile_data, x, mi_row, mi_col, rd_cost, 1970 bsize, ctx, best_rd); 1971 } 1972 } 1973 1974 // Examine the resulting rate and for AQ mode 2 make a segment choice. 1975 if ((rd_cost->rate != INT_MAX) && (aq_mode == COMPLEXITY_AQ) && 1976 (bsize >= BLOCK_16X16) && 1977 (cm->frame_type == KEY_FRAME || cpi->refresh_alt_ref_frame || 1978 (cpi->refresh_golden_frame && !cpi->rc.is_src_frame_alt_ref))) { 1979 vp9_caq_select_segment(cpi, x, bsize, mi_row, mi_col, rd_cost->rate); 1980 } 1981 1982 x->rdmult = orig_rdmult; 1983 1984 // TODO(jingning) The rate-distortion optimization flow needs to be 1985 // refactored to provide proper exit/return handle. 1986 if (rd_cost->rate == INT_MAX) rd_cost->rdcost = INT64_MAX; 1987 1988 ctx->rate = rd_cost->rate; 1989 ctx->dist = rd_cost->dist; 1990} 1991 1992static void update_stats(VP9_COMMON *cm, ThreadData *td) { 1993 const MACROBLOCK *x = &td->mb; 1994 const MACROBLOCKD *const xd = &x->e_mbd; 1995 const MODE_INFO *const mi = xd->mi[0]; 1996 const MB_MODE_INFO_EXT *const mbmi_ext = x->mbmi_ext; 1997 const BLOCK_SIZE bsize = mi->sb_type; 1998 1999 if (!frame_is_intra_only(cm)) { 2000 FRAME_COUNTS *const counts = td->counts; 2001 const int inter_block = is_inter_block(mi); 2002 const int seg_ref_active = 2003 segfeature_active(&cm->seg, mi->segment_id, SEG_LVL_REF_FRAME); 2004 if (!seg_ref_active) { 2005 counts->intra_inter[get_intra_inter_context(xd)][inter_block]++; 2006 // If the segment reference feature is enabled we have only a single 2007 // reference frame allowed for the segment so exclude it from 2008 // the reference frame counts used to work out probabilities. 2009 if (inter_block) { 2010 const MV_REFERENCE_FRAME ref0 = mi->ref_frame[0]; 2011 if (cm->reference_mode == REFERENCE_MODE_SELECT) 2012 counts->comp_inter[vp9_get_reference_mode_context(cm, xd)] 2013 [has_second_ref(mi)]++; 2014 2015 if (has_second_ref(mi)) { 2016 counts->comp_ref[vp9_get_pred_context_comp_ref_p(cm, xd)] 2017 [ref0 == GOLDEN_FRAME]++; 2018 } else { 2019 counts->single_ref[vp9_get_pred_context_single_ref_p1(xd)][0] 2020 [ref0 != LAST_FRAME]++; 2021 if (ref0 != LAST_FRAME) 2022 counts->single_ref[vp9_get_pred_context_single_ref_p2(xd)][1] 2023 [ref0 != GOLDEN_FRAME]++; 2024 } 2025 } 2026 } 2027 if (inter_block && 2028 !segfeature_active(&cm->seg, mi->segment_id, SEG_LVL_SKIP)) { 2029 const int mode_ctx = mbmi_ext->mode_context[mi->ref_frame[0]]; 2030 if (bsize >= BLOCK_8X8) { 2031 const PREDICTION_MODE mode = mi->mode; 2032 ++counts->inter_mode[mode_ctx][INTER_OFFSET(mode)]; 2033 } else { 2034 const int num_4x4_w = num_4x4_blocks_wide_lookup[bsize]; 2035 const int num_4x4_h = num_4x4_blocks_high_lookup[bsize]; 2036 int idx, idy; 2037 for (idy = 0; idy < 2; idy += num_4x4_h) { 2038 for (idx = 0; idx < 2; idx += num_4x4_w) { 2039 const int j = idy * 2 + idx; 2040 const PREDICTION_MODE b_mode = mi->bmi[j].as_mode; 2041 ++counts->inter_mode[mode_ctx][INTER_OFFSET(b_mode)]; 2042 } 2043 } 2044 } 2045 } 2046 } 2047} 2048 2049static void restore_context(MACROBLOCK *const x, int mi_row, int mi_col, 2050 ENTROPY_CONTEXT a[16 * MAX_MB_PLANE], 2051 ENTROPY_CONTEXT l[16 * MAX_MB_PLANE], 2052 PARTITION_CONTEXT sa[8], PARTITION_CONTEXT sl[8], 2053 BLOCK_SIZE bsize) { 2054 MACROBLOCKD *const xd = &x->e_mbd; 2055 int p; 2056 const int num_4x4_blocks_wide = num_4x4_blocks_wide_lookup[bsize]; 2057 const int num_4x4_blocks_high = num_4x4_blocks_high_lookup[bsize]; 2058 int mi_width = num_8x8_blocks_wide_lookup[bsize]; 2059 int mi_height = num_8x8_blocks_high_lookup[bsize]; 2060 for (p = 0; p < MAX_MB_PLANE; p++) { 2061 memcpy(xd->above_context[p] + ((mi_col * 2) >> xd->plane[p].subsampling_x), 2062 a + num_4x4_blocks_wide * p, 2063 (sizeof(ENTROPY_CONTEXT) * num_4x4_blocks_wide) >> 2064 xd->plane[p].subsampling_x); 2065 memcpy(xd->left_context[p] + 2066 ((mi_row & MI_MASK) * 2 >> xd->plane[p].subsampling_y), 2067 l + num_4x4_blocks_high * p, 2068 (sizeof(ENTROPY_CONTEXT) * num_4x4_blocks_high) >> 2069 xd->plane[p].subsampling_y); 2070 } 2071 memcpy(xd->above_seg_context + mi_col, sa, 2072 sizeof(*xd->above_seg_context) * mi_width); 2073 memcpy(xd->left_seg_context + (mi_row & MI_MASK), sl, 2074 sizeof(xd->left_seg_context[0]) * mi_height); 2075} 2076 2077static void save_context(MACROBLOCK *const x, int mi_row, int mi_col, 2078 ENTROPY_CONTEXT a[16 * MAX_MB_PLANE], 2079 ENTROPY_CONTEXT l[16 * MAX_MB_PLANE], 2080 PARTITION_CONTEXT sa[8], PARTITION_CONTEXT sl[8], 2081 BLOCK_SIZE bsize) { 2082 const MACROBLOCKD *const xd = &x->e_mbd; 2083 int p; 2084 const int num_4x4_blocks_wide = num_4x4_blocks_wide_lookup[bsize]; 2085 const int num_4x4_blocks_high = num_4x4_blocks_high_lookup[bsize]; 2086 int mi_width = num_8x8_blocks_wide_lookup[bsize]; 2087 int mi_height = num_8x8_blocks_high_lookup[bsize]; 2088 2089 // buffer the above/left context information of the block in search. 2090 for (p = 0; p < MAX_MB_PLANE; ++p) { 2091 memcpy(a + num_4x4_blocks_wide * p, 2092 xd->above_context[p] + (mi_col * 2 >> xd->plane[p].subsampling_x), 2093 (sizeof(ENTROPY_CONTEXT) * num_4x4_blocks_wide) >> 2094 xd->plane[p].subsampling_x); 2095 memcpy(l + num_4x4_blocks_high * p, 2096 xd->left_context[p] + 2097 ((mi_row & MI_MASK) * 2 >> xd->plane[p].subsampling_y), 2098 (sizeof(ENTROPY_CONTEXT) * num_4x4_blocks_high) >> 2099 xd->plane[p].subsampling_y); 2100 } 2101 memcpy(sa, xd->above_seg_context + mi_col, 2102 sizeof(*xd->above_seg_context) * mi_width); 2103 memcpy(sl, xd->left_seg_context + (mi_row & MI_MASK), 2104 sizeof(xd->left_seg_context[0]) * mi_height); 2105} 2106 2107static void encode_b(VP9_COMP *cpi, const TileInfo *const tile, ThreadData *td, 2108 TOKENEXTRA **tp, int mi_row, int mi_col, 2109 int output_enabled, BLOCK_SIZE bsize, 2110 PICK_MODE_CONTEXT *ctx) { 2111 MACROBLOCK *const x = &td->mb; 2112 set_offsets(cpi, tile, x, mi_row, mi_col, bsize); 2113 update_state(cpi, td, ctx, mi_row, mi_col, bsize, output_enabled); 2114 encode_superblock(cpi, td, tp, output_enabled, mi_row, mi_col, bsize, ctx); 2115 2116 if (output_enabled) { 2117 update_stats(&cpi->common, td); 2118 2119 (*tp)->token = EOSB_TOKEN; 2120 (*tp)++; 2121 } 2122} 2123 2124static void encode_sb(VP9_COMP *cpi, ThreadData *td, const TileInfo *const tile, 2125 TOKENEXTRA **tp, int mi_row, int mi_col, 2126 int output_enabled, BLOCK_SIZE bsize, PC_TREE *pc_tree) { 2127 VP9_COMMON *const cm = &cpi->common; 2128 MACROBLOCK *const x = &td->mb; 2129 MACROBLOCKD *const xd = &x->e_mbd; 2130 2131 const int bsl = b_width_log2_lookup[bsize], hbs = (1 << bsl) / 4; 2132 int ctx; 2133 PARTITION_TYPE partition; 2134 BLOCK_SIZE subsize = bsize; 2135 2136 if (mi_row >= cm->mi_rows || mi_col >= cm->mi_cols) return; 2137 2138 if (bsize >= BLOCK_8X8) { 2139 ctx = partition_plane_context(xd, mi_row, mi_col, bsize); 2140 subsize = get_subsize(bsize, pc_tree->partitioning); 2141 } else { 2142 ctx = 0; 2143 subsize = BLOCK_4X4; 2144 } 2145 2146 partition = partition_lookup[bsl][subsize]; 2147 if (output_enabled && bsize != BLOCK_4X4) 2148 td->counts->partition[ctx][partition]++; 2149 2150 switch (partition) { 2151 case PARTITION_NONE: 2152 encode_b(cpi, tile, td, tp, mi_row, mi_col, output_enabled, subsize, 2153 &pc_tree->none); 2154 break; 2155 case PARTITION_VERT: 2156 encode_b(cpi, tile, td, tp, mi_row, mi_col, output_enabled, subsize, 2157 &pc_tree->vertical[0]); 2158 if (mi_col + hbs < cm->mi_cols && bsize > BLOCK_8X8) { 2159 encode_b(cpi, tile, td, tp, mi_row, mi_col + hbs, output_enabled, 2160 subsize, &pc_tree->vertical[1]); 2161 } 2162 break; 2163 case PARTITION_HORZ: 2164 encode_b(cpi, tile, td, tp, mi_row, mi_col, output_enabled, subsize, 2165 &pc_tree->horizontal[0]); 2166 if (mi_row + hbs < cm->mi_rows && bsize > BLOCK_8X8) { 2167 encode_b(cpi, tile, td, tp, mi_row + hbs, mi_col, output_enabled, 2168 subsize, &pc_tree->horizontal[1]); 2169 } 2170 break; 2171 case PARTITION_SPLIT: 2172 if (bsize == BLOCK_8X8) { 2173 encode_b(cpi, tile, td, tp, mi_row, mi_col, output_enabled, subsize, 2174 pc_tree->leaf_split[0]); 2175 } else { 2176 encode_sb(cpi, td, tile, tp, mi_row, mi_col, output_enabled, subsize, 2177 pc_tree->split[0]); 2178 encode_sb(cpi, td, tile, tp, mi_row, mi_col + hbs, output_enabled, 2179 subsize, pc_tree->split[1]); 2180 encode_sb(cpi, td, tile, tp, mi_row + hbs, mi_col, output_enabled, 2181 subsize, pc_tree->split[2]); 2182 encode_sb(cpi, td, tile, tp, mi_row + hbs, mi_col + hbs, output_enabled, 2183 subsize, pc_tree->split[3]); 2184 } 2185 break; 2186 default: assert(0 && "Invalid partition type."); break; 2187 } 2188 2189 if (partition != PARTITION_SPLIT || bsize == BLOCK_8X8) 2190 update_partition_context(xd, mi_row, mi_col, subsize, bsize); 2191} 2192 2193// Check to see if the given partition size is allowed for a specified number 2194// of 8x8 block rows and columns remaining in the image. 2195// If not then return the largest allowed partition size 2196static BLOCK_SIZE find_partition_size(BLOCK_SIZE bsize, int rows_left, 2197 int cols_left, int *bh, int *bw) { 2198 if (rows_left <= 0 || cols_left <= 0) { 2199 return VPXMIN(bsize, BLOCK_8X8); 2200 } else { 2201 for (; bsize > 0; bsize -= 3) { 2202 *bh = num_8x8_blocks_high_lookup[bsize]; 2203 *bw = num_8x8_blocks_wide_lookup[bsize]; 2204 if ((*bh <= rows_left) && (*bw <= cols_left)) { 2205 break; 2206 } 2207 } 2208 } 2209 return bsize; 2210} 2211 2212static void set_partial_b64x64_partition(MODE_INFO *mi, int mis, int bh_in, 2213 int bw_in, int row8x8_remaining, 2214 int col8x8_remaining, BLOCK_SIZE bsize, 2215 MODE_INFO **mi_8x8) { 2216 int bh = bh_in; 2217 int r, c; 2218 for (r = 0; r < MI_BLOCK_SIZE; r += bh) { 2219 int bw = bw_in; 2220 for (c = 0; c < MI_BLOCK_SIZE; c += bw) { 2221 const int index = r * mis + c; 2222 mi_8x8[index] = mi + index; 2223 mi_8x8[index]->sb_type = find_partition_size( 2224 bsize, row8x8_remaining - r, col8x8_remaining - c, &bh, &bw); 2225 } 2226 } 2227} 2228 2229// This function attempts to set all mode info entries in a given SB64 2230// to the same block partition size. 2231// However, at the bottom and right borders of the image the requested size 2232// may not be allowed in which case this code attempts to choose the largest 2233// allowable partition. 2234static void set_fixed_partitioning(VP9_COMP *cpi, const TileInfo *const tile, 2235 MODE_INFO **mi_8x8, int mi_row, int mi_col, 2236 BLOCK_SIZE bsize) { 2237 VP9_COMMON *const cm = &cpi->common; 2238 const int mis = cm->mi_stride; 2239 const int row8x8_remaining = tile->mi_row_end - mi_row; 2240 const int col8x8_remaining = tile->mi_col_end - mi_col; 2241 int block_row, block_col; 2242 MODE_INFO *mi_upper_left = cm->mi + mi_row * mis + mi_col; 2243 int bh = num_8x8_blocks_high_lookup[bsize]; 2244 int bw = num_8x8_blocks_wide_lookup[bsize]; 2245 2246 assert((row8x8_remaining > 0) && (col8x8_remaining > 0)); 2247 2248 // Apply the requested partition size to the SB64 if it is all "in image" 2249 if ((col8x8_remaining >= MI_BLOCK_SIZE) && 2250 (row8x8_remaining >= MI_BLOCK_SIZE)) { 2251 for (block_row = 0; block_row < MI_BLOCK_SIZE; block_row += bh) { 2252 for (block_col = 0; block_col < MI_BLOCK_SIZE; block_col += bw) { 2253 int index = block_row * mis + block_col; 2254 mi_8x8[index] = mi_upper_left + index; 2255 mi_8x8[index]->sb_type = bsize; 2256 } 2257 } 2258 } else { 2259 // Else this is a partial SB64. 2260 set_partial_b64x64_partition(mi_upper_left, mis, bh, bw, row8x8_remaining, 2261 col8x8_remaining, bsize, mi_8x8); 2262 } 2263} 2264 2265static const struct { 2266 int row; 2267 int col; 2268} coord_lookup[16] = { 2269 // 32x32 index = 0 2270 { 0, 0 }, 2271 { 0, 2 }, 2272 { 2, 0 }, 2273 { 2, 2 }, 2274 // 32x32 index = 1 2275 { 0, 4 }, 2276 { 0, 6 }, 2277 { 2, 4 }, 2278 { 2, 6 }, 2279 // 32x32 index = 2 2280 { 4, 0 }, 2281 { 4, 2 }, 2282 { 6, 0 }, 2283 { 6, 2 }, 2284 // 32x32 index = 3 2285 { 4, 4 }, 2286 { 4, 6 }, 2287 { 6, 4 }, 2288 { 6, 6 }, 2289}; 2290 2291static void set_source_var_based_partition(VP9_COMP *cpi, 2292 const TileInfo *const tile, 2293 MACROBLOCK *const x, 2294 MODE_INFO **mi_8x8, int mi_row, 2295 int mi_col) { 2296 VP9_COMMON *const cm = &cpi->common; 2297 const int mis = cm->mi_stride; 2298 const int row8x8_remaining = tile->mi_row_end - mi_row; 2299 const int col8x8_remaining = tile->mi_col_end - mi_col; 2300 MODE_INFO *mi_upper_left = cm->mi + mi_row * mis + mi_col; 2301 2302 vp9_setup_src_planes(x, cpi->Source, mi_row, mi_col); 2303 2304 assert((row8x8_remaining > 0) && (col8x8_remaining > 0)); 2305 2306 // In-image SB64 2307 if ((col8x8_remaining >= MI_BLOCK_SIZE) && 2308 (row8x8_remaining >= MI_BLOCK_SIZE)) { 2309 int i, j; 2310 int index; 2311 diff d32[4]; 2312 const int offset = (mi_row >> 1) * cm->mb_cols + (mi_col >> 1); 2313 int is_larger_better = 0; 2314 int use32x32 = 0; 2315 unsigned int thr = cpi->source_var_thresh; 2316 2317 memset(d32, 0, 4 * sizeof(diff)); 2318 2319 for (i = 0; i < 4; i++) { 2320 diff *d16[4]; 2321 2322 for (j = 0; j < 4; j++) { 2323 int b_mi_row = coord_lookup[i * 4 + j].row; 2324 int b_mi_col = coord_lookup[i * 4 + j].col; 2325 int boffset = b_mi_row / 2 * cm->mb_cols + b_mi_col / 2; 2326 2327 d16[j] = cpi->source_diff_var + offset + boffset; 2328 2329 index = b_mi_row * mis + b_mi_col; 2330 mi_8x8[index] = mi_upper_left + index; 2331 mi_8x8[index]->sb_type = BLOCK_16X16; 2332 2333 // TODO(yunqingwang): If d16[j].var is very large, use 8x8 partition 2334 // size to further improve quality. 2335 } 2336 2337 is_larger_better = (d16[0]->var < thr) && (d16[1]->var < thr) && 2338 (d16[2]->var < thr) && (d16[3]->var < thr); 2339 2340 // Use 32x32 partition 2341 if (is_larger_better) { 2342 use32x32 += 1; 2343 2344 for (j = 0; j < 4; j++) { 2345 d32[i].sse += d16[j]->sse; 2346 d32[i].sum += d16[j]->sum; 2347 } 2348 2349 d32[i].var = 2350 (unsigned int)(d32[i].sse - 2351 (unsigned int)(((int64_t)d32[i].sum * d32[i].sum) >> 2352 10)); 2353 2354 index = coord_lookup[i * 4].row * mis + coord_lookup[i * 4].col; 2355 mi_8x8[index] = mi_upper_left + index; 2356 mi_8x8[index]->sb_type = BLOCK_32X32; 2357 } 2358 } 2359 2360 if (use32x32 == 4) { 2361 thr <<= 1; 2362 is_larger_better = (d32[0].var < thr) && (d32[1].var < thr) && 2363 (d32[2].var < thr) && (d32[3].var < thr); 2364 2365 // Use 64x64 partition 2366 if (is_larger_better) { 2367 mi_8x8[0] = mi_upper_left; 2368 mi_8x8[0]->sb_type = BLOCK_64X64; 2369 } 2370 } 2371 } else { // partial in-image SB64 2372 int bh = num_8x8_blocks_high_lookup[BLOCK_16X16]; 2373 int bw = num_8x8_blocks_wide_lookup[BLOCK_16X16]; 2374 set_partial_b64x64_partition(mi_upper_left, mis, bh, bw, row8x8_remaining, 2375 col8x8_remaining, BLOCK_16X16, mi_8x8); 2376 } 2377} 2378 2379static void update_state_rt(VP9_COMP *cpi, ThreadData *td, 2380 PICK_MODE_CONTEXT *ctx, int mi_row, int mi_col, 2381 int bsize) { 2382 VP9_COMMON *const cm = &cpi->common; 2383 MACROBLOCK *const x = &td->mb; 2384 MACROBLOCKD *const xd = &x->e_mbd; 2385 MODE_INFO *const mi = xd->mi[0]; 2386 struct macroblock_plane *const p = x->plane; 2387 const struct segmentation *const seg = &cm->seg; 2388 const int bw = num_8x8_blocks_wide_lookup[mi->sb_type]; 2389 const int bh = num_8x8_blocks_high_lookup[mi->sb_type]; 2390 const int x_mis = VPXMIN(bw, cm->mi_cols - mi_col); 2391 const int y_mis = VPXMIN(bh, cm->mi_rows - mi_row); 2392 2393 *(xd->mi[0]) = ctx->mic; 2394 *(x->mbmi_ext) = ctx->mbmi_ext; 2395 2396 if (seg->enabled && cpi->oxcf.aq_mode != NO_AQ) { 2397 // For in frame complexity AQ or variance AQ, copy segment_id from 2398 // segmentation_map. 2399 if (cpi->oxcf.aq_mode != CYCLIC_REFRESH_AQ) { 2400 const uint8_t *const map = 2401 seg->update_map ? cpi->segmentation_map : cm->last_frame_seg_map; 2402 mi->segment_id = get_segment_id(cm, map, bsize, mi_row, mi_col); 2403 } else { 2404 // Setting segmentation map for cyclic_refresh. 2405 vp9_cyclic_refresh_update_segment(cpi, mi, mi_row, mi_col, bsize, 2406 ctx->rate, ctx->dist, x->skip, p); 2407 } 2408 vp9_init_plane_quantizers(cpi, x); 2409 } 2410 2411 if (is_inter_block(mi)) { 2412 vp9_update_mv_count(td); 2413 if (cm->interp_filter == SWITCHABLE) { 2414 const int pred_ctx = get_pred_context_switchable_interp(xd); 2415 ++td->counts->switchable_interp[pred_ctx][mi->interp_filter]; 2416 } 2417 2418 if (mi->sb_type < BLOCK_8X8) { 2419 mi->mv[0].as_int = mi->bmi[3].as_mv[0].as_int; 2420 mi->mv[1].as_int = mi->bmi[3].as_mv[1].as_int; 2421 } 2422 } 2423 2424 if (cm->use_prev_frame_mvs || !cm->error_resilient_mode || 2425 (cpi->svc.use_base_mv && cpi->svc.number_spatial_layers > 1 && 2426 cpi->svc.spatial_layer_id != cpi->svc.number_spatial_layers - 1)) { 2427 MV_REF *const frame_mvs = 2428 cm->cur_frame->mvs + mi_row * cm->mi_cols + mi_col; 2429 int w, h; 2430 2431 for (h = 0; h < y_mis; ++h) { 2432 MV_REF *const frame_mv = frame_mvs + h * cm->mi_cols; 2433 for (w = 0; w < x_mis; ++w) { 2434 MV_REF *const mv = frame_mv + w; 2435 mv->ref_frame[0] = mi->ref_frame[0]; 2436 mv->ref_frame[1] = mi->ref_frame[1]; 2437 mv->mv[0].as_int = mi->mv[0].as_int; 2438 mv->mv[1].as_int = mi->mv[1].as_int; 2439 } 2440 } 2441 } 2442 2443 x->skip = ctx->skip; 2444 x->skip_txfm[0] = mi->segment_id ? 0 : ctx->skip_txfm[0]; 2445} 2446 2447static void encode_b_rt(VP9_COMP *cpi, ThreadData *td, 2448 const TileInfo *const tile, TOKENEXTRA **tp, int mi_row, 2449 int mi_col, int output_enabled, BLOCK_SIZE bsize, 2450 PICK_MODE_CONTEXT *ctx) { 2451 MACROBLOCK *const x = &td->mb; 2452 set_offsets(cpi, tile, x, mi_row, mi_col, bsize); 2453 update_state_rt(cpi, td, ctx, mi_row, mi_col, bsize); 2454 2455 encode_superblock(cpi, td, tp, output_enabled, mi_row, mi_col, bsize, ctx); 2456 update_stats(&cpi->common, td); 2457 2458 (*tp)->token = EOSB_TOKEN; 2459 (*tp)++; 2460} 2461 2462static void encode_sb_rt(VP9_COMP *cpi, ThreadData *td, 2463 const TileInfo *const tile, TOKENEXTRA **tp, 2464 int mi_row, int mi_col, int output_enabled, 2465 BLOCK_SIZE bsize, PC_TREE *pc_tree) { 2466 VP9_COMMON *const cm = &cpi->common; 2467 MACROBLOCK *const x = &td->mb; 2468 MACROBLOCKD *const xd = &x->e_mbd; 2469 2470 const int bsl = b_width_log2_lookup[bsize], hbs = (1 << bsl) / 4; 2471 int ctx; 2472 PARTITION_TYPE partition; 2473 BLOCK_SIZE subsize; 2474 2475 if (mi_row >= cm->mi_rows || mi_col >= cm->mi_cols) return; 2476 2477 if (bsize >= BLOCK_8X8) { 2478 const int idx_str = xd->mi_stride * mi_row + mi_col; 2479 MODE_INFO **mi_8x8 = cm->mi_grid_visible + idx_str; 2480 ctx = partition_plane_context(xd, mi_row, mi_col, bsize); 2481 subsize = mi_8x8[0]->sb_type; 2482 } else { 2483 ctx = 0; 2484 subsize = BLOCK_4X4; 2485 } 2486 2487 partition = partition_lookup[bsl][subsize]; 2488 if (output_enabled && bsize != BLOCK_4X4) 2489 td->counts->partition[ctx][partition]++; 2490 2491 switch (partition) { 2492 case PARTITION_NONE: 2493 encode_b_rt(cpi, td, tile, tp, mi_row, mi_col, output_enabled, subsize, 2494 &pc_tree->none); 2495 break; 2496 case PARTITION_VERT: 2497 encode_b_rt(cpi, td, tile, tp, mi_row, mi_col, output_enabled, subsize, 2498 &pc_tree->vertical[0]); 2499 if (mi_col + hbs < cm->mi_cols && bsize > BLOCK_8X8) { 2500 encode_b_rt(cpi, td, tile, tp, mi_row, mi_col + hbs, output_enabled, 2501 subsize, &pc_tree->vertical[1]); 2502 } 2503 break; 2504 case PARTITION_HORZ: 2505 encode_b_rt(cpi, td, tile, tp, mi_row, mi_col, output_enabled, subsize, 2506 &pc_tree->horizontal[0]); 2507 if (mi_row + hbs < cm->mi_rows && bsize > BLOCK_8X8) { 2508 encode_b_rt(cpi, td, tile, tp, mi_row + hbs, mi_col, output_enabled, 2509 subsize, &pc_tree->horizontal[1]); 2510 } 2511 break; 2512 case PARTITION_SPLIT: 2513 subsize = get_subsize(bsize, PARTITION_SPLIT); 2514 encode_sb_rt(cpi, td, tile, tp, mi_row, mi_col, output_enabled, subsize, 2515 pc_tree->split[0]); 2516 encode_sb_rt(cpi, td, tile, tp, mi_row, mi_col + hbs, output_enabled, 2517 subsize, pc_tree->split[1]); 2518 encode_sb_rt(cpi, td, tile, tp, mi_row + hbs, mi_col, output_enabled, 2519 subsize, pc_tree->split[2]); 2520 encode_sb_rt(cpi, td, tile, tp, mi_row + hbs, mi_col + hbs, 2521 output_enabled, subsize, pc_tree->split[3]); 2522 break; 2523 default: assert(0 && "Invalid partition type."); break; 2524 } 2525 2526 if (partition != PARTITION_SPLIT || bsize == BLOCK_8X8) 2527 update_partition_context(xd, mi_row, mi_col, subsize, bsize); 2528} 2529 2530static void rd_use_partition(VP9_COMP *cpi, ThreadData *td, 2531 TileDataEnc *tile_data, MODE_INFO **mi_8x8, 2532 TOKENEXTRA **tp, int mi_row, int mi_col, 2533 BLOCK_SIZE bsize, int *rate, int64_t *dist, 2534 int do_recon, PC_TREE *pc_tree) { 2535 VP9_COMMON *const cm = &cpi->common; 2536 TileInfo *const tile_info = &tile_data->tile_info; 2537 MACROBLOCK *const x = &td->mb; 2538 MACROBLOCKD *const xd = &x->e_mbd; 2539 const int mis = cm->mi_stride; 2540 const int bsl = b_width_log2_lookup[bsize]; 2541 const int mi_step = num_4x4_blocks_wide_lookup[bsize] / 2; 2542 const int bss = (1 << bsl) / 4; 2543 int i, pl; 2544 PARTITION_TYPE partition = PARTITION_NONE; 2545 BLOCK_SIZE subsize; 2546 ENTROPY_CONTEXT l[16 * MAX_MB_PLANE], a[16 * MAX_MB_PLANE]; 2547 PARTITION_CONTEXT sl[8], sa[8]; 2548 RD_COST last_part_rdc, none_rdc, chosen_rdc; 2549 BLOCK_SIZE sub_subsize = BLOCK_4X4; 2550 int splits_below = 0; 2551 BLOCK_SIZE bs_type = mi_8x8[0]->sb_type; 2552 int do_partition_search = 1; 2553 PICK_MODE_CONTEXT *ctx = &pc_tree->none; 2554 2555 if (mi_row >= cm->mi_rows || mi_col >= cm->mi_cols) return; 2556 2557 assert(num_4x4_blocks_wide_lookup[bsize] == 2558 num_4x4_blocks_high_lookup[bsize]); 2559 2560 vp9_rd_cost_reset(&last_part_rdc); 2561 vp9_rd_cost_reset(&none_rdc); 2562 vp9_rd_cost_reset(&chosen_rdc); 2563 2564 partition = partition_lookup[bsl][bs_type]; 2565 subsize = get_subsize(bsize, partition); 2566 2567 pc_tree->partitioning = partition; 2568 save_context(x, mi_row, mi_col, a, l, sa, sl, bsize); 2569 2570 if (bsize == BLOCK_16X16 && cpi->oxcf.aq_mode != NO_AQ) { 2571 set_offsets(cpi, tile_info, x, mi_row, mi_col, bsize); 2572 x->mb_energy = vp9_block_energy(cpi, x, bsize); 2573 } 2574 2575 if (do_partition_search && 2576 cpi->sf.partition_search_type == SEARCH_PARTITION && 2577 cpi->sf.adjust_partitioning_from_last_frame) { 2578 // Check if any of the sub blocks are further split. 2579 if (partition == PARTITION_SPLIT && subsize > BLOCK_8X8) { 2580 sub_subsize = get_subsize(subsize, PARTITION_SPLIT); 2581 splits_below = 1; 2582 for (i = 0; i < 4; i++) { 2583 int jj = i >> 1, ii = i & 0x01; 2584 MODE_INFO *this_mi = mi_8x8[jj * bss * mis + ii * bss]; 2585 if (this_mi && this_mi->sb_type >= sub_subsize) { 2586 splits_below = 0; 2587 } 2588 } 2589 } 2590 2591 // If partition is not none try none unless each of the 4 splits are split 2592 // even further.. 2593 if (partition != PARTITION_NONE && !splits_below && 2594 mi_row + (mi_step >> 1) < cm->mi_rows && 2595 mi_col + (mi_step >> 1) < cm->mi_cols) { 2596 pc_tree->partitioning = PARTITION_NONE; 2597 rd_pick_sb_modes(cpi, tile_data, x, mi_row, mi_col, &none_rdc, bsize, ctx, 2598 INT64_MAX); 2599 2600 pl = partition_plane_context(xd, mi_row, mi_col, bsize); 2601 2602 if (none_rdc.rate < INT_MAX) { 2603 none_rdc.rate += cpi->partition_cost[pl][PARTITION_NONE]; 2604 none_rdc.rdcost = 2605 RDCOST(x->rdmult, x->rddiv, none_rdc.rate, none_rdc.dist); 2606 } 2607 2608 restore_context(x, mi_row, mi_col, a, l, sa, sl, bsize); 2609 mi_8x8[0]->sb_type = bs_type; 2610 pc_tree->partitioning = partition; 2611 } 2612 } 2613 2614 switch (partition) { 2615 case PARTITION_NONE: 2616 rd_pick_sb_modes(cpi, tile_data, x, mi_row, mi_col, &last_part_rdc, bsize, 2617 ctx, INT64_MAX); 2618 break; 2619 case PARTITION_HORZ: 2620 rd_pick_sb_modes(cpi, tile_data, x, mi_row, mi_col, &last_part_rdc, 2621 subsize, &pc_tree->horizontal[0], INT64_MAX); 2622 if (last_part_rdc.rate != INT_MAX && bsize >= BLOCK_8X8 && 2623 mi_row + (mi_step >> 1) < cm->mi_rows) { 2624 RD_COST tmp_rdc; 2625 PICK_MODE_CONTEXT *ctx = &pc_tree->horizontal[0]; 2626 vp9_rd_cost_init(&tmp_rdc); 2627 update_state(cpi, td, ctx, mi_row, mi_col, subsize, 0); 2628 encode_superblock(cpi, td, tp, 0, mi_row, mi_col, subsize, ctx); 2629 rd_pick_sb_modes(cpi, tile_data, x, mi_row + (mi_step >> 1), mi_col, 2630 &tmp_rdc, subsize, &pc_tree->horizontal[1], INT64_MAX); 2631 if (tmp_rdc.rate == INT_MAX || tmp_rdc.dist == INT64_MAX) { 2632 vp9_rd_cost_reset(&last_part_rdc); 2633 break; 2634 } 2635 last_part_rdc.rate += tmp_rdc.rate; 2636 last_part_rdc.dist += tmp_rdc.dist; 2637 last_part_rdc.rdcost += tmp_rdc.rdcost; 2638 } 2639 break; 2640 case PARTITION_VERT: 2641 rd_pick_sb_modes(cpi, tile_data, x, mi_row, mi_col, &last_part_rdc, 2642 subsize, &pc_tree->vertical[0], INT64_MAX); 2643 if (last_part_rdc.rate != INT_MAX && bsize >= BLOCK_8X8 && 2644 mi_col + (mi_step >> 1) < cm->mi_cols) { 2645 RD_COST tmp_rdc; 2646 PICK_MODE_CONTEXT *ctx = &pc_tree->vertical[0]; 2647 vp9_rd_cost_init(&tmp_rdc); 2648 update_state(cpi, td, ctx, mi_row, mi_col, subsize, 0); 2649 encode_superblock(cpi, td, tp, 0, mi_row, mi_col, subsize, ctx); 2650 rd_pick_sb_modes(cpi, tile_data, x, mi_row, mi_col + (mi_step >> 1), 2651 &tmp_rdc, subsize, 2652 &pc_tree->vertical[bsize > BLOCK_8X8], INT64_MAX); 2653 if (tmp_rdc.rate == INT_MAX || tmp_rdc.dist == INT64_MAX) { 2654 vp9_rd_cost_reset(&last_part_rdc); 2655 break; 2656 } 2657 last_part_rdc.rate += tmp_rdc.rate; 2658 last_part_rdc.dist += tmp_rdc.dist; 2659 last_part_rdc.rdcost += tmp_rdc.rdcost; 2660 } 2661 break; 2662 case PARTITION_SPLIT: 2663 if (bsize == BLOCK_8X8) { 2664 rd_pick_sb_modes(cpi, tile_data, x, mi_row, mi_col, &last_part_rdc, 2665 subsize, pc_tree->leaf_split[0], INT64_MAX); 2666 break; 2667 } 2668 last_part_rdc.rate = 0; 2669 last_part_rdc.dist = 0; 2670 last_part_rdc.rdcost = 0; 2671 for (i = 0; i < 4; i++) { 2672 int x_idx = (i & 1) * (mi_step >> 1); 2673 int y_idx = (i >> 1) * (mi_step >> 1); 2674 int jj = i >> 1, ii = i & 0x01; 2675 RD_COST tmp_rdc; 2676 if ((mi_row + y_idx >= cm->mi_rows) || (mi_col + x_idx >= cm->mi_cols)) 2677 continue; 2678 2679 vp9_rd_cost_init(&tmp_rdc); 2680 rd_use_partition(cpi, td, tile_data, mi_8x8 + jj * bss * mis + ii * bss, 2681 tp, mi_row + y_idx, mi_col + x_idx, subsize, 2682 &tmp_rdc.rate, &tmp_rdc.dist, i != 3, 2683 pc_tree->split[i]); 2684 if (tmp_rdc.rate == INT_MAX || tmp_rdc.dist == INT64_MAX) { 2685 vp9_rd_cost_reset(&last_part_rdc); 2686 break; 2687 } 2688 last_part_rdc.rate += tmp_rdc.rate; 2689 last_part_rdc.dist += tmp_rdc.dist; 2690 } 2691 break; 2692 default: assert(0); break; 2693 } 2694 2695 pl = partition_plane_context(xd, mi_row, mi_col, bsize); 2696 if (last_part_rdc.rate < INT_MAX) { 2697 last_part_rdc.rate += cpi->partition_cost[pl][partition]; 2698 last_part_rdc.rdcost = 2699 RDCOST(x->rdmult, x->rddiv, last_part_rdc.rate, last_part_rdc.dist); 2700 } 2701 2702 if (do_partition_search && cpi->sf.adjust_partitioning_from_last_frame && 2703 cpi->sf.partition_search_type == SEARCH_PARTITION && 2704 partition != PARTITION_SPLIT && bsize > BLOCK_8X8 && 2705 (mi_row + mi_step < cm->mi_rows || 2706 mi_row + (mi_step >> 1) == cm->mi_rows) && 2707 (mi_col + mi_step < cm->mi_cols || 2708 mi_col + (mi_step >> 1) == cm->mi_cols)) { 2709 BLOCK_SIZE split_subsize = get_subsize(bsize, PARTITION_SPLIT); 2710 chosen_rdc.rate = 0; 2711 chosen_rdc.dist = 0; 2712 restore_context(x, mi_row, mi_col, a, l, sa, sl, bsize); 2713 pc_tree->partitioning = PARTITION_SPLIT; 2714 2715 // Split partition. 2716 for (i = 0; i < 4; i++) { 2717 int x_idx = (i & 1) * (mi_step >> 1); 2718 int y_idx = (i >> 1) * (mi_step >> 1); 2719 RD_COST tmp_rdc; 2720 ENTROPY_CONTEXT l[16 * MAX_MB_PLANE], a[16 * MAX_MB_PLANE]; 2721 PARTITION_CONTEXT sl[8], sa[8]; 2722 2723 if ((mi_row + y_idx >= cm->mi_rows) || (mi_col + x_idx >= cm->mi_cols)) 2724 continue; 2725 2726 save_context(x, mi_row, mi_col, a, l, sa, sl, bsize); 2727 pc_tree->split[i]->partitioning = PARTITION_NONE; 2728 rd_pick_sb_modes(cpi, tile_data, x, mi_row + y_idx, mi_col + x_idx, 2729 &tmp_rdc, split_subsize, &pc_tree->split[i]->none, 2730 INT64_MAX); 2731 2732 restore_context(x, mi_row, mi_col, a, l, sa, sl, bsize); 2733 2734 if (tmp_rdc.rate == INT_MAX || tmp_rdc.dist == INT64_MAX) { 2735 vp9_rd_cost_reset(&chosen_rdc); 2736 break; 2737 } 2738 2739 chosen_rdc.rate += tmp_rdc.rate; 2740 chosen_rdc.dist += tmp_rdc.dist; 2741 2742 if (i != 3) 2743 encode_sb(cpi, td, tile_info, tp, mi_row + y_idx, mi_col + x_idx, 0, 2744 split_subsize, pc_tree->split[i]); 2745 2746 pl = partition_plane_context(xd, mi_row + y_idx, mi_col + x_idx, 2747 split_subsize); 2748 chosen_rdc.rate += cpi->partition_cost[pl][PARTITION_NONE]; 2749 } 2750 pl = partition_plane_context(xd, mi_row, mi_col, bsize); 2751 if (chosen_rdc.rate < INT_MAX) { 2752 chosen_rdc.rate += cpi->partition_cost[pl][PARTITION_SPLIT]; 2753 chosen_rdc.rdcost = 2754 RDCOST(x->rdmult, x->rddiv, chosen_rdc.rate, chosen_rdc.dist); 2755 } 2756 } 2757 2758 // If last_part is better set the partitioning to that. 2759 if (last_part_rdc.rdcost < chosen_rdc.rdcost) { 2760 mi_8x8[0]->sb_type = bsize; 2761 if (bsize >= BLOCK_8X8) pc_tree->partitioning = partition; 2762 chosen_rdc = last_part_rdc; 2763 } 2764 // If none was better set the partitioning to that. 2765 if (none_rdc.rdcost < chosen_rdc.rdcost) { 2766 if (bsize >= BLOCK_8X8) pc_tree->partitioning = PARTITION_NONE; 2767 chosen_rdc = none_rdc; 2768 } 2769 2770 restore_context(x, mi_row, mi_col, a, l, sa, sl, bsize); 2771 2772 // We must have chosen a partitioning and encoding or we'll fail later on. 2773 // No other opportunities for success. 2774 if (bsize == BLOCK_64X64) 2775 assert(chosen_rdc.rate < INT_MAX && chosen_rdc.dist < INT64_MAX); 2776 2777 if (do_recon) { 2778 int output_enabled = (bsize == BLOCK_64X64); 2779 encode_sb(cpi, td, tile_info, tp, mi_row, mi_col, output_enabled, bsize, 2780 pc_tree); 2781 } 2782 2783 *rate = chosen_rdc.rate; 2784 *dist = chosen_rdc.dist; 2785} 2786 2787static const BLOCK_SIZE min_partition_size[BLOCK_SIZES] = { 2788 BLOCK_4X4, BLOCK_4X4, BLOCK_4X4, BLOCK_4X4, BLOCK_4X4, 2789 BLOCK_4X4, BLOCK_8X8, BLOCK_8X8, BLOCK_8X8, BLOCK_16X16, 2790 BLOCK_16X16, BLOCK_16X16, BLOCK_16X16 2791}; 2792 2793static const BLOCK_SIZE max_partition_size[BLOCK_SIZES] = { 2794 BLOCK_8X8, BLOCK_16X16, BLOCK_16X16, BLOCK_16X16, BLOCK_32X32, 2795 BLOCK_32X32, BLOCK_32X32, BLOCK_64X64, BLOCK_64X64, BLOCK_64X64, 2796 BLOCK_64X64, BLOCK_64X64, BLOCK_64X64 2797}; 2798 2799// Look at all the mode_info entries for blocks that are part of this 2800// partition and find the min and max values for sb_type. 2801// At the moment this is designed to work on a 64x64 SB but could be 2802// adjusted to use a size parameter. 2803// 2804// The min and max are assumed to have been initialized prior to calling this 2805// function so repeat calls can accumulate a min and max of more than one sb64. 2806static void get_sb_partition_size_range(MACROBLOCKD *xd, MODE_INFO **mi_8x8, 2807 BLOCK_SIZE *min_block_size, 2808 BLOCK_SIZE *max_block_size, 2809 int bs_hist[BLOCK_SIZES]) { 2810 int sb_width_in_blocks = MI_BLOCK_SIZE; 2811 int sb_height_in_blocks = MI_BLOCK_SIZE; 2812 int i, j; 2813 int index = 0; 2814 2815 // Check the sb_type for each block that belongs to this region. 2816 for (i = 0; i < sb_height_in_blocks; ++i) { 2817 for (j = 0; j < sb_width_in_blocks; ++j) { 2818 MODE_INFO *mi = mi_8x8[index + j]; 2819 BLOCK_SIZE sb_type = mi ? mi->sb_type : 0; 2820 bs_hist[sb_type]++; 2821 *min_block_size = VPXMIN(*min_block_size, sb_type); 2822 *max_block_size = VPXMAX(*max_block_size, sb_type); 2823 } 2824 index += xd->mi_stride; 2825 } 2826} 2827 2828// Next square block size less or equal than current block size. 2829static const BLOCK_SIZE next_square_size[BLOCK_SIZES] = { 2830 BLOCK_4X4, BLOCK_4X4, BLOCK_4X4, BLOCK_8X8, BLOCK_8X8, 2831 BLOCK_8X8, BLOCK_16X16, BLOCK_16X16, BLOCK_16X16, BLOCK_32X32, 2832 BLOCK_32X32, BLOCK_32X32, BLOCK_64X64 2833}; 2834 2835// Look at neighboring blocks and set a min and max partition size based on 2836// what they chose. 2837static void rd_auto_partition_range(VP9_COMP *cpi, const TileInfo *const tile, 2838 MACROBLOCKD *const xd, int mi_row, 2839 int mi_col, BLOCK_SIZE *min_block_size, 2840 BLOCK_SIZE *max_block_size) { 2841 VP9_COMMON *const cm = &cpi->common; 2842 MODE_INFO **mi = xd->mi; 2843 const int left_in_image = !!xd->left_mi; 2844 const int above_in_image = !!xd->above_mi; 2845 const int row8x8_remaining = tile->mi_row_end - mi_row; 2846 const int col8x8_remaining = tile->mi_col_end - mi_col; 2847 int bh, bw; 2848 BLOCK_SIZE min_size = BLOCK_4X4; 2849 BLOCK_SIZE max_size = BLOCK_64X64; 2850 int bs_hist[BLOCK_SIZES] = { 0 }; 2851 2852 // Trap case where we do not have a prediction. 2853 if (left_in_image || above_in_image || cm->frame_type != KEY_FRAME) { 2854 // Default "min to max" and "max to min" 2855 min_size = BLOCK_64X64; 2856 max_size = BLOCK_4X4; 2857 2858 // NOTE: each call to get_sb_partition_size_range() uses the previous 2859 // passed in values for min and max as a starting point. 2860 // Find the min and max partition used in previous frame at this location 2861 if (cm->frame_type != KEY_FRAME) { 2862 MODE_INFO **prev_mi = 2863 &cm->prev_mi_grid_visible[mi_row * xd->mi_stride + mi_col]; 2864 get_sb_partition_size_range(xd, prev_mi, &min_size, &max_size, bs_hist); 2865 } 2866 // Find the min and max partition sizes used in the left SB64 2867 if (left_in_image) { 2868 MODE_INFO **left_sb64_mi = &mi[-MI_BLOCK_SIZE]; 2869 get_sb_partition_size_range(xd, left_sb64_mi, &min_size, &max_size, 2870 bs_hist); 2871 } 2872 // Find the min and max partition sizes used in the above SB64. 2873 if (above_in_image) { 2874 MODE_INFO **above_sb64_mi = &mi[-xd->mi_stride * MI_BLOCK_SIZE]; 2875 get_sb_partition_size_range(xd, above_sb64_mi, &min_size, &max_size, 2876 bs_hist); 2877 } 2878 2879 // Adjust observed min and max for "relaxed" auto partition case. 2880 if (cpi->sf.auto_min_max_partition_size == RELAXED_NEIGHBORING_MIN_MAX) { 2881 min_size = min_partition_size[min_size]; 2882 max_size = max_partition_size[max_size]; 2883 } 2884 } 2885 2886 // Check border cases where max and min from neighbors may not be legal. 2887 max_size = find_partition_size(max_size, row8x8_remaining, col8x8_remaining, 2888 &bh, &bw); 2889 // Test for blocks at the edge of the active image. 2890 // This may be the actual edge of the image or where there are formatting 2891 // bars. 2892 if (vp9_active_edge_sb(cpi, mi_row, mi_col)) { 2893 min_size = BLOCK_4X4; 2894 } else { 2895 min_size = 2896 VPXMIN(cpi->sf.rd_auto_partition_min_limit, VPXMIN(min_size, max_size)); 2897 } 2898 2899 // When use_square_partition_only is true, make sure at least one square 2900 // partition is allowed by selecting the next smaller square size as 2901 // *min_block_size. 2902 if (cpi->sf.use_square_partition_only && 2903 next_square_size[max_size] < min_size) { 2904 min_size = next_square_size[max_size]; 2905 } 2906 2907 *min_block_size = min_size; 2908 *max_block_size = max_size; 2909} 2910 2911// TODO(jingning) refactor functions setting partition search range 2912static void set_partition_range(VP9_COMMON *cm, MACROBLOCKD *xd, int mi_row, 2913 int mi_col, BLOCK_SIZE bsize, 2914 BLOCK_SIZE *min_bs, BLOCK_SIZE *max_bs) { 2915 int mi_width = num_8x8_blocks_wide_lookup[bsize]; 2916 int mi_height = num_8x8_blocks_high_lookup[bsize]; 2917 int idx, idy; 2918 2919 MODE_INFO *mi; 2920 const int idx_str = cm->mi_stride * mi_row + mi_col; 2921 MODE_INFO **prev_mi = &cm->prev_mi_grid_visible[idx_str]; 2922 BLOCK_SIZE bs, min_size, max_size; 2923 2924 min_size = BLOCK_64X64; 2925 max_size = BLOCK_4X4; 2926 2927 if (prev_mi) { 2928 for (idy = 0; idy < mi_height; ++idy) { 2929 for (idx = 0; idx < mi_width; ++idx) { 2930 mi = prev_mi[idy * cm->mi_stride + idx]; 2931 bs = mi ? mi->sb_type : bsize; 2932 min_size = VPXMIN(min_size, bs); 2933 max_size = VPXMAX(max_size, bs); 2934 } 2935 } 2936 } 2937 2938 if (xd->left_mi) { 2939 for (idy = 0; idy < mi_height; ++idy) { 2940 mi = xd->mi[idy * cm->mi_stride - 1]; 2941 bs = mi ? mi->sb_type : bsize; 2942 min_size = VPXMIN(min_size, bs); 2943 max_size = VPXMAX(max_size, bs); 2944 } 2945 } 2946 2947 if (xd->above_mi) { 2948 for (idx = 0; idx < mi_width; ++idx) { 2949 mi = xd->mi[idx - cm->mi_stride]; 2950 bs = mi ? mi->sb_type : bsize; 2951 min_size = VPXMIN(min_size, bs); 2952 max_size = VPXMAX(max_size, bs); 2953 } 2954 } 2955 2956 if (min_size == max_size) { 2957 min_size = min_partition_size[min_size]; 2958 max_size = max_partition_size[max_size]; 2959 } 2960 2961 *min_bs = min_size; 2962 *max_bs = max_size; 2963} 2964 2965static INLINE void store_pred_mv(MACROBLOCK *x, PICK_MODE_CONTEXT *ctx) { 2966 memcpy(ctx->pred_mv, x->pred_mv, sizeof(x->pred_mv)); 2967} 2968 2969static INLINE void load_pred_mv(MACROBLOCK *x, PICK_MODE_CONTEXT *ctx) { 2970 memcpy(x->pred_mv, ctx->pred_mv, sizeof(x->pred_mv)); 2971} 2972 2973#if CONFIG_FP_MB_STATS 2974const int num_16x16_blocks_wide_lookup[BLOCK_SIZES] = { 1, 1, 1, 1, 1, 1, 1, 2975 1, 2, 2, 2, 4, 4 }; 2976const int num_16x16_blocks_high_lookup[BLOCK_SIZES] = { 1, 1, 1, 1, 1, 1, 1, 2977 2, 1, 2, 4, 2, 4 }; 2978const int qindex_skip_threshold_lookup[BLOCK_SIZES] = { 2979 0, 10, 10, 30, 40, 40, 60, 80, 80, 90, 100, 100, 120 2980}; 2981const int qindex_split_threshold_lookup[BLOCK_SIZES] = { 2982 0, 3, 3, 7, 15, 15, 30, 40, 40, 60, 80, 80, 120 2983}; 2984const int complexity_16x16_blocks_threshold[BLOCK_SIZES] = { 2985 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 4, 4, 6 2986}; 2987 2988typedef enum { 2989 MV_ZERO = 0, 2990 MV_LEFT = 1, 2991 MV_UP = 2, 2992 MV_RIGHT = 3, 2993 MV_DOWN = 4, 2994 MV_INVALID 2995} MOTION_DIRECTION; 2996 2997static INLINE MOTION_DIRECTION get_motion_direction_fp(uint8_t fp_byte) { 2998 if (fp_byte & FPMB_MOTION_ZERO_MASK) { 2999 return MV_ZERO; 3000 } else if (fp_byte & FPMB_MOTION_LEFT_MASK) { 3001 return MV_LEFT; 3002 } else if (fp_byte & FPMB_MOTION_RIGHT_MASK) { 3003 return MV_RIGHT; 3004 } else if (fp_byte & FPMB_MOTION_UP_MASK) { 3005 return MV_UP; 3006 } else { 3007 return MV_DOWN; 3008 } 3009} 3010 3011static INLINE int get_motion_inconsistency(MOTION_DIRECTION this_mv, 3012 MOTION_DIRECTION that_mv) { 3013 if (this_mv == that_mv) { 3014 return 0; 3015 } else { 3016 return abs(this_mv - that_mv) == 2 ? 2 : 1; 3017 } 3018} 3019#endif 3020 3021// Calculate the score used in machine-learning based partition search early 3022// termination. 3023static double compute_score(VP9_COMMON *const cm, MACROBLOCKD *const xd, 3024 PICK_MODE_CONTEXT *ctx, int mi_row, int mi_col, 3025 BLOCK_SIZE bsize) { 3026 const double *clf; 3027 const double *mean; 3028 const double *sd; 3029 const int mag_mv = 3030 abs(ctx->mic.mv[0].as_mv.col) + abs(ctx->mic.mv[0].as_mv.row); 3031 const int left_in_image = !!xd->left_mi; 3032 const int above_in_image = !!xd->above_mi; 3033 MODE_INFO **prev_mi = 3034 &cm->prev_mi_grid_visible[mi_col + cm->mi_stride * mi_row]; 3035 int above_par = 0; // above_partitioning 3036 int left_par = 0; // left_partitioning 3037 int last_par = 0; // last_partitioning 3038 BLOCK_SIZE context_size; 3039 double score; 3040 int offset = 0; 3041 3042 assert(b_width_log2_lookup[bsize] == b_height_log2_lookup[bsize]); 3043 3044 if (above_in_image) { 3045 context_size = xd->above_mi->sb_type; 3046 if (context_size < bsize) 3047 above_par = 2; 3048 else if (context_size == bsize) 3049 above_par = 1; 3050 } 3051 3052 if (left_in_image) { 3053 context_size = xd->left_mi->sb_type; 3054 if (context_size < bsize) 3055 left_par = 2; 3056 else if (context_size == bsize) 3057 left_par = 1; 3058 } 3059 3060 if (prev_mi) { 3061 context_size = prev_mi[0]->sb_type; 3062 if (context_size < bsize) 3063 last_par = 2; 3064 else if (context_size == bsize) 3065 last_par = 1; 3066 } 3067 3068 if (bsize == BLOCK_64X64) 3069 offset = 0; 3070 else if (bsize == BLOCK_32X32) 3071 offset = 8; 3072 else if (bsize == BLOCK_16X16) 3073 offset = 16; 3074 3075 // early termination score calculation 3076 clf = &classifiers[offset]; 3077 mean = &train_mean[offset]; 3078 sd = &train_stdm[offset]; 3079 score = clf[0] * (((double)ctx->rate - mean[0]) / sd[0]) + 3080 clf[1] * (((double)ctx->dist - mean[1]) / sd[1]) + 3081 clf[2] * (((double)mag_mv / 2 - mean[2]) * sd[2]) + 3082 clf[3] * (((double)(left_par + above_par) / 2 - mean[3]) * sd[3]) + 3083 clf[4] * (((double)ctx->sum_y_eobs - mean[4]) / sd[4]) + 3084 clf[5] * (((double)cm->base_qindex - mean[5]) * sd[5]) + 3085 clf[6] * (((double)last_par - mean[6]) * sd[6]) + clf[7]; 3086 return score; 3087} 3088 3089// TODO(jingning,jimbankoski,rbultje): properly skip partition types that are 3090// unlikely to be selected depending on previous rate-distortion optimization 3091// results, for encoding speed-up. 3092static void rd_pick_partition(VP9_COMP *cpi, ThreadData *td, 3093 TileDataEnc *tile_data, TOKENEXTRA **tp, 3094 int mi_row, int mi_col, BLOCK_SIZE bsize, 3095 RD_COST *rd_cost, int64_t best_rd, 3096 PC_TREE *pc_tree) { 3097 VP9_COMMON *const cm = &cpi->common; 3098 TileInfo *const tile_info = &tile_data->tile_info; 3099 MACROBLOCK *const x = &td->mb; 3100 MACROBLOCKD *const xd = &x->e_mbd; 3101 const int mi_step = num_8x8_blocks_wide_lookup[bsize] / 2; 3102 ENTROPY_CONTEXT l[16 * MAX_MB_PLANE], a[16 * MAX_MB_PLANE]; 3103 PARTITION_CONTEXT sl[8], sa[8]; 3104 TOKENEXTRA *tp_orig = *tp; 3105 PICK_MODE_CONTEXT *ctx = &pc_tree->none; 3106 int i; 3107 const int pl = partition_plane_context(xd, mi_row, mi_col, bsize); 3108 BLOCK_SIZE subsize; 3109 RD_COST this_rdc, sum_rdc, best_rdc; 3110 int do_split = bsize >= BLOCK_8X8; 3111 int do_rect = 1; 3112 INTERP_FILTER pred_interp_filter; 3113 3114 // Override skipping rectangular partition operations for edge blocks 3115 const int force_horz_split = (mi_row + mi_step >= cm->mi_rows); 3116 const int force_vert_split = (mi_col + mi_step >= cm->mi_cols); 3117 const int xss = x->e_mbd.plane[1].subsampling_x; 3118 const int yss = x->e_mbd.plane[1].subsampling_y; 3119 3120 BLOCK_SIZE min_size = x->min_partition_size; 3121 BLOCK_SIZE max_size = x->max_partition_size; 3122 3123#if CONFIG_FP_MB_STATS 3124 unsigned int src_diff_var = UINT_MAX; 3125 int none_complexity = 0; 3126#endif 3127 3128 int partition_none_allowed = !force_horz_split && !force_vert_split; 3129 int partition_horz_allowed = 3130 !force_vert_split && yss <= xss && bsize >= BLOCK_8X8; 3131 int partition_vert_allowed = 3132 !force_horz_split && xss <= yss && bsize >= BLOCK_8X8; 3133 3134 int64_t dist_breakout_thr = cpi->sf.partition_search_breakout_thr.dist; 3135 int rate_breakout_thr = cpi->sf.partition_search_breakout_thr.rate; 3136 3137 (void)*tp_orig; 3138 3139 assert(num_8x8_blocks_wide_lookup[bsize] == 3140 num_8x8_blocks_high_lookup[bsize]); 3141 3142 // Adjust dist breakout threshold according to the partition size. 3143 dist_breakout_thr >>= 3144 8 - (b_width_log2_lookup[bsize] + b_height_log2_lookup[bsize]); 3145 rate_breakout_thr *= num_pels_log2_lookup[bsize]; 3146 3147 vp9_rd_cost_init(&this_rdc); 3148 vp9_rd_cost_init(&sum_rdc); 3149 vp9_rd_cost_reset(&best_rdc); 3150 best_rdc.rdcost = best_rd; 3151 3152 set_offsets(cpi, tile_info, x, mi_row, mi_col, bsize); 3153 3154 if (bsize == BLOCK_16X16 && cpi->oxcf.aq_mode != NO_AQ && 3155 cpi->oxcf.aq_mode != LOOKAHEAD_AQ) 3156 x->mb_energy = vp9_block_energy(cpi, x, bsize); 3157 3158 if (cpi->sf.cb_partition_search && bsize == BLOCK_16X16) { 3159 int cb_partition_search_ctrl = 3160 ((pc_tree->index == 0 || pc_tree->index == 3) + 3161 get_chessboard_index(cm->current_video_frame)) & 3162 0x1; 3163 3164 if (cb_partition_search_ctrl && bsize > min_size && bsize < max_size) 3165 set_partition_range(cm, xd, mi_row, mi_col, bsize, &min_size, &max_size); 3166 } 3167 3168 // Determine partition types in search according to the speed features. 3169 // The threshold set here has to be of square block size. 3170 if (cpi->sf.auto_min_max_partition_size) { 3171 partition_none_allowed &= (bsize <= max_size && bsize >= min_size); 3172 partition_horz_allowed &= 3173 ((bsize <= max_size && bsize > min_size) || force_horz_split); 3174 partition_vert_allowed &= 3175 ((bsize <= max_size && bsize > min_size) || force_vert_split); 3176 do_split &= bsize > min_size; 3177 } 3178 3179 if (cpi->sf.use_square_partition_only && 3180 bsize > cpi->sf.use_square_only_threshold) { 3181 if (cpi->use_svc) { 3182 if (!vp9_active_h_edge(cpi, mi_row, mi_step) || x->e_mbd.lossless) 3183 partition_horz_allowed &= force_horz_split; 3184 if (!vp9_active_v_edge(cpi, mi_row, mi_step) || x->e_mbd.lossless) 3185 partition_vert_allowed &= force_vert_split; 3186 } else { 3187 partition_horz_allowed &= force_horz_split; 3188 partition_vert_allowed &= force_vert_split; 3189 } 3190 } 3191 3192 save_context(x, mi_row, mi_col, a, l, sa, sl, bsize); 3193 3194#if CONFIG_FP_MB_STATS 3195 if (cpi->use_fp_mb_stats) { 3196 set_offsets(cpi, tile_info, x, mi_row, mi_col, bsize); 3197 src_diff_var = get_sby_perpixel_diff_variance(cpi, &x->plane[0].src, mi_row, 3198 mi_col, bsize); 3199 } 3200#endif 3201 3202#if CONFIG_FP_MB_STATS 3203 // Decide whether we shall split directly and skip searching NONE by using 3204 // the first pass block statistics 3205 if (cpi->use_fp_mb_stats && bsize >= BLOCK_32X32 && do_split && 3206 partition_none_allowed && src_diff_var > 4 && 3207 cm->base_qindex < qindex_split_threshold_lookup[bsize]) { 3208 int mb_row = mi_row >> 1; 3209 int mb_col = mi_col >> 1; 3210 int mb_row_end = 3211 VPXMIN(mb_row + num_16x16_blocks_high_lookup[bsize], cm->mb_rows); 3212 int mb_col_end = 3213 VPXMIN(mb_col + num_16x16_blocks_wide_lookup[bsize], cm->mb_cols); 3214 int r, c; 3215 3216 // compute a complexity measure, basically measure inconsistency of motion 3217 // vectors obtained from the first pass in the current block 3218 for (r = mb_row; r < mb_row_end; r++) { 3219 for (c = mb_col; c < mb_col_end; c++) { 3220 const int mb_index = r * cm->mb_cols + c; 3221 3222 MOTION_DIRECTION this_mv; 3223 MOTION_DIRECTION right_mv; 3224 MOTION_DIRECTION bottom_mv; 3225 3226 this_mv = 3227 get_motion_direction_fp(cpi->twopass.this_frame_mb_stats[mb_index]); 3228 3229 // to its right 3230 if (c != mb_col_end - 1) { 3231 right_mv = get_motion_direction_fp( 3232 cpi->twopass.this_frame_mb_stats[mb_index + 1]); 3233 none_complexity += get_motion_inconsistency(this_mv, right_mv); 3234 } 3235 3236 // to its bottom 3237 if (r != mb_row_end - 1) { 3238 bottom_mv = get_motion_direction_fp( 3239 cpi->twopass.this_frame_mb_stats[mb_index + cm->mb_cols]); 3240 none_complexity += get_motion_inconsistency(this_mv, bottom_mv); 3241 } 3242 3243 // do not count its left and top neighbors to avoid double counting 3244 } 3245 } 3246 3247 if (none_complexity > complexity_16x16_blocks_threshold[bsize]) { 3248 partition_none_allowed = 0; 3249 } 3250 } 3251#endif 3252 3253 // PARTITION_NONE 3254 if (partition_none_allowed) { 3255 rd_pick_sb_modes(cpi, tile_data, x, mi_row, mi_col, &this_rdc, bsize, ctx, 3256 best_rdc.rdcost); 3257 if (this_rdc.rate != INT_MAX) { 3258 if (bsize >= BLOCK_8X8) { 3259 this_rdc.rate += cpi->partition_cost[pl][PARTITION_NONE]; 3260 this_rdc.rdcost = 3261 RDCOST(x->rdmult, x->rddiv, this_rdc.rate, this_rdc.dist); 3262 } 3263 3264 if (this_rdc.rdcost < best_rdc.rdcost) { 3265 MODE_INFO *mi = xd->mi[0]; 3266 3267 best_rdc = this_rdc; 3268 if (bsize >= BLOCK_8X8) pc_tree->partitioning = PARTITION_NONE; 3269 3270 if (!cpi->sf.ml_partition_search_early_termination) { 3271 // If all y, u, v transform blocks in this partition are skippable, 3272 // and the dist & rate are within the thresholds, the partition search 3273 // is terminated for current branch of the partition search tree. 3274 if (!x->e_mbd.lossless && ctx->skippable && 3275 ((best_rdc.dist < (dist_breakout_thr >> 2)) || 3276 (best_rdc.dist < dist_breakout_thr && 3277 best_rdc.rate < rate_breakout_thr))) { 3278 do_split = 0; 3279 do_rect = 0; 3280 } 3281 } else { 3282 // Currently, the machine-learning based partition search early 3283 // termination is only used while bsize is 16x16, 32x32 or 64x64, 3284 // VPXMIN(cm->width, cm->height) >= 480, and speed = 0. 3285 if (!x->e_mbd.lossless && 3286 !segfeature_active(&cm->seg, mi->segment_id, SEG_LVL_SKIP) && 3287 ctx->mic.mode >= INTRA_MODES && bsize >= BLOCK_16X16) { 3288 if (compute_score(cm, xd, ctx, mi_row, mi_col, bsize) < 0.0) { 3289 do_split = 0; 3290 do_rect = 0; 3291 } 3292 } 3293 } 3294 3295#if CONFIG_FP_MB_STATS 3296 // Check if every 16x16 first pass block statistics has zero 3297 // motion and the corresponding first pass residue is small enough. 3298 // If that is the case, check the difference variance between the 3299 // current frame and the last frame. If the variance is small enough, 3300 // stop further splitting in RD optimization 3301 if (cpi->use_fp_mb_stats && do_split != 0 && 3302 cm->base_qindex > qindex_skip_threshold_lookup[bsize]) { 3303 int mb_row = mi_row >> 1; 3304 int mb_col = mi_col >> 1; 3305 int mb_row_end = 3306 VPXMIN(mb_row + num_16x16_blocks_high_lookup[bsize], cm->mb_rows); 3307 int mb_col_end = 3308 VPXMIN(mb_col + num_16x16_blocks_wide_lookup[bsize], cm->mb_cols); 3309 int r, c; 3310 3311 int skip = 1; 3312 for (r = mb_row; r < mb_row_end; r++) { 3313 for (c = mb_col; c < mb_col_end; c++) { 3314 const int mb_index = r * cm->mb_cols + c; 3315 if (!(cpi->twopass.this_frame_mb_stats[mb_index] & 3316 FPMB_MOTION_ZERO_MASK) || 3317 !(cpi->twopass.this_frame_mb_stats[mb_index] & 3318 FPMB_ERROR_SMALL_MASK)) { 3319 skip = 0; 3320 break; 3321 } 3322 } 3323 if (skip == 0) { 3324 break; 3325 } 3326 } 3327 3328 if (skip) { 3329 if (src_diff_var == UINT_MAX) { 3330 set_offsets(cpi, tile_info, x, mi_row, mi_col, bsize); 3331 src_diff_var = get_sby_perpixel_diff_variance( 3332 cpi, &x->plane[0].src, mi_row, mi_col, bsize); 3333 } 3334 if (src_diff_var < 8) { 3335 do_split = 0; 3336 do_rect = 0; 3337 } 3338 } 3339 } 3340#endif 3341 } 3342 } 3343 restore_context(x, mi_row, mi_col, a, l, sa, sl, bsize); 3344 } 3345 3346 // store estimated motion vector 3347 if (cpi->sf.adaptive_motion_search) store_pred_mv(x, ctx); 3348 3349 // If the interp_filter is marked as SWITCHABLE_FILTERS, it was for an 3350 // intra block and used for context purposes. 3351 if (ctx->mic.interp_filter == SWITCHABLE_FILTERS) { 3352 pred_interp_filter = EIGHTTAP; 3353 } else { 3354 pred_interp_filter = ctx->mic.interp_filter; 3355 } 3356 3357 // PARTITION_SPLIT 3358 // TODO(jingning): use the motion vectors given by the above search as 3359 // the starting point of motion search in the following partition type check. 3360 if (do_split) { 3361 subsize = get_subsize(bsize, PARTITION_SPLIT); 3362 if (bsize == BLOCK_8X8) { 3363 i = 4; 3364 if (cpi->sf.adaptive_pred_interp_filter && partition_none_allowed) 3365 pc_tree->leaf_split[0]->pred_interp_filter = pred_interp_filter; 3366 rd_pick_sb_modes(cpi, tile_data, x, mi_row, mi_col, &sum_rdc, subsize, 3367 pc_tree->leaf_split[0], best_rdc.rdcost); 3368 3369 if (sum_rdc.rate == INT_MAX) sum_rdc.rdcost = INT64_MAX; 3370 } else { 3371 for (i = 0; i < 4 && sum_rdc.rdcost < best_rdc.rdcost; ++i) { 3372 const int x_idx = (i & 1) * mi_step; 3373 const int y_idx = (i >> 1) * mi_step; 3374 3375 if (mi_row + y_idx >= cm->mi_rows || mi_col + x_idx >= cm->mi_cols) 3376 continue; 3377 3378 if (cpi->sf.adaptive_motion_search) load_pred_mv(x, ctx); 3379 3380 pc_tree->split[i]->index = i; 3381 rd_pick_partition(cpi, td, tile_data, tp, mi_row + y_idx, 3382 mi_col + x_idx, subsize, &this_rdc, 3383 best_rdc.rdcost - sum_rdc.rdcost, pc_tree->split[i]); 3384 3385 if (this_rdc.rate == INT_MAX) { 3386 sum_rdc.rdcost = INT64_MAX; 3387 break; 3388 } else { 3389 sum_rdc.rate += this_rdc.rate; 3390 sum_rdc.dist += this_rdc.dist; 3391 sum_rdc.rdcost += this_rdc.rdcost; 3392 } 3393 } 3394 } 3395 3396 if (sum_rdc.rdcost < best_rdc.rdcost && i == 4) { 3397 sum_rdc.rate += cpi->partition_cost[pl][PARTITION_SPLIT]; 3398 sum_rdc.rdcost = RDCOST(x->rdmult, x->rddiv, sum_rdc.rate, sum_rdc.dist); 3399 3400 if (sum_rdc.rdcost < best_rdc.rdcost) { 3401 best_rdc = sum_rdc; 3402 pc_tree->partitioning = PARTITION_SPLIT; 3403 3404 // Rate and distortion based partition search termination clause. 3405 if (!cpi->sf.ml_partition_search_early_termination && 3406 !x->e_mbd.lossless && ((best_rdc.dist < (dist_breakout_thr >> 2)) || 3407 (best_rdc.dist < dist_breakout_thr && 3408 best_rdc.rate < rate_breakout_thr))) { 3409 do_rect = 0; 3410 } 3411 } 3412 } else { 3413 // skip rectangular partition test when larger block size 3414 // gives better rd cost 3415 if ((cpi->sf.less_rectangular_check) && 3416 ((bsize > cpi->sf.use_square_only_threshold) || 3417 (best_rdc.dist < dist_breakout_thr))) 3418 do_rect &= !partition_none_allowed; 3419 } 3420 restore_context(x, mi_row, mi_col, a, l, sa, sl, bsize); 3421 } 3422 3423 // PARTITION_HORZ 3424 if (partition_horz_allowed && 3425 (do_rect || vp9_active_h_edge(cpi, mi_row, mi_step))) { 3426 subsize = get_subsize(bsize, PARTITION_HORZ); 3427 if (cpi->sf.adaptive_motion_search) load_pred_mv(x, ctx); 3428 if (cpi->sf.adaptive_pred_interp_filter && bsize == BLOCK_8X8 && 3429 partition_none_allowed) 3430 pc_tree->horizontal[0].pred_interp_filter = pred_interp_filter; 3431 rd_pick_sb_modes(cpi, tile_data, x, mi_row, mi_col, &sum_rdc, subsize, 3432 &pc_tree->horizontal[0], best_rdc.rdcost); 3433 3434 if (sum_rdc.rdcost < best_rdc.rdcost && mi_row + mi_step < cm->mi_rows && 3435 bsize > BLOCK_8X8) { 3436 PICK_MODE_CONTEXT *ctx = &pc_tree->horizontal[0]; 3437 update_state(cpi, td, ctx, mi_row, mi_col, subsize, 0); 3438 encode_superblock(cpi, td, tp, 0, mi_row, mi_col, subsize, ctx); 3439 3440 if (cpi->sf.adaptive_motion_search) load_pred_mv(x, ctx); 3441 if (cpi->sf.adaptive_pred_interp_filter && bsize == BLOCK_8X8 && 3442 partition_none_allowed) 3443 pc_tree->horizontal[1].pred_interp_filter = pred_interp_filter; 3444 rd_pick_sb_modes(cpi, tile_data, x, mi_row + mi_step, mi_col, &this_rdc, 3445 subsize, &pc_tree->horizontal[1], 3446 best_rdc.rdcost - sum_rdc.rdcost); 3447 if (this_rdc.rate == INT_MAX) { 3448 sum_rdc.rdcost = INT64_MAX; 3449 } else { 3450 sum_rdc.rate += this_rdc.rate; 3451 sum_rdc.dist += this_rdc.dist; 3452 sum_rdc.rdcost += this_rdc.rdcost; 3453 } 3454 } 3455 3456 if (sum_rdc.rdcost < best_rdc.rdcost) { 3457 sum_rdc.rate += cpi->partition_cost[pl][PARTITION_HORZ]; 3458 sum_rdc.rdcost = RDCOST(x->rdmult, x->rddiv, sum_rdc.rate, sum_rdc.dist); 3459 if (sum_rdc.rdcost < best_rdc.rdcost) { 3460 best_rdc = sum_rdc; 3461 pc_tree->partitioning = PARTITION_HORZ; 3462 3463 if ((cpi->sf.less_rectangular_check) && 3464 (bsize > cpi->sf.use_square_only_threshold)) 3465 do_rect = 0; 3466 } 3467 } 3468 restore_context(x, mi_row, mi_col, a, l, sa, sl, bsize); 3469 } 3470 3471 // PARTITION_VERT 3472 if (partition_vert_allowed && 3473 (do_rect || vp9_active_v_edge(cpi, mi_col, mi_step))) { 3474 subsize = get_subsize(bsize, PARTITION_VERT); 3475 3476 if (cpi->sf.adaptive_motion_search) load_pred_mv(x, ctx); 3477 if (cpi->sf.adaptive_pred_interp_filter && bsize == BLOCK_8X8 && 3478 partition_none_allowed) 3479 pc_tree->vertical[0].pred_interp_filter = pred_interp_filter; 3480 rd_pick_sb_modes(cpi, tile_data, x, mi_row, mi_col, &sum_rdc, subsize, 3481 &pc_tree->vertical[0], best_rdc.rdcost); 3482 if (sum_rdc.rdcost < best_rdc.rdcost && mi_col + mi_step < cm->mi_cols && 3483 bsize > BLOCK_8X8) { 3484 update_state(cpi, td, &pc_tree->vertical[0], mi_row, mi_col, subsize, 0); 3485 encode_superblock(cpi, td, tp, 0, mi_row, mi_col, subsize, 3486 &pc_tree->vertical[0]); 3487 3488 if (cpi->sf.adaptive_motion_search) load_pred_mv(x, ctx); 3489 if (cpi->sf.adaptive_pred_interp_filter && bsize == BLOCK_8X8 && 3490 partition_none_allowed) 3491 pc_tree->vertical[1].pred_interp_filter = pred_interp_filter; 3492 rd_pick_sb_modes(cpi, tile_data, x, mi_row, mi_col + mi_step, &this_rdc, 3493 subsize, &pc_tree->vertical[1], 3494 best_rdc.rdcost - sum_rdc.rdcost); 3495 if (this_rdc.rate == INT_MAX) { 3496 sum_rdc.rdcost = INT64_MAX; 3497 } else { 3498 sum_rdc.rate += this_rdc.rate; 3499 sum_rdc.dist += this_rdc.dist; 3500 sum_rdc.rdcost += this_rdc.rdcost; 3501 } 3502 } 3503 3504 if (sum_rdc.rdcost < best_rdc.rdcost) { 3505 sum_rdc.rate += cpi->partition_cost[pl][PARTITION_VERT]; 3506 sum_rdc.rdcost = RDCOST(x->rdmult, x->rddiv, sum_rdc.rate, sum_rdc.dist); 3507 if (sum_rdc.rdcost < best_rdc.rdcost) { 3508 best_rdc = sum_rdc; 3509 pc_tree->partitioning = PARTITION_VERT; 3510 } 3511 } 3512 restore_context(x, mi_row, mi_col, a, l, sa, sl, bsize); 3513 } 3514 3515 // TODO(jbb): This code added so that we avoid static analysis 3516 // warning related to the fact that best_rd isn't used after this 3517 // point. This code should be refactored so that the duplicate 3518 // checks occur in some sub function and thus are used... 3519 (void)best_rd; 3520 *rd_cost = best_rdc; 3521 3522 if (best_rdc.rate < INT_MAX && best_rdc.dist < INT64_MAX && 3523 pc_tree->index != 3) { 3524 int output_enabled = (bsize == BLOCK_64X64); 3525 encode_sb(cpi, td, tile_info, tp, mi_row, mi_col, output_enabled, bsize, 3526 pc_tree); 3527 } 3528 3529 if (bsize == BLOCK_64X64) { 3530 assert(tp_orig < *tp); 3531 assert(best_rdc.rate < INT_MAX); 3532 assert(best_rdc.dist < INT64_MAX); 3533 } else { 3534 assert(tp_orig == *tp); 3535 } 3536} 3537 3538static void encode_rd_sb_row(VP9_COMP *cpi, ThreadData *td, 3539 TileDataEnc *tile_data, int mi_row, 3540 TOKENEXTRA **tp) { 3541 VP9_COMMON *const cm = &cpi->common; 3542 TileInfo *const tile_info = &tile_data->tile_info; 3543 MACROBLOCK *const x = &td->mb; 3544 MACROBLOCKD *const xd = &x->e_mbd; 3545 SPEED_FEATURES *const sf = &cpi->sf; 3546 const int mi_col_start = tile_info->mi_col_start; 3547 const int mi_col_end = tile_info->mi_col_end; 3548 int mi_col; 3549 const int sb_row = mi_row >> MI_BLOCK_SIZE_LOG2; 3550 const int num_sb_cols = 3551 get_num_cols(tile_data->tile_info, MI_BLOCK_SIZE_LOG2); 3552 int sb_col_in_tile; 3553 3554 // Initialize the left context for the new SB row 3555 memset(&xd->left_context, 0, sizeof(xd->left_context)); 3556 memset(xd->left_seg_context, 0, sizeof(xd->left_seg_context)); 3557 3558 // Code each SB in the row 3559 for (mi_col = mi_col_start, sb_col_in_tile = 0; mi_col < mi_col_end; 3560 mi_col += MI_BLOCK_SIZE, sb_col_in_tile++) { 3561 const struct segmentation *const seg = &cm->seg; 3562 int dummy_rate; 3563 int64_t dummy_dist; 3564 RD_COST dummy_rdc; 3565 int i; 3566 int seg_skip = 0; 3567 3568 const int idx_str = cm->mi_stride * mi_row + mi_col; 3569 MODE_INFO **mi = cm->mi_grid_visible + idx_str; 3570 3571 (*(cpi->row_mt_sync_read_ptr))(&tile_data->row_mt_sync, sb_row, 3572 sb_col_in_tile); 3573 3574 if (sf->adaptive_pred_interp_filter) { 3575 for (i = 0; i < 64; ++i) td->leaf_tree[i].pred_interp_filter = SWITCHABLE; 3576 3577 for (i = 0; i < 64; ++i) { 3578 td->pc_tree[i].vertical[0].pred_interp_filter = SWITCHABLE; 3579 td->pc_tree[i].vertical[1].pred_interp_filter = SWITCHABLE; 3580 td->pc_tree[i].horizontal[0].pred_interp_filter = SWITCHABLE; 3581 td->pc_tree[i].horizontal[1].pred_interp_filter = SWITCHABLE; 3582 } 3583 } 3584 3585 vp9_zero(x->pred_mv); 3586 td->pc_root->index = 0; 3587 3588 if (seg->enabled) { 3589 const uint8_t *const map = 3590 seg->update_map ? cpi->segmentation_map : cm->last_frame_seg_map; 3591 int segment_id = get_segment_id(cm, map, BLOCK_64X64, mi_row, mi_col); 3592 seg_skip = segfeature_active(seg, segment_id, SEG_LVL_SKIP); 3593 } 3594 3595 x->source_variance = UINT_MAX; 3596 if (sf->partition_search_type == FIXED_PARTITION || seg_skip) { 3597 const BLOCK_SIZE bsize = 3598 seg_skip ? BLOCK_64X64 : sf->always_this_block_size; 3599 set_offsets(cpi, tile_info, x, mi_row, mi_col, BLOCK_64X64); 3600 set_fixed_partitioning(cpi, tile_info, mi, mi_row, mi_col, bsize); 3601 rd_use_partition(cpi, td, tile_data, mi, tp, mi_row, mi_col, BLOCK_64X64, 3602 &dummy_rate, &dummy_dist, 1, td->pc_root); 3603 } else if (cpi->partition_search_skippable_frame) { 3604 BLOCK_SIZE bsize; 3605 set_offsets(cpi, tile_info, x, mi_row, mi_col, BLOCK_64X64); 3606 bsize = get_rd_var_based_fixed_partition(cpi, x, mi_row, mi_col); 3607 set_fixed_partitioning(cpi, tile_info, mi, mi_row, mi_col, bsize); 3608 rd_use_partition(cpi, td, tile_data, mi, tp, mi_row, mi_col, BLOCK_64X64, 3609 &dummy_rate, &dummy_dist, 1, td->pc_root); 3610 } else if (sf->partition_search_type == VAR_BASED_PARTITION && 3611 cm->frame_type != KEY_FRAME) { 3612 choose_partitioning(cpi, tile_info, x, mi_row, mi_col); 3613 rd_use_partition(cpi, td, tile_data, mi, tp, mi_row, mi_col, BLOCK_64X64, 3614 &dummy_rate, &dummy_dist, 1, td->pc_root); 3615 } else { 3616 // If required set upper and lower partition size limits 3617 if (sf->auto_min_max_partition_size) { 3618 set_offsets(cpi, tile_info, x, mi_row, mi_col, BLOCK_64X64); 3619 rd_auto_partition_range(cpi, tile_info, xd, mi_row, mi_col, 3620 &x->min_partition_size, &x->max_partition_size); 3621 } 3622 rd_pick_partition(cpi, td, tile_data, tp, mi_row, mi_col, BLOCK_64X64, 3623 &dummy_rdc, INT64_MAX, td->pc_root); 3624 } 3625 (*(cpi->row_mt_sync_write_ptr))(&tile_data->row_mt_sync, sb_row, 3626 sb_col_in_tile, num_sb_cols); 3627 } 3628} 3629 3630static void init_encode_frame_mb_context(VP9_COMP *cpi) { 3631 MACROBLOCK *const x = &cpi->td.mb; 3632 VP9_COMMON *const cm = &cpi->common; 3633 MACROBLOCKD *const xd = &x->e_mbd; 3634 const int aligned_mi_cols = mi_cols_aligned_to_sb(cm->mi_cols); 3635 3636 // Copy data over into macro block data structures. 3637 vp9_setup_src_planes(x, cpi->Source, 0, 0); 3638 3639 vp9_setup_block_planes(&x->e_mbd, cm->subsampling_x, cm->subsampling_y); 3640 3641 // Note: this memset assumes above_context[0], [1] and [2] 3642 // are allocated as part of the same buffer. 3643 memset(xd->above_context[0], 0, 3644 sizeof(*xd->above_context[0]) * 2 * aligned_mi_cols * MAX_MB_PLANE); 3645 memset(xd->above_seg_context, 0, 3646 sizeof(*xd->above_seg_context) * aligned_mi_cols); 3647} 3648 3649static int check_dual_ref_flags(VP9_COMP *cpi) { 3650 const int ref_flags = cpi->ref_frame_flags; 3651 3652 if (segfeature_active(&cpi->common.seg, 1, SEG_LVL_REF_FRAME)) { 3653 return 0; 3654 } else { 3655 return (!!(ref_flags & VP9_GOLD_FLAG) + !!(ref_flags & VP9_LAST_FLAG) + 3656 !!(ref_flags & VP9_ALT_FLAG)) >= 2; 3657 } 3658} 3659 3660static void reset_skip_tx_size(VP9_COMMON *cm, TX_SIZE max_tx_size) { 3661 int mi_row, mi_col; 3662 const int mis = cm->mi_stride; 3663 MODE_INFO **mi_ptr = cm->mi_grid_visible; 3664 3665 for (mi_row = 0; mi_row < cm->mi_rows; ++mi_row, mi_ptr += mis) { 3666 for (mi_col = 0; mi_col < cm->mi_cols; ++mi_col) { 3667 if (mi_ptr[mi_col]->tx_size > max_tx_size) 3668 mi_ptr[mi_col]->tx_size = max_tx_size; 3669 } 3670 } 3671} 3672 3673static MV_REFERENCE_FRAME get_frame_type(const VP9_COMP *cpi) { 3674 if (frame_is_intra_only(&cpi->common)) 3675 return INTRA_FRAME; 3676 else if (cpi->rc.is_src_frame_alt_ref && cpi->refresh_golden_frame) 3677 return ALTREF_FRAME; 3678 else if (cpi->refresh_golden_frame || cpi->refresh_alt_ref_frame) 3679 return GOLDEN_FRAME; 3680 else 3681 return LAST_FRAME; 3682} 3683 3684static TX_MODE select_tx_mode(const VP9_COMP *cpi, MACROBLOCKD *const xd) { 3685 if (xd->lossless) return ONLY_4X4; 3686 if (cpi->common.frame_type == KEY_FRAME && cpi->sf.use_nonrd_pick_mode) 3687 return ALLOW_16X16; 3688 if (cpi->sf.tx_size_search_method == USE_LARGESTALL) 3689 return ALLOW_32X32; 3690 else if (cpi->sf.tx_size_search_method == USE_FULL_RD || 3691 cpi->sf.tx_size_search_method == USE_TX_8X8) 3692 return TX_MODE_SELECT; 3693 else 3694 return cpi->common.tx_mode; 3695} 3696 3697static void hybrid_intra_mode_search(VP9_COMP *cpi, MACROBLOCK *const x, 3698 RD_COST *rd_cost, BLOCK_SIZE bsize, 3699 PICK_MODE_CONTEXT *ctx) { 3700 if (!cpi->sf.nonrd_keyframe && bsize < BLOCK_16X16) 3701 vp9_rd_pick_intra_mode_sb(cpi, x, rd_cost, bsize, ctx, INT64_MAX); 3702 else 3703 vp9_pick_intra_mode(cpi, x, rd_cost, bsize, ctx); 3704} 3705 3706static void nonrd_pick_sb_modes(VP9_COMP *cpi, TileDataEnc *tile_data, 3707 MACROBLOCK *const x, int mi_row, int mi_col, 3708 RD_COST *rd_cost, BLOCK_SIZE bsize, 3709 PICK_MODE_CONTEXT *ctx) { 3710 VP9_COMMON *const cm = &cpi->common; 3711 TileInfo *const tile_info = &tile_data->tile_info; 3712 MACROBLOCKD *const xd = &x->e_mbd; 3713 MODE_INFO *mi; 3714 ENTROPY_CONTEXT l[16 * MAX_MB_PLANE], a[16 * MAX_MB_PLANE]; 3715 BLOCK_SIZE bs = VPXMAX(bsize, BLOCK_8X8); // processing unit block size 3716 const int num_4x4_blocks_wide = num_4x4_blocks_wide_lookup[bs]; 3717 const int num_4x4_blocks_high = num_4x4_blocks_high_lookup[bs]; 3718 int plane; 3719 3720 set_offsets(cpi, tile_info, x, mi_row, mi_col, bsize); 3721 mi = xd->mi[0]; 3722 mi->sb_type = bsize; 3723 3724 for (plane = 0; plane < MAX_MB_PLANE; ++plane) { 3725 struct macroblockd_plane *pd = &xd->plane[plane]; 3726 memcpy(a + num_4x4_blocks_wide * plane, pd->above_context, 3727 (sizeof(a[0]) * num_4x4_blocks_wide) >> pd->subsampling_x); 3728 memcpy(l + num_4x4_blocks_high * plane, pd->left_context, 3729 (sizeof(l[0]) * num_4x4_blocks_high) >> pd->subsampling_y); 3730 } 3731 3732 if (cpi->oxcf.aq_mode == CYCLIC_REFRESH_AQ && cm->seg.enabled) 3733 if (cyclic_refresh_segment_id_boosted(mi->segment_id)) 3734 x->rdmult = vp9_cyclic_refresh_get_rdmult(cpi->cyclic_refresh); 3735 3736 if (cm->frame_type == KEY_FRAME) 3737 hybrid_intra_mode_search(cpi, x, rd_cost, bsize, ctx); 3738 else if (segfeature_active(&cm->seg, mi->segment_id, SEG_LVL_SKIP)) 3739 set_mode_info_seg_skip(x, cm->tx_mode, rd_cost, bsize); 3740 else if (bsize >= BLOCK_8X8) 3741 vp9_pick_inter_mode(cpi, x, tile_data, mi_row, mi_col, rd_cost, bsize, ctx); 3742 else 3743 vp9_pick_inter_mode_sub8x8(cpi, x, mi_row, mi_col, rd_cost, bsize, ctx); 3744 3745 duplicate_mode_info_in_sb(cm, xd, mi_row, mi_col, bsize); 3746 3747 for (plane = 0; plane < MAX_MB_PLANE; ++plane) { 3748 struct macroblockd_plane *pd = &xd->plane[plane]; 3749 memcpy(pd->above_context, a + num_4x4_blocks_wide * plane, 3750 (sizeof(a[0]) * num_4x4_blocks_wide) >> pd->subsampling_x); 3751 memcpy(pd->left_context, l + num_4x4_blocks_high * plane, 3752 (sizeof(l[0]) * num_4x4_blocks_high) >> pd->subsampling_y); 3753 } 3754 3755 if (rd_cost->rate == INT_MAX) vp9_rd_cost_reset(rd_cost); 3756 3757 ctx->rate = rd_cost->rate; 3758 ctx->dist = rd_cost->dist; 3759} 3760 3761static void fill_mode_info_sb(VP9_COMMON *cm, MACROBLOCK *x, int mi_row, 3762 int mi_col, BLOCK_SIZE bsize, PC_TREE *pc_tree) { 3763 MACROBLOCKD *xd = &x->e_mbd; 3764 int bsl = b_width_log2_lookup[bsize], hbs = (1 << bsl) / 4; 3765 PARTITION_TYPE partition = pc_tree->partitioning; 3766 BLOCK_SIZE subsize = get_subsize(bsize, partition); 3767 3768 assert(bsize >= BLOCK_8X8); 3769 3770 if (mi_row >= cm->mi_rows || mi_col >= cm->mi_cols) return; 3771 3772 switch (partition) { 3773 case PARTITION_NONE: 3774 set_mode_info_offsets(cm, x, xd, mi_row, mi_col); 3775 *(xd->mi[0]) = pc_tree->none.mic; 3776 *(x->mbmi_ext) = pc_tree->none.mbmi_ext; 3777 duplicate_mode_info_in_sb(cm, xd, mi_row, mi_col, bsize); 3778 break; 3779 case PARTITION_VERT: 3780 set_mode_info_offsets(cm, x, xd, mi_row, mi_col); 3781 *(xd->mi[0]) = pc_tree->vertical[0].mic; 3782 *(x->mbmi_ext) = pc_tree->vertical[0].mbmi_ext; 3783 duplicate_mode_info_in_sb(cm, xd, mi_row, mi_col, subsize); 3784 3785 if (mi_col + hbs < cm->mi_cols) { 3786 set_mode_info_offsets(cm, x, xd, mi_row, mi_col + hbs); 3787 *(xd->mi[0]) = pc_tree->vertical[1].mic; 3788 *(x->mbmi_ext) = pc_tree->vertical[1].mbmi_ext; 3789 duplicate_mode_info_in_sb(cm, xd, mi_row, mi_col + hbs, subsize); 3790 } 3791 break; 3792 case PARTITION_HORZ: 3793 set_mode_info_offsets(cm, x, xd, mi_row, mi_col); 3794 *(xd->mi[0]) = pc_tree->horizontal[0].mic; 3795 *(x->mbmi_ext) = pc_tree->horizontal[0].mbmi_ext; 3796 duplicate_mode_info_in_sb(cm, xd, mi_row, mi_col, subsize); 3797 if (mi_row + hbs < cm->mi_rows) { 3798 set_mode_info_offsets(cm, x, xd, mi_row + hbs, mi_col); 3799 *(xd->mi[0]) = pc_tree->horizontal[1].mic; 3800 *(x->mbmi_ext) = pc_tree->horizontal[1].mbmi_ext; 3801 duplicate_mode_info_in_sb(cm, xd, mi_row + hbs, mi_col, subsize); 3802 } 3803 break; 3804 case PARTITION_SPLIT: { 3805 fill_mode_info_sb(cm, x, mi_row, mi_col, subsize, pc_tree->split[0]); 3806 fill_mode_info_sb(cm, x, mi_row, mi_col + hbs, subsize, 3807 pc_tree->split[1]); 3808 fill_mode_info_sb(cm, x, mi_row + hbs, mi_col, subsize, 3809 pc_tree->split[2]); 3810 fill_mode_info_sb(cm, x, mi_row + hbs, mi_col + hbs, subsize, 3811 pc_tree->split[3]); 3812 break; 3813 } 3814 default: break; 3815 } 3816} 3817 3818// Reset the prediction pixel ready flag recursively. 3819static void pred_pixel_ready_reset(PC_TREE *pc_tree, BLOCK_SIZE bsize) { 3820 pc_tree->none.pred_pixel_ready = 0; 3821 pc_tree->horizontal[0].pred_pixel_ready = 0; 3822 pc_tree->horizontal[1].pred_pixel_ready = 0; 3823 pc_tree->vertical[0].pred_pixel_ready = 0; 3824 pc_tree->vertical[1].pred_pixel_ready = 0; 3825 3826 if (bsize > BLOCK_8X8) { 3827 BLOCK_SIZE subsize = get_subsize(bsize, PARTITION_SPLIT); 3828 int i; 3829 for (i = 0; i < 4; ++i) pred_pixel_ready_reset(pc_tree->split[i], subsize); 3830 } 3831} 3832 3833static void nonrd_pick_partition(VP9_COMP *cpi, ThreadData *td, 3834 TileDataEnc *tile_data, TOKENEXTRA **tp, 3835 int mi_row, int mi_col, BLOCK_SIZE bsize, 3836 RD_COST *rd_cost, int do_recon, 3837 int64_t best_rd, PC_TREE *pc_tree) { 3838 const SPEED_FEATURES *const sf = &cpi->sf; 3839 VP9_COMMON *const cm = &cpi->common; 3840 TileInfo *const tile_info = &tile_data->tile_info; 3841 MACROBLOCK *const x = &td->mb; 3842 MACROBLOCKD *const xd = &x->e_mbd; 3843 const int ms = num_8x8_blocks_wide_lookup[bsize] / 2; 3844 TOKENEXTRA *tp_orig = *tp; 3845 PICK_MODE_CONTEXT *ctx = &pc_tree->none; 3846 int i; 3847 BLOCK_SIZE subsize = bsize; 3848 RD_COST this_rdc, sum_rdc, best_rdc; 3849 int do_split = bsize >= BLOCK_8X8; 3850 int do_rect = 1; 3851 // Override skipping rectangular partition operations for edge blocks 3852 const int force_horz_split = (mi_row + ms >= cm->mi_rows); 3853 const int force_vert_split = (mi_col + ms >= cm->mi_cols); 3854 const int xss = x->e_mbd.plane[1].subsampling_x; 3855 const int yss = x->e_mbd.plane[1].subsampling_y; 3856 3857 int partition_none_allowed = !force_horz_split && !force_vert_split; 3858 int partition_horz_allowed = 3859 !force_vert_split && yss <= xss && bsize >= BLOCK_8X8; 3860 int partition_vert_allowed = 3861 !force_horz_split && xss <= yss && bsize >= BLOCK_8X8; 3862 (void)*tp_orig; 3863 3864 // Avoid checking for rectangular partitions for speed >= 6. 3865 if (cpi->oxcf.speed >= 6) do_rect = 0; 3866 3867 assert(num_8x8_blocks_wide_lookup[bsize] == 3868 num_8x8_blocks_high_lookup[bsize]); 3869 3870 vp9_rd_cost_init(&sum_rdc); 3871 vp9_rd_cost_reset(&best_rdc); 3872 best_rdc.rdcost = best_rd; 3873 3874 // Determine partition types in search according to the speed features. 3875 // The threshold set here has to be of square block size. 3876 if (sf->auto_min_max_partition_size) { 3877 partition_none_allowed &= 3878 (bsize <= x->max_partition_size && bsize >= x->min_partition_size); 3879 partition_horz_allowed &= 3880 ((bsize <= x->max_partition_size && bsize > x->min_partition_size) || 3881 force_horz_split); 3882 partition_vert_allowed &= 3883 ((bsize <= x->max_partition_size && bsize > x->min_partition_size) || 3884 force_vert_split); 3885 do_split &= bsize > x->min_partition_size; 3886 } 3887 if (sf->use_square_partition_only) { 3888 partition_horz_allowed &= force_horz_split; 3889 partition_vert_allowed &= force_vert_split; 3890 } 3891 3892 ctx->pred_pixel_ready = 3893 !(partition_vert_allowed || partition_horz_allowed || do_split); 3894 3895 // PARTITION_NONE 3896 if (partition_none_allowed) { 3897 nonrd_pick_sb_modes(cpi, tile_data, x, mi_row, mi_col, &this_rdc, bsize, 3898 ctx); 3899 ctx->mic = *xd->mi[0]; 3900 ctx->mbmi_ext = *x->mbmi_ext; 3901 ctx->skip_txfm[0] = x->skip_txfm[0]; 3902 ctx->skip = x->skip; 3903 3904 if (this_rdc.rate != INT_MAX) { 3905 int pl = partition_plane_context(xd, mi_row, mi_col, bsize); 3906 this_rdc.rate += cpi->partition_cost[pl][PARTITION_NONE]; 3907 this_rdc.rdcost = 3908 RDCOST(x->rdmult, x->rddiv, this_rdc.rate, this_rdc.dist); 3909 if (this_rdc.rdcost < best_rdc.rdcost) { 3910 int64_t dist_breakout_thr = sf->partition_search_breakout_thr.dist; 3911 int64_t rate_breakout_thr = sf->partition_search_breakout_thr.rate; 3912 3913 dist_breakout_thr >>= 3914 8 - (b_width_log2_lookup[bsize] + b_height_log2_lookup[bsize]); 3915 3916 rate_breakout_thr *= num_pels_log2_lookup[bsize]; 3917 3918 best_rdc = this_rdc; 3919 if (bsize >= BLOCK_8X8) pc_tree->partitioning = PARTITION_NONE; 3920 3921 if (!x->e_mbd.lossless && this_rdc.rate < rate_breakout_thr && 3922 this_rdc.dist < dist_breakout_thr) { 3923 do_split = 0; 3924 do_rect = 0; 3925 } 3926 } 3927 } 3928 } 3929 3930 // store estimated motion vector 3931 store_pred_mv(x, ctx); 3932 3933 // PARTITION_SPLIT 3934 if (do_split) { 3935 int pl = partition_plane_context(xd, mi_row, mi_col, bsize); 3936 sum_rdc.rate += cpi->partition_cost[pl][PARTITION_SPLIT]; 3937 sum_rdc.rdcost = RDCOST(x->rdmult, x->rddiv, sum_rdc.rate, sum_rdc.dist); 3938 subsize = get_subsize(bsize, PARTITION_SPLIT); 3939 for (i = 0; i < 4 && sum_rdc.rdcost < best_rdc.rdcost; ++i) { 3940 const int x_idx = (i & 1) * ms; 3941 const int y_idx = (i >> 1) * ms; 3942 3943 if (mi_row + y_idx >= cm->mi_rows || mi_col + x_idx >= cm->mi_cols) 3944 continue; 3945 load_pred_mv(x, ctx); 3946 nonrd_pick_partition(cpi, td, tile_data, tp, mi_row + y_idx, 3947 mi_col + x_idx, subsize, &this_rdc, 0, 3948 best_rdc.rdcost - sum_rdc.rdcost, pc_tree->split[i]); 3949 3950 if (this_rdc.rate == INT_MAX) { 3951 vp9_rd_cost_reset(&sum_rdc); 3952 } else { 3953 sum_rdc.rate += this_rdc.rate; 3954 sum_rdc.dist += this_rdc.dist; 3955 sum_rdc.rdcost += this_rdc.rdcost; 3956 } 3957 } 3958 3959 if (sum_rdc.rdcost < best_rdc.rdcost) { 3960 best_rdc = sum_rdc; 3961 pc_tree->partitioning = PARTITION_SPLIT; 3962 } else { 3963 // skip rectangular partition test when larger block size 3964 // gives better rd cost 3965 if (sf->less_rectangular_check) do_rect &= !partition_none_allowed; 3966 } 3967 } 3968 3969 // PARTITION_HORZ 3970 if (partition_horz_allowed && do_rect) { 3971 subsize = get_subsize(bsize, PARTITION_HORZ); 3972 if (sf->adaptive_motion_search) load_pred_mv(x, ctx); 3973 pc_tree->horizontal[0].pred_pixel_ready = 1; 3974 nonrd_pick_sb_modes(cpi, tile_data, x, mi_row, mi_col, &sum_rdc, subsize, 3975 &pc_tree->horizontal[0]); 3976 3977 pc_tree->horizontal[0].mic = *xd->mi[0]; 3978 pc_tree->horizontal[0].mbmi_ext = *x->mbmi_ext; 3979 pc_tree->horizontal[0].skip_txfm[0] = x->skip_txfm[0]; 3980 pc_tree->horizontal[0].skip = x->skip; 3981 3982 if (sum_rdc.rdcost < best_rdc.rdcost && mi_row + ms < cm->mi_rows) { 3983 load_pred_mv(x, ctx); 3984 pc_tree->horizontal[1].pred_pixel_ready = 1; 3985 nonrd_pick_sb_modes(cpi, tile_data, x, mi_row + ms, mi_col, &this_rdc, 3986 subsize, &pc_tree->horizontal[1]); 3987 3988 pc_tree->horizontal[1].mic = *xd->mi[0]; 3989 pc_tree->horizontal[1].mbmi_ext = *x->mbmi_ext; 3990 pc_tree->horizontal[1].skip_txfm[0] = x->skip_txfm[0]; 3991 pc_tree->horizontal[1].skip = x->skip; 3992 3993 if (this_rdc.rate == INT_MAX) { 3994 vp9_rd_cost_reset(&sum_rdc); 3995 } else { 3996 int pl = partition_plane_context(xd, mi_row, mi_col, bsize); 3997 this_rdc.rate += cpi->partition_cost[pl][PARTITION_HORZ]; 3998 sum_rdc.rate += this_rdc.rate; 3999 sum_rdc.dist += this_rdc.dist; 4000 sum_rdc.rdcost = 4001 RDCOST(x->rdmult, x->rddiv, sum_rdc.rate, sum_rdc.dist); 4002 } 4003 } 4004 4005 if (sum_rdc.rdcost < best_rdc.rdcost) { 4006 best_rdc = sum_rdc; 4007 pc_tree->partitioning = PARTITION_HORZ; 4008 } else { 4009 pred_pixel_ready_reset(pc_tree, bsize); 4010 } 4011 } 4012 4013 // PARTITION_VERT 4014 if (partition_vert_allowed && do_rect) { 4015 subsize = get_subsize(bsize, PARTITION_VERT); 4016 if (sf->adaptive_motion_search) load_pred_mv(x, ctx); 4017 pc_tree->vertical[0].pred_pixel_ready = 1; 4018 nonrd_pick_sb_modes(cpi, tile_data, x, mi_row, mi_col, &sum_rdc, subsize, 4019 &pc_tree->vertical[0]); 4020 pc_tree->vertical[0].mic = *xd->mi[0]; 4021 pc_tree->vertical[0].mbmi_ext = *x->mbmi_ext; 4022 pc_tree->vertical[0].skip_txfm[0] = x->skip_txfm[0]; 4023 pc_tree->vertical[0].skip = x->skip; 4024 4025 if (sum_rdc.rdcost < best_rdc.rdcost && mi_col + ms < cm->mi_cols) { 4026 load_pred_mv(x, ctx); 4027 pc_tree->vertical[1].pred_pixel_ready = 1; 4028 nonrd_pick_sb_modes(cpi, tile_data, x, mi_row, mi_col + ms, &this_rdc, 4029 subsize, &pc_tree->vertical[1]); 4030 pc_tree->vertical[1].mic = *xd->mi[0]; 4031 pc_tree->vertical[1].mbmi_ext = *x->mbmi_ext; 4032 pc_tree->vertical[1].skip_txfm[0] = x->skip_txfm[0]; 4033 pc_tree->vertical[1].skip = x->skip; 4034 4035 if (this_rdc.rate == INT_MAX) { 4036 vp9_rd_cost_reset(&sum_rdc); 4037 } else { 4038 int pl = partition_plane_context(xd, mi_row, mi_col, bsize); 4039 sum_rdc.rate += cpi->partition_cost[pl][PARTITION_VERT]; 4040 sum_rdc.rate += this_rdc.rate; 4041 sum_rdc.dist += this_rdc.dist; 4042 sum_rdc.rdcost = 4043 RDCOST(x->rdmult, x->rddiv, sum_rdc.rate, sum_rdc.dist); 4044 } 4045 } 4046 4047 if (sum_rdc.rdcost < best_rdc.rdcost) { 4048 best_rdc = sum_rdc; 4049 pc_tree->partitioning = PARTITION_VERT; 4050 } else { 4051 pred_pixel_ready_reset(pc_tree, bsize); 4052 } 4053 } 4054 4055 *rd_cost = best_rdc; 4056 4057 if (best_rdc.rate == INT_MAX) { 4058 vp9_rd_cost_reset(rd_cost); 4059 return; 4060 } 4061 4062 // update mode info array 4063 fill_mode_info_sb(cm, x, mi_row, mi_col, bsize, pc_tree); 4064 4065 if (best_rdc.rate < INT_MAX && best_rdc.dist < INT64_MAX && do_recon) { 4066 int output_enabled = (bsize == BLOCK_64X64); 4067 encode_sb_rt(cpi, td, tile_info, tp, mi_row, mi_col, output_enabled, bsize, 4068 pc_tree); 4069 } 4070 4071 if (bsize == BLOCK_64X64 && do_recon) { 4072 assert(tp_orig < *tp); 4073 assert(best_rdc.rate < INT_MAX); 4074 assert(best_rdc.dist < INT64_MAX); 4075 } else { 4076 assert(tp_orig == *tp); 4077 } 4078} 4079 4080static void nonrd_select_partition(VP9_COMP *cpi, ThreadData *td, 4081 TileDataEnc *tile_data, MODE_INFO **mi, 4082 TOKENEXTRA **tp, int mi_row, int mi_col, 4083 BLOCK_SIZE bsize, int output_enabled, 4084 RD_COST *rd_cost, PC_TREE *pc_tree) { 4085 VP9_COMMON *const cm = &cpi->common; 4086 TileInfo *const tile_info = &tile_data->tile_info; 4087 MACROBLOCK *const x = &td->mb; 4088 MACROBLOCKD *const xd = &x->e_mbd; 4089 const int bsl = b_width_log2_lookup[bsize], hbs = (1 << bsl) / 4; 4090 const int mis = cm->mi_stride; 4091 PARTITION_TYPE partition; 4092 BLOCK_SIZE subsize; 4093 RD_COST this_rdc; 4094 BLOCK_SIZE subsize_ref = 4095 (cpi->sf.adapt_partition_source_sad) ? BLOCK_8X8 : BLOCK_16X16; 4096 4097 vp9_rd_cost_reset(&this_rdc); 4098 if (mi_row >= cm->mi_rows || mi_col >= cm->mi_cols) return; 4099 4100 subsize = (bsize >= BLOCK_8X8) ? mi[0]->sb_type : BLOCK_4X4; 4101 partition = partition_lookup[bsl][subsize]; 4102 4103 if (bsize == BLOCK_32X32 && subsize == BLOCK_32X32) { 4104 x->max_partition_size = BLOCK_32X32; 4105 x->min_partition_size = BLOCK_16X16; 4106 nonrd_pick_partition(cpi, td, tile_data, tp, mi_row, mi_col, bsize, rd_cost, 4107 0, INT64_MAX, pc_tree); 4108 } else if (bsize == BLOCK_32X32 && partition != PARTITION_NONE && 4109 subsize >= subsize_ref) { 4110 x->max_partition_size = BLOCK_32X32; 4111 x->min_partition_size = BLOCK_8X8; 4112 nonrd_pick_partition(cpi, td, tile_data, tp, mi_row, mi_col, bsize, rd_cost, 4113 0, INT64_MAX, pc_tree); 4114 } else if (bsize == BLOCK_16X16 && partition != PARTITION_NONE) { 4115 x->max_partition_size = BLOCK_16X16; 4116 x->min_partition_size = BLOCK_8X8; 4117 nonrd_pick_partition(cpi, td, tile_data, tp, mi_row, mi_col, bsize, rd_cost, 4118 0, INT64_MAX, pc_tree); 4119 } else { 4120 switch (partition) { 4121 case PARTITION_NONE: 4122 pc_tree->none.pred_pixel_ready = 1; 4123 nonrd_pick_sb_modes(cpi, tile_data, x, mi_row, mi_col, rd_cost, subsize, 4124 &pc_tree->none); 4125 pc_tree->none.mic = *xd->mi[0]; 4126 pc_tree->none.mbmi_ext = *x->mbmi_ext; 4127 pc_tree->none.skip_txfm[0] = x->skip_txfm[0]; 4128 pc_tree->none.skip = x->skip; 4129 break; 4130 case PARTITION_VERT: 4131 pc_tree->vertical[0].pred_pixel_ready = 1; 4132 nonrd_pick_sb_modes(cpi, tile_data, x, mi_row, mi_col, rd_cost, subsize, 4133 &pc_tree->vertical[0]); 4134 pc_tree->vertical[0].mic = *xd->mi[0]; 4135 pc_tree->vertical[0].mbmi_ext = *x->mbmi_ext; 4136 pc_tree->vertical[0].skip_txfm[0] = x->skip_txfm[0]; 4137 pc_tree->vertical[0].skip = x->skip; 4138 if (mi_col + hbs < cm->mi_cols) { 4139 pc_tree->vertical[1].pred_pixel_ready = 1; 4140 nonrd_pick_sb_modes(cpi, tile_data, x, mi_row, mi_col + hbs, 4141 &this_rdc, subsize, &pc_tree->vertical[1]); 4142 pc_tree->vertical[1].mic = *xd->mi[0]; 4143 pc_tree->vertical[1].mbmi_ext = *x->mbmi_ext; 4144 pc_tree->vertical[1].skip_txfm[0] = x->skip_txfm[0]; 4145 pc_tree->vertical[1].skip = x->skip; 4146 if (this_rdc.rate != INT_MAX && this_rdc.dist != INT64_MAX && 4147 rd_cost->rate != INT_MAX && rd_cost->dist != INT64_MAX) { 4148 rd_cost->rate += this_rdc.rate; 4149 rd_cost->dist += this_rdc.dist; 4150 } 4151 } 4152 break; 4153 case PARTITION_HORZ: 4154 pc_tree->horizontal[0].pred_pixel_ready = 1; 4155 nonrd_pick_sb_modes(cpi, tile_data, x, mi_row, mi_col, rd_cost, subsize, 4156 &pc_tree->horizontal[0]); 4157 pc_tree->horizontal[0].mic = *xd->mi[0]; 4158 pc_tree->horizontal[0].mbmi_ext = *x->mbmi_ext; 4159 pc_tree->horizontal[0].skip_txfm[0] = x->skip_txfm[0]; 4160 pc_tree->horizontal[0].skip = x->skip; 4161 if (mi_row + hbs < cm->mi_rows) { 4162 pc_tree->horizontal[1].pred_pixel_ready = 1; 4163 nonrd_pick_sb_modes(cpi, tile_data, x, mi_row + hbs, mi_col, 4164 &this_rdc, subsize, &pc_tree->horizontal[1]); 4165 pc_tree->horizontal[1].mic = *xd->mi[0]; 4166 pc_tree->horizontal[1].mbmi_ext = *x->mbmi_ext; 4167 pc_tree->horizontal[1].skip_txfm[0] = x->skip_txfm[0]; 4168 pc_tree->horizontal[1].skip = x->skip; 4169 if (this_rdc.rate != INT_MAX && this_rdc.dist != INT64_MAX && 4170 rd_cost->rate != INT_MAX && rd_cost->dist != INT64_MAX) { 4171 rd_cost->rate += this_rdc.rate; 4172 rd_cost->dist += this_rdc.dist; 4173 } 4174 } 4175 break; 4176 case PARTITION_SPLIT: 4177 subsize = get_subsize(bsize, PARTITION_SPLIT); 4178 nonrd_select_partition(cpi, td, tile_data, mi, tp, mi_row, mi_col, 4179 subsize, output_enabled, rd_cost, 4180 pc_tree->split[0]); 4181 nonrd_select_partition(cpi, td, tile_data, mi + hbs, tp, mi_row, 4182 mi_col + hbs, subsize, output_enabled, &this_rdc, 4183 pc_tree->split[1]); 4184 if (this_rdc.rate != INT_MAX && this_rdc.dist != INT64_MAX && 4185 rd_cost->rate != INT_MAX && rd_cost->dist != INT64_MAX) { 4186 rd_cost->rate += this_rdc.rate; 4187 rd_cost->dist += this_rdc.dist; 4188 } 4189 nonrd_select_partition(cpi, td, tile_data, mi + hbs * mis, tp, 4190 mi_row + hbs, mi_col, subsize, output_enabled, 4191 &this_rdc, pc_tree->split[2]); 4192 if (this_rdc.rate != INT_MAX && this_rdc.dist != INT64_MAX && 4193 rd_cost->rate != INT_MAX && rd_cost->dist != INT64_MAX) { 4194 rd_cost->rate += this_rdc.rate; 4195 rd_cost->dist += this_rdc.dist; 4196 } 4197 nonrd_select_partition(cpi, td, tile_data, mi + hbs * mis + hbs, tp, 4198 mi_row + hbs, mi_col + hbs, subsize, 4199 output_enabled, &this_rdc, pc_tree->split[3]); 4200 if (this_rdc.rate != INT_MAX && this_rdc.dist != INT64_MAX && 4201 rd_cost->rate != INT_MAX && rd_cost->dist != INT64_MAX) { 4202 rd_cost->rate += this_rdc.rate; 4203 rd_cost->dist += this_rdc.dist; 4204 } 4205 break; 4206 default: assert(0 && "Invalid partition type."); break; 4207 } 4208 } 4209 4210 if (bsize == BLOCK_64X64 && output_enabled) 4211 encode_sb_rt(cpi, td, tile_info, tp, mi_row, mi_col, 1, bsize, pc_tree); 4212} 4213 4214static void nonrd_use_partition(VP9_COMP *cpi, ThreadData *td, 4215 TileDataEnc *tile_data, MODE_INFO **mi, 4216 TOKENEXTRA **tp, int mi_row, int mi_col, 4217 BLOCK_SIZE bsize, int output_enabled, 4218 RD_COST *dummy_cost, PC_TREE *pc_tree) { 4219 VP9_COMMON *const cm = &cpi->common; 4220 TileInfo *tile_info = &tile_data->tile_info; 4221 MACROBLOCK *const x = &td->mb; 4222 MACROBLOCKD *const xd = &x->e_mbd; 4223 const int bsl = b_width_log2_lookup[bsize], hbs = (1 << bsl) / 4; 4224 const int mis = cm->mi_stride; 4225 PARTITION_TYPE partition; 4226 BLOCK_SIZE subsize; 4227 4228 if (mi_row >= cm->mi_rows || mi_col >= cm->mi_cols) return; 4229 4230 subsize = (bsize >= BLOCK_8X8) ? mi[0]->sb_type : BLOCK_4X4; 4231 partition = partition_lookup[bsl][subsize]; 4232 4233 if (output_enabled && bsize != BLOCK_4X4) { 4234 int ctx = partition_plane_context(xd, mi_row, mi_col, bsize); 4235 td->counts->partition[ctx][partition]++; 4236 } 4237 4238 switch (partition) { 4239 case PARTITION_NONE: 4240 pc_tree->none.pred_pixel_ready = 1; 4241 nonrd_pick_sb_modes(cpi, tile_data, x, mi_row, mi_col, dummy_cost, 4242 subsize, &pc_tree->none); 4243 pc_tree->none.mic = *xd->mi[0]; 4244 pc_tree->none.mbmi_ext = *x->mbmi_ext; 4245 pc_tree->none.skip_txfm[0] = x->skip_txfm[0]; 4246 pc_tree->none.skip = x->skip; 4247 encode_b_rt(cpi, td, tile_info, tp, mi_row, mi_col, output_enabled, 4248 subsize, &pc_tree->none); 4249 break; 4250 case PARTITION_VERT: 4251 pc_tree->vertical[0].pred_pixel_ready = 1; 4252 nonrd_pick_sb_modes(cpi, tile_data, x, mi_row, mi_col, dummy_cost, 4253 subsize, &pc_tree->vertical[0]); 4254 pc_tree->vertical[0].mic = *xd->mi[0]; 4255 pc_tree->vertical[0].mbmi_ext = *x->mbmi_ext; 4256 pc_tree->vertical[0].skip_txfm[0] = x->skip_txfm[0]; 4257 pc_tree->vertical[0].skip = x->skip; 4258 encode_b_rt(cpi, td, tile_info, tp, mi_row, mi_col, output_enabled, 4259 subsize, &pc_tree->vertical[0]); 4260 if (mi_col + hbs < cm->mi_cols && bsize > BLOCK_8X8) { 4261 pc_tree->vertical[1].pred_pixel_ready = 1; 4262 nonrd_pick_sb_modes(cpi, tile_data, x, mi_row, mi_col + hbs, dummy_cost, 4263 subsize, &pc_tree->vertical[1]); 4264 pc_tree->vertical[1].mic = *xd->mi[0]; 4265 pc_tree->vertical[1].mbmi_ext = *x->mbmi_ext; 4266 pc_tree->vertical[1].skip_txfm[0] = x->skip_txfm[0]; 4267 pc_tree->vertical[1].skip = x->skip; 4268 encode_b_rt(cpi, td, tile_info, tp, mi_row, mi_col + hbs, 4269 output_enabled, subsize, &pc_tree->vertical[1]); 4270 } 4271 break; 4272 case PARTITION_HORZ: 4273 pc_tree->horizontal[0].pred_pixel_ready = 1; 4274 nonrd_pick_sb_modes(cpi, tile_data, x, mi_row, mi_col, dummy_cost, 4275 subsize, &pc_tree->horizontal[0]); 4276 pc_tree->horizontal[0].mic = *xd->mi[0]; 4277 pc_tree->horizontal[0].mbmi_ext = *x->mbmi_ext; 4278 pc_tree->horizontal[0].skip_txfm[0] = x->skip_txfm[0]; 4279 pc_tree->horizontal[0].skip = x->skip; 4280 encode_b_rt(cpi, td, tile_info, tp, mi_row, mi_col, output_enabled, 4281 subsize, &pc_tree->horizontal[0]); 4282 4283 if (mi_row + hbs < cm->mi_rows && bsize > BLOCK_8X8) { 4284 pc_tree->horizontal[1].pred_pixel_ready = 1; 4285 nonrd_pick_sb_modes(cpi, tile_data, x, mi_row + hbs, mi_col, dummy_cost, 4286 subsize, &pc_tree->horizontal[1]); 4287 pc_tree->horizontal[1].mic = *xd->mi[0]; 4288 pc_tree->horizontal[1].mbmi_ext = *x->mbmi_ext; 4289 pc_tree->horizontal[1].skip_txfm[0] = x->skip_txfm[0]; 4290 pc_tree->horizontal[1].skip = x->skip; 4291 encode_b_rt(cpi, td, tile_info, tp, mi_row + hbs, mi_col, 4292 output_enabled, subsize, &pc_tree->horizontal[1]); 4293 } 4294 break; 4295 case PARTITION_SPLIT: 4296 subsize = get_subsize(bsize, PARTITION_SPLIT); 4297 if (bsize == BLOCK_8X8) { 4298 nonrd_pick_sb_modes(cpi, tile_data, x, mi_row, mi_col, dummy_cost, 4299 subsize, pc_tree->leaf_split[0]); 4300 encode_b_rt(cpi, td, tile_info, tp, mi_row, mi_col, output_enabled, 4301 subsize, pc_tree->leaf_split[0]); 4302 } else { 4303 nonrd_use_partition(cpi, td, tile_data, mi, tp, mi_row, mi_col, subsize, 4304 output_enabled, dummy_cost, pc_tree->split[0]); 4305 nonrd_use_partition(cpi, td, tile_data, mi + hbs, tp, mi_row, 4306 mi_col + hbs, subsize, output_enabled, dummy_cost, 4307 pc_tree->split[1]); 4308 nonrd_use_partition(cpi, td, tile_data, mi + hbs * mis, tp, 4309 mi_row + hbs, mi_col, subsize, output_enabled, 4310 dummy_cost, pc_tree->split[2]); 4311 nonrd_use_partition(cpi, td, tile_data, mi + hbs * mis + hbs, tp, 4312 mi_row + hbs, mi_col + hbs, subsize, output_enabled, 4313 dummy_cost, pc_tree->split[3]); 4314 } 4315 break; 4316 default: assert(0 && "Invalid partition type."); break; 4317 } 4318 4319 if (partition != PARTITION_SPLIT || bsize == BLOCK_8X8) 4320 update_partition_context(xd, mi_row, mi_col, subsize, bsize); 4321} 4322 4323static void encode_nonrd_sb_row(VP9_COMP *cpi, ThreadData *td, 4324 TileDataEnc *tile_data, int mi_row, 4325 TOKENEXTRA **tp) { 4326 SPEED_FEATURES *const sf = &cpi->sf; 4327 VP9_COMMON *const cm = &cpi->common; 4328 TileInfo *const tile_info = &tile_data->tile_info; 4329 MACROBLOCK *const x = &td->mb; 4330 MACROBLOCKD *const xd = &x->e_mbd; 4331 const int mi_col_start = tile_info->mi_col_start; 4332 const int mi_col_end = tile_info->mi_col_end; 4333 int mi_col; 4334 const int sb_row = mi_row >> MI_BLOCK_SIZE_LOG2; 4335 const int num_sb_cols = 4336 get_num_cols(tile_data->tile_info, MI_BLOCK_SIZE_LOG2); 4337 int sb_col_in_tile; 4338 4339 // Initialize the left context for the new SB row 4340 memset(&xd->left_context, 0, sizeof(xd->left_context)); 4341 memset(xd->left_seg_context, 0, sizeof(xd->left_seg_context)); 4342 4343 // Code each SB in the row 4344 for (mi_col = mi_col_start, sb_col_in_tile = 0; mi_col < mi_col_end; 4345 mi_col += MI_BLOCK_SIZE, ++sb_col_in_tile) { 4346 const struct segmentation *const seg = &cm->seg; 4347 RD_COST dummy_rdc; 4348 const int idx_str = cm->mi_stride * mi_row + mi_col; 4349 MODE_INFO **mi = cm->mi_grid_visible + idx_str; 4350 PARTITION_SEARCH_TYPE partition_search_type = sf->partition_search_type; 4351 BLOCK_SIZE bsize = BLOCK_64X64; 4352 int seg_skip = 0; 4353 4354 (*(cpi->row_mt_sync_read_ptr))(&tile_data->row_mt_sync, sb_row, 4355 sb_col_in_tile); 4356 4357 if (cpi->use_skin_detection) { 4358 vp9_compute_skin_sb(cpi, BLOCK_16X16, mi_row, mi_col); 4359 } 4360 4361 x->source_variance = UINT_MAX; 4362 vp9_zero(x->pred_mv); 4363 vp9_rd_cost_init(&dummy_rdc); 4364 x->color_sensitivity[0] = 0; 4365 x->color_sensitivity[1] = 0; 4366 x->sb_is_skin = 0; 4367 x->skip_low_source_sad = 0; 4368 x->lowvar_highsumdiff = 0; 4369 x->content_state_sb = 0; 4370 x->sb_use_mv_part = 0; 4371 x->sb_mvcol_part = 0; 4372 x->sb_mvrow_part = 0; 4373 x->sb_pickmode_part = 0; 4374 x->arf_frame_usage = 0; 4375 x->lastgolden_frame_usage = 0; 4376 4377 if (seg->enabled) { 4378 const uint8_t *const map = 4379 seg->update_map ? cpi->segmentation_map : cm->last_frame_seg_map; 4380 int segment_id = get_segment_id(cm, map, BLOCK_64X64, mi_row, mi_col); 4381 seg_skip = segfeature_active(seg, segment_id, SEG_LVL_SKIP); 4382 if (seg_skip) { 4383 partition_search_type = FIXED_PARTITION; 4384 } 4385 } 4386 4387 if (cpi->compute_source_sad_onepass && cpi->sf.use_source_sad) { 4388 int shift = cpi->Source->y_stride * (mi_row << 3) + (mi_col << 3); 4389 int sb_offset2 = ((cm->mi_cols + 7) >> 3) * (mi_row >> 3) + (mi_col >> 3); 4390 int64_t source_sad = avg_source_sad(cpi, x, shift, sb_offset2); 4391 if (sf->adapt_partition_source_sad && 4392 (cpi->oxcf.rc_mode == VPX_VBR && !cpi->rc.is_src_frame_alt_ref && 4393 source_sad > sf->adapt_partition_thresh && 4394 (cpi->refresh_golden_frame || cpi->refresh_alt_ref_frame))) 4395 partition_search_type = REFERENCE_PARTITION; 4396 } 4397 4398 // Set the partition type of the 64X64 block 4399 switch (partition_search_type) { 4400 case VAR_BASED_PARTITION: 4401 // TODO(jingning, marpan): The mode decision and encoding process 4402 // support both intra and inter sub8x8 block coding for RTC mode. 4403 // Tune the thresholds accordingly to use sub8x8 block coding for 4404 // coding performance improvement. 4405 choose_partitioning(cpi, tile_info, x, mi_row, mi_col); 4406 nonrd_use_partition(cpi, td, tile_data, mi, tp, mi_row, mi_col, 4407 BLOCK_64X64, 1, &dummy_rdc, td->pc_root); 4408 break; 4409 case SOURCE_VAR_BASED_PARTITION: 4410 set_source_var_based_partition(cpi, tile_info, x, mi, mi_row, mi_col); 4411 nonrd_use_partition(cpi, td, tile_data, mi, tp, mi_row, mi_col, 4412 BLOCK_64X64, 1, &dummy_rdc, td->pc_root); 4413 break; 4414 case FIXED_PARTITION: 4415 if (!seg_skip) bsize = sf->always_this_block_size; 4416 set_fixed_partitioning(cpi, tile_info, mi, mi_row, mi_col, bsize); 4417 nonrd_use_partition(cpi, td, tile_data, mi, tp, mi_row, mi_col, 4418 BLOCK_64X64, 1, &dummy_rdc, td->pc_root); 4419 break; 4420 case REFERENCE_PARTITION: 4421 x->sb_pickmode_part = 1; 4422 set_offsets(cpi, tile_info, x, mi_row, mi_col, BLOCK_64X64); 4423 // Use nonrd_pick_partition on scene-cut for VBR mode. 4424 // nonrd_pick_partition does not support 4x4 partition, so avoid it 4425 // on key frame for now. 4426 if ((cpi->oxcf.rc_mode == VPX_VBR && cpi->rc.high_source_sad && 4427 cpi->oxcf.speed < 6 && cm->frame_type != KEY_FRAME && 4428 (cpi->refresh_golden_frame || cpi->refresh_alt_ref_frame))) { 4429 // Use lower max_partition_size for low resoultions. 4430 if (cm->width <= 352 && cm->height <= 288) 4431 x->max_partition_size = BLOCK_32X32; 4432 else 4433 x->max_partition_size = BLOCK_64X64; 4434 x->min_partition_size = BLOCK_8X8; 4435 nonrd_pick_partition(cpi, td, tile_data, tp, mi_row, mi_col, 4436 BLOCK_64X64, &dummy_rdc, 1, INT64_MAX, 4437 td->pc_root); 4438 } else { 4439 choose_partitioning(cpi, tile_info, x, mi_row, mi_col); 4440 // TODO(marpan): Seems like nonrd_select_partition does not support 4441 // 4x4 partition. Since 4x4 is used on key frame, use this switch 4442 // for now. 4443 if (cm->frame_type == KEY_FRAME) 4444 nonrd_use_partition(cpi, td, tile_data, mi, tp, mi_row, mi_col, 4445 BLOCK_64X64, 1, &dummy_rdc, td->pc_root); 4446 else 4447 nonrd_select_partition(cpi, td, tile_data, mi, tp, mi_row, mi_col, 4448 BLOCK_64X64, 1, &dummy_rdc, td->pc_root); 4449 } 4450 4451 break; 4452 default: assert(0); break; 4453 } 4454 4455 // Update ref_frame usage for inter frame if this group is ARF group. 4456 if (!cpi->rc.is_src_frame_alt_ref && !cpi->refresh_golden_frame && 4457 !cpi->refresh_alt_ref_frame && cpi->rc.alt_ref_gf_group && 4458 cpi->sf.use_altref_onepass) { 4459 int sboffset = ((cm->mi_cols + 7) >> 3) * (mi_row >> 3) + (mi_col >> 3); 4460 if (cpi->count_arf_frame_usage != NULL) 4461 cpi->count_arf_frame_usage[sboffset] = x->arf_frame_usage; 4462 if (cpi->count_lastgolden_frame_usage != NULL) 4463 cpi->count_lastgolden_frame_usage[sboffset] = x->lastgolden_frame_usage; 4464 } 4465 4466 (*(cpi->row_mt_sync_write_ptr))(&tile_data->row_mt_sync, sb_row, 4467 sb_col_in_tile, num_sb_cols); 4468 } 4469} 4470// end RTC play code 4471 4472static INLINE uint32_t variance(const diff *const d) { 4473 return d->sse - (uint32_t)(((int64_t)d->sum * d->sum) >> 8); 4474} 4475 4476#if CONFIG_VP9_HIGHBITDEPTH 4477static INLINE uint32_t variance_highbd(diff *const d) { 4478 const int64_t var = (int64_t)d->sse - (((int64_t)d->sum * d->sum) >> 8); 4479 return (var >= 0) ? (uint32_t)var : 0; 4480} 4481#endif // CONFIG_VP9_HIGHBITDEPTH 4482 4483static int set_var_thresh_from_histogram(VP9_COMP *cpi) { 4484 const SPEED_FEATURES *const sf = &cpi->sf; 4485 const VP9_COMMON *const cm = &cpi->common; 4486 4487 const uint8_t *src = cpi->Source->y_buffer; 4488 const uint8_t *last_src = cpi->Last_Source->y_buffer; 4489 const int src_stride = cpi->Source->y_stride; 4490 const int last_stride = cpi->Last_Source->y_stride; 4491 4492 // Pick cutoff threshold 4493 const int cutoff = (VPXMIN(cm->width, cm->height) >= 720) 4494 ? (cm->MBs * VAR_HIST_LARGE_CUT_OFF / 100) 4495 : (cm->MBs * VAR_HIST_SMALL_CUT_OFF / 100); 4496 DECLARE_ALIGNED(16, int, hist[VAR_HIST_BINS]); 4497 diff *var16 = cpi->source_diff_var; 4498 4499 int sum = 0; 4500 int i, j; 4501 4502 memset(hist, 0, VAR_HIST_BINS * sizeof(hist[0])); 4503 4504 for (i = 0; i < cm->mb_rows; i++) { 4505 for (j = 0; j < cm->mb_cols; j++) { 4506#if CONFIG_VP9_HIGHBITDEPTH 4507 if (cm->use_highbitdepth) { 4508 switch (cm->bit_depth) { 4509 case VPX_BITS_8: 4510 vpx_highbd_8_get16x16var(src, src_stride, last_src, last_stride, 4511 &var16->sse, &var16->sum); 4512 var16->var = variance(var16); 4513 break; 4514 case VPX_BITS_10: 4515 vpx_highbd_10_get16x16var(src, src_stride, last_src, last_stride, 4516 &var16->sse, &var16->sum); 4517 var16->var = variance_highbd(var16); 4518 break; 4519 case VPX_BITS_12: 4520 vpx_highbd_12_get16x16var(src, src_stride, last_src, last_stride, 4521 &var16->sse, &var16->sum); 4522 var16->var = variance_highbd(var16); 4523 break; 4524 default: 4525 assert(0 && 4526 "cm->bit_depth should be VPX_BITS_8, VPX_BITS_10" 4527 " or VPX_BITS_12"); 4528 return -1; 4529 } 4530 } else { 4531 vpx_get16x16var(src, src_stride, last_src, last_stride, &var16->sse, 4532 &var16->sum); 4533 var16->var = variance(var16); 4534 } 4535#else 4536 vpx_get16x16var(src, src_stride, last_src, last_stride, &var16->sse, 4537 &var16->sum); 4538 var16->var = variance(var16); 4539#endif // CONFIG_VP9_HIGHBITDEPTH 4540 4541 if (var16->var >= VAR_HIST_MAX_BG_VAR) 4542 hist[VAR_HIST_BINS - 1]++; 4543 else 4544 hist[var16->var / VAR_HIST_FACTOR]++; 4545 4546 src += 16; 4547 last_src += 16; 4548 var16++; 4549 } 4550 4551 src = src - cm->mb_cols * 16 + 16 * src_stride; 4552 last_src = last_src - cm->mb_cols * 16 + 16 * last_stride; 4553 } 4554 4555 cpi->source_var_thresh = 0; 4556 4557 if (hist[VAR_HIST_BINS - 1] < cutoff) { 4558 for (i = 0; i < VAR_HIST_BINS - 1; i++) { 4559 sum += hist[i]; 4560 4561 if (sum > cutoff) { 4562 cpi->source_var_thresh = (i + 1) * VAR_HIST_FACTOR; 4563 return 0; 4564 } 4565 } 4566 } 4567 4568 return sf->search_type_check_frequency; 4569} 4570 4571static void source_var_based_partition_search_method(VP9_COMP *cpi) { 4572 VP9_COMMON *const cm = &cpi->common; 4573 SPEED_FEATURES *const sf = &cpi->sf; 4574 4575 if (cm->frame_type == KEY_FRAME) { 4576 // For key frame, use SEARCH_PARTITION. 4577 sf->partition_search_type = SEARCH_PARTITION; 4578 } else if (cm->intra_only) { 4579 sf->partition_search_type = FIXED_PARTITION; 4580 } else { 4581 if (cm->last_width != cm->width || cm->last_height != cm->height) { 4582 if (cpi->source_diff_var) vpx_free(cpi->source_diff_var); 4583 4584 CHECK_MEM_ERROR(cm, cpi->source_diff_var, 4585 vpx_calloc(cm->MBs, sizeof(diff))); 4586 } 4587 4588 if (!cpi->frames_till_next_var_check) 4589 cpi->frames_till_next_var_check = set_var_thresh_from_histogram(cpi); 4590 4591 if (cpi->frames_till_next_var_check > 0) { 4592 sf->partition_search_type = FIXED_PARTITION; 4593 cpi->frames_till_next_var_check--; 4594 } 4595 } 4596} 4597 4598static int get_skip_encode_frame(const VP9_COMMON *cm, ThreadData *const td) { 4599 unsigned int intra_count = 0, inter_count = 0; 4600 int j; 4601 4602 for (j = 0; j < INTRA_INTER_CONTEXTS; ++j) { 4603 intra_count += td->counts->intra_inter[j][0]; 4604 inter_count += td->counts->intra_inter[j][1]; 4605 } 4606 4607 return (intra_count << 2) < inter_count && cm->frame_type != KEY_FRAME && 4608 cm->show_frame; 4609} 4610 4611void vp9_init_tile_data(VP9_COMP *cpi) { 4612 VP9_COMMON *const cm = &cpi->common; 4613 const int tile_cols = 1 << cm->log2_tile_cols; 4614 const int tile_rows = 1 << cm->log2_tile_rows; 4615 int tile_col, tile_row; 4616 TOKENEXTRA *pre_tok = cpi->tile_tok[0][0]; 4617 TOKENLIST *tplist = cpi->tplist[0][0]; 4618 int tile_tok = 0; 4619 int tplist_count = 0; 4620 4621 if (cpi->tile_data == NULL || cpi->allocated_tiles < tile_cols * tile_rows) { 4622 if (cpi->tile_data != NULL) vpx_free(cpi->tile_data); 4623 CHECK_MEM_ERROR(cm, cpi->tile_data, vpx_malloc(tile_cols * tile_rows * 4624 sizeof(*cpi->tile_data))); 4625 cpi->allocated_tiles = tile_cols * tile_rows; 4626 4627 for (tile_row = 0; tile_row < tile_rows; ++tile_row) 4628 for (tile_col = 0; tile_col < tile_cols; ++tile_col) { 4629 TileDataEnc *tile_data = 4630 &cpi->tile_data[tile_row * tile_cols + tile_col]; 4631 int i, j; 4632 for (i = 0; i < BLOCK_SIZES; ++i) { 4633 for (j = 0; j < MAX_MODES; ++j) { 4634 tile_data->thresh_freq_fact[i][j] = RD_THRESH_INIT_FACT; 4635 tile_data->mode_map[i][j] = j; 4636 } 4637 } 4638#if CONFIG_MULTITHREAD 4639 tile_data->row_base_thresh_freq_fact = NULL; 4640#endif 4641 } 4642 } 4643 4644 for (tile_row = 0; tile_row < tile_rows; ++tile_row) { 4645 for (tile_col = 0; tile_col < tile_cols; ++tile_col) { 4646 TileDataEnc *this_tile = &cpi->tile_data[tile_row * tile_cols + tile_col]; 4647 TileInfo *tile_info = &this_tile->tile_info; 4648 vp9_tile_init(tile_info, cm, tile_row, tile_col); 4649 4650 cpi->tile_tok[tile_row][tile_col] = pre_tok + tile_tok; 4651 pre_tok = cpi->tile_tok[tile_row][tile_col]; 4652 tile_tok = allocated_tokens(*tile_info); 4653 4654 cpi->tplist[tile_row][tile_col] = tplist + tplist_count; 4655 tplist = cpi->tplist[tile_row][tile_col]; 4656 tplist_count = get_num_vert_units(*tile_info, MI_BLOCK_SIZE_LOG2); 4657 } 4658 } 4659} 4660 4661void vp9_encode_sb_row(VP9_COMP *cpi, ThreadData *td, int tile_row, 4662 int tile_col, int mi_row) { 4663 VP9_COMMON *const cm = &cpi->common; 4664 const int tile_cols = 1 << cm->log2_tile_cols; 4665 TileDataEnc *this_tile = &cpi->tile_data[tile_row * tile_cols + tile_col]; 4666 const TileInfo *const tile_info = &this_tile->tile_info; 4667 TOKENEXTRA *tok = NULL; 4668 int tile_sb_row; 4669 int tile_mb_cols = (tile_info->mi_col_end - tile_info->mi_col_start + 1) >> 1; 4670 4671 tile_sb_row = mi_cols_aligned_to_sb(mi_row - tile_info->mi_row_start) >> 4672 MI_BLOCK_SIZE_LOG2; 4673 get_start_tok(cpi, tile_row, tile_col, mi_row, &tok); 4674 cpi->tplist[tile_row][tile_col][tile_sb_row].start = tok; 4675 4676 if (cpi->sf.use_nonrd_pick_mode) 4677 encode_nonrd_sb_row(cpi, td, this_tile, mi_row, &tok); 4678 else 4679 encode_rd_sb_row(cpi, td, this_tile, mi_row, &tok); 4680 4681 cpi->tplist[tile_row][tile_col][tile_sb_row].stop = tok; 4682 cpi->tplist[tile_row][tile_col][tile_sb_row].count = 4683 (unsigned int)(cpi->tplist[tile_row][tile_col][tile_sb_row].stop - 4684 cpi->tplist[tile_row][tile_col][tile_sb_row].start); 4685 assert(tok - cpi->tplist[tile_row][tile_col][tile_sb_row].start <= 4686 get_token_alloc(MI_BLOCK_SIZE >> 1, tile_mb_cols)); 4687 4688 (void)tile_mb_cols; 4689} 4690 4691void vp9_encode_tile(VP9_COMP *cpi, ThreadData *td, int tile_row, 4692 int tile_col) { 4693 VP9_COMMON *const cm = &cpi->common; 4694 const int tile_cols = 1 << cm->log2_tile_cols; 4695 TileDataEnc *this_tile = &cpi->tile_data[tile_row * tile_cols + tile_col]; 4696 const TileInfo *const tile_info = &this_tile->tile_info; 4697 const int mi_row_start = tile_info->mi_row_start; 4698 const int mi_row_end = tile_info->mi_row_end; 4699 int mi_row; 4700 4701 for (mi_row = mi_row_start; mi_row < mi_row_end; mi_row += MI_BLOCK_SIZE) 4702 vp9_encode_sb_row(cpi, td, tile_row, tile_col, mi_row); 4703} 4704 4705static void encode_tiles(VP9_COMP *cpi) { 4706 VP9_COMMON *const cm = &cpi->common; 4707 const int tile_cols = 1 << cm->log2_tile_cols; 4708 const int tile_rows = 1 << cm->log2_tile_rows; 4709 int tile_col, tile_row; 4710 4711 vp9_init_tile_data(cpi); 4712 4713 for (tile_row = 0; tile_row < tile_rows; ++tile_row) 4714 for (tile_col = 0; tile_col < tile_cols; ++tile_col) 4715 vp9_encode_tile(cpi, &cpi->td, tile_row, tile_col); 4716} 4717 4718#if CONFIG_FP_MB_STATS 4719static int input_fpmb_stats(FIRSTPASS_MB_STATS *firstpass_mb_stats, 4720 VP9_COMMON *cm, uint8_t **this_frame_mb_stats) { 4721 uint8_t *mb_stats_in = firstpass_mb_stats->mb_stats_start + 4722 cm->current_video_frame * cm->MBs * sizeof(uint8_t); 4723 4724 if (mb_stats_in > firstpass_mb_stats->mb_stats_end) return EOF; 4725 4726 *this_frame_mb_stats = mb_stats_in; 4727 4728 return 1; 4729} 4730#endif 4731 4732static void encode_frame_internal(VP9_COMP *cpi) { 4733 SPEED_FEATURES *const sf = &cpi->sf; 4734 ThreadData *const td = &cpi->td; 4735 MACROBLOCK *const x = &td->mb; 4736 VP9_COMMON *const cm = &cpi->common; 4737 MACROBLOCKD *const xd = &x->e_mbd; 4738 4739 xd->mi = cm->mi_grid_visible; 4740 xd->mi[0] = cm->mi; 4741 4742 vp9_zero(*td->counts); 4743 vp9_zero(cpi->td.rd_counts); 4744 4745 xd->lossless = cm->base_qindex == 0 && cm->y_dc_delta_q == 0 && 4746 cm->uv_dc_delta_q == 0 && cm->uv_ac_delta_q == 0; 4747 4748#if CONFIG_VP9_HIGHBITDEPTH 4749 if (cm->use_highbitdepth) 4750 x->fwd_txfm4x4 = xd->lossless ? vp9_highbd_fwht4x4 : vpx_highbd_fdct4x4; 4751 else 4752 x->fwd_txfm4x4 = xd->lossless ? vp9_fwht4x4 : vpx_fdct4x4; 4753 x->highbd_inv_txfm_add = 4754 xd->lossless ? vp9_highbd_iwht4x4_add : vp9_highbd_idct4x4_add; 4755#else 4756 x->fwd_txfm4x4 = xd->lossless ? vp9_fwht4x4 : vpx_fdct4x4; 4757#endif // CONFIG_VP9_HIGHBITDEPTH 4758 x->inv_txfm_add = xd->lossless ? vp9_iwht4x4_add : vp9_idct4x4_add; 4759 4760 if (xd->lossless) x->optimize = 0; 4761 4762 cm->tx_mode = select_tx_mode(cpi, xd); 4763 4764 vp9_frame_init_quantizer(cpi); 4765 4766 vp9_initialize_rd_consts(cpi); 4767 vp9_initialize_me_consts(cpi, x, cm->base_qindex); 4768 init_encode_frame_mb_context(cpi); 4769 cm->use_prev_frame_mvs = 4770 !cm->error_resilient_mode && cm->width == cm->last_width && 4771 cm->height == cm->last_height && !cm->intra_only && cm->last_show_frame; 4772 // Special case: set prev_mi to NULL when the previous mode info 4773 // context cannot be used. 4774 cm->prev_mi = 4775 cm->use_prev_frame_mvs ? cm->prev_mip + cm->mi_stride + 1 : NULL; 4776 4777 x->quant_fp = cpi->sf.use_quant_fp; 4778 vp9_zero(x->skip_txfm); 4779 if (sf->use_nonrd_pick_mode) { 4780 // Initialize internal buffer pointers for rtc coding, where non-RD 4781 // mode decision is used and hence no buffer pointer swap needed. 4782 int i; 4783 struct macroblock_plane *const p = x->plane; 4784 struct macroblockd_plane *const pd = xd->plane; 4785 PICK_MODE_CONTEXT *ctx = &cpi->td.pc_root->none; 4786 4787 for (i = 0; i < MAX_MB_PLANE; ++i) { 4788 p[i].coeff = ctx->coeff_pbuf[i][0]; 4789 p[i].qcoeff = ctx->qcoeff_pbuf[i][0]; 4790 pd[i].dqcoeff = ctx->dqcoeff_pbuf[i][0]; 4791 p[i].eobs = ctx->eobs_pbuf[i][0]; 4792 } 4793 vp9_zero(x->zcoeff_blk); 4794 4795 if (cm->frame_type != KEY_FRAME && cpi->rc.frames_since_golden == 0 && 4796 !(cpi->oxcf.lag_in_frames > 0 && cpi->oxcf.rc_mode == VPX_VBR) && 4797 !cpi->use_svc) 4798 cpi->ref_frame_flags &= (~VP9_GOLD_FLAG); 4799 4800 if (sf->partition_search_type == SOURCE_VAR_BASED_PARTITION) 4801 source_var_based_partition_search_method(cpi); 4802 } 4803 4804 { 4805 struct vpx_usec_timer emr_timer; 4806 vpx_usec_timer_start(&emr_timer); 4807 4808#if CONFIG_FP_MB_STATS 4809 if (cpi->use_fp_mb_stats) { 4810 input_fpmb_stats(&cpi->twopass.firstpass_mb_stats, cm, 4811 &cpi->twopass.this_frame_mb_stats); 4812 } 4813#endif 4814 4815 if (!cpi->row_mt) { 4816 cpi->row_mt_sync_read_ptr = vp9_row_mt_sync_read_dummy; 4817 cpi->row_mt_sync_write_ptr = vp9_row_mt_sync_write_dummy; 4818 // If allowed, encoding tiles in parallel with one thread handling one 4819 // tile when row based multi-threading is disabled. 4820 if (VPXMIN(cpi->oxcf.max_threads, 1 << cm->log2_tile_cols) > 1) 4821 vp9_encode_tiles_mt(cpi); 4822 else 4823 encode_tiles(cpi); 4824 } else { 4825 cpi->row_mt_sync_read_ptr = vp9_row_mt_sync_read; 4826 cpi->row_mt_sync_write_ptr = vp9_row_mt_sync_write; 4827 vp9_encode_tiles_row_mt(cpi); 4828 } 4829 4830 vpx_usec_timer_mark(&emr_timer); 4831 cpi->time_encode_sb_row += vpx_usec_timer_elapsed(&emr_timer); 4832 } 4833 4834 sf->skip_encode_frame = 4835 sf->skip_encode_sb ? get_skip_encode_frame(cm, td) : 0; 4836 4837#if 0 4838 // Keep record of the total distortion this time around for future use 4839 cpi->last_frame_distortion = cpi->frame_distortion; 4840#endif 4841} 4842 4843static INTERP_FILTER get_interp_filter( 4844 const int64_t threshes[SWITCHABLE_FILTER_CONTEXTS], int is_alt_ref) { 4845 if (!is_alt_ref && threshes[EIGHTTAP_SMOOTH] > threshes[EIGHTTAP] && 4846 threshes[EIGHTTAP_SMOOTH] > threshes[EIGHTTAP_SHARP] && 4847 threshes[EIGHTTAP_SMOOTH] > threshes[SWITCHABLE - 1]) { 4848 return EIGHTTAP_SMOOTH; 4849 } else if (threshes[EIGHTTAP_SHARP] > threshes[EIGHTTAP] && 4850 threshes[EIGHTTAP_SHARP] > threshes[SWITCHABLE - 1]) { 4851 return EIGHTTAP_SHARP; 4852 } else if (threshes[EIGHTTAP] > threshes[SWITCHABLE - 1]) { 4853 return EIGHTTAP; 4854 } else { 4855 return SWITCHABLE; 4856 } 4857} 4858 4859static int compute_frame_aq_offset(struct VP9_COMP *cpi) { 4860 VP9_COMMON *const cm = &cpi->common; 4861 MODE_INFO **mi_8x8_ptr = cm->mi_grid_visible; 4862 struct segmentation *const seg = &cm->seg; 4863 4864 int mi_row, mi_col; 4865 int sum_delta = 0; 4866 int map_index = 0; 4867 int qdelta_index; 4868 int segment_id; 4869 4870 for (mi_row = 0; mi_row < cm->mi_rows; mi_row++) { 4871 MODE_INFO **mi_8x8 = mi_8x8_ptr; 4872 for (mi_col = 0; mi_col < cm->mi_cols; mi_col++, mi_8x8++) { 4873 segment_id = mi_8x8[0]->segment_id; 4874 qdelta_index = get_segdata(seg, segment_id, SEG_LVL_ALT_Q); 4875 sum_delta += qdelta_index; 4876 map_index++; 4877 } 4878 mi_8x8_ptr += cm->mi_stride; 4879 } 4880 4881 return sum_delta / (cm->mi_rows * cm->mi_cols); 4882} 4883 4884void vp9_encode_frame(VP9_COMP *cpi) { 4885 VP9_COMMON *const cm = &cpi->common; 4886 4887 // In the longer term the encoder should be generalized to match the 4888 // decoder such that we allow compound where one of the 3 buffers has a 4889 // different sign bias and that buffer is then the fixed ref. However, this 4890 // requires further work in the rd loop. For now the only supported encoder 4891 // side behavior is where the ALT ref buffer has opposite sign bias to 4892 // the other two. 4893 if (!frame_is_intra_only(cm)) { 4894 if ((cm->ref_frame_sign_bias[ALTREF_FRAME] == 4895 cm->ref_frame_sign_bias[GOLDEN_FRAME]) || 4896 (cm->ref_frame_sign_bias[ALTREF_FRAME] == 4897 cm->ref_frame_sign_bias[LAST_FRAME])) { 4898 cpi->allow_comp_inter_inter = 0; 4899 } else { 4900 cpi->allow_comp_inter_inter = 1; 4901 cm->comp_fixed_ref = ALTREF_FRAME; 4902 cm->comp_var_ref[0] = LAST_FRAME; 4903 cm->comp_var_ref[1] = GOLDEN_FRAME; 4904 } 4905 } 4906 4907 if (cpi->sf.frame_parameter_update) { 4908 int i; 4909 RD_OPT *const rd_opt = &cpi->rd; 4910 FRAME_COUNTS *counts = cpi->td.counts; 4911 RD_COUNTS *const rdc = &cpi->td.rd_counts; 4912 4913 // This code does a single RD pass over the whole frame assuming 4914 // either compound, single or hybrid prediction as per whatever has 4915 // worked best for that type of frame in the past. 4916 // It also predicts whether another coding mode would have worked 4917 // better than this coding mode. If that is the case, it remembers 4918 // that for subsequent frames. 4919 // It also does the same analysis for transform size selection. 4920 const MV_REFERENCE_FRAME frame_type = get_frame_type(cpi); 4921 int64_t *const mode_thrs = rd_opt->prediction_type_threshes[frame_type]; 4922 int64_t *const filter_thrs = rd_opt->filter_threshes[frame_type]; 4923 const int is_alt_ref = frame_type == ALTREF_FRAME; 4924 4925 /* prediction (compound, single or hybrid) mode selection */ 4926 if (is_alt_ref || !cpi->allow_comp_inter_inter) 4927 cm->reference_mode = SINGLE_REFERENCE; 4928 else if (mode_thrs[COMPOUND_REFERENCE] > mode_thrs[SINGLE_REFERENCE] && 4929 mode_thrs[COMPOUND_REFERENCE] > mode_thrs[REFERENCE_MODE_SELECT] && 4930 check_dual_ref_flags(cpi) && cpi->static_mb_pct == 100) 4931 cm->reference_mode = COMPOUND_REFERENCE; 4932 else if (mode_thrs[SINGLE_REFERENCE] > mode_thrs[REFERENCE_MODE_SELECT]) 4933 cm->reference_mode = SINGLE_REFERENCE; 4934 else 4935 cm->reference_mode = REFERENCE_MODE_SELECT; 4936 4937 if (cm->interp_filter == SWITCHABLE) 4938 cm->interp_filter = get_interp_filter(filter_thrs, is_alt_ref); 4939 4940 encode_frame_internal(cpi); 4941 4942 for (i = 0; i < REFERENCE_MODES; ++i) 4943 mode_thrs[i] = (mode_thrs[i] + rdc->comp_pred_diff[i] / cm->MBs) / 2; 4944 4945 for (i = 0; i < SWITCHABLE_FILTER_CONTEXTS; ++i) 4946 filter_thrs[i] = (filter_thrs[i] + rdc->filter_diff[i] / cm->MBs) / 2; 4947 4948 if (cm->reference_mode == REFERENCE_MODE_SELECT) { 4949 int single_count_zero = 0; 4950 int comp_count_zero = 0; 4951 4952 for (i = 0; i < COMP_INTER_CONTEXTS; i++) { 4953 single_count_zero += counts->comp_inter[i][0]; 4954 comp_count_zero += counts->comp_inter[i][1]; 4955 } 4956 4957 if (comp_count_zero == 0) { 4958 cm->reference_mode = SINGLE_REFERENCE; 4959 vp9_zero(counts->comp_inter); 4960 } else if (single_count_zero == 0) { 4961 cm->reference_mode = COMPOUND_REFERENCE; 4962 vp9_zero(counts->comp_inter); 4963 } 4964 } 4965 4966 if (cm->tx_mode == TX_MODE_SELECT) { 4967 int count4x4 = 0; 4968 int count8x8_lp = 0, count8x8_8x8p = 0; 4969 int count16x16_16x16p = 0, count16x16_lp = 0; 4970 int count32x32 = 0; 4971 4972 for (i = 0; i < TX_SIZE_CONTEXTS; ++i) { 4973 count4x4 += counts->tx.p32x32[i][TX_4X4]; 4974 count4x4 += counts->tx.p16x16[i][TX_4X4]; 4975 count4x4 += counts->tx.p8x8[i][TX_4X4]; 4976 4977 count8x8_lp += counts->tx.p32x32[i][TX_8X8]; 4978 count8x8_lp += counts->tx.p16x16[i][TX_8X8]; 4979 count8x8_8x8p += counts->tx.p8x8[i][TX_8X8]; 4980 4981 count16x16_16x16p += counts->tx.p16x16[i][TX_16X16]; 4982 count16x16_lp += counts->tx.p32x32[i][TX_16X16]; 4983 count32x32 += counts->tx.p32x32[i][TX_32X32]; 4984 } 4985 if (count4x4 == 0 && count16x16_lp == 0 && count16x16_16x16p == 0 && 4986 count32x32 == 0) { 4987 cm->tx_mode = ALLOW_8X8; 4988 reset_skip_tx_size(cm, TX_8X8); 4989 } else if (count8x8_8x8p == 0 && count16x16_16x16p == 0 && 4990 count8x8_lp == 0 && count16x16_lp == 0 && count32x32 == 0) { 4991 cm->tx_mode = ONLY_4X4; 4992 reset_skip_tx_size(cm, TX_4X4); 4993 } else if (count8x8_lp == 0 && count16x16_lp == 0 && count4x4 == 0) { 4994 cm->tx_mode = ALLOW_32X32; 4995 } else if (count32x32 == 0 && count8x8_lp == 0 && count4x4 == 0) { 4996 cm->tx_mode = ALLOW_16X16; 4997 reset_skip_tx_size(cm, TX_16X16); 4998 } 4999 } 5000 } else { 5001 FRAME_COUNTS *counts = cpi->td.counts; 5002 cm->reference_mode = SINGLE_REFERENCE; 5003 if (cpi->allow_comp_inter_inter && cpi->sf.use_compound_nonrd_pickmode && 5004 cpi->rc.alt_ref_gf_group && !cpi->rc.is_src_frame_alt_ref && 5005 cm->frame_type != KEY_FRAME) 5006 cm->reference_mode = REFERENCE_MODE_SELECT; 5007 5008 encode_frame_internal(cpi); 5009 5010 if (cm->reference_mode == REFERENCE_MODE_SELECT) { 5011 int single_count_zero = 0; 5012 int comp_count_zero = 0; 5013 int i; 5014 for (i = 0; i < COMP_INTER_CONTEXTS; i++) { 5015 single_count_zero += counts->comp_inter[i][0]; 5016 comp_count_zero += counts->comp_inter[i][1]; 5017 } 5018 if (comp_count_zero == 0) { 5019 cm->reference_mode = SINGLE_REFERENCE; 5020 vp9_zero(counts->comp_inter); 5021 } else if (single_count_zero == 0) { 5022 cm->reference_mode = COMPOUND_REFERENCE; 5023 vp9_zero(counts->comp_inter); 5024 } 5025 } 5026 } 5027 5028 // If segmented AQ is enabled compute the average AQ weighting. 5029 if (cm->seg.enabled && (cpi->oxcf.aq_mode != NO_AQ) && 5030 (cm->seg.update_map || cm->seg.update_data)) { 5031 cm->seg.aq_av_offset = compute_frame_aq_offset(cpi); 5032 } 5033} 5034 5035static void sum_intra_stats(FRAME_COUNTS *counts, const MODE_INFO *mi) { 5036 const PREDICTION_MODE y_mode = mi->mode; 5037 const PREDICTION_MODE uv_mode = mi->uv_mode; 5038 const BLOCK_SIZE bsize = mi->sb_type; 5039 5040 if (bsize < BLOCK_8X8) { 5041 int idx, idy; 5042 const int num_4x4_w = num_4x4_blocks_wide_lookup[bsize]; 5043 const int num_4x4_h = num_4x4_blocks_high_lookup[bsize]; 5044 for (idy = 0; idy < 2; idy += num_4x4_h) 5045 for (idx = 0; idx < 2; idx += num_4x4_w) 5046 ++counts->y_mode[0][mi->bmi[idy * 2 + idx].as_mode]; 5047 } else { 5048 ++counts->y_mode[size_group_lookup[bsize]][y_mode]; 5049 } 5050 5051 ++counts->uv_mode[y_mode][uv_mode]; 5052} 5053 5054static void update_zeromv_cnt(VP9_COMP *const cpi, const MODE_INFO *const mi, 5055 int mi_row, int mi_col, BLOCK_SIZE bsize) { 5056 const VP9_COMMON *const cm = &cpi->common; 5057 MV mv = mi->mv[0].as_mv; 5058 const int bw = num_8x8_blocks_wide_lookup[bsize]; 5059 const int bh = num_8x8_blocks_high_lookup[bsize]; 5060 const int xmis = VPXMIN(cm->mi_cols - mi_col, bw); 5061 const int ymis = VPXMIN(cm->mi_rows - mi_row, bh); 5062 const int block_index = mi_row * cm->mi_cols + mi_col; 5063 int x, y; 5064 for (y = 0; y < ymis; y++) 5065 for (x = 0; x < xmis; x++) { 5066 int map_offset = block_index + y * cm->mi_cols + x; 5067 if (is_inter_block(mi) && mi->segment_id <= CR_SEGMENT_ID_BOOST2) { 5068 if (abs(mv.row) < 8 && abs(mv.col) < 8) { 5069 if (cpi->consec_zero_mv[map_offset] < 255) 5070 cpi->consec_zero_mv[map_offset]++; 5071 } else { 5072 cpi->consec_zero_mv[map_offset] = 0; 5073 } 5074 } 5075 } 5076} 5077 5078static void encode_superblock(VP9_COMP *cpi, ThreadData *td, TOKENEXTRA **t, 5079 int output_enabled, int mi_row, int mi_col, 5080 BLOCK_SIZE bsize, PICK_MODE_CONTEXT *ctx) { 5081 VP9_COMMON *const cm = &cpi->common; 5082 MACROBLOCK *const x = &td->mb; 5083 MACROBLOCKD *const xd = &x->e_mbd; 5084 MODE_INFO *mi = xd->mi[0]; 5085 const int seg_skip = 5086 segfeature_active(&cm->seg, mi->segment_id, SEG_LVL_SKIP); 5087 x->skip_recode = !x->select_tx_size && mi->sb_type >= BLOCK_8X8 && 5088 cpi->oxcf.aq_mode != COMPLEXITY_AQ && 5089 cpi->oxcf.aq_mode != CYCLIC_REFRESH_AQ && 5090 cpi->sf.allow_skip_recode; 5091 5092 if (!x->skip_recode && !cpi->sf.use_nonrd_pick_mode) 5093 memset(x->skip_txfm, 0, sizeof(x->skip_txfm)); 5094 5095 x->skip_optimize = ctx->is_coded; 5096 ctx->is_coded = 1; 5097 x->use_lp32x32fdct = cpi->sf.use_lp32x32fdct; 5098 x->skip_encode = (!output_enabled && cpi->sf.skip_encode_frame && 5099 x->q_index < QIDX_SKIP_THRESH); 5100 5101 if (x->skip_encode) return; 5102 5103 if (!is_inter_block(mi)) { 5104 int plane; 5105#if CONFIG_BETTER_HW_COMPATIBILITY && CONFIG_VP9_HIGHBITDEPTH 5106 if ((xd->cur_buf->flags & YV12_FLAG_HIGHBITDEPTH) && 5107 (xd->above_mi == NULL || xd->left_mi == NULL) && 5108 need_top_left[mi->uv_mode]) 5109 assert(0); 5110#endif // CONFIG_BETTER_HW_COMPATIBILITY && CONFIG_VP9_HIGHBITDEPTH 5111 mi->skip = 1; 5112 for (plane = 0; plane < MAX_MB_PLANE; ++plane) 5113 vp9_encode_intra_block_plane(x, VPXMAX(bsize, BLOCK_8X8), plane, 1); 5114 if (output_enabled) sum_intra_stats(td->counts, mi); 5115 vp9_tokenize_sb(cpi, td, t, !output_enabled, seg_skip, 5116 VPXMAX(bsize, BLOCK_8X8)); 5117 } else { 5118 int ref; 5119 const int is_compound = has_second_ref(mi); 5120 set_ref_ptrs(cm, xd, mi->ref_frame[0], mi->ref_frame[1]); 5121 for (ref = 0; ref < 1 + is_compound; ++ref) { 5122 YV12_BUFFER_CONFIG *cfg = get_ref_frame_buffer(cpi, mi->ref_frame[ref]); 5123 assert(cfg != NULL); 5124 vp9_setup_pre_planes(xd, ref, cfg, mi_row, mi_col, 5125 &xd->block_refs[ref]->sf); 5126 } 5127 if (!(cpi->sf.reuse_inter_pred_sby && ctx->pred_pixel_ready) || seg_skip) 5128 vp9_build_inter_predictors_sby(xd, mi_row, mi_col, 5129 VPXMAX(bsize, BLOCK_8X8)); 5130 5131 vp9_build_inter_predictors_sbuv(xd, mi_row, mi_col, 5132 VPXMAX(bsize, BLOCK_8X8)); 5133 5134 vp9_encode_sb(x, VPXMAX(bsize, BLOCK_8X8)); 5135 vp9_tokenize_sb(cpi, td, t, !output_enabled, seg_skip, 5136 VPXMAX(bsize, BLOCK_8X8)); 5137 } 5138 5139 if (seg_skip) { 5140 assert(mi->skip); 5141 } 5142 5143 if (output_enabled) { 5144 if (cm->tx_mode == TX_MODE_SELECT && mi->sb_type >= BLOCK_8X8 && 5145 !(is_inter_block(mi) && mi->skip)) { 5146 ++get_tx_counts(max_txsize_lookup[bsize], get_tx_size_context(xd), 5147 &td->counts->tx)[mi->tx_size]; 5148 } else { 5149 // The new intra coding scheme requires no change of transform size 5150 if (is_inter_block(mi)) { 5151 mi->tx_size = VPXMIN(tx_mode_to_biggest_tx_size[cm->tx_mode], 5152 max_txsize_lookup[bsize]); 5153 } else { 5154 mi->tx_size = (bsize >= BLOCK_8X8) ? mi->tx_size : TX_4X4; 5155 } 5156 } 5157 5158 ++td->counts->tx.tx_totals[mi->tx_size]; 5159 ++td->counts->tx.tx_totals[get_uv_tx_size(mi, &xd->plane[1])]; 5160 if (cm->seg.enabled && cpi->oxcf.aq_mode == CYCLIC_REFRESH_AQ) 5161 vp9_cyclic_refresh_update_sb_postencode(cpi, mi, mi_row, mi_col, bsize); 5162 if (cpi->oxcf.pass == 0 && cpi->svc.temporal_layer_id == 0) 5163 update_zeromv_cnt(cpi, mi, mi_row, mi_col, bsize); 5164 } 5165} 5166