1/* 2 * Copyright (c) 2010 The WebM project authors. All Rights Reserved. 3 * 4 * Use of this source code is governed by a BSD-style license 5 * that can be found in the LICENSE file in the root of the source 6 * tree. An additional intellectual property rights grant can be found 7 * in the file PATENTS. All contributing project authors may 8 * be found in the AUTHORS file in the root of the source tree. 9 */ 10 11#ifndef VP9_ENCODER_VP9_ENCODER_H_ 12#define VP9_ENCODER_VP9_ENCODER_H_ 13 14#include <stdio.h> 15 16#include "./vpx_config.h" 17#include "vpx_ports/mem.h" 18#include "vpx/internal/vpx_codec_internal.h" 19#include "vpx/vp8cx.h" 20 21#include "vp9/common/vp9_ppflags.h" 22#include "vp9/common/vp9_entropy.h" 23#include "vp9/common/vp9_entropymode.h" 24#include "vp9/common/vp9_onyxc_int.h" 25 26#include "vp9/encoder/vp9_aq_cyclicrefresh.h" 27#include "vp9/encoder/vp9_context_tree.h" 28#include "vp9/encoder/vp9_encodemb.h" 29#include "vp9/encoder/vp9_firstpass.h" 30#include "vp9/encoder/vp9_lookahead.h" 31#include "vp9/encoder/vp9_mbgraph.h" 32#include "vp9/encoder/vp9_mcomp.h" 33#include "vp9/encoder/vp9_quantize.h" 34#include "vp9/encoder/vp9_ratectrl.h" 35#include "vp9/encoder/vp9_rd.h" 36#include "vp9/encoder/vp9_speed_features.h" 37#include "vp9/encoder/vp9_svc_layercontext.h" 38#include "vp9/encoder/vp9_tokenize.h" 39#include "vp9/encoder/vp9_variance.h" 40#if CONFIG_VP9_TEMPORAL_DENOISING 41#include "vp9/encoder/vp9_denoiser.h" 42#endif 43 44#ifdef __cplusplus 45extern "C" { 46#endif 47 48#define DEFAULT_GF_INTERVAL 10 49 50typedef struct { 51 int nmvjointcost[MV_JOINTS]; 52 int nmvcosts[2][MV_VALS]; 53 int nmvcosts_hp[2][MV_VALS]; 54 55 vp9_prob segment_pred_probs[PREDICTION_PROBS]; 56 57 unsigned char *last_frame_seg_map_copy; 58 59 // 0 = Intra, Last, GF, ARF 60 signed char last_ref_lf_deltas[MAX_REF_LF_DELTAS]; 61 // 0 = ZERO_MV, MV 62 signed char last_mode_lf_deltas[MAX_MODE_LF_DELTAS]; 63 64 FRAME_CONTEXT fc; 65} CODING_CONTEXT; 66 67 68typedef enum { 69 // encode_breakout is disabled. 70 ENCODE_BREAKOUT_DISABLED = 0, 71 // encode_breakout is enabled. 72 ENCODE_BREAKOUT_ENABLED = 1, 73 // encode_breakout is enabled with small max_thresh limit. 74 ENCODE_BREAKOUT_LIMITED = 2 75} ENCODE_BREAKOUT_TYPE; 76 77typedef enum { 78 NORMAL = 0, 79 FOURFIVE = 1, 80 THREEFIVE = 2, 81 ONETWO = 3 82} VPX_SCALING; 83 84typedef enum { 85 // Good Quality Fast Encoding. The encoder balances quality with the 86 // amount of time it takes to encode the output. (speed setting 87 // controls how fast) 88 ONE_PASS_GOOD = 1, 89 90 // One Pass - Best Quality. The encoder places priority on the 91 // quality of the output over encoding speed. The output is compressed 92 // at the highest possible quality. This option takes the longest 93 // amount of time to encode. (speed setting ignored) 94 ONE_PASS_BEST = 2, 95 96 // Two Pass - First Pass. The encoder generates a file of statistics 97 // for use in the second encoding pass. (speed setting controls how fast) 98 TWO_PASS_FIRST = 3, 99 100 // Two Pass - Second Pass. The encoder uses the statistics that were 101 // generated in the first encoding pass to create the compressed 102 // output. (speed setting controls how fast) 103 TWO_PASS_SECOND_GOOD = 4, 104 105 // Two Pass - Second Pass Best. The encoder uses the statistics that 106 // were generated in the first encoding pass to create the compressed 107 // output using the highest possible quality, and taking a 108 // longer amount of time to encode. (speed setting ignored) 109 TWO_PASS_SECOND_BEST = 5, 110 111 // Realtime/Live Encoding. This mode is optimized for realtime 112 // encoding (for example, capturing a television signal or feed from 113 // a live camera). (speed setting controls how fast) 114 REALTIME = 6, 115} MODE; 116 117typedef enum { 118 FRAMEFLAGS_KEY = 1 << 0, 119 FRAMEFLAGS_GOLDEN = 1 << 1, 120 FRAMEFLAGS_ALTREF = 1 << 2, 121} FRAMETYPE_FLAGS; 122 123typedef enum { 124 NO_AQ = 0, 125 VARIANCE_AQ = 1, 126 COMPLEXITY_AQ = 2, 127 CYCLIC_REFRESH_AQ = 3, 128 AQ_MODE_COUNT // This should always be the last member of the enum 129} AQ_MODE; 130 131 132typedef struct VP9EncoderConfig { 133 BITSTREAM_PROFILE profile; 134 BIT_DEPTH bit_depth; 135 int width; // width of data passed to the compressor 136 int height; // height of data passed to the compressor 137 double framerate; // set to passed in framerate 138 int64_t target_bandwidth; // bandwidth to be used in kilobits per second 139 140 int noise_sensitivity; // pre processing blur: recommendation 0 141 int sharpness; // sharpening output: recommendation 0: 142 int speed; 143 unsigned int rc_max_intra_bitrate_pct; 144 145 MODE mode; 146 int pass; 147 148 // Key Framing Operations 149 int auto_key; // autodetect cut scenes and set the keyframes 150 int key_freq; // maximum distance to key frame. 151 152 int lag_in_frames; // how many frames lag before we start encoding 153 154 // ---------------------------------------------------------------- 155 // DATARATE CONTROL OPTIONS 156 157 // vbr, cbr, constrained quality or constant quality 158 enum vpx_rc_mode rc_mode; 159 160 // buffer targeting aggressiveness 161 int under_shoot_pct; 162 int over_shoot_pct; 163 164 // buffering parameters 165 int64_t starting_buffer_level_ms; 166 int64_t optimal_buffer_level_ms; 167 int64_t maximum_buffer_size_ms; 168 169 // Frame drop threshold. 170 int drop_frames_water_mark; 171 172 // controlling quality 173 int fixed_q; 174 int worst_allowed_q; 175 int best_allowed_q; 176 int cq_level; 177 AQ_MODE aq_mode; // Adaptive Quantization mode 178 179 // Internal frame size scaling. 180 int allow_spatial_resampling; 181 int scaled_frame_width; 182 int scaled_frame_height; 183 184 // Enable feature to reduce the frame quantization every x frames. 185 int frame_periodic_boost; 186 187 // two pass datarate control 188 int two_pass_vbrbias; // two pass datarate control tweaks 189 int two_pass_vbrmin_section; 190 int two_pass_vbrmax_section; 191 // END DATARATE CONTROL OPTIONS 192 // ---------------------------------------------------------------- 193 194 // Spatial and temporal scalability. 195 int ss_number_layers; // Number of spatial layers. 196 int ts_number_layers; // Number of temporal layers. 197 // Bitrate allocation for spatial layers. 198 int ss_target_bitrate[VPX_SS_MAX_LAYERS]; 199 int ss_play_alternate[VPX_SS_MAX_LAYERS]; 200 // Bitrate allocation (CBR mode) and framerate factor, for temporal layers. 201 int ts_target_bitrate[VPX_TS_MAX_LAYERS]; 202 int ts_rate_decimator[VPX_TS_MAX_LAYERS]; 203 204 // these parameters aren't to be used in final build don't use!!! 205 int play_alternate; 206 207 int encode_breakout; // early breakout : for video conf recommend 800 208 209 /* Bitfield defining the error resiliency features to enable. 210 * Can provide decodable frames after losses in previous 211 * frames and decodable partitions after losses in the same frame. 212 */ 213 unsigned int error_resilient_mode; 214 215 /* Bitfield defining the parallel decoding mode where the 216 * decoding in successive frames may be conducted in parallel 217 * just by decoding the frame headers. 218 */ 219 unsigned int frame_parallel_decoding_mode; 220 221 int arnr_max_frames; 222 int arnr_strength; 223 int arnr_type; 224 225 int tile_columns; 226 int tile_rows; 227 228 struct vpx_fixed_buf two_pass_stats_in; 229 struct vpx_codec_pkt_list *output_pkt_list; 230 231#if CONFIG_FP_MB_STATS 232 struct vpx_fixed_buf firstpass_mb_stats_in; 233#endif 234 235 vp8e_tuning tuning; 236 vp9e_tune_content content; 237} VP9EncoderConfig; 238 239static INLINE int is_lossless_requested(const VP9EncoderConfig *cfg) { 240 return cfg->best_allowed_q == 0 && cfg->worst_allowed_q == 0; 241} 242 243static INLINE int is_best_mode(MODE mode) { 244 return mode == ONE_PASS_BEST || mode == TWO_PASS_SECOND_BEST; 245} 246 247typedef struct VP9_COMP { 248 QUANTS quants; 249 MACROBLOCK mb; 250 VP9_COMMON common; 251 VP9EncoderConfig oxcf; 252 struct lookahead_ctx *lookahead; 253 struct lookahead_entry *source; 254 struct lookahead_entry *alt_ref_source; 255 struct lookahead_entry *last_source; 256 257 YV12_BUFFER_CONFIG *Source; 258 YV12_BUFFER_CONFIG *Last_Source; // NULL for first frame and alt_ref frames 259 YV12_BUFFER_CONFIG *un_scaled_source; 260 YV12_BUFFER_CONFIG scaled_source; 261 YV12_BUFFER_CONFIG *unscaled_last_source; 262 YV12_BUFFER_CONFIG scaled_last_source; 263 264 int gold_is_last; // gold same as last frame ( short circuit gold searches) 265 int alt_is_last; // Alt same as last ( short circuit altref search) 266 int gold_is_alt; // don't do both alt and gold search ( just do gold). 267 268 int skippable_frame; 269 270 int scaled_ref_idx[3]; 271 int lst_fb_idx; 272 int gld_fb_idx; 273 int alt_fb_idx; 274 275 int refresh_last_frame; 276 int refresh_golden_frame; 277 int refresh_alt_ref_frame; 278 279 int ext_refresh_frame_flags_pending; 280 int ext_refresh_last_frame; 281 int ext_refresh_golden_frame; 282 int ext_refresh_alt_ref_frame; 283 284 int ext_refresh_frame_context_pending; 285 int ext_refresh_frame_context; 286 287 YV12_BUFFER_CONFIG last_frame_uf; 288 289 TOKENEXTRA *tok; 290 unsigned int tok_count[4][1 << 6]; 291 292 // Ambient reconstruction err target for force key frames 293 int ambient_err; 294 295 RD_OPT rd; 296 297 CODING_CONTEXT coding_context; 298 299 int zbin_mode_boost; 300 int zbin_mode_boost_enabled; 301 int active_arnr_frames; // <= cpi->oxcf.arnr_max_frames 302 int active_arnr_strength; // <= cpi->oxcf.arnr_max_strength 303 304 int64_t last_time_stamp_seen; 305 int64_t last_end_time_stamp_seen; 306 int64_t first_time_stamp_ever; 307 308 RATE_CONTROL rc; 309 310 vp9_coeff_count coef_counts[TX_SIZES][PLANE_TYPES]; 311 312 struct vpx_codec_pkt_list *output_pkt_list; 313 314 MBGRAPH_FRAME_STATS mbgraph_stats[MAX_LAG_BUFFERS]; 315 int mbgraph_n_frames; // number of frames filled in the above 316 int static_mb_pct; // % forced skip mbs by segmentation 317 int ref_frame_flags; 318 319 SPEED_FEATURES sf; 320 321 unsigned int max_mv_magnitude; 322 int mv_step_param; 323 324 // Default value is 1. From first pass stats, encode_breakout may be disabled. 325 ENCODE_BREAKOUT_TYPE allow_encode_breakout; 326 327 // Get threshold from external input. A suggested threshold is 800 for HD 328 // clips, and 300 for < HD clips. 329 int encode_breakout; 330 331 unsigned char *segmentation_map; 332 333 // segment threashold for encode breakout 334 int segment_encode_breakout[MAX_SEGMENTS]; 335 336 unsigned char *complexity_map; 337 338 CYCLIC_REFRESH *cyclic_refresh; 339 340 fractional_mv_step_fp *find_fractional_mv_step; 341 vp9_full_search_fn_t full_search_sad; 342 vp9_refining_search_fn_t refining_search_sad; 343 vp9_diamond_search_fn_t diamond_search_sad; 344 vp9_variance_fn_ptr_t fn_ptr[BLOCK_SIZES]; 345 uint64_t time_receive_data; 346 uint64_t time_compress_data; 347 uint64_t time_pick_lpf; 348 uint64_t time_encode_sb_row; 349 350#if CONFIG_FP_MB_STATS 351 int use_fp_mb_stats; 352#endif 353 354 TWO_PASS twopass; 355 356 YV12_BUFFER_CONFIG alt_ref_buffer; 357 YV12_BUFFER_CONFIG *frames[MAX_LAG_BUFFERS]; 358 359#if CONFIG_INTERNAL_STATS 360 unsigned int mode_chosen_counts[MAX_MODES]; 361 362 int count; 363 double total_y; 364 double total_u; 365 double total_v; 366 double total; 367 uint64_t total_sq_error; 368 uint64_t total_samples; 369 370 double totalp_y; 371 double totalp_u; 372 double totalp_v; 373 double totalp; 374 uint64_t totalp_sq_error; 375 uint64_t totalp_samples; 376 377 int bytes; 378 double summed_quality; 379 double summed_weights; 380 double summedp_quality; 381 double summedp_weights; 382 unsigned int tot_recode_hits; 383 384 385 double total_ssimg_y; 386 double total_ssimg_u; 387 double total_ssimg_v; 388 double total_ssimg_all; 389 390 int b_calculate_ssimg; 391#endif 392 int b_calculate_psnr; 393 394 int droppable; 395 396 int dummy_packing; /* flag to indicate if packing is dummy */ 397 398 unsigned int tx_stepdown_count[TX_SIZES]; 399 400 int initial_width; 401 int initial_height; 402 403 int use_svc; 404 405 SVC svc; 406 407 // Store frame variance info in SOURCE_VAR_BASED_PARTITION search type. 408 diff *source_diff_var; 409 // The threshold used in SOURCE_VAR_BASED_PARTITION search type. 410 unsigned int source_var_thresh; 411 int frames_till_next_var_check; 412 413 int frame_flags; 414 415 search_site_config ss_cfg; 416 417 int mbmode_cost[INTRA_MODES]; 418 unsigned inter_mode_cost[INTER_MODE_CONTEXTS][INTER_MODES]; 419 int intra_uv_mode_cost[FRAME_TYPES][INTRA_MODES]; 420 int y_mode_costs[INTRA_MODES][INTRA_MODES][INTRA_MODES]; 421 int switchable_interp_costs[SWITCHABLE_FILTER_CONTEXTS][SWITCHABLE_FILTERS]; 422 423 PICK_MODE_CONTEXT *leaf_tree; 424 PC_TREE *pc_tree; 425 PC_TREE *pc_root; 426 int partition_cost[PARTITION_CONTEXTS][PARTITION_TYPES]; 427 428 int multi_arf_allowed; 429 int multi_arf_enabled; 430 int multi_arf_last_grp_enabled; 431 432#if CONFIG_VP9_TEMPORAL_DENOISING 433 VP9_DENOISER denoiser; 434#endif 435} VP9_COMP; 436 437void vp9_initialize_enc(); 438 439struct VP9_COMP *vp9_create_compressor(VP9EncoderConfig *oxcf); 440void vp9_remove_compressor(VP9_COMP *cpi); 441 442void vp9_change_config(VP9_COMP *cpi, const VP9EncoderConfig *oxcf); 443 444 // receive a frames worth of data. caller can assume that a copy of this 445 // frame is made and not just a copy of the pointer.. 446int vp9_receive_raw_frame(VP9_COMP *cpi, unsigned int frame_flags, 447 YV12_BUFFER_CONFIG *sd, int64_t time_stamp, 448 int64_t end_time_stamp); 449 450int vp9_get_compressed_data(VP9_COMP *cpi, unsigned int *frame_flags, 451 size_t *size, uint8_t *dest, 452 int64_t *time_stamp, int64_t *time_end, int flush); 453 454int vp9_get_preview_raw_frame(VP9_COMP *cpi, YV12_BUFFER_CONFIG *dest, 455 vp9_ppflags_t *flags); 456 457int vp9_use_as_reference(VP9_COMP *cpi, int ref_frame_flags); 458 459void vp9_update_reference(VP9_COMP *cpi, int ref_frame_flags); 460 461int vp9_copy_reference_enc(VP9_COMP *cpi, VP9_REFFRAME ref_frame_flag, 462 YV12_BUFFER_CONFIG *sd); 463 464int vp9_set_reference_enc(VP9_COMP *cpi, VP9_REFFRAME ref_frame_flag, 465 YV12_BUFFER_CONFIG *sd); 466 467int vp9_update_entropy(VP9_COMP *cpi, int update); 468 469int vp9_set_active_map(VP9_COMP *cpi, unsigned char *map, int rows, int cols); 470 471int vp9_set_internal_size(VP9_COMP *cpi, 472 VPX_SCALING horiz_mode, VPX_SCALING vert_mode); 473 474int vp9_set_size_literal(VP9_COMP *cpi, unsigned int width, 475 unsigned int height); 476 477void vp9_set_svc(VP9_COMP *cpi, int use_svc); 478 479int vp9_get_quantizer(struct VP9_COMP *cpi); 480 481static INLINE int get_ref_frame_idx(const VP9_COMP *cpi, 482 MV_REFERENCE_FRAME ref_frame) { 483 if (ref_frame == LAST_FRAME) { 484 return cpi->lst_fb_idx; 485 } else if (ref_frame == GOLDEN_FRAME) { 486 return cpi->gld_fb_idx; 487 } else { 488 return cpi->alt_fb_idx; 489 } 490} 491 492static INLINE YV12_BUFFER_CONFIG *get_ref_frame_buffer( 493 VP9_COMP *cpi, MV_REFERENCE_FRAME ref_frame) { 494 VP9_COMMON * const cm = &cpi->common; 495 return &cm->frame_bufs[cm->ref_frame_map[get_ref_frame_idx(cpi, ref_frame)]] 496 .buf; 497} 498 499// Intra only frames, golden frames (except alt ref overlays) and 500// alt ref frames tend to be coded at a higher than ambient quality 501static INLINE int frame_is_boosted(const VP9_COMP *cpi) { 502 return frame_is_intra_only(&cpi->common) || cpi->refresh_alt_ref_frame || 503 (cpi->refresh_golden_frame && !cpi->rc.is_src_frame_alt_ref) || 504 vp9_is_upper_layer_key_frame(cpi); 505} 506 507static INLINE int get_token_alloc(int mb_rows, int mb_cols) { 508 // TODO(JBB): double check we can't exceed this token count if we have a 509 // 32x32 transform crossing a boundary at a multiple of 16. 510 // mb_rows, cols are in units of 16 pixels. We assume 3 planes all at full 511 // resolution. We assume up to 1 token per pixel, and then allow 512 // a head room of 4. 513 return mb_rows * mb_cols * (16 * 16 * 3 + 4); 514} 515 516int vp9_get_y_sse(const YV12_BUFFER_CONFIG *a, const YV12_BUFFER_CONFIG *b); 517 518void vp9_alloc_compressor_data(VP9_COMP *cpi); 519 520void vp9_scale_references(VP9_COMP *cpi); 521 522void vp9_update_reference_frames(VP9_COMP *cpi); 523 524int64_t vp9_rescale(int64_t val, int64_t num, int denom); 525 526void vp9_set_high_precision_mv(VP9_COMP *cpi, int allow_high_precision_mv); 527 528YV12_BUFFER_CONFIG *vp9_scale_if_required(VP9_COMMON *cm, 529 YV12_BUFFER_CONFIG *unscaled, 530 YV12_BUFFER_CONFIG *scaled); 531 532void vp9_apply_encoding_flags(VP9_COMP *cpi, vpx_enc_frame_flags_t flags); 533 534static INLINE int is_spatial_svc(const struct VP9_COMP *const cpi) { 535 return cpi->use_svc && 536 cpi->svc.number_temporal_layers == 1 && 537 cpi->svc.number_spatial_layers > 1; 538} 539 540static INLINE int is_altref_enabled(const VP9_COMP *const cpi) { 541 return cpi->oxcf.mode != REALTIME && cpi->oxcf.lag_in_frames > 0 && 542 (cpi->oxcf.play_alternate && 543 (!is_spatial_svc(cpi) || 544 cpi->oxcf.ss_play_alternate[cpi->svc.spatial_layer_id])); 545} 546 547static INLINE void set_ref_ptrs(VP9_COMMON *cm, MACROBLOCKD *xd, 548 MV_REFERENCE_FRAME ref0, 549 MV_REFERENCE_FRAME ref1) { 550 xd->block_refs[0] = &cm->frame_refs[ref0 >= LAST_FRAME ? ref0 - LAST_FRAME 551 : 0]; 552 xd->block_refs[1] = &cm->frame_refs[ref1 >= LAST_FRAME ? ref1 - LAST_FRAME 553 : 0]; 554} 555 556static INLINE int get_chessboard_index(const int frame_index) { 557 return frame_index & 0x1; 558} 559 560#ifdef __cplusplus 561} // extern "C" 562#endif 563 564#endif // VP9_ENCODER_VP9_ENCODER_H_ 565