1/* 2 * Copyright (c) 2012 The WebM project authors. All Rights Reserved. 3 * 4 * Use of this source code is governed by a BSD-style license 5 * that can be found in the LICENSE file in the root of the source 6 * tree. An additional intellectual property rights grant can be found 7 * in the file PATENTS. All contributing project authors may 8 * be found in the AUTHORS file in the root of the source tree. 9 */ 10 11// This is an example demonstrating how to implement a multi-layer VPx 12// encoding scheme based on temporal scalability for video applications 13// that benefit from a scalable bitstream. 14 15#include <assert.h> 16#include <math.h> 17#include <stdio.h> 18#include <stdlib.h> 19#include <string.h> 20 21#include "./vpx_config.h" 22#include "vpx_ports/vpx_timer.h" 23#include "vpx/vp8cx.h" 24#include "vpx/vpx_encoder.h" 25 26#include "./tools_common.h" 27#include "./video_writer.h" 28 29static const char *exec_name; 30 31void usage_exit() { 32 exit(EXIT_FAILURE); 33} 34 35// Denoiser states, for temporal denoising. 36enum denoiserState { 37 kDenoiserOff, 38 kDenoiserOnYOnly, 39 kDenoiserOnYUV, 40 kDenoiserOnYUVAggressive, 41 kDenoiserOnAdaptive 42}; 43 44static int mode_to_num_layers[12] = {1, 2, 2, 3, 3, 3, 3, 5, 2, 3, 3, 3}; 45 46// For rate control encoding stats. 47struct RateControlMetrics { 48 // Number of input frames per layer. 49 int layer_input_frames[VPX_TS_MAX_LAYERS]; 50 // Total (cumulative) number of encoded frames per layer. 51 int layer_tot_enc_frames[VPX_TS_MAX_LAYERS]; 52 // Number of encoded non-key frames per layer. 53 int layer_enc_frames[VPX_TS_MAX_LAYERS]; 54 // Framerate per layer layer (cumulative). 55 double layer_framerate[VPX_TS_MAX_LAYERS]; 56 // Target average frame size per layer (per-frame-bandwidth per layer). 57 double layer_pfb[VPX_TS_MAX_LAYERS]; 58 // Actual average frame size per layer. 59 double layer_avg_frame_size[VPX_TS_MAX_LAYERS]; 60 // Average rate mismatch per layer (|target - actual| / target). 61 double layer_avg_rate_mismatch[VPX_TS_MAX_LAYERS]; 62 // Actual encoding bitrate per layer (cumulative). 63 double layer_encoding_bitrate[VPX_TS_MAX_LAYERS]; 64}; 65 66// Note: these rate control metrics assume only 1 key frame in the 67// sequence (i.e., first frame only). So for temporal pattern# 7 68// (which has key frame for every frame on base layer), the metrics 69// computation will be off/wrong. 70// TODO(marpan): Update these metrics to account for multiple key frames 71// in the stream. 72static void set_rate_control_metrics(struct RateControlMetrics *rc, 73 vpx_codec_enc_cfg_t *cfg) { 74 unsigned int i = 0; 75 // Set the layer (cumulative) framerate and the target layer (non-cumulative) 76 // per-frame-bandwidth, for the rate control encoding stats below. 77 const double framerate = cfg->g_timebase.den / cfg->g_timebase.num; 78 rc->layer_framerate[0] = framerate / cfg->ts_rate_decimator[0]; 79 rc->layer_pfb[0] = 1000.0 * cfg->ts_target_bitrate[0] / 80 rc->layer_framerate[0]; 81 for (i = 0; i < cfg->ts_number_layers; ++i) { 82 if (i > 0) { 83 rc->layer_framerate[i] = framerate / cfg->ts_rate_decimator[i]; 84 rc->layer_pfb[i] = 1000.0 * 85 (cfg->ts_target_bitrate[i] - cfg->ts_target_bitrate[i - 1]) / 86 (rc->layer_framerate[i] - rc->layer_framerate[i - 1]); 87 } 88 rc->layer_input_frames[i] = 0; 89 rc->layer_enc_frames[i] = 0; 90 rc->layer_tot_enc_frames[i] = 0; 91 rc->layer_encoding_bitrate[i] = 0.0; 92 rc->layer_avg_frame_size[i] = 0.0; 93 rc->layer_avg_rate_mismatch[i] = 0.0; 94 } 95} 96 97static void printout_rate_control_summary(struct RateControlMetrics *rc, 98 vpx_codec_enc_cfg_t *cfg, 99 int frame_cnt) { 100 unsigned int i = 0; 101 int tot_num_frames = 0; 102 printf("Total number of processed frames: %d\n\n", frame_cnt -1); 103 printf("Rate control layer stats for %d layer(s):\n\n", 104 cfg->ts_number_layers); 105 for (i = 0; i < cfg->ts_number_layers; ++i) { 106 const int num_dropped = (i > 0) ? 107 (rc->layer_input_frames[i] - rc->layer_enc_frames[i]) : 108 (rc->layer_input_frames[i] - rc->layer_enc_frames[i] - 1); 109 tot_num_frames += rc->layer_input_frames[i]; 110 rc->layer_encoding_bitrate[i] = 0.001 * rc->layer_framerate[i] * 111 rc->layer_encoding_bitrate[i] / tot_num_frames; 112 rc->layer_avg_frame_size[i] = rc->layer_avg_frame_size[i] / 113 rc->layer_enc_frames[i]; 114 rc->layer_avg_rate_mismatch[i] = 100.0 * rc->layer_avg_rate_mismatch[i] / 115 rc->layer_enc_frames[i]; 116 printf("For layer#: %d \n", i); 117 printf("Bitrate (target vs actual): %d %f \n", cfg->ts_target_bitrate[i], 118 rc->layer_encoding_bitrate[i]); 119 printf("Average frame size (target vs actual): %f %f \n", rc->layer_pfb[i], 120 rc->layer_avg_frame_size[i]); 121 printf("Average rate_mismatch: %f \n", rc->layer_avg_rate_mismatch[i]); 122 printf("Number of input frames, encoded (non-key) frames, " 123 "and perc dropped frames: %d %d %f \n", rc->layer_input_frames[i], 124 rc->layer_enc_frames[i], 125 100.0 * num_dropped / rc->layer_input_frames[i]); 126 printf("\n"); 127 } 128 if ((frame_cnt - 1) != tot_num_frames) 129 die("Error: Number of input frames not equal to output! \n"); 130} 131 132// Temporal scaling parameters: 133// NOTE: The 3 prediction frames cannot be used interchangeably due to 134// differences in the way they are handled throughout the code. The 135// frames should be allocated to layers in the order LAST, GF, ARF. 136// Other combinations work, but may produce slightly inferior results. 137static void set_temporal_layer_pattern(int layering_mode, 138 vpx_codec_enc_cfg_t *cfg, 139 int *layer_flags, 140 int *flag_periodicity) { 141 switch (layering_mode) { 142 case 0: { 143 // 1-layer. 144 int ids[1] = {0}; 145 cfg->ts_periodicity = 1; 146 *flag_periodicity = 1; 147 cfg->ts_number_layers = 1; 148 cfg->ts_rate_decimator[0] = 1; 149 memcpy(cfg->ts_layer_id, ids, sizeof(ids)); 150 // Update L only. 151 layer_flags[0] = VPX_EFLAG_FORCE_KF | VP8_EFLAG_NO_UPD_GF | 152 VP8_EFLAG_NO_UPD_ARF; 153 break; 154 } 155 case 1: { 156 // 2-layers, 2-frame period. 157 int ids[2] = {0, 1}; 158 cfg->ts_periodicity = 2; 159 *flag_periodicity = 2; 160 cfg->ts_number_layers = 2; 161 cfg->ts_rate_decimator[0] = 2; 162 cfg->ts_rate_decimator[1] = 1; 163 memcpy(cfg->ts_layer_id, ids, sizeof(ids)); 164#if 1 165 // 0=L, 1=GF, Intra-layer prediction enabled. 166 layer_flags[0] = VPX_EFLAG_FORCE_KF | VP8_EFLAG_NO_UPD_GF | 167 VP8_EFLAG_NO_UPD_ARF | VP8_EFLAG_NO_REF_GF | VP8_EFLAG_NO_REF_ARF; 168 layer_flags[1] = VP8_EFLAG_NO_UPD_ARF | VP8_EFLAG_NO_UPD_LAST | 169 VP8_EFLAG_NO_REF_ARF; 170#else 171 // 0=L, 1=GF, Intra-layer prediction disabled. 172 layer_flags[0] = VPX_EFLAG_FORCE_KF | VP8_EFLAG_NO_UPD_GF | 173 VP8_EFLAG_NO_UPD_ARF | VP8_EFLAG_NO_REF_GF | VP8_EFLAG_NO_REF_ARF; 174 layer_flags[1] = VP8_EFLAG_NO_UPD_ARF | VP8_EFLAG_NO_UPD_LAST | 175 VP8_EFLAG_NO_REF_ARF | VP8_EFLAG_NO_REF_LAST; 176#endif 177 break; 178 } 179 case 2: { 180 // 2-layers, 3-frame period. 181 int ids[3] = {0, 1, 1}; 182 cfg->ts_periodicity = 3; 183 *flag_periodicity = 3; 184 cfg->ts_number_layers = 2; 185 cfg->ts_rate_decimator[0] = 3; 186 cfg->ts_rate_decimator[1] = 1; 187 memcpy(cfg->ts_layer_id, ids, sizeof(ids)); 188 // 0=L, 1=GF, Intra-layer prediction enabled. 189 layer_flags[0] = VPX_EFLAG_FORCE_KF | VP8_EFLAG_NO_REF_GF | 190 VP8_EFLAG_NO_REF_ARF | VP8_EFLAG_NO_UPD_GF | VP8_EFLAG_NO_UPD_ARF; 191 layer_flags[1] = 192 layer_flags[2] = VP8_EFLAG_NO_REF_GF | VP8_EFLAG_NO_REF_ARF | 193 VP8_EFLAG_NO_UPD_ARF | VP8_EFLAG_NO_UPD_LAST; 194 break; 195 } 196 case 3: { 197 // 3-layers, 6-frame period. 198 int ids[6] = {0, 2, 2, 1, 2, 2}; 199 cfg->ts_periodicity = 6; 200 *flag_periodicity = 6; 201 cfg->ts_number_layers = 3; 202 cfg->ts_rate_decimator[0] = 6; 203 cfg->ts_rate_decimator[1] = 3; 204 cfg->ts_rate_decimator[2] = 1; 205 memcpy(cfg->ts_layer_id, ids, sizeof(ids)); 206 // 0=L, 1=GF, 2=ARF, Intra-layer prediction enabled. 207 layer_flags[0] = VPX_EFLAG_FORCE_KF | VP8_EFLAG_NO_REF_GF | 208 VP8_EFLAG_NO_REF_ARF | VP8_EFLAG_NO_UPD_GF | VP8_EFLAG_NO_UPD_ARF; 209 layer_flags[3] = VP8_EFLAG_NO_REF_ARF | VP8_EFLAG_NO_UPD_ARF | 210 VP8_EFLAG_NO_UPD_LAST; 211 layer_flags[1] = 212 layer_flags[2] = 213 layer_flags[4] = 214 layer_flags[5] = VP8_EFLAG_NO_UPD_GF | VP8_EFLAG_NO_UPD_LAST; 215 break; 216 } 217 case 4: { 218 // 3-layers, 4-frame period. 219 int ids[4] = {0, 2, 1, 2}; 220 cfg->ts_periodicity = 4; 221 *flag_periodicity = 4; 222 cfg->ts_number_layers = 3; 223 cfg->ts_rate_decimator[0] = 4; 224 cfg->ts_rate_decimator[1] = 2; 225 cfg->ts_rate_decimator[2] = 1; 226 memcpy(cfg->ts_layer_id, ids, sizeof(ids)); 227 // 0=L, 1=GF, 2=ARF, Intra-layer prediction disabled. 228 layer_flags[0] = VPX_EFLAG_FORCE_KF | VP8_EFLAG_NO_REF_GF | 229 VP8_EFLAG_NO_REF_ARF | VP8_EFLAG_NO_UPD_GF | VP8_EFLAG_NO_UPD_ARF; 230 layer_flags[2] = VP8_EFLAG_NO_REF_GF | VP8_EFLAG_NO_REF_ARF | 231 VP8_EFLAG_NO_UPD_ARF | VP8_EFLAG_NO_UPD_LAST; 232 layer_flags[1] = 233 layer_flags[3] = VP8_EFLAG_NO_REF_ARF | VP8_EFLAG_NO_UPD_LAST | 234 VP8_EFLAG_NO_UPD_GF | VP8_EFLAG_NO_UPD_ARF; 235 break; 236 } 237 case 5: { 238 // 3-layers, 4-frame period. 239 int ids[4] = {0, 2, 1, 2}; 240 cfg->ts_periodicity = 4; 241 *flag_periodicity = 4; 242 cfg->ts_number_layers = 3; 243 cfg->ts_rate_decimator[0] = 4; 244 cfg->ts_rate_decimator[1] = 2; 245 cfg->ts_rate_decimator[2] = 1; 246 memcpy(cfg->ts_layer_id, ids, sizeof(ids)); 247 // 0=L, 1=GF, 2=ARF, Intra-layer prediction enabled in layer 1, disabled 248 // in layer 2. 249 layer_flags[0] = VPX_EFLAG_FORCE_KF | VP8_EFLAG_NO_REF_GF | 250 VP8_EFLAG_NO_REF_ARF | VP8_EFLAG_NO_UPD_GF | VP8_EFLAG_NO_UPD_ARF; 251 layer_flags[2] = VP8_EFLAG_NO_REF_ARF | VP8_EFLAG_NO_UPD_LAST | 252 VP8_EFLAG_NO_UPD_ARF; 253 layer_flags[1] = 254 layer_flags[3] = VP8_EFLAG_NO_REF_ARF | VP8_EFLAG_NO_UPD_LAST | 255 VP8_EFLAG_NO_UPD_GF | VP8_EFLAG_NO_UPD_ARF; 256 break; 257 } 258 case 6: { 259 // 3-layers, 4-frame period. 260 int ids[4] = {0, 2, 1, 2}; 261 cfg->ts_periodicity = 4; 262 *flag_periodicity = 4; 263 cfg->ts_number_layers = 3; 264 cfg->ts_rate_decimator[0] = 4; 265 cfg->ts_rate_decimator[1] = 2; 266 cfg->ts_rate_decimator[2] = 1; 267 memcpy(cfg->ts_layer_id, ids, sizeof(ids)); 268 // 0=L, 1=GF, 2=ARF, Intra-layer prediction enabled. 269 layer_flags[0] = VPX_EFLAG_FORCE_KF | VP8_EFLAG_NO_REF_GF | 270 VP8_EFLAG_NO_REF_ARF | VP8_EFLAG_NO_UPD_GF | VP8_EFLAG_NO_UPD_ARF; 271 layer_flags[2] = VP8_EFLAG_NO_REF_ARF | VP8_EFLAG_NO_UPD_LAST | 272 VP8_EFLAG_NO_UPD_ARF; 273 layer_flags[1] = 274 layer_flags[3] = VP8_EFLAG_NO_UPD_LAST | VP8_EFLAG_NO_UPD_GF; 275 break; 276 } 277 case 7: { 278 // NOTE: Probably of academic interest only. 279 // 5-layers, 16-frame period. 280 int ids[16] = {0, 4, 3, 4, 2, 4, 3, 4, 1, 4, 3, 4, 2, 4, 3, 4}; 281 cfg->ts_periodicity = 16; 282 *flag_periodicity = 16; 283 cfg->ts_number_layers = 5; 284 cfg->ts_rate_decimator[0] = 16; 285 cfg->ts_rate_decimator[1] = 8; 286 cfg->ts_rate_decimator[2] = 4; 287 cfg->ts_rate_decimator[3] = 2; 288 cfg->ts_rate_decimator[4] = 1; 289 memcpy(cfg->ts_layer_id, ids, sizeof(ids)); 290 layer_flags[0] = VPX_EFLAG_FORCE_KF; 291 layer_flags[1] = 292 layer_flags[3] = 293 layer_flags[5] = 294 layer_flags[7] = 295 layer_flags[9] = 296 layer_flags[11] = 297 layer_flags[13] = 298 layer_flags[15] = VP8_EFLAG_NO_UPD_LAST | VP8_EFLAG_NO_UPD_GF | 299 VP8_EFLAG_NO_UPD_ARF; 300 layer_flags[2] = 301 layer_flags[6] = 302 layer_flags[10] = 303 layer_flags[14] = VP8_EFLAG_NO_UPD_ARF | VP8_EFLAG_NO_UPD_GF; 304 layer_flags[4] = 305 layer_flags[12] = VP8_EFLAG_NO_REF_LAST | VP8_EFLAG_NO_UPD_ARF; 306 layer_flags[8] = VP8_EFLAG_NO_REF_LAST | VP8_EFLAG_NO_REF_GF; 307 break; 308 } 309 case 8: { 310 // 2-layers, with sync point at first frame of layer 1. 311 int ids[2] = {0, 1}; 312 cfg->ts_periodicity = 2; 313 *flag_periodicity = 8; 314 cfg->ts_number_layers = 2; 315 cfg->ts_rate_decimator[0] = 2; 316 cfg->ts_rate_decimator[1] = 1; 317 memcpy(cfg->ts_layer_id, ids, sizeof(ids)); 318 // 0=L, 1=GF. 319 // ARF is used as predictor for all frames, and is only updated on 320 // key frame. Sync point every 8 frames. 321 322 // Layer 0: predict from L and ARF, update L and G. 323 layer_flags[0] = VPX_EFLAG_FORCE_KF | VP8_EFLAG_NO_REF_GF | 324 VP8_EFLAG_NO_UPD_ARF; 325 // Layer 1: sync point: predict from L and ARF, and update G. 326 layer_flags[1] = VP8_EFLAG_NO_REF_GF | VP8_EFLAG_NO_UPD_LAST | 327 VP8_EFLAG_NO_UPD_ARF; 328 // Layer 0, predict from L and ARF, update L. 329 layer_flags[2] = VP8_EFLAG_NO_REF_GF | VP8_EFLAG_NO_UPD_GF | 330 VP8_EFLAG_NO_UPD_ARF; 331 // Layer 1: predict from L, G and ARF, and update G. 332 layer_flags[3] = VP8_EFLAG_NO_UPD_ARF | VP8_EFLAG_NO_UPD_LAST | 333 VP8_EFLAG_NO_UPD_ENTROPY; 334 // Layer 0. 335 layer_flags[4] = layer_flags[2]; 336 // Layer 1. 337 layer_flags[5] = layer_flags[3]; 338 // Layer 0. 339 layer_flags[6] = layer_flags[4]; 340 // Layer 1. 341 layer_flags[7] = layer_flags[5]; 342 break; 343 } 344 case 9: { 345 // 3-layers: Sync points for layer 1 and 2 every 8 frames. 346 int ids[4] = {0, 2, 1, 2}; 347 cfg->ts_periodicity = 4; 348 *flag_periodicity = 8; 349 cfg->ts_number_layers = 3; 350 cfg->ts_rate_decimator[0] = 4; 351 cfg->ts_rate_decimator[1] = 2; 352 cfg->ts_rate_decimator[2] = 1; 353 memcpy(cfg->ts_layer_id, ids, sizeof(ids)); 354 // 0=L, 1=GF, 2=ARF. 355 layer_flags[0] = VPX_EFLAG_FORCE_KF | VP8_EFLAG_NO_REF_GF | 356 VP8_EFLAG_NO_REF_ARF | VP8_EFLAG_NO_UPD_GF | VP8_EFLAG_NO_UPD_ARF; 357 layer_flags[1] = VP8_EFLAG_NO_REF_GF | VP8_EFLAG_NO_REF_ARF | 358 VP8_EFLAG_NO_UPD_LAST | VP8_EFLAG_NO_UPD_GF; 359 layer_flags[2] = VP8_EFLAG_NO_REF_GF | VP8_EFLAG_NO_REF_ARF | 360 VP8_EFLAG_NO_UPD_LAST | VP8_EFLAG_NO_UPD_ARF; 361 layer_flags[3] = 362 layer_flags[5] = VP8_EFLAG_NO_UPD_LAST | VP8_EFLAG_NO_UPD_GF; 363 layer_flags[4] = VP8_EFLAG_NO_REF_GF | VP8_EFLAG_NO_REF_ARF | 364 VP8_EFLAG_NO_UPD_GF | VP8_EFLAG_NO_UPD_ARF; 365 layer_flags[6] = VP8_EFLAG_NO_REF_ARF | VP8_EFLAG_NO_UPD_LAST | 366 VP8_EFLAG_NO_UPD_ARF; 367 layer_flags[7] = VP8_EFLAG_NO_UPD_LAST | VP8_EFLAG_NO_UPD_GF | 368 VP8_EFLAG_NO_UPD_ARF | VP8_EFLAG_NO_UPD_ENTROPY; 369 break; 370 } 371 case 10: { 372 // 3-layers structure where ARF is used as predictor for all frames, 373 // and is only updated on key frame. 374 // Sync points for layer 1 and 2 every 8 frames. 375 376 int ids[4] = {0, 2, 1, 2}; 377 cfg->ts_periodicity = 4; 378 *flag_periodicity = 8; 379 cfg->ts_number_layers = 3; 380 cfg->ts_rate_decimator[0] = 4; 381 cfg->ts_rate_decimator[1] = 2; 382 cfg->ts_rate_decimator[2] = 1; 383 memcpy(cfg->ts_layer_id, ids, sizeof(ids)); 384 // 0=L, 1=GF, 2=ARF. 385 // Layer 0: predict from L and ARF; update L and G. 386 layer_flags[0] = VPX_EFLAG_FORCE_KF | VP8_EFLAG_NO_UPD_ARF | 387 VP8_EFLAG_NO_REF_GF; 388 // Layer 2: sync point: predict from L and ARF; update none. 389 layer_flags[1] = VP8_EFLAG_NO_REF_GF | VP8_EFLAG_NO_UPD_GF | 390 VP8_EFLAG_NO_UPD_ARF | VP8_EFLAG_NO_UPD_LAST | 391 VP8_EFLAG_NO_UPD_ENTROPY; 392 // Layer 1: sync point: predict from L and ARF; update G. 393 layer_flags[2] = VP8_EFLAG_NO_REF_GF | VP8_EFLAG_NO_UPD_ARF | 394 VP8_EFLAG_NO_UPD_LAST; 395 // Layer 2: predict from L, G, ARF; update none. 396 layer_flags[3] = VP8_EFLAG_NO_UPD_GF | VP8_EFLAG_NO_UPD_ARF | 397 VP8_EFLAG_NO_UPD_LAST | VP8_EFLAG_NO_UPD_ENTROPY; 398 // Layer 0: predict from L and ARF; update L. 399 layer_flags[4] = VP8_EFLAG_NO_UPD_GF | VP8_EFLAG_NO_UPD_ARF | 400 VP8_EFLAG_NO_REF_GF; 401 // Layer 2: predict from L, G, ARF; update none. 402 layer_flags[5] = layer_flags[3]; 403 // Layer 1: predict from L, G, ARF; update G. 404 layer_flags[6] = VP8_EFLAG_NO_UPD_ARF | VP8_EFLAG_NO_UPD_LAST; 405 // Layer 2: predict from L, G, ARF; update none. 406 layer_flags[7] = layer_flags[3]; 407 break; 408 } 409 case 11: 410 default: { 411 // 3-layers structure as in case 10, but no sync/refresh points for 412 // layer 1 and 2. 413 int ids[4] = {0, 2, 1, 2}; 414 cfg->ts_periodicity = 4; 415 *flag_periodicity = 8; 416 cfg->ts_number_layers = 3; 417 cfg->ts_rate_decimator[0] = 4; 418 cfg->ts_rate_decimator[1] = 2; 419 cfg->ts_rate_decimator[2] = 1; 420 memcpy(cfg->ts_layer_id, ids, sizeof(ids)); 421 // 0=L, 1=GF, 2=ARF. 422 // Layer 0: predict from L and ARF; update L. 423 layer_flags[0] = VP8_EFLAG_NO_UPD_GF | VP8_EFLAG_NO_UPD_ARF | 424 VP8_EFLAG_NO_REF_GF; 425 layer_flags[4] = layer_flags[0]; 426 // Layer 1: predict from L, G, ARF; update G. 427 layer_flags[2] = VP8_EFLAG_NO_UPD_ARF | VP8_EFLAG_NO_UPD_LAST; 428 layer_flags[6] = layer_flags[2]; 429 // Layer 2: predict from L, G, ARF; update none. 430 layer_flags[1] = VP8_EFLAG_NO_UPD_GF | VP8_EFLAG_NO_UPD_ARF | 431 VP8_EFLAG_NO_UPD_LAST | VP8_EFLAG_NO_UPD_ENTROPY; 432 layer_flags[3] = layer_flags[1]; 433 layer_flags[5] = layer_flags[1]; 434 layer_flags[7] = layer_flags[1]; 435 break; 436 } 437 } 438} 439 440int main(int argc, char **argv) { 441 VpxVideoWriter *outfile[VPX_TS_MAX_LAYERS] = {NULL}; 442 vpx_codec_ctx_t codec; 443 vpx_codec_enc_cfg_t cfg; 444 int frame_cnt = 0; 445 vpx_image_t raw; 446 vpx_codec_err_t res; 447 unsigned int width; 448 unsigned int height; 449 int speed; 450 int frame_avail; 451 int got_data; 452 int flags = 0; 453 unsigned int i; 454 int pts = 0; // PTS starts at 0. 455 int frame_duration = 1; // 1 timebase tick per frame. 456 int layering_mode = 0; 457 int layer_flags[VPX_TS_MAX_PERIODICITY] = {0}; 458 int flag_periodicity = 1; 459 vpx_svc_layer_id_t layer_id = {0, 0}; 460 const VpxInterface *encoder = NULL; 461 FILE *infile = NULL; 462 struct RateControlMetrics rc; 463 int64_t cx_time = 0; 464 465 exec_name = argv[0]; 466 // Check usage and arguments. 467 if (argc < 11) { 468 die("Usage: %s <infile> <outfile> <codec_type(vp8/vp9)> <width> <height> " 469 "<rate_num> <rate_den> <speed> <frame_drop_threshold> <mode> " 470 "<Rate_0> ... <Rate_nlayers-1> \n", argv[0]); 471 } 472 473 encoder = get_vpx_encoder_by_name(argv[3]); 474 if (!encoder) 475 die("Unsupported codec."); 476 477 printf("Using %s\n", vpx_codec_iface_name(encoder->codec_interface())); 478 479 width = strtol(argv[4], NULL, 0); 480 height = strtol(argv[5], NULL, 0); 481 if (width < 16 || width % 2 || height < 16 || height % 2) { 482 die("Invalid resolution: %d x %d", width, height); 483 } 484 485 layering_mode = strtol(argv[10], NULL, 0); 486 if (layering_mode < 0 || layering_mode > 12) { 487 die("Invalid layering mode (0..12) %s", argv[10]); 488 } 489 490 if (argc != 11 + mode_to_num_layers[layering_mode]) { 491 die("Invalid number of arguments"); 492 } 493 494 if (!vpx_img_alloc(&raw, VPX_IMG_FMT_I420, width, height, 32)) { 495 die("Failed to allocate image", width, height); 496 } 497 498 // Populate encoder configuration. 499 res = vpx_codec_enc_config_default(encoder->codec_interface(), &cfg, 0); 500 if (res) { 501 printf("Failed to get config: %s\n", vpx_codec_err_to_string(res)); 502 return EXIT_FAILURE; 503 } 504 505 // Update the default configuration with our settings. 506 cfg.g_w = width; 507 cfg.g_h = height; 508 509 // Timebase format e.g. 30fps: numerator=1, demoninator = 30. 510 cfg.g_timebase.num = strtol(argv[6], NULL, 0); 511 cfg.g_timebase.den = strtol(argv[7], NULL, 0); 512 513 speed = strtol(argv[8], NULL, 0); 514 if (speed < 0) { 515 die("Invalid speed setting: must be positive"); 516 } 517 518 for (i = 11; (int)i < 11 + mode_to_num_layers[layering_mode]; ++i) { 519 cfg.ts_target_bitrate[i - 11] = strtol(argv[i], NULL, 0); 520 } 521 522 // Real time parameters. 523 cfg.rc_dropframe_thresh = strtol(argv[9], NULL, 0); 524 cfg.rc_end_usage = VPX_CBR; 525 cfg.rc_resize_allowed = 0; 526 cfg.rc_min_quantizer = 2; 527 cfg.rc_max_quantizer = 56; 528 cfg.rc_undershoot_pct = 50; 529 cfg.rc_overshoot_pct = 50; 530 cfg.rc_buf_initial_sz = 500; 531 cfg.rc_buf_optimal_sz = 600; 532 cfg.rc_buf_sz = 1000; 533 534 // Enable error resilient mode. 535 cfg.g_error_resilient = 1; 536 cfg.g_lag_in_frames = 0; 537 cfg.kf_mode = VPX_KF_AUTO; 538 539 // Disable automatic keyframe placement. 540 cfg.kf_min_dist = cfg.kf_max_dist = 3000; 541 542 set_temporal_layer_pattern(layering_mode, 543 &cfg, 544 layer_flags, 545 &flag_periodicity); 546 547 set_rate_control_metrics(&rc, &cfg); 548 549 // Target bandwidth for the whole stream. 550 // Set to ts_target_bitrate for highest layer (total bitrate). 551 cfg.rc_target_bitrate = cfg.ts_target_bitrate[cfg.ts_number_layers - 1]; 552 553 // Open input file. 554 if (!(infile = fopen(argv[1], "rb"))) { 555 die("Failed to open %s for reading", argv[1]); 556 } 557 558 // Open an output file for each stream. 559 for (i = 0; i < cfg.ts_number_layers; ++i) { 560 char file_name[PATH_MAX]; 561 VpxVideoInfo info; 562 info.codec_fourcc = encoder->fourcc; 563 info.frame_width = cfg.g_w; 564 info.frame_height = cfg.g_h; 565 info.time_base.numerator = cfg.g_timebase.num; 566 info.time_base.denominator = cfg.g_timebase.den; 567 568 snprintf(file_name, sizeof(file_name), "%s_%d.ivf", argv[2], i); 569 outfile[i] = vpx_video_writer_open(file_name, kContainerIVF, &info); 570 if (!outfile[i]) 571 die("Failed to open %s for writing", file_name); 572 573 assert(outfile[i] != NULL); 574 } 575 // No spatial layers in this encoder. 576 cfg.ss_number_layers = 1; 577 578 // Initialize codec. 579 if (vpx_codec_enc_init(&codec, encoder->codec_interface(), &cfg, 0)) 580 die_codec(&codec, "Failed to initialize encoder"); 581 582 if (strncmp(encoder->name, "vp8", 3) == 0) { 583 vpx_codec_control(&codec, VP8E_SET_CPUUSED, -speed); 584 vpx_codec_control(&codec, VP8E_SET_NOISE_SENSITIVITY, kDenoiserOnYOnly); 585 } else if (strncmp(encoder->name, "vp9", 3) == 0) { 586 vpx_codec_control(&codec, VP8E_SET_CPUUSED, speed); 587 vpx_codec_control(&codec, VP9E_SET_AQ_MODE, 3); 588 vpx_codec_control(&codec, VP9E_SET_FRAME_PERIODIC_BOOST, 0); 589 vpx_codec_control(&codec, VP9E_SET_NOISE_SENSITIVITY, 0); 590 if (vpx_codec_control(&codec, VP9E_SET_SVC, 1)) { 591 die_codec(&codec, "Failed to set SVC"); 592 } 593 } 594 vpx_codec_control(&codec, VP8E_SET_STATIC_THRESHOLD, 1); 595 vpx_codec_control(&codec, VP8E_SET_TOKEN_PARTITIONS, 1); 596 // This controls the maximum target size of the key frame. 597 // For generating smaller key frames, use a smaller max_intra_size_pct 598 // value, like 100 or 200. 599 { 600 const int max_intra_size_pct = 200; 601 vpx_codec_control(&codec, VP8E_SET_MAX_INTRA_BITRATE_PCT, 602 max_intra_size_pct); 603 } 604 605 frame_avail = 1; 606 while (frame_avail || got_data) { 607 struct vpx_usec_timer timer; 608 vpx_codec_iter_t iter = NULL; 609 const vpx_codec_cx_pkt_t *pkt; 610 // Update the temporal layer_id. No spatial layers in this test. 611 layer_id.spatial_layer_id = 0; 612 layer_id.temporal_layer_id = 613 cfg.ts_layer_id[frame_cnt % cfg.ts_periodicity]; 614 if (strncmp(encoder->name, "vp9", 3) == 0) { 615 vpx_codec_control(&codec, VP9E_SET_SVC_LAYER_ID, &layer_id); 616 } 617 flags = layer_flags[frame_cnt % flag_periodicity]; 618 frame_avail = vpx_img_read(&raw, infile); 619 if (frame_avail) 620 ++rc.layer_input_frames[layer_id.temporal_layer_id]; 621 vpx_usec_timer_start(&timer); 622 if (vpx_codec_encode(&codec, frame_avail? &raw : NULL, pts, 1, flags, 623 VPX_DL_REALTIME)) { 624 die_codec(&codec, "Failed to encode frame"); 625 } 626 vpx_usec_timer_mark(&timer); 627 cx_time += vpx_usec_timer_elapsed(&timer); 628 // Reset KF flag. 629 if (layering_mode != 7) { 630 layer_flags[0] &= ~VPX_EFLAG_FORCE_KF; 631 } 632 got_data = 0; 633 while ( (pkt = vpx_codec_get_cx_data(&codec, &iter)) ) { 634 got_data = 1; 635 switch (pkt->kind) { 636 case VPX_CODEC_CX_FRAME_PKT: 637 for (i = cfg.ts_layer_id[frame_cnt % cfg.ts_periodicity]; 638 i < cfg.ts_number_layers; ++i) { 639 vpx_video_writer_write_frame(outfile[i], pkt->data.frame.buf, 640 pkt->data.frame.sz, pts); 641 ++rc.layer_tot_enc_frames[i]; 642 rc.layer_encoding_bitrate[i] += 8.0 * pkt->data.frame.sz; 643 // Keep count of rate control stats per layer (for non-key frames). 644 if (i == cfg.ts_layer_id[frame_cnt % cfg.ts_periodicity] && 645 !(pkt->data.frame.flags & VPX_FRAME_IS_KEY)) { 646 rc.layer_avg_frame_size[i] += 8.0 * pkt->data.frame.sz; 647 rc.layer_avg_rate_mismatch[i] += 648 fabs(8.0 * pkt->data.frame.sz - rc.layer_pfb[i]) / 649 rc.layer_pfb[i]; 650 ++rc.layer_enc_frames[i]; 651 } 652 } 653 break; 654 default: 655 break; 656 } 657 } 658 ++frame_cnt; 659 pts += frame_duration; 660 } 661 fclose(infile); 662 printout_rate_control_summary(&rc, &cfg, frame_cnt); 663 printf("\n"); 664 printf("Frame cnt and encoding time/FPS stats for encoding: %d %f %f \n", 665 frame_cnt, 666 1000 * (float)cx_time / (double)(frame_cnt * 1000000), 667 1000000 * (double)frame_cnt / (double)cx_time); 668 669 if (vpx_codec_destroy(&codec)) 670 die_codec(&codec, "Failed to destroy codec"); 671 672 // Try to rewrite the output file headers with the actual frame count. 673 for (i = 0; i < cfg.ts_number_layers; ++i) 674 vpx_video_writer_close(outfile[i]); 675 676 vpx_img_free(&raw); 677 return EXIT_SUCCESS; 678} 679