1/* 2 * Copyright (c) 2014 The WebRTC project authors. All Rights Reserved. 3 * 4 * Use of this source code is governed by a BSD-style license 5 * that can be found in the LICENSE file in the root of the source 6 * tree. An additional intellectual property rights grant can be found 7 * in the file PATENTS. All contributing project authors may 8 * be found in the AUTHORS file in the root of the source tree. 9 * 10 */ 11 12#include "webrtc/modules/video_coding/codecs/vp9/vp9_impl.h" 13 14#include <stdlib.h> 15#include <string.h> 16#include <time.h> 17#include <vector> 18 19#include "vpx/vpx_encoder.h" 20#include "vpx/vpx_decoder.h" 21#include "vpx/vp8cx.h" 22#include "vpx/vp8dx.h" 23 24#include "webrtc/base/checks.h" 25#include "webrtc/base/keep_ref_until_done.h" 26#include "webrtc/base/logging.h" 27#include "webrtc/base/trace_event.h" 28#include "webrtc/common.h" 29#include "webrtc/common_video/libyuv/include/webrtc_libyuv.h" 30#include "webrtc/modules/include/module_common_types.h" 31#include "webrtc/modules/video_coding/codecs/vp9/screenshare_layers.h" 32#include "webrtc/system_wrappers/include/tick_util.h" 33 34namespace webrtc { 35 36// Only positive speeds, range for real-time coding currently is: 5 - 8. 37// Lower means slower/better quality, higher means fastest/lower quality. 38int GetCpuSpeed(int width, int height) { 39#if defined(WEBRTC_ARCH_ARM) || defined(WEBRTC_ARCH_ARM64) 40 return 8; 41#else 42 // For smaller resolutions, use lower speed setting (get some coding gain at 43 // the cost of increased encoding complexity). 44 if (width * height <= 352 * 288) 45 return 5; 46 else 47 return 7; 48#endif 49} 50 51VP9Encoder* VP9Encoder::Create() { 52 return new VP9EncoderImpl(); 53} 54 55void VP9EncoderImpl::EncoderOutputCodedPacketCallback(vpx_codec_cx_pkt* pkt, 56 void* user_data) { 57 VP9EncoderImpl* enc = static_cast<VP9EncoderImpl*>(user_data); 58 enc->GetEncodedLayerFrame(pkt); 59} 60 61VP9EncoderImpl::VP9EncoderImpl() 62 : encoded_image_(), 63 encoded_complete_callback_(NULL), 64 inited_(false), 65 timestamp_(0), 66 picture_id_(0), 67 cpu_speed_(3), 68 rc_max_intra_target_(0), 69 encoder_(NULL), 70 config_(NULL), 71 raw_(NULL), 72 input_image_(NULL), 73 tl0_pic_idx_(0), 74 frames_since_kf_(0), 75 num_temporal_layers_(0), 76 num_spatial_layers_(0), 77 frames_encoded_(0), 78 // Use two spatial when screensharing with flexible mode. 79 spatial_layer_(new ScreenshareLayersVP9(2)) { 80 memset(&codec_, 0, sizeof(codec_)); 81 uint32_t seed = static_cast<uint32_t>(TickTime::MillisecondTimestamp()); 82 srand(seed); 83} 84 85VP9EncoderImpl::~VP9EncoderImpl() { 86 Release(); 87} 88 89int VP9EncoderImpl::Release() { 90 if (encoded_image_._buffer != NULL) { 91 delete[] encoded_image_._buffer; 92 encoded_image_._buffer = NULL; 93 } 94 if (encoder_ != NULL) { 95 if (vpx_codec_destroy(encoder_)) { 96 return WEBRTC_VIDEO_CODEC_MEMORY; 97 } 98 delete encoder_; 99 encoder_ = NULL; 100 } 101 if (config_ != NULL) { 102 delete config_; 103 config_ = NULL; 104 } 105 if (raw_ != NULL) { 106 vpx_img_free(raw_); 107 raw_ = NULL; 108 } 109 inited_ = false; 110 return WEBRTC_VIDEO_CODEC_OK; 111} 112 113bool VP9EncoderImpl::ExplicitlyConfiguredSpatialLayers() const { 114 // We check target_bitrate_bps of the 0th layer to see if the spatial layers 115 // (i.e. bitrates) were explicitly configured. 116 return num_spatial_layers_ > 1 && 117 codec_.spatialLayers[0].target_bitrate_bps > 0; 118} 119 120bool VP9EncoderImpl::SetSvcRates() { 121 uint8_t i = 0; 122 123 if (ExplicitlyConfiguredSpatialLayers()) { 124 if (num_temporal_layers_ > 1) { 125 LOG(LS_ERROR) << "Multiple temporal layers when manually specifying " 126 "spatial layers not implemented yet!"; 127 return false; 128 } 129 int total_bitrate_bps = 0; 130 for (i = 0; i < num_spatial_layers_; ++i) 131 total_bitrate_bps += codec_.spatialLayers[i].target_bitrate_bps; 132 // If total bitrate differs now from what has been specified at the 133 // beginning, update the bitrates in the same ratio as before. 134 for (i = 0; i < num_spatial_layers_; ++i) { 135 config_->ss_target_bitrate[i] = config_->layer_target_bitrate[i] = 136 static_cast<int>(static_cast<int64_t>(config_->rc_target_bitrate) * 137 codec_.spatialLayers[i].target_bitrate_bps / 138 total_bitrate_bps); 139 } 140 } else { 141 float rate_ratio[VPX_MAX_LAYERS] = {0}; 142 float total = 0; 143 144 for (i = 0; i < num_spatial_layers_; ++i) { 145 if (svc_internal_.svc_params.scaling_factor_num[i] <= 0 || 146 svc_internal_.svc_params.scaling_factor_den[i] <= 0) { 147 LOG(LS_ERROR) << "Scaling factors not specified!"; 148 return false; 149 } 150 rate_ratio[i] = 151 static_cast<float>(svc_internal_.svc_params.scaling_factor_num[i]) / 152 svc_internal_.svc_params.scaling_factor_den[i]; 153 total += rate_ratio[i]; 154 } 155 156 for (i = 0; i < num_spatial_layers_; ++i) { 157 config_->ss_target_bitrate[i] = static_cast<unsigned int>( 158 config_->rc_target_bitrate * rate_ratio[i] / total); 159 if (num_temporal_layers_ == 1) { 160 config_->layer_target_bitrate[i] = config_->ss_target_bitrate[i]; 161 } else if (num_temporal_layers_ == 2) { 162 config_->layer_target_bitrate[i * num_temporal_layers_] = 163 config_->ss_target_bitrate[i] * 2 / 3; 164 config_->layer_target_bitrate[i * num_temporal_layers_ + 1] = 165 config_->ss_target_bitrate[i]; 166 } else if (num_temporal_layers_ == 3) { 167 config_->layer_target_bitrate[i * num_temporal_layers_] = 168 config_->ss_target_bitrate[i] / 2; 169 config_->layer_target_bitrate[i * num_temporal_layers_ + 1] = 170 config_->layer_target_bitrate[i * num_temporal_layers_] + 171 (config_->ss_target_bitrate[i] / 4); 172 config_->layer_target_bitrate[i * num_temporal_layers_ + 2] = 173 config_->ss_target_bitrate[i]; 174 } else { 175 LOG(LS_ERROR) << "Unsupported number of temporal layers: " 176 << num_temporal_layers_; 177 return false; 178 } 179 } 180 } 181 182 // For now, temporal layers only supported when having one spatial layer. 183 if (num_spatial_layers_ == 1) { 184 for (i = 0; i < num_temporal_layers_; ++i) { 185 config_->ts_target_bitrate[i] = config_->layer_target_bitrate[i]; 186 } 187 } 188 189 return true; 190} 191 192int VP9EncoderImpl::SetRates(uint32_t new_bitrate_kbit, 193 uint32_t new_framerate) { 194 if (!inited_) { 195 return WEBRTC_VIDEO_CODEC_UNINITIALIZED; 196 } 197 if (encoder_->err) { 198 return WEBRTC_VIDEO_CODEC_ERROR; 199 } 200 if (new_framerate < 1) { 201 return WEBRTC_VIDEO_CODEC_ERR_PARAMETER; 202 } 203 // Update bit rate 204 if (codec_.maxBitrate > 0 && new_bitrate_kbit > codec_.maxBitrate) { 205 new_bitrate_kbit = codec_.maxBitrate; 206 } 207 config_->rc_target_bitrate = new_bitrate_kbit; 208 codec_.maxFramerate = new_framerate; 209 spatial_layer_->ConfigureBitrate(new_bitrate_kbit, 0); 210 211 if (!SetSvcRates()) { 212 return WEBRTC_VIDEO_CODEC_ERR_PARAMETER; 213 } 214 215 // Update encoder context 216 if (vpx_codec_enc_config_set(encoder_, config_)) { 217 return WEBRTC_VIDEO_CODEC_ERROR; 218 } 219 return WEBRTC_VIDEO_CODEC_OK; 220} 221 222int VP9EncoderImpl::InitEncode(const VideoCodec* inst, 223 int number_of_cores, 224 size_t /*max_payload_size*/) { 225 if (inst == NULL) { 226 return WEBRTC_VIDEO_CODEC_ERR_PARAMETER; 227 } 228 if (inst->maxFramerate < 1) { 229 return WEBRTC_VIDEO_CODEC_ERR_PARAMETER; 230 } 231 // Allow zero to represent an unspecified maxBitRate 232 if (inst->maxBitrate > 0 && inst->startBitrate > inst->maxBitrate) { 233 return WEBRTC_VIDEO_CODEC_ERR_PARAMETER; 234 } 235 if (inst->width < 1 || inst->height < 1) { 236 return WEBRTC_VIDEO_CODEC_ERR_PARAMETER; 237 } 238 if (number_of_cores < 1) { 239 return WEBRTC_VIDEO_CODEC_ERR_PARAMETER; 240 } 241 if (inst->codecSpecific.VP9.numberOfTemporalLayers > 3) { 242 return WEBRTC_VIDEO_CODEC_ERR_PARAMETER; 243 } 244 // libvpx currently supports only one or two spatial layers. 245 if (inst->codecSpecific.VP9.numberOfSpatialLayers > 2) { 246 return WEBRTC_VIDEO_CODEC_ERR_PARAMETER; 247 } 248 249 int retVal = Release(); 250 if (retVal < 0) { 251 return retVal; 252 } 253 if (encoder_ == NULL) { 254 encoder_ = new vpx_codec_ctx_t; 255 } 256 if (config_ == NULL) { 257 config_ = new vpx_codec_enc_cfg_t; 258 } 259 timestamp_ = 0; 260 if (&codec_ != inst) { 261 codec_ = *inst; 262 } 263 264 num_spatial_layers_ = inst->codecSpecific.VP9.numberOfSpatialLayers; 265 num_temporal_layers_ = inst->codecSpecific.VP9.numberOfTemporalLayers; 266 if (num_temporal_layers_ == 0) 267 num_temporal_layers_ = 1; 268 269 // Random start 16 bits is enough. 270 picture_id_ = static_cast<uint16_t>(rand()) & 0x7FFF; // NOLINT 271 // Allocate memory for encoded image 272 if (encoded_image_._buffer != NULL) { 273 delete[] encoded_image_._buffer; 274 } 275 encoded_image_._size = CalcBufferSize(kI420, codec_.width, codec_.height); 276 encoded_image_._buffer = new uint8_t[encoded_image_._size]; 277 encoded_image_._completeFrame = true; 278 // Creating a wrapper to the image - setting image data to NULL. Actual 279 // pointer will be set in encode. Setting align to 1, as it is meaningless 280 // (actual memory is not allocated). 281 raw_ = vpx_img_wrap(NULL, VPX_IMG_FMT_I420, codec_.width, codec_.height, 1, 282 NULL); 283 // Populate encoder configuration with default values. 284 if (vpx_codec_enc_config_default(vpx_codec_vp9_cx(), config_, 0)) { 285 return WEBRTC_VIDEO_CODEC_ERROR; 286 } 287 config_->g_w = codec_.width; 288 config_->g_h = codec_.height; 289 config_->rc_target_bitrate = inst->startBitrate; // in kbit/s 290 config_->g_error_resilient = 1; 291 // Setting the time base of the codec. 292 config_->g_timebase.num = 1; 293 config_->g_timebase.den = 90000; 294 config_->g_lag_in_frames = 0; // 0- no frame lagging 295 config_->g_threads = 1; 296 // Rate control settings. 297 config_->rc_dropframe_thresh = 298 inst->codecSpecific.VP9.frameDroppingOn ? 30 : 0; 299 config_->rc_end_usage = VPX_CBR; 300 config_->g_pass = VPX_RC_ONE_PASS; 301 config_->rc_min_quantizer = 2; 302 config_->rc_max_quantizer = 52; 303 config_->rc_undershoot_pct = 50; 304 config_->rc_overshoot_pct = 50; 305 config_->rc_buf_initial_sz = 500; 306 config_->rc_buf_optimal_sz = 600; 307 config_->rc_buf_sz = 1000; 308 // Set the maximum target size of any key-frame. 309 rc_max_intra_target_ = MaxIntraTarget(config_->rc_buf_optimal_sz); 310 if (inst->codecSpecific.VP9.keyFrameInterval > 0) { 311 config_->kf_mode = VPX_KF_AUTO; 312 config_->kf_max_dist = inst->codecSpecific.VP9.keyFrameInterval; 313 // Needs to be set (in svc mode) to get correct periodic key frame interval 314 // (will have no effect in non-svc). 315 config_->kf_min_dist = config_->kf_max_dist; 316 } else { 317 config_->kf_mode = VPX_KF_DISABLED; 318 } 319 config_->rc_resize_allowed = 320 inst->codecSpecific.VP9.automaticResizeOn ? 1 : 0; 321 // Determine number of threads based on the image size and #cores. 322 config_->g_threads = 323 NumberOfThreads(config_->g_w, config_->g_h, number_of_cores); 324 325 cpu_speed_ = GetCpuSpeed(config_->g_w, config_->g_h); 326 327 // TODO(asapersson): Check configuration of temporal switch up and increase 328 // pattern length. 329 is_flexible_mode_ = inst->codecSpecific.VP9.flexibleMode; 330 if (is_flexible_mode_) { 331 config_->temporal_layering_mode = VP9E_TEMPORAL_LAYERING_MODE_BYPASS; 332 config_->ts_number_layers = num_temporal_layers_; 333 if (codec_.mode == kScreensharing) 334 spatial_layer_->ConfigureBitrate(inst->startBitrate, 0); 335 } else if (num_temporal_layers_ == 1) { 336 gof_.SetGofInfoVP9(kTemporalStructureMode1); 337 config_->temporal_layering_mode = VP9E_TEMPORAL_LAYERING_MODE_NOLAYERING; 338 config_->ts_number_layers = 1; 339 config_->ts_rate_decimator[0] = 1; 340 config_->ts_periodicity = 1; 341 config_->ts_layer_id[0] = 0; 342 } else if (num_temporal_layers_ == 2) { 343 gof_.SetGofInfoVP9(kTemporalStructureMode2); 344 config_->temporal_layering_mode = VP9E_TEMPORAL_LAYERING_MODE_0101; 345 config_->ts_number_layers = 2; 346 config_->ts_rate_decimator[0] = 2; 347 config_->ts_rate_decimator[1] = 1; 348 config_->ts_periodicity = 2; 349 config_->ts_layer_id[0] = 0; 350 config_->ts_layer_id[1] = 1; 351 } else if (num_temporal_layers_ == 3) { 352 gof_.SetGofInfoVP9(kTemporalStructureMode3); 353 config_->temporal_layering_mode = VP9E_TEMPORAL_LAYERING_MODE_0212; 354 config_->ts_number_layers = 3; 355 config_->ts_rate_decimator[0] = 4; 356 config_->ts_rate_decimator[1] = 2; 357 config_->ts_rate_decimator[2] = 1; 358 config_->ts_periodicity = 4; 359 config_->ts_layer_id[0] = 0; 360 config_->ts_layer_id[1] = 2; 361 config_->ts_layer_id[2] = 1; 362 config_->ts_layer_id[3] = 2; 363 } else { 364 return WEBRTC_VIDEO_CODEC_ERR_PARAMETER; 365 } 366 367 tl0_pic_idx_ = static_cast<uint8_t>(rand()); // NOLINT 368 369 return InitAndSetControlSettings(inst); 370} 371 372int VP9EncoderImpl::NumberOfThreads(int width, 373 int height, 374 int number_of_cores) { 375 // Keep the number of encoder threads equal to the possible number of column 376 // tiles, which is (1, 2, 4, 8). See comments below for VP9E_SET_TILE_COLUMNS. 377 if (width * height >= 1280 * 720 && number_of_cores > 4) { 378 return 4; 379 } else if (width * height >= 640 * 480 && number_of_cores > 2) { 380 return 2; 381 } else { 382 // 1 thread less than VGA. 383 return 1; 384 } 385} 386 387int VP9EncoderImpl::InitAndSetControlSettings(const VideoCodec* inst) { 388 // Set QP-min/max per spatial and temporal layer. 389 int tot_num_layers = num_spatial_layers_ * num_temporal_layers_; 390 for (int i = 0; i < tot_num_layers; ++i) { 391 svc_internal_.svc_params.max_quantizers[i] = config_->rc_max_quantizer; 392 svc_internal_.svc_params.min_quantizers[i] = config_->rc_min_quantizer; 393 } 394 config_->ss_number_layers = num_spatial_layers_; 395 if (ExplicitlyConfiguredSpatialLayers()) { 396 for (int i = 0; i < num_spatial_layers_; ++i) { 397 const auto& layer = codec_.spatialLayers[i]; 398 svc_internal_.svc_params.scaling_factor_num[i] = layer.scaling_factor_num; 399 svc_internal_.svc_params.scaling_factor_den[i] = layer.scaling_factor_den; 400 } 401 } else { 402 int scaling_factor_num = 256; 403 for (int i = num_spatial_layers_ - 1; i >= 0; --i) { 404 // 1:2 scaling in each dimension. 405 svc_internal_.svc_params.scaling_factor_num[i] = scaling_factor_num; 406 svc_internal_.svc_params.scaling_factor_den[i] = 256; 407 if (codec_.mode != kScreensharing) 408 scaling_factor_num /= 2; 409 } 410 } 411 412 if (!SetSvcRates()) { 413 return WEBRTC_VIDEO_CODEC_ERR_PARAMETER; 414 } 415 416 if (vpx_codec_enc_init(encoder_, vpx_codec_vp9_cx(), config_, 0)) { 417 return WEBRTC_VIDEO_CODEC_UNINITIALIZED; 418 } 419 vpx_codec_control(encoder_, VP8E_SET_CPUUSED, cpu_speed_); 420 vpx_codec_control(encoder_, VP8E_SET_MAX_INTRA_BITRATE_PCT, 421 rc_max_intra_target_); 422 vpx_codec_control(encoder_, VP9E_SET_AQ_MODE, 423 inst->codecSpecific.VP9.adaptiveQpMode ? 3 : 0); 424 425 vpx_codec_control( 426 encoder_, VP9E_SET_SVC, 427 (num_temporal_layers_ > 1 || num_spatial_layers_ > 1) ? 1 : 0); 428 if (num_temporal_layers_ > 1 || num_spatial_layers_ > 1) { 429 vpx_codec_control(encoder_, VP9E_SET_SVC_PARAMETERS, 430 &svc_internal_.svc_params); 431 } 432 // Register callback for getting each spatial layer. 433 vpx_codec_priv_output_cx_pkt_cb_pair_t cbp = { 434 VP9EncoderImpl::EncoderOutputCodedPacketCallback, 435 reinterpret_cast<void*>(this)}; 436 vpx_codec_control(encoder_, VP9E_REGISTER_CX_CALLBACK, 437 reinterpret_cast<void*>(&cbp)); 438 439 // Control function to set the number of column tiles in encoding a frame, in 440 // log2 unit: e.g., 0 = 1 tile column, 1 = 2 tile columns, 2 = 4 tile columns. 441 // The number tile columns will be capped by the encoder based on image size 442 // (minimum width of tile column is 256 pixels, maximum is 4096). 443 vpx_codec_control(encoder_, VP9E_SET_TILE_COLUMNS, (config_->g_threads >> 1)); 444#if !defined(WEBRTC_ARCH_ARM) && !defined(WEBRTC_ARCH_ARM64) 445 // Note denoiser is still off by default until further testing/optimization, 446 // i.e., codecSpecific.VP9.denoisingOn == 0. 447 vpx_codec_control(encoder_, VP9E_SET_NOISE_SENSITIVITY, 448 inst->codecSpecific.VP9.denoisingOn ? 1 : 0); 449#endif 450 if (codec_.mode == kScreensharing) { 451 // Adjust internal parameters to screen content. 452 vpx_codec_control(encoder_, VP9E_SET_TUNE_CONTENT, 1); 453 } 454 // Enable encoder skip of static/low content blocks. 455 vpx_codec_control(encoder_, VP8E_SET_STATIC_THRESHOLD, 1); 456 inited_ = true; 457 return WEBRTC_VIDEO_CODEC_OK; 458} 459 460uint32_t VP9EncoderImpl::MaxIntraTarget(uint32_t optimal_buffer_size) { 461 // Set max to the optimal buffer level (normalized by target BR), 462 // and scaled by a scale_par. 463 // Max target size = scale_par * optimal_buffer_size * targetBR[Kbps]. 464 // This value is presented in percentage of perFrameBw: 465 // perFrameBw = targetBR[Kbps] * 1000 / framerate. 466 // The target in % is as follows: 467 float scale_par = 0.5; 468 uint32_t target_pct = 469 optimal_buffer_size * scale_par * codec_.maxFramerate / 10; 470 // Don't go below 3 times the per frame bandwidth. 471 const uint32_t min_intra_size = 300; 472 return (target_pct < min_intra_size) ? min_intra_size : target_pct; 473} 474 475int VP9EncoderImpl::Encode(const VideoFrame& input_image, 476 const CodecSpecificInfo* codec_specific_info, 477 const std::vector<FrameType>* frame_types) { 478 if (!inited_) { 479 return WEBRTC_VIDEO_CODEC_UNINITIALIZED; 480 } 481 if (input_image.IsZeroSize()) { 482 return WEBRTC_VIDEO_CODEC_ERR_PARAMETER; 483 } 484 if (encoded_complete_callback_ == NULL) { 485 return WEBRTC_VIDEO_CODEC_UNINITIALIZED; 486 } 487 FrameType frame_type = kVideoFrameDelta; 488 // We only support one stream at the moment. 489 if (frame_types && frame_types->size() > 0) { 490 frame_type = (*frame_types)[0]; 491 } 492 RTC_DCHECK_EQ(input_image.width(), static_cast<int>(raw_->d_w)); 493 RTC_DCHECK_EQ(input_image.height(), static_cast<int>(raw_->d_h)); 494 495 // Set input image for use in the callback. 496 // This was necessary since you need some information from input_image. 497 // You can save only the necessary information (such as timestamp) instead of 498 // doing this. 499 input_image_ = &input_image; 500 501 // Image in vpx_image_t format. 502 // Input image is const. VPX's raw image is not defined as const. 503 raw_->planes[VPX_PLANE_Y] = const_cast<uint8_t*>(input_image.buffer(kYPlane)); 504 raw_->planes[VPX_PLANE_U] = const_cast<uint8_t*>(input_image.buffer(kUPlane)); 505 raw_->planes[VPX_PLANE_V] = const_cast<uint8_t*>(input_image.buffer(kVPlane)); 506 raw_->stride[VPX_PLANE_Y] = input_image.stride(kYPlane); 507 raw_->stride[VPX_PLANE_U] = input_image.stride(kUPlane); 508 raw_->stride[VPX_PLANE_V] = input_image.stride(kVPlane); 509 510 vpx_enc_frame_flags_t flags = 0; 511 bool send_keyframe = (frame_type == kVideoFrameKey); 512 if (send_keyframe) { 513 // Key frame request from caller. 514 flags = VPX_EFLAG_FORCE_KF; 515 } 516 517 if (is_flexible_mode_) { 518 SuperFrameRefSettings settings; 519 520 // These structs are copied when calling vpx_codec_control, 521 // therefore it is ok for them to go out of scope. 522 vpx_svc_ref_frame_config enc_layer_conf; 523 vpx_svc_layer_id layer_id; 524 525 if (codec_.mode == kRealtimeVideo) { 526 // Real time video not yet implemented in flexible mode. 527 RTC_NOTREACHED(); 528 } else { 529 settings = spatial_layer_->GetSuperFrameSettings(input_image.timestamp(), 530 send_keyframe); 531 } 532 enc_layer_conf = GenerateRefsAndFlags(settings); 533 layer_id.temporal_layer_id = 0; 534 layer_id.spatial_layer_id = settings.start_layer; 535 vpx_codec_control(encoder_, VP9E_SET_SVC_LAYER_ID, &layer_id); 536 vpx_codec_control(encoder_, VP9E_SET_SVC_REF_FRAME_CONFIG, &enc_layer_conf); 537 } 538 539 assert(codec_.maxFramerate > 0); 540 uint32_t duration = 90000 / codec_.maxFramerate; 541 if (vpx_codec_encode(encoder_, raw_, timestamp_, duration, flags, 542 VPX_DL_REALTIME)) { 543 return WEBRTC_VIDEO_CODEC_ERROR; 544 } 545 timestamp_ += duration; 546 547 return WEBRTC_VIDEO_CODEC_OK; 548} 549 550void VP9EncoderImpl::PopulateCodecSpecific(CodecSpecificInfo* codec_specific, 551 const vpx_codec_cx_pkt& pkt, 552 uint32_t timestamp) { 553 assert(codec_specific != NULL); 554 codec_specific->codecType = kVideoCodecVP9; 555 CodecSpecificInfoVP9* vp9_info = &(codec_specific->codecSpecific.VP9); 556 // TODO(asapersson): Set correct value. 557 vp9_info->inter_pic_predicted = 558 (pkt.data.frame.flags & VPX_FRAME_IS_KEY) ? false : true; 559 vp9_info->flexible_mode = codec_.codecSpecific.VP9.flexibleMode; 560 vp9_info->ss_data_available = ((pkt.data.frame.flags & VPX_FRAME_IS_KEY) && 561 !codec_.codecSpecific.VP9.flexibleMode) 562 ? true 563 : false; 564 565 vpx_svc_layer_id_t layer_id = {0}; 566 vpx_codec_control(encoder_, VP9E_GET_SVC_LAYER_ID, &layer_id); 567 568 assert(num_temporal_layers_ > 0); 569 assert(num_spatial_layers_ > 0); 570 if (num_temporal_layers_ == 1) { 571 assert(layer_id.temporal_layer_id == 0); 572 vp9_info->temporal_idx = kNoTemporalIdx; 573 } else { 574 vp9_info->temporal_idx = layer_id.temporal_layer_id; 575 } 576 if (num_spatial_layers_ == 1) { 577 assert(layer_id.spatial_layer_id == 0); 578 vp9_info->spatial_idx = kNoSpatialIdx; 579 } else { 580 vp9_info->spatial_idx = layer_id.spatial_layer_id; 581 } 582 if (layer_id.spatial_layer_id != 0) { 583 vp9_info->ss_data_available = false; 584 } 585 586 // TODO(asapersson): this info has to be obtained from the encoder. 587 vp9_info->temporal_up_switch = false; 588 589 bool is_first_frame = false; 590 if (is_flexible_mode_) { 591 is_first_frame = 592 layer_id.spatial_layer_id == spatial_layer_->GetStartLayer(); 593 } else { 594 is_first_frame = layer_id.spatial_layer_id == 0; 595 } 596 597 if (is_first_frame) { 598 picture_id_ = (picture_id_ + 1) & 0x7FFF; 599 // TODO(asapersson): this info has to be obtained from the encoder. 600 vp9_info->inter_layer_predicted = false; 601 ++frames_since_kf_; 602 } else { 603 // TODO(asapersson): this info has to be obtained from the encoder. 604 vp9_info->inter_layer_predicted = true; 605 } 606 607 if (pkt.data.frame.flags & VPX_FRAME_IS_KEY) { 608 frames_since_kf_ = 0; 609 } 610 611 vp9_info->picture_id = picture_id_; 612 613 if (!vp9_info->flexible_mode) { 614 if (layer_id.temporal_layer_id == 0 && layer_id.spatial_layer_id == 0) { 615 tl0_pic_idx_++; 616 } 617 vp9_info->tl0_pic_idx = tl0_pic_idx_; 618 } 619 620 // Always populate this, so that the packetizer can properly set the marker 621 // bit. 622 vp9_info->num_spatial_layers = num_spatial_layers_; 623 624 vp9_info->num_ref_pics = 0; 625 if (vp9_info->flexible_mode) { 626 vp9_info->gof_idx = kNoGofIdx; 627 vp9_info->num_ref_pics = num_ref_pics_[layer_id.spatial_layer_id]; 628 for (int i = 0; i < num_ref_pics_[layer_id.spatial_layer_id]; ++i) { 629 vp9_info->p_diff[i] = p_diff_[layer_id.spatial_layer_id][i]; 630 } 631 } else { 632 vp9_info->gof_idx = 633 static_cast<uint8_t>(frames_since_kf_ % gof_.num_frames_in_gof); 634 vp9_info->temporal_up_switch = gof_.temporal_up_switch[vp9_info->gof_idx]; 635 } 636 637 if (vp9_info->ss_data_available) { 638 vp9_info->spatial_layer_resolution_present = true; 639 for (size_t i = 0; i < vp9_info->num_spatial_layers; ++i) { 640 vp9_info->width[i] = codec_.width * 641 svc_internal_.svc_params.scaling_factor_num[i] / 642 svc_internal_.svc_params.scaling_factor_den[i]; 643 vp9_info->height[i] = codec_.height * 644 svc_internal_.svc_params.scaling_factor_num[i] / 645 svc_internal_.svc_params.scaling_factor_den[i]; 646 } 647 if (!vp9_info->flexible_mode) { 648 vp9_info->gof.CopyGofInfoVP9(gof_); 649 } 650 } 651} 652 653int VP9EncoderImpl::GetEncodedLayerFrame(const vpx_codec_cx_pkt* pkt) { 654 encoded_image_._length = 0; 655 encoded_image_._frameType = kVideoFrameDelta; 656 RTPFragmentationHeader frag_info; 657 // Note: no data partitioning in VP9, so 1 partition only. We keep this 658 // fragmentation data for now, until VP9 packetizer is implemented. 659 frag_info.VerifyAndAllocateFragmentationHeader(1); 660 int part_idx = 0; 661 CodecSpecificInfo codec_specific; 662 663 assert(pkt->kind == VPX_CODEC_CX_FRAME_PKT); 664 memcpy(&encoded_image_._buffer[encoded_image_._length], pkt->data.frame.buf, 665 pkt->data.frame.sz); 666 frag_info.fragmentationOffset[part_idx] = encoded_image_._length; 667 frag_info.fragmentationLength[part_idx] = 668 static_cast<uint32_t>(pkt->data.frame.sz); 669 frag_info.fragmentationPlType[part_idx] = 0; 670 frag_info.fragmentationTimeDiff[part_idx] = 0; 671 encoded_image_._length += static_cast<uint32_t>(pkt->data.frame.sz); 672 673 vpx_svc_layer_id_t layer_id = {0}; 674 vpx_codec_control(encoder_, VP9E_GET_SVC_LAYER_ID, &layer_id); 675 if (is_flexible_mode_ && codec_.mode == kScreensharing) 676 spatial_layer_->LayerFrameEncoded( 677 static_cast<unsigned int>(encoded_image_._length), 678 layer_id.spatial_layer_id); 679 680 assert(encoded_image_._length <= encoded_image_._size); 681 682 // End of frame. 683 // Check if encoded frame is a key frame. 684 if (pkt->data.frame.flags & VPX_FRAME_IS_KEY) { 685 encoded_image_._frameType = kVideoFrameKey; 686 } 687 PopulateCodecSpecific(&codec_specific, *pkt, input_image_->timestamp()); 688 689 if (encoded_image_._length > 0) { 690 TRACE_COUNTER1("webrtc", "EncodedFrameSize", encoded_image_._length); 691 encoded_image_._timeStamp = input_image_->timestamp(); 692 encoded_image_.capture_time_ms_ = input_image_->render_time_ms(); 693 encoded_image_._encodedHeight = raw_->d_h; 694 encoded_image_._encodedWidth = raw_->d_w; 695 encoded_complete_callback_->Encoded(encoded_image_, &codec_specific, 696 &frag_info); 697 } 698 return WEBRTC_VIDEO_CODEC_OK; 699} 700 701vpx_svc_ref_frame_config VP9EncoderImpl::GenerateRefsAndFlags( 702 const SuperFrameRefSettings& settings) { 703 static const vpx_enc_frame_flags_t kAllFlags = 704 VP8_EFLAG_NO_REF_ARF | VP8_EFLAG_NO_REF_GF | VP8_EFLAG_NO_REF_LAST | 705 VP8_EFLAG_NO_UPD_LAST | VP8_EFLAG_NO_UPD_ARF | VP8_EFLAG_NO_UPD_GF; 706 vpx_svc_ref_frame_config sf_conf = {}; 707 if (settings.is_keyframe) { 708 // Used later on to make sure we don't make any invalid references. 709 memset(buffer_updated_at_frame_, -1, sizeof(buffer_updated_at_frame_)); 710 for (int layer = settings.start_layer; layer <= settings.stop_layer; 711 ++layer) { 712 num_ref_pics_[layer] = 0; 713 buffer_updated_at_frame_[settings.layer[layer].upd_buf] = frames_encoded_; 714 // When encoding a keyframe only the alt_fb_idx is used 715 // to specify which layer ends up in which buffer. 716 sf_conf.alt_fb_idx[layer] = settings.layer[layer].upd_buf; 717 } 718 } else { 719 for (int layer_idx = settings.start_layer; layer_idx <= settings.stop_layer; 720 ++layer_idx) { 721 vpx_enc_frame_flags_t layer_flags = kAllFlags; 722 num_ref_pics_[layer_idx] = 0; 723 int8_t refs[3] = {settings.layer[layer_idx].ref_buf1, 724 settings.layer[layer_idx].ref_buf2, 725 settings.layer[layer_idx].ref_buf3}; 726 727 for (unsigned int ref_idx = 0; ref_idx < kMaxVp9RefPics; ++ref_idx) { 728 if (refs[ref_idx] == -1) 729 continue; 730 731 RTC_DCHECK_GE(refs[ref_idx], 0); 732 RTC_DCHECK_LE(refs[ref_idx], 7); 733 // Easier to remove flags from all flags rather than having to 734 // build the flags from 0. 735 switch (num_ref_pics_[layer_idx]) { 736 case 0: { 737 sf_conf.lst_fb_idx[layer_idx] = refs[ref_idx]; 738 layer_flags &= ~VP8_EFLAG_NO_REF_LAST; 739 break; 740 } 741 case 1: { 742 sf_conf.gld_fb_idx[layer_idx] = refs[ref_idx]; 743 layer_flags &= ~VP8_EFLAG_NO_REF_GF; 744 break; 745 } 746 case 2: { 747 sf_conf.alt_fb_idx[layer_idx] = refs[ref_idx]; 748 layer_flags &= ~VP8_EFLAG_NO_REF_ARF; 749 break; 750 } 751 } 752 // Make sure we don't reference a buffer that hasn't been 753 // used at all or hasn't been used since a keyframe. 754 RTC_DCHECK_NE(buffer_updated_at_frame_[refs[ref_idx]], -1); 755 756 p_diff_[layer_idx][num_ref_pics_[layer_idx]] = 757 frames_encoded_ - buffer_updated_at_frame_[refs[ref_idx]]; 758 num_ref_pics_[layer_idx]++; 759 } 760 761 bool upd_buf_same_as_a_ref = false; 762 if (settings.layer[layer_idx].upd_buf != -1) { 763 for (unsigned int ref_idx = 0; ref_idx < kMaxVp9RefPics; ++ref_idx) { 764 if (settings.layer[layer_idx].upd_buf == refs[ref_idx]) { 765 switch (ref_idx) { 766 case 0: { 767 layer_flags &= ~VP8_EFLAG_NO_UPD_LAST; 768 break; 769 } 770 case 1: { 771 layer_flags &= ~VP8_EFLAG_NO_UPD_GF; 772 break; 773 } 774 case 2: { 775 layer_flags &= ~VP8_EFLAG_NO_UPD_ARF; 776 break; 777 } 778 } 779 upd_buf_same_as_a_ref = true; 780 break; 781 } 782 } 783 if (!upd_buf_same_as_a_ref) { 784 // If we have three references and a buffer is specified to be 785 // updated, then that buffer must be the same as one of the 786 // three references. 787 RTC_CHECK_LT(num_ref_pics_[layer_idx], kMaxVp9RefPics); 788 789 sf_conf.alt_fb_idx[layer_idx] = settings.layer[layer_idx].upd_buf; 790 layer_flags ^= VP8_EFLAG_NO_UPD_ARF; 791 } 792 793 int updated_buffer = settings.layer[layer_idx].upd_buf; 794 buffer_updated_at_frame_[updated_buffer] = frames_encoded_; 795 sf_conf.frame_flags[layer_idx] = layer_flags; 796 } 797 } 798 } 799 ++frames_encoded_; 800 return sf_conf; 801} 802 803int VP9EncoderImpl::SetChannelParameters(uint32_t packet_loss, int64_t rtt) { 804 return WEBRTC_VIDEO_CODEC_OK; 805} 806 807int VP9EncoderImpl::RegisterEncodeCompleteCallback( 808 EncodedImageCallback* callback) { 809 encoded_complete_callback_ = callback; 810 return WEBRTC_VIDEO_CODEC_OK; 811} 812 813const char* VP9EncoderImpl::ImplementationName() const { 814 return "libvpx"; 815} 816 817VP9Decoder* VP9Decoder::Create() { 818 return new VP9DecoderImpl(); 819} 820 821VP9DecoderImpl::VP9DecoderImpl() 822 : decode_complete_callback_(NULL), 823 inited_(false), 824 decoder_(NULL), 825 key_frame_required_(true) { 826 memset(&codec_, 0, sizeof(codec_)); 827} 828 829VP9DecoderImpl::~VP9DecoderImpl() { 830 inited_ = true; // in order to do the actual release 831 Release(); 832 int num_buffers_in_use = frame_buffer_pool_.GetNumBuffersInUse(); 833 if (num_buffers_in_use > 0) { 834 // The frame buffers are reference counted and frames are exposed after 835 // decoding. There may be valid usage cases where previous frames are still 836 // referenced after ~VP9DecoderImpl that is not a leak. 837 LOG(LS_INFO) << num_buffers_in_use << " Vp9FrameBuffers are still " 838 << "referenced during ~VP9DecoderImpl."; 839 } 840} 841 842int VP9DecoderImpl::Reset() { 843 if (!inited_) { 844 return WEBRTC_VIDEO_CODEC_UNINITIALIZED; 845 } 846 InitDecode(&codec_, 1); 847 return WEBRTC_VIDEO_CODEC_OK; 848} 849 850int VP9DecoderImpl::InitDecode(const VideoCodec* inst, int number_of_cores) { 851 if (inst == NULL) { 852 return WEBRTC_VIDEO_CODEC_ERR_PARAMETER; 853 } 854 int ret_val = Release(); 855 if (ret_val < 0) { 856 return ret_val; 857 } 858 if (decoder_ == NULL) { 859 decoder_ = new vpx_codec_ctx_t; 860 } 861 vpx_codec_dec_cfg_t cfg; 862 // Setting number of threads to a constant value (1) 863 cfg.threads = 1; 864 cfg.h = cfg.w = 0; // set after decode 865 vpx_codec_flags_t flags = 0; 866 if (vpx_codec_dec_init(decoder_, vpx_codec_vp9_dx(), &cfg, flags)) { 867 return WEBRTC_VIDEO_CODEC_MEMORY; 868 } 869 if (&codec_ != inst) { 870 // Save VideoCodec instance for later; mainly for duplicating the decoder. 871 codec_ = *inst; 872 } 873 874 if (!frame_buffer_pool_.InitializeVpxUsePool(decoder_)) { 875 return WEBRTC_VIDEO_CODEC_MEMORY; 876 } 877 878 inited_ = true; 879 // Always start with a complete key frame. 880 key_frame_required_ = true; 881 return WEBRTC_VIDEO_CODEC_OK; 882} 883 884int VP9DecoderImpl::Decode(const EncodedImage& input_image, 885 bool missing_frames, 886 const RTPFragmentationHeader* fragmentation, 887 const CodecSpecificInfo* codec_specific_info, 888 int64_t /*render_time_ms*/) { 889 if (!inited_) { 890 return WEBRTC_VIDEO_CODEC_UNINITIALIZED; 891 } 892 if (decode_complete_callback_ == NULL) { 893 return WEBRTC_VIDEO_CODEC_UNINITIALIZED; 894 } 895 // Always start with a complete key frame. 896 if (key_frame_required_) { 897 if (input_image._frameType != kVideoFrameKey) 898 return WEBRTC_VIDEO_CODEC_ERROR; 899 // We have a key frame - is it complete? 900 if (input_image._completeFrame) { 901 key_frame_required_ = false; 902 } else { 903 return WEBRTC_VIDEO_CODEC_ERROR; 904 } 905 } 906 vpx_codec_iter_t iter = NULL; 907 vpx_image_t* img; 908 uint8_t* buffer = input_image._buffer; 909 if (input_image._length == 0) { 910 buffer = NULL; // Triggers full frame concealment. 911 } 912 // During decode libvpx may get and release buffers from |frame_buffer_pool_|. 913 // In practice libvpx keeps a few (~3-4) buffers alive at a time. 914 if (vpx_codec_decode(decoder_, buffer, 915 static_cast<unsigned int>(input_image._length), 0, 916 VPX_DL_REALTIME)) { 917 return WEBRTC_VIDEO_CODEC_ERROR; 918 } 919 // |img->fb_priv| contains the image data, a reference counted Vp9FrameBuffer. 920 // It may be released by libvpx during future vpx_codec_decode or 921 // vpx_codec_destroy calls. 922 img = vpx_codec_get_frame(decoder_, &iter); 923 int ret = ReturnFrame(img, input_image._timeStamp); 924 if (ret != 0) { 925 return ret; 926 } 927 return WEBRTC_VIDEO_CODEC_OK; 928} 929 930int VP9DecoderImpl::ReturnFrame(const vpx_image_t* img, uint32_t timestamp) { 931 if (img == NULL) { 932 // Decoder OK and NULL image => No show frame. 933 return WEBRTC_VIDEO_CODEC_NO_OUTPUT; 934 } 935 936 // This buffer contains all of |img|'s image data, a reference counted 937 // Vp9FrameBuffer. (libvpx is done with the buffers after a few 938 // vpx_codec_decode calls or vpx_codec_destroy). 939 Vp9FrameBufferPool::Vp9FrameBuffer* img_buffer = 940 static_cast<Vp9FrameBufferPool::Vp9FrameBuffer*>(img->fb_priv); 941 // The buffer can be used directly by the VideoFrame (without copy) by 942 // using a WrappedI420Buffer. 943 rtc::scoped_refptr<WrappedI420Buffer> img_wrapped_buffer( 944 new rtc::RefCountedObject<webrtc::WrappedI420Buffer>( 945 img->d_w, img->d_h, img->planes[VPX_PLANE_Y], 946 img->stride[VPX_PLANE_Y], img->planes[VPX_PLANE_U], 947 img->stride[VPX_PLANE_U], img->planes[VPX_PLANE_V], 948 img->stride[VPX_PLANE_V], 949 // WrappedI420Buffer's mechanism for allowing the release of its frame 950 // buffer is through a callback function. This is where we should 951 // release |img_buffer|. 952 rtc::KeepRefUntilDone(img_buffer))); 953 954 VideoFrame decoded_image; 955 decoded_image.set_video_frame_buffer(img_wrapped_buffer); 956 decoded_image.set_timestamp(timestamp); 957 int ret = decode_complete_callback_->Decoded(decoded_image); 958 if (ret != 0) 959 return ret; 960 return WEBRTC_VIDEO_CODEC_OK; 961} 962 963int VP9DecoderImpl::RegisterDecodeCompleteCallback( 964 DecodedImageCallback* callback) { 965 decode_complete_callback_ = callback; 966 return WEBRTC_VIDEO_CODEC_OK; 967} 968 969int VP9DecoderImpl::Release() { 970 if (decoder_ != NULL) { 971 // When a codec is destroyed libvpx will release any buffers of 972 // |frame_buffer_pool_| it is currently using. 973 if (vpx_codec_destroy(decoder_)) { 974 return WEBRTC_VIDEO_CODEC_MEMORY; 975 } 976 delete decoder_; 977 decoder_ = NULL; 978 } 979 // Releases buffers from the pool. Any buffers not in use are deleted. Buffers 980 // still referenced externally are deleted once fully released, not returning 981 // to the pool. 982 frame_buffer_pool_.ClearPool(); 983 inited_ = false; 984 return WEBRTC_VIDEO_CODEC_OK; 985} 986 987const char* VP9DecoderImpl::ImplementationName() const { 988 return "libvpx"; 989} 990 991} // namespace webrtc 992