1/*
2 *  Copyright (c) 2014 The WebRTC project authors. All Rights Reserved.
3 *
4 *  Use of this source code is governed by a BSD-style license
5 *  that can be found in the LICENSE file in the root of the source
6 *  tree. An additional intellectual property rights grant can be found
7 *  in the file PATENTS.  All contributing project authors may
8 *  be found in the AUTHORS file in the root of the source tree.
9 *
10 */
11
12#include "webrtc/modules/video_coding/codecs/vp9/vp9_impl.h"
13
14#include <stdlib.h>
15#include <string.h>
16#include <time.h>
17#include <vector>
18
19#include "vpx/vpx_encoder.h"
20#include "vpx/vpx_decoder.h"
21#include "vpx/vp8cx.h"
22#include "vpx/vp8dx.h"
23
24#include "webrtc/base/checks.h"
25#include "webrtc/base/keep_ref_until_done.h"
26#include "webrtc/base/logging.h"
27#include "webrtc/base/trace_event.h"
28#include "webrtc/common.h"
29#include "webrtc/common_video/libyuv/include/webrtc_libyuv.h"
30#include "webrtc/modules/include/module_common_types.h"
31#include "webrtc/modules/video_coding/codecs/vp9/screenshare_layers.h"
32#include "webrtc/system_wrappers/include/tick_util.h"
33
34namespace webrtc {
35
36// Only positive speeds, range for real-time coding currently is: 5 - 8.
37// Lower means slower/better quality, higher means fastest/lower quality.
38int GetCpuSpeed(int width, int height) {
39#if defined(WEBRTC_ARCH_ARM) || defined(WEBRTC_ARCH_ARM64)
40  return 8;
41#else
42  // For smaller resolutions, use lower speed setting (get some coding gain at
43  // the cost of increased encoding complexity).
44  if (width * height <= 352 * 288)
45    return 5;
46  else
47    return 7;
48#endif
49}
50
51VP9Encoder* VP9Encoder::Create() {
52  return new VP9EncoderImpl();
53}
54
55void VP9EncoderImpl::EncoderOutputCodedPacketCallback(vpx_codec_cx_pkt* pkt,
56                                                      void* user_data) {
57  VP9EncoderImpl* enc = static_cast<VP9EncoderImpl*>(user_data);
58  enc->GetEncodedLayerFrame(pkt);
59}
60
61VP9EncoderImpl::VP9EncoderImpl()
62    : encoded_image_(),
63      encoded_complete_callback_(NULL),
64      inited_(false),
65      timestamp_(0),
66      picture_id_(0),
67      cpu_speed_(3),
68      rc_max_intra_target_(0),
69      encoder_(NULL),
70      config_(NULL),
71      raw_(NULL),
72      input_image_(NULL),
73      tl0_pic_idx_(0),
74      frames_since_kf_(0),
75      num_temporal_layers_(0),
76      num_spatial_layers_(0),
77      frames_encoded_(0),
78      // Use two spatial when screensharing with flexible mode.
79      spatial_layer_(new ScreenshareLayersVP9(2)) {
80  memset(&codec_, 0, sizeof(codec_));
81  uint32_t seed = static_cast<uint32_t>(TickTime::MillisecondTimestamp());
82  srand(seed);
83}
84
85VP9EncoderImpl::~VP9EncoderImpl() {
86  Release();
87}
88
89int VP9EncoderImpl::Release() {
90  if (encoded_image_._buffer != NULL) {
91    delete[] encoded_image_._buffer;
92    encoded_image_._buffer = NULL;
93  }
94  if (encoder_ != NULL) {
95    if (vpx_codec_destroy(encoder_)) {
96      return WEBRTC_VIDEO_CODEC_MEMORY;
97    }
98    delete encoder_;
99    encoder_ = NULL;
100  }
101  if (config_ != NULL) {
102    delete config_;
103    config_ = NULL;
104  }
105  if (raw_ != NULL) {
106    vpx_img_free(raw_);
107    raw_ = NULL;
108  }
109  inited_ = false;
110  return WEBRTC_VIDEO_CODEC_OK;
111}
112
113bool VP9EncoderImpl::ExplicitlyConfiguredSpatialLayers() const {
114  // We check target_bitrate_bps of the 0th layer to see if the spatial layers
115  // (i.e. bitrates) were explicitly configured.
116  return num_spatial_layers_ > 1 &&
117         codec_.spatialLayers[0].target_bitrate_bps > 0;
118}
119
120bool VP9EncoderImpl::SetSvcRates() {
121  uint8_t i = 0;
122
123  if (ExplicitlyConfiguredSpatialLayers()) {
124    if (num_temporal_layers_ > 1) {
125      LOG(LS_ERROR) << "Multiple temporal layers when manually specifying "
126                       "spatial layers not implemented yet!";
127      return false;
128    }
129    int total_bitrate_bps = 0;
130    for (i = 0; i < num_spatial_layers_; ++i)
131      total_bitrate_bps += codec_.spatialLayers[i].target_bitrate_bps;
132    // If total bitrate differs now from what has been specified at the
133    // beginning, update the bitrates in the same ratio as before.
134    for (i = 0; i < num_spatial_layers_; ++i) {
135      config_->ss_target_bitrate[i] = config_->layer_target_bitrate[i] =
136          static_cast<int>(static_cast<int64_t>(config_->rc_target_bitrate) *
137                           codec_.spatialLayers[i].target_bitrate_bps /
138                           total_bitrate_bps);
139    }
140  } else {
141    float rate_ratio[VPX_MAX_LAYERS] = {0};
142    float total = 0;
143
144    for (i = 0; i < num_spatial_layers_; ++i) {
145      if (svc_internal_.svc_params.scaling_factor_num[i] <= 0 ||
146          svc_internal_.svc_params.scaling_factor_den[i] <= 0) {
147        LOG(LS_ERROR) << "Scaling factors not specified!";
148        return false;
149      }
150      rate_ratio[i] =
151          static_cast<float>(svc_internal_.svc_params.scaling_factor_num[i]) /
152          svc_internal_.svc_params.scaling_factor_den[i];
153      total += rate_ratio[i];
154    }
155
156    for (i = 0; i < num_spatial_layers_; ++i) {
157      config_->ss_target_bitrate[i] = static_cast<unsigned int>(
158          config_->rc_target_bitrate * rate_ratio[i] / total);
159      if (num_temporal_layers_ == 1) {
160        config_->layer_target_bitrate[i] = config_->ss_target_bitrate[i];
161      } else if (num_temporal_layers_ == 2) {
162        config_->layer_target_bitrate[i * num_temporal_layers_] =
163            config_->ss_target_bitrate[i] * 2 / 3;
164        config_->layer_target_bitrate[i * num_temporal_layers_ + 1] =
165            config_->ss_target_bitrate[i];
166      } else if (num_temporal_layers_ == 3) {
167        config_->layer_target_bitrate[i * num_temporal_layers_] =
168            config_->ss_target_bitrate[i] / 2;
169        config_->layer_target_bitrate[i * num_temporal_layers_ + 1] =
170            config_->layer_target_bitrate[i * num_temporal_layers_] +
171            (config_->ss_target_bitrate[i] / 4);
172        config_->layer_target_bitrate[i * num_temporal_layers_ + 2] =
173            config_->ss_target_bitrate[i];
174      } else {
175        LOG(LS_ERROR) << "Unsupported number of temporal layers: "
176                      << num_temporal_layers_;
177        return false;
178      }
179    }
180  }
181
182  // For now, temporal layers only supported when having one spatial layer.
183  if (num_spatial_layers_ == 1) {
184    for (i = 0; i < num_temporal_layers_; ++i) {
185      config_->ts_target_bitrate[i] = config_->layer_target_bitrate[i];
186    }
187  }
188
189  return true;
190}
191
192int VP9EncoderImpl::SetRates(uint32_t new_bitrate_kbit,
193                             uint32_t new_framerate) {
194  if (!inited_) {
195    return WEBRTC_VIDEO_CODEC_UNINITIALIZED;
196  }
197  if (encoder_->err) {
198    return WEBRTC_VIDEO_CODEC_ERROR;
199  }
200  if (new_framerate < 1) {
201    return WEBRTC_VIDEO_CODEC_ERR_PARAMETER;
202  }
203  // Update bit rate
204  if (codec_.maxBitrate > 0 && new_bitrate_kbit > codec_.maxBitrate) {
205    new_bitrate_kbit = codec_.maxBitrate;
206  }
207  config_->rc_target_bitrate = new_bitrate_kbit;
208  codec_.maxFramerate = new_framerate;
209  spatial_layer_->ConfigureBitrate(new_bitrate_kbit, 0);
210
211  if (!SetSvcRates()) {
212    return WEBRTC_VIDEO_CODEC_ERR_PARAMETER;
213  }
214
215  // Update encoder context
216  if (vpx_codec_enc_config_set(encoder_, config_)) {
217    return WEBRTC_VIDEO_CODEC_ERROR;
218  }
219  return WEBRTC_VIDEO_CODEC_OK;
220}
221
222int VP9EncoderImpl::InitEncode(const VideoCodec* inst,
223                               int number_of_cores,
224                               size_t /*max_payload_size*/) {
225  if (inst == NULL) {
226    return WEBRTC_VIDEO_CODEC_ERR_PARAMETER;
227  }
228  if (inst->maxFramerate < 1) {
229    return WEBRTC_VIDEO_CODEC_ERR_PARAMETER;
230  }
231  // Allow zero to represent an unspecified maxBitRate
232  if (inst->maxBitrate > 0 && inst->startBitrate > inst->maxBitrate) {
233    return WEBRTC_VIDEO_CODEC_ERR_PARAMETER;
234  }
235  if (inst->width < 1 || inst->height < 1) {
236    return WEBRTC_VIDEO_CODEC_ERR_PARAMETER;
237  }
238  if (number_of_cores < 1) {
239    return WEBRTC_VIDEO_CODEC_ERR_PARAMETER;
240  }
241  if (inst->codecSpecific.VP9.numberOfTemporalLayers > 3) {
242    return WEBRTC_VIDEO_CODEC_ERR_PARAMETER;
243  }
244  // libvpx currently supports only one or two spatial layers.
245  if (inst->codecSpecific.VP9.numberOfSpatialLayers > 2) {
246    return WEBRTC_VIDEO_CODEC_ERR_PARAMETER;
247  }
248
249  int retVal = Release();
250  if (retVal < 0) {
251    return retVal;
252  }
253  if (encoder_ == NULL) {
254    encoder_ = new vpx_codec_ctx_t;
255  }
256  if (config_ == NULL) {
257    config_ = new vpx_codec_enc_cfg_t;
258  }
259  timestamp_ = 0;
260  if (&codec_ != inst) {
261    codec_ = *inst;
262  }
263
264  num_spatial_layers_ = inst->codecSpecific.VP9.numberOfSpatialLayers;
265  num_temporal_layers_ = inst->codecSpecific.VP9.numberOfTemporalLayers;
266  if (num_temporal_layers_ == 0)
267    num_temporal_layers_ = 1;
268
269  // Random start 16 bits is enough.
270  picture_id_ = static_cast<uint16_t>(rand()) & 0x7FFF;  // NOLINT
271  // Allocate memory for encoded image
272  if (encoded_image_._buffer != NULL) {
273    delete[] encoded_image_._buffer;
274  }
275  encoded_image_._size = CalcBufferSize(kI420, codec_.width, codec_.height);
276  encoded_image_._buffer = new uint8_t[encoded_image_._size];
277  encoded_image_._completeFrame = true;
278  // Creating a wrapper to the image - setting image data to NULL. Actual
279  // pointer will be set in encode. Setting align to 1, as it is meaningless
280  // (actual memory is not allocated).
281  raw_ = vpx_img_wrap(NULL, VPX_IMG_FMT_I420, codec_.width, codec_.height, 1,
282                      NULL);
283  // Populate encoder configuration with default values.
284  if (vpx_codec_enc_config_default(vpx_codec_vp9_cx(), config_, 0)) {
285    return WEBRTC_VIDEO_CODEC_ERROR;
286  }
287  config_->g_w = codec_.width;
288  config_->g_h = codec_.height;
289  config_->rc_target_bitrate = inst->startBitrate;  // in kbit/s
290  config_->g_error_resilient = 1;
291  // Setting the time base of the codec.
292  config_->g_timebase.num = 1;
293  config_->g_timebase.den = 90000;
294  config_->g_lag_in_frames = 0;  // 0- no frame lagging
295  config_->g_threads = 1;
296  // Rate control settings.
297  config_->rc_dropframe_thresh =
298      inst->codecSpecific.VP9.frameDroppingOn ? 30 : 0;
299  config_->rc_end_usage = VPX_CBR;
300  config_->g_pass = VPX_RC_ONE_PASS;
301  config_->rc_min_quantizer = 2;
302  config_->rc_max_quantizer = 52;
303  config_->rc_undershoot_pct = 50;
304  config_->rc_overshoot_pct = 50;
305  config_->rc_buf_initial_sz = 500;
306  config_->rc_buf_optimal_sz = 600;
307  config_->rc_buf_sz = 1000;
308  // Set the maximum target size of any key-frame.
309  rc_max_intra_target_ = MaxIntraTarget(config_->rc_buf_optimal_sz);
310  if (inst->codecSpecific.VP9.keyFrameInterval > 0) {
311    config_->kf_mode = VPX_KF_AUTO;
312    config_->kf_max_dist = inst->codecSpecific.VP9.keyFrameInterval;
313    // Needs to be set (in svc mode) to get correct periodic key frame interval
314    // (will have no effect in non-svc).
315    config_->kf_min_dist = config_->kf_max_dist;
316  } else {
317    config_->kf_mode = VPX_KF_DISABLED;
318  }
319  config_->rc_resize_allowed =
320      inst->codecSpecific.VP9.automaticResizeOn ? 1 : 0;
321  // Determine number of threads based on the image size and #cores.
322  config_->g_threads =
323      NumberOfThreads(config_->g_w, config_->g_h, number_of_cores);
324
325  cpu_speed_ = GetCpuSpeed(config_->g_w, config_->g_h);
326
327  // TODO(asapersson): Check configuration of temporal switch up and increase
328  // pattern length.
329  is_flexible_mode_ = inst->codecSpecific.VP9.flexibleMode;
330  if (is_flexible_mode_) {
331    config_->temporal_layering_mode = VP9E_TEMPORAL_LAYERING_MODE_BYPASS;
332    config_->ts_number_layers = num_temporal_layers_;
333    if (codec_.mode == kScreensharing)
334      spatial_layer_->ConfigureBitrate(inst->startBitrate, 0);
335  } else if (num_temporal_layers_ == 1) {
336    gof_.SetGofInfoVP9(kTemporalStructureMode1);
337    config_->temporal_layering_mode = VP9E_TEMPORAL_LAYERING_MODE_NOLAYERING;
338    config_->ts_number_layers = 1;
339    config_->ts_rate_decimator[0] = 1;
340    config_->ts_periodicity = 1;
341    config_->ts_layer_id[0] = 0;
342  } else if (num_temporal_layers_ == 2) {
343    gof_.SetGofInfoVP9(kTemporalStructureMode2);
344    config_->temporal_layering_mode = VP9E_TEMPORAL_LAYERING_MODE_0101;
345    config_->ts_number_layers = 2;
346    config_->ts_rate_decimator[0] = 2;
347    config_->ts_rate_decimator[1] = 1;
348    config_->ts_periodicity = 2;
349    config_->ts_layer_id[0] = 0;
350    config_->ts_layer_id[1] = 1;
351  } else if (num_temporal_layers_ == 3) {
352    gof_.SetGofInfoVP9(kTemporalStructureMode3);
353    config_->temporal_layering_mode = VP9E_TEMPORAL_LAYERING_MODE_0212;
354    config_->ts_number_layers = 3;
355    config_->ts_rate_decimator[0] = 4;
356    config_->ts_rate_decimator[1] = 2;
357    config_->ts_rate_decimator[2] = 1;
358    config_->ts_periodicity = 4;
359    config_->ts_layer_id[0] = 0;
360    config_->ts_layer_id[1] = 2;
361    config_->ts_layer_id[2] = 1;
362    config_->ts_layer_id[3] = 2;
363  } else {
364    return WEBRTC_VIDEO_CODEC_ERR_PARAMETER;
365  }
366
367  tl0_pic_idx_ = static_cast<uint8_t>(rand());  // NOLINT
368
369  return InitAndSetControlSettings(inst);
370}
371
372int VP9EncoderImpl::NumberOfThreads(int width,
373                                    int height,
374                                    int number_of_cores) {
375  // Keep the number of encoder threads equal to the possible number of column
376  // tiles, which is (1, 2, 4, 8). See comments below for VP9E_SET_TILE_COLUMNS.
377  if (width * height >= 1280 * 720 && number_of_cores > 4) {
378    return 4;
379  } else if (width * height >= 640 * 480 && number_of_cores > 2) {
380    return 2;
381  } else {
382    // 1 thread less than VGA.
383    return 1;
384  }
385}
386
387int VP9EncoderImpl::InitAndSetControlSettings(const VideoCodec* inst) {
388  // Set QP-min/max per spatial and temporal layer.
389  int tot_num_layers = num_spatial_layers_ * num_temporal_layers_;
390  for (int i = 0; i < tot_num_layers; ++i) {
391    svc_internal_.svc_params.max_quantizers[i] = config_->rc_max_quantizer;
392    svc_internal_.svc_params.min_quantizers[i] = config_->rc_min_quantizer;
393  }
394  config_->ss_number_layers = num_spatial_layers_;
395  if (ExplicitlyConfiguredSpatialLayers()) {
396    for (int i = 0; i < num_spatial_layers_; ++i) {
397      const auto& layer = codec_.spatialLayers[i];
398      svc_internal_.svc_params.scaling_factor_num[i] = layer.scaling_factor_num;
399      svc_internal_.svc_params.scaling_factor_den[i] = layer.scaling_factor_den;
400    }
401  } else {
402    int scaling_factor_num = 256;
403    for (int i = num_spatial_layers_ - 1; i >= 0; --i) {
404      // 1:2 scaling in each dimension.
405      svc_internal_.svc_params.scaling_factor_num[i] = scaling_factor_num;
406      svc_internal_.svc_params.scaling_factor_den[i] = 256;
407      if (codec_.mode != kScreensharing)
408        scaling_factor_num /= 2;
409    }
410  }
411
412  if (!SetSvcRates()) {
413    return WEBRTC_VIDEO_CODEC_ERR_PARAMETER;
414  }
415
416  if (vpx_codec_enc_init(encoder_, vpx_codec_vp9_cx(), config_, 0)) {
417    return WEBRTC_VIDEO_CODEC_UNINITIALIZED;
418  }
419  vpx_codec_control(encoder_, VP8E_SET_CPUUSED, cpu_speed_);
420  vpx_codec_control(encoder_, VP8E_SET_MAX_INTRA_BITRATE_PCT,
421                    rc_max_intra_target_);
422  vpx_codec_control(encoder_, VP9E_SET_AQ_MODE,
423                    inst->codecSpecific.VP9.adaptiveQpMode ? 3 : 0);
424
425  vpx_codec_control(
426      encoder_, VP9E_SET_SVC,
427      (num_temporal_layers_ > 1 || num_spatial_layers_ > 1) ? 1 : 0);
428  if (num_temporal_layers_ > 1 || num_spatial_layers_ > 1) {
429    vpx_codec_control(encoder_, VP9E_SET_SVC_PARAMETERS,
430                      &svc_internal_.svc_params);
431  }
432  // Register callback for getting each spatial layer.
433  vpx_codec_priv_output_cx_pkt_cb_pair_t cbp = {
434      VP9EncoderImpl::EncoderOutputCodedPacketCallback,
435      reinterpret_cast<void*>(this)};
436  vpx_codec_control(encoder_, VP9E_REGISTER_CX_CALLBACK,
437                    reinterpret_cast<void*>(&cbp));
438
439  // Control function to set the number of column tiles in encoding a frame, in
440  // log2 unit: e.g., 0 = 1 tile column, 1 = 2 tile columns, 2 = 4 tile columns.
441  // The number tile columns will be capped by the encoder based on image size
442  // (minimum width of tile column is 256 pixels, maximum is 4096).
443  vpx_codec_control(encoder_, VP9E_SET_TILE_COLUMNS, (config_->g_threads >> 1));
444#if !defined(WEBRTC_ARCH_ARM) && !defined(WEBRTC_ARCH_ARM64)
445  // Note denoiser is still off by default until further testing/optimization,
446  // i.e., codecSpecific.VP9.denoisingOn == 0.
447  vpx_codec_control(encoder_, VP9E_SET_NOISE_SENSITIVITY,
448                    inst->codecSpecific.VP9.denoisingOn ? 1 : 0);
449#endif
450  if (codec_.mode == kScreensharing) {
451    // Adjust internal parameters to screen content.
452    vpx_codec_control(encoder_, VP9E_SET_TUNE_CONTENT, 1);
453  }
454  // Enable encoder skip of static/low content blocks.
455  vpx_codec_control(encoder_, VP8E_SET_STATIC_THRESHOLD, 1);
456  inited_ = true;
457  return WEBRTC_VIDEO_CODEC_OK;
458}
459
460uint32_t VP9EncoderImpl::MaxIntraTarget(uint32_t optimal_buffer_size) {
461  // Set max to the optimal buffer level (normalized by target BR),
462  // and scaled by a scale_par.
463  // Max target size = scale_par * optimal_buffer_size * targetBR[Kbps].
464  // This value is presented in percentage of perFrameBw:
465  // perFrameBw = targetBR[Kbps] * 1000 / framerate.
466  // The target in % is as follows:
467  float scale_par = 0.5;
468  uint32_t target_pct =
469      optimal_buffer_size * scale_par * codec_.maxFramerate / 10;
470  // Don't go below 3 times the per frame bandwidth.
471  const uint32_t min_intra_size = 300;
472  return (target_pct < min_intra_size) ? min_intra_size : target_pct;
473}
474
475int VP9EncoderImpl::Encode(const VideoFrame& input_image,
476                           const CodecSpecificInfo* codec_specific_info,
477                           const std::vector<FrameType>* frame_types) {
478  if (!inited_) {
479    return WEBRTC_VIDEO_CODEC_UNINITIALIZED;
480  }
481  if (input_image.IsZeroSize()) {
482    return WEBRTC_VIDEO_CODEC_ERR_PARAMETER;
483  }
484  if (encoded_complete_callback_ == NULL) {
485    return WEBRTC_VIDEO_CODEC_UNINITIALIZED;
486  }
487  FrameType frame_type = kVideoFrameDelta;
488  // We only support one stream at the moment.
489  if (frame_types && frame_types->size() > 0) {
490    frame_type = (*frame_types)[0];
491  }
492  RTC_DCHECK_EQ(input_image.width(), static_cast<int>(raw_->d_w));
493  RTC_DCHECK_EQ(input_image.height(), static_cast<int>(raw_->d_h));
494
495  // Set input image for use in the callback.
496  // This was necessary since you need some information from input_image.
497  // You can save only the necessary information (such as timestamp) instead of
498  // doing this.
499  input_image_ = &input_image;
500
501  // Image in vpx_image_t format.
502  // Input image is const. VPX's raw image is not defined as const.
503  raw_->planes[VPX_PLANE_Y] = const_cast<uint8_t*>(input_image.buffer(kYPlane));
504  raw_->planes[VPX_PLANE_U] = const_cast<uint8_t*>(input_image.buffer(kUPlane));
505  raw_->planes[VPX_PLANE_V] = const_cast<uint8_t*>(input_image.buffer(kVPlane));
506  raw_->stride[VPX_PLANE_Y] = input_image.stride(kYPlane);
507  raw_->stride[VPX_PLANE_U] = input_image.stride(kUPlane);
508  raw_->stride[VPX_PLANE_V] = input_image.stride(kVPlane);
509
510  vpx_enc_frame_flags_t flags = 0;
511  bool send_keyframe = (frame_type == kVideoFrameKey);
512  if (send_keyframe) {
513    // Key frame request from caller.
514    flags = VPX_EFLAG_FORCE_KF;
515  }
516
517  if (is_flexible_mode_) {
518    SuperFrameRefSettings settings;
519
520    // These structs are copied when calling vpx_codec_control,
521    // therefore it is ok for them to go out of scope.
522    vpx_svc_ref_frame_config enc_layer_conf;
523    vpx_svc_layer_id layer_id;
524
525    if (codec_.mode == kRealtimeVideo) {
526      // Real time video not yet implemented in flexible mode.
527      RTC_NOTREACHED();
528    } else {
529      settings = spatial_layer_->GetSuperFrameSettings(input_image.timestamp(),
530                                                       send_keyframe);
531    }
532    enc_layer_conf = GenerateRefsAndFlags(settings);
533    layer_id.temporal_layer_id = 0;
534    layer_id.spatial_layer_id = settings.start_layer;
535    vpx_codec_control(encoder_, VP9E_SET_SVC_LAYER_ID, &layer_id);
536    vpx_codec_control(encoder_, VP9E_SET_SVC_REF_FRAME_CONFIG, &enc_layer_conf);
537  }
538
539  assert(codec_.maxFramerate > 0);
540  uint32_t duration = 90000 / codec_.maxFramerate;
541  if (vpx_codec_encode(encoder_, raw_, timestamp_, duration, flags,
542                       VPX_DL_REALTIME)) {
543    return WEBRTC_VIDEO_CODEC_ERROR;
544  }
545  timestamp_ += duration;
546
547  return WEBRTC_VIDEO_CODEC_OK;
548}
549
550void VP9EncoderImpl::PopulateCodecSpecific(CodecSpecificInfo* codec_specific,
551                                           const vpx_codec_cx_pkt& pkt,
552                                           uint32_t timestamp) {
553  assert(codec_specific != NULL);
554  codec_specific->codecType = kVideoCodecVP9;
555  CodecSpecificInfoVP9* vp9_info = &(codec_specific->codecSpecific.VP9);
556  // TODO(asapersson): Set correct value.
557  vp9_info->inter_pic_predicted =
558      (pkt.data.frame.flags & VPX_FRAME_IS_KEY) ? false : true;
559  vp9_info->flexible_mode = codec_.codecSpecific.VP9.flexibleMode;
560  vp9_info->ss_data_available = ((pkt.data.frame.flags & VPX_FRAME_IS_KEY) &&
561                                 !codec_.codecSpecific.VP9.flexibleMode)
562                                    ? true
563                                    : false;
564
565  vpx_svc_layer_id_t layer_id = {0};
566  vpx_codec_control(encoder_, VP9E_GET_SVC_LAYER_ID, &layer_id);
567
568  assert(num_temporal_layers_ > 0);
569  assert(num_spatial_layers_ > 0);
570  if (num_temporal_layers_ == 1) {
571    assert(layer_id.temporal_layer_id == 0);
572    vp9_info->temporal_idx = kNoTemporalIdx;
573  } else {
574    vp9_info->temporal_idx = layer_id.temporal_layer_id;
575  }
576  if (num_spatial_layers_ == 1) {
577    assert(layer_id.spatial_layer_id == 0);
578    vp9_info->spatial_idx = kNoSpatialIdx;
579  } else {
580    vp9_info->spatial_idx = layer_id.spatial_layer_id;
581  }
582  if (layer_id.spatial_layer_id != 0) {
583    vp9_info->ss_data_available = false;
584  }
585
586  // TODO(asapersson): this info has to be obtained from the encoder.
587  vp9_info->temporal_up_switch = false;
588
589  bool is_first_frame = false;
590  if (is_flexible_mode_) {
591    is_first_frame =
592        layer_id.spatial_layer_id == spatial_layer_->GetStartLayer();
593  } else {
594    is_first_frame = layer_id.spatial_layer_id == 0;
595  }
596
597  if (is_first_frame) {
598    picture_id_ = (picture_id_ + 1) & 0x7FFF;
599    // TODO(asapersson): this info has to be obtained from the encoder.
600    vp9_info->inter_layer_predicted = false;
601    ++frames_since_kf_;
602  } else {
603    // TODO(asapersson): this info has to be obtained from the encoder.
604    vp9_info->inter_layer_predicted = true;
605  }
606
607  if (pkt.data.frame.flags & VPX_FRAME_IS_KEY) {
608    frames_since_kf_ = 0;
609  }
610
611  vp9_info->picture_id = picture_id_;
612
613  if (!vp9_info->flexible_mode) {
614    if (layer_id.temporal_layer_id == 0 && layer_id.spatial_layer_id == 0) {
615      tl0_pic_idx_++;
616    }
617    vp9_info->tl0_pic_idx = tl0_pic_idx_;
618  }
619
620  // Always populate this, so that the packetizer can properly set the marker
621  // bit.
622  vp9_info->num_spatial_layers = num_spatial_layers_;
623
624  vp9_info->num_ref_pics = 0;
625  if (vp9_info->flexible_mode) {
626    vp9_info->gof_idx = kNoGofIdx;
627    vp9_info->num_ref_pics = num_ref_pics_[layer_id.spatial_layer_id];
628    for (int i = 0; i < num_ref_pics_[layer_id.spatial_layer_id]; ++i) {
629      vp9_info->p_diff[i] = p_diff_[layer_id.spatial_layer_id][i];
630    }
631  } else {
632    vp9_info->gof_idx =
633        static_cast<uint8_t>(frames_since_kf_ % gof_.num_frames_in_gof);
634    vp9_info->temporal_up_switch = gof_.temporal_up_switch[vp9_info->gof_idx];
635  }
636
637  if (vp9_info->ss_data_available) {
638    vp9_info->spatial_layer_resolution_present = true;
639    for (size_t i = 0; i < vp9_info->num_spatial_layers; ++i) {
640      vp9_info->width[i] = codec_.width *
641                           svc_internal_.svc_params.scaling_factor_num[i] /
642                           svc_internal_.svc_params.scaling_factor_den[i];
643      vp9_info->height[i] = codec_.height *
644                            svc_internal_.svc_params.scaling_factor_num[i] /
645                            svc_internal_.svc_params.scaling_factor_den[i];
646    }
647    if (!vp9_info->flexible_mode) {
648      vp9_info->gof.CopyGofInfoVP9(gof_);
649    }
650  }
651}
652
653int VP9EncoderImpl::GetEncodedLayerFrame(const vpx_codec_cx_pkt* pkt) {
654  encoded_image_._length = 0;
655  encoded_image_._frameType = kVideoFrameDelta;
656  RTPFragmentationHeader frag_info;
657  // Note: no data partitioning in VP9, so 1 partition only. We keep this
658  // fragmentation data for now, until VP9 packetizer is implemented.
659  frag_info.VerifyAndAllocateFragmentationHeader(1);
660  int part_idx = 0;
661  CodecSpecificInfo codec_specific;
662
663  assert(pkt->kind == VPX_CODEC_CX_FRAME_PKT);
664  memcpy(&encoded_image_._buffer[encoded_image_._length], pkt->data.frame.buf,
665         pkt->data.frame.sz);
666  frag_info.fragmentationOffset[part_idx] = encoded_image_._length;
667  frag_info.fragmentationLength[part_idx] =
668      static_cast<uint32_t>(pkt->data.frame.sz);
669  frag_info.fragmentationPlType[part_idx] = 0;
670  frag_info.fragmentationTimeDiff[part_idx] = 0;
671  encoded_image_._length += static_cast<uint32_t>(pkt->data.frame.sz);
672
673  vpx_svc_layer_id_t layer_id = {0};
674  vpx_codec_control(encoder_, VP9E_GET_SVC_LAYER_ID, &layer_id);
675  if (is_flexible_mode_ && codec_.mode == kScreensharing)
676    spatial_layer_->LayerFrameEncoded(
677        static_cast<unsigned int>(encoded_image_._length),
678        layer_id.spatial_layer_id);
679
680  assert(encoded_image_._length <= encoded_image_._size);
681
682  // End of frame.
683  // Check if encoded frame is a key frame.
684  if (pkt->data.frame.flags & VPX_FRAME_IS_KEY) {
685    encoded_image_._frameType = kVideoFrameKey;
686  }
687  PopulateCodecSpecific(&codec_specific, *pkt, input_image_->timestamp());
688
689  if (encoded_image_._length > 0) {
690    TRACE_COUNTER1("webrtc", "EncodedFrameSize", encoded_image_._length);
691    encoded_image_._timeStamp = input_image_->timestamp();
692    encoded_image_.capture_time_ms_ = input_image_->render_time_ms();
693    encoded_image_._encodedHeight = raw_->d_h;
694    encoded_image_._encodedWidth = raw_->d_w;
695    encoded_complete_callback_->Encoded(encoded_image_, &codec_specific,
696                                        &frag_info);
697  }
698  return WEBRTC_VIDEO_CODEC_OK;
699}
700
701vpx_svc_ref_frame_config VP9EncoderImpl::GenerateRefsAndFlags(
702    const SuperFrameRefSettings& settings) {
703  static const vpx_enc_frame_flags_t kAllFlags =
704      VP8_EFLAG_NO_REF_ARF | VP8_EFLAG_NO_REF_GF | VP8_EFLAG_NO_REF_LAST |
705      VP8_EFLAG_NO_UPD_LAST | VP8_EFLAG_NO_UPD_ARF | VP8_EFLAG_NO_UPD_GF;
706  vpx_svc_ref_frame_config sf_conf = {};
707  if (settings.is_keyframe) {
708    // Used later on to make sure we don't make any invalid references.
709    memset(buffer_updated_at_frame_, -1, sizeof(buffer_updated_at_frame_));
710    for (int layer = settings.start_layer; layer <= settings.stop_layer;
711         ++layer) {
712      num_ref_pics_[layer] = 0;
713      buffer_updated_at_frame_[settings.layer[layer].upd_buf] = frames_encoded_;
714      // When encoding a keyframe only the alt_fb_idx is used
715      // to specify which layer ends up in which buffer.
716      sf_conf.alt_fb_idx[layer] = settings.layer[layer].upd_buf;
717    }
718  } else {
719    for (int layer_idx = settings.start_layer; layer_idx <= settings.stop_layer;
720         ++layer_idx) {
721      vpx_enc_frame_flags_t layer_flags = kAllFlags;
722      num_ref_pics_[layer_idx] = 0;
723      int8_t refs[3] = {settings.layer[layer_idx].ref_buf1,
724                        settings.layer[layer_idx].ref_buf2,
725                        settings.layer[layer_idx].ref_buf3};
726
727      for (unsigned int ref_idx = 0; ref_idx < kMaxVp9RefPics; ++ref_idx) {
728        if (refs[ref_idx] == -1)
729          continue;
730
731        RTC_DCHECK_GE(refs[ref_idx], 0);
732        RTC_DCHECK_LE(refs[ref_idx], 7);
733        // Easier to remove flags from all flags rather than having to
734        // build the flags from 0.
735        switch (num_ref_pics_[layer_idx]) {
736          case 0: {
737            sf_conf.lst_fb_idx[layer_idx] = refs[ref_idx];
738            layer_flags &= ~VP8_EFLAG_NO_REF_LAST;
739            break;
740          }
741          case 1: {
742            sf_conf.gld_fb_idx[layer_idx] = refs[ref_idx];
743            layer_flags &= ~VP8_EFLAG_NO_REF_GF;
744            break;
745          }
746          case 2: {
747            sf_conf.alt_fb_idx[layer_idx] = refs[ref_idx];
748            layer_flags &= ~VP8_EFLAG_NO_REF_ARF;
749            break;
750          }
751        }
752        // Make sure we don't reference a buffer that hasn't been
753        // used at all or hasn't been used since a keyframe.
754        RTC_DCHECK_NE(buffer_updated_at_frame_[refs[ref_idx]], -1);
755
756        p_diff_[layer_idx][num_ref_pics_[layer_idx]] =
757            frames_encoded_ - buffer_updated_at_frame_[refs[ref_idx]];
758        num_ref_pics_[layer_idx]++;
759      }
760
761      bool upd_buf_same_as_a_ref = false;
762      if (settings.layer[layer_idx].upd_buf != -1) {
763        for (unsigned int ref_idx = 0; ref_idx < kMaxVp9RefPics; ++ref_idx) {
764          if (settings.layer[layer_idx].upd_buf == refs[ref_idx]) {
765            switch (ref_idx) {
766              case 0: {
767                layer_flags &= ~VP8_EFLAG_NO_UPD_LAST;
768                break;
769              }
770              case 1: {
771                layer_flags &= ~VP8_EFLAG_NO_UPD_GF;
772                break;
773              }
774              case 2: {
775                layer_flags &= ~VP8_EFLAG_NO_UPD_ARF;
776                break;
777              }
778            }
779            upd_buf_same_as_a_ref = true;
780            break;
781          }
782        }
783        if (!upd_buf_same_as_a_ref) {
784          // If we have three references and a buffer is specified to be
785          // updated, then that buffer must be the same as one of the
786          // three references.
787          RTC_CHECK_LT(num_ref_pics_[layer_idx], kMaxVp9RefPics);
788
789          sf_conf.alt_fb_idx[layer_idx] = settings.layer[layer_idx].upd_buf;
790          layer_flags ^= VP8_EFLAG_NO_UPD_ARF;
791        }
792
793        int updated_buffer = settings.layer[layer_idx].upd_buf;
794        buffer_updated_at_frame_[updated_buffer] = frames_encoded_;
795        sf_conf.frame_flags[layer_idx] = layer_flags;
796      }
797    }
798  }
799  ++frames_encoded_;
800  return sf_conf;
801}
802
803int VP9EncoderImpl::SetChannelParameters(uint32_t packet_loss, int64_t rtt) {
804  return WEBRTC_VIDEO_CODEC_OK;
805}
806
807int VP9EncoderImpl::RegisterEncodeCompleteCallback(
808    EncodedImageCallback* callback) {
809  encoded_complete_callback_ = callback;
810  return WEBRTC_VIDEO_CODEC_OK;
811}
812
813const char* VP9EncoderImpl::ImplementationName() const {
814  return "libvpx";
815}
816
817VP9Decoder* VP9Decoder::Create() {
818  return new VP9DecoderImpl();
819}
820
821VP9DecoderImpl::VP9DecoderImpl()
822    : decode_complete_callback_(NULL),
823      inited_(false),
824      decoder_(NULL),
825      key_frame_required_(true) {
826  memset(&codec_, 0, sizeof(codec_));
827}
828
829VP9DecoderImpl::~VP9DecoderImpl() {
830  inited_ = true;  // in order to do the actual release
831  Release();
832  int num_buffers_in_use = frame_buffer_pool_.GetNumBuffersInUse();
833  if (num_buffers_in_use > 0) {
834    // The frame buffers are reference counted and frames are exposed after
835    // decoding. There may be valid usage cases where previous frames are still
836    // referenced after ~VP9DecoderImpl that is not a leak.
837    LOG(LS_INFO) << num_buffers_in_use << " Vp9FrameBuffers are still "
838                 << "referenced during ~VP9DecoderImpl.";
839  }
840}
841
842int VP9DecoderImpl::Reset() {
843  if (!inited_) {
844    return WEBRTC_VIDEO_CODEC_UNINITIALIZED;
845  }
846  InitDecode(&codec_, 1);
847  return WEBRTC_VIDEO_CODEC_OK;
848}
849
850int VP9DecoderImpl::InitDecode(const VideoCodec* inst, int number_of_cores) {
851  if (inst == NULL) {
852    return WEBRTC_VIDEO_CODEC_ERR_PARAMETER;
853  }
854  int ret_val = Release();
855  if (ret_val < 0) {
856    return ret_val;
857  }
858  if (decoder_ == NULL) {
859    decoder_ = new vpx_codec_ctx_t;
860  }
861  vpx_codec_dec_cfg_t cfg;
862  // Setting number of threads to a constant value (1)
863  cfg.threads = 1;
864  cfg.h = cfg.w = 0;  // set after decode
865  vpx_codec_flags_t flags = 0;
866  if (vpx_codec_dec_init(decoder_, vpx_codec_vp9_dx(), &cfg, flags)) {
867    return WEBRTC_VIDEO_CODEC_MEMORY;
868  }
869  if (&codec_ != inst) {
870    // Save VideoCodec instance for later; mainly for duplicating the decoder.
871    codec_ = *inst;
872  }
873
874  if (!frame_buffer_pool_.InitializeVpxUsePool(decoder_)) {
875    return WEBRTC_VIDEO_CODEC_MEMORY;
876  }
877
878  inited_ = true;
879  // Always start with a complete key frame.
880  key_frame_required_ = true;
881  return WEBRTC_VIDEO_CODEC_OK;
882}
883
884int VP9DecoderImpl::Decode(const EncodedImage& input_image,
885                           bool missing_frames,
886                           const RTPFragmentationHeader* fragmentation,
887                           const CodecSpecificInfo* codec_specific_info,
888                           int64_t /*render_time_ms*/) {
889  if (!inited_) {
890    return WEBRTC_VIDEO_CODEC_UNINITIALIZED;
891  }
892  if (decode_complete_callback_ == NULL) {
893    return WEBRTC_VIDEO_CODEC_UNINITIALIZED;
894  }
895  // Always start with a complete key frame.
896  if (key_frame_required_) {
897    if (input_image._frameType != kVideoFrameKey)
898      return WEBRTC_VIDEO_CODEC_ERROR;
899    // We have a key frame - is it complete?
900    if (input_image._completeFrame) {
901      key_frame_required_ = false;
902    } else {
903      return WEBRTC_VIDEO_CODEC_ERROR;
904    }
905  }
906  vpx_codec_iter_t iter = NULL;
907  vpx_image_t* img;
908  uint8_t* buffer = input_image._buffer;
909  if (input_image._length == 0) {
910    buffer = NULL;  // Triggers full frame concealment.
911  }
912  // During decode libvpx may get and release buffers from |frame_buffer_pool_|.
913  // In practice libvpx keeps a few (~3-4) buffers alive at a time.
914  if (vpx_codec_decode(decoder_, buffer,
915                       static_cast<unsigned int>(input_image._length), 0,
916                       VPX_DL_REALTIME)) {
917    return WEBRTC_VIDEO_CODEC_ERROR;
918  }
919  // |img->fb_priv| contains the image data, a reference counted Vp9FrameBuffer.
920  // It may be released by libvpx during future vpx_codec_decode or
921  // vpx_codec_destroy calls.
922  img = vpx_codec_get_frame(decoder_, &iter);
923  int ret = ReturnFrame(img, input_image._timeStamp);
924  if (ret != 0) {
925    return ret;
926  }
927  return WEBRTC_VIDEO_CODEC_OK;
928}
929
930int VP9DecoderImpl::ReturnFrame(const vpx_image_t* img, uint32_t timestamp) {
931  if (img == NULL) {
932    // Decoder OK and NULL image => No show frame.
933    return WEBRTC_VIDEO_CODEC_NO_OUTPUT;
934  }
935
936  // This buffer contains all of |img|'s image data, a reference counted
937  // Vp9FrameBuffer. (libvpx is done with the buffers after a few
938  // vpx_codec_decode calls or vpx_codec_destroy).
939  Vp9FrameBufferPool::Vp9FrameBuffer* img_buffer =
940      static_cast<Vp9FrameBufferPool::Vp9FrameBuffer*>(img->fb_priv);
941  // The buffer can be used directly by the VideoFrame (without copy) by
942  // using a WrappedI420Buffer.
943  rtc::scoped_refptr<WrappedI420Buffer> img_wrapped_buffer(
944      new rtc::RefCountedObject<webrtc::WrappedI420Buffer>(
945          img->d_w, img->d_h, img->planes[VPX_PLANE_Y],
946          img->stride[VPX_PLANE_Y], img->planes[VPX_PLANE_U],
947          img->stride[VPX_PLANE_U], img->planes[VPX_PLANE_V],
948          img->stride[VPX_PLANE_V],
949          // WrappedI420Buffer's mechanism for allowing the release of its frame
950          // buffer is through a callback function. This is where we should
951          // release |img_buffer|.
952          rtc::KeepRefUntilDone(img_buffer)));
953
954  VideoFrame decoded_image;
955  decoded_image.set_video_frame_buffer(img_wrapped_buffer);
956  decoded_image.set_timestamp(timestamp);
957  int ret = decode_complete_callback_->Decoded(decoded_image);
958  if (ret != 0)
959    return ret;
960  return WEBRTC_VIDEO_CODEC_OK;
961}
962
963int VP9DecoderImpl::RegisterDecodeCompleteCallback(
964    DecodedImageCallback* callback) {
965  decode_complete_callback_ = callback;
966  return WEBRTC_VIDEO_CODEC_OK;
967}
968
969int VP9DecoderImpl::Release() {
970  if (decoder_ != NULL) {
971    // When a codec is destroyed libvpx will release any buffers of
972    // |frame_buffer_pool_| it is currently using.
973    if (vpx_codec_destroy(decoder_)) {
974      return WEBRTC_VIDEO_CODEC_MEMORY;
975    }
976    delete decoder_;
977    decoder_ = NULL;
978  }
979  // Releases buffers from the pool. Any buffers not in use are deleted. Buffers
980  // still referenced externally are deleted once fully released, not returning
981  // to the pool.
982  frame_buffer_pool_.ClearPool();
983  inited_ = false;
984  return WEBRTC_VIDEO_CODEC_OK;
985}
986
987const char* VP9DecoderImpl::ImplementationName() const {
988  return "libvpx";
989}
990
991}  // namespace webrtc
992