modules/video_coding/qm_select.h

/*
 *  Copyright (c) 2012 The WebRTC project authors. All Rights Reserved.
 *
 *  Use of this source code is governed by a BSD-style license
 *  that can be found in the LICENSE file in the root of the source
 *  tree. An additional intellectual property rights grant can be found
 *  in the file PATENTS.  All contributing project authors may
 *  be found in the AUTHORS file in the root of the source tree.
 */

#ifndef WEBRTC_MODULES_VIDEO_CODING_QM_SELECT_H_
#define WEBRTC_MODULES_VIDEO_CODING_QM_SELECT_H_

#include "webrtc/common_types.h"
#include "webrtc/typedefs.h"

/******************************************************/
/* Quality Modes: Resolution and Robustness settings  */
/******************************************************/

namespace webrtc {
struct VideoContentMetrics;

struct VCMResolutionScale {
  VCMResolutionScale()
      : codec_width(640),
        codec_height(480),
        frame_rate(30.0f),
        spatial_width_fact(1.0f),
        spatial_height_fact(1.0f),
        temporal_fact(1.0f),
        change_resolution_spatial(false),
        change_resolution_temporal(false) {}
  uint16_t codec_width;
  uint16_t codec_height;
  float frame_rate;
  float spatial_width_fact;
  float spatial_height_fact;
  float temporal_fact;
  bool change_resolution_spatial;
  bool change_resolution_temporal;
};

enum ImageType {
  kQCIF = 0,  // 176x144
  kHCIF,      // 264x216 = half(~3/4x3/4) CIF.
  kQVGA,      // 320x240 = quarter VGA.
  kCIF,       // 352x288
  kHVGA,      // 480x360 = half(~3/4x3/4) VGA.
  kVGA,       // 640x480
  kQFULLHD,   // 960x540 = quarter FULLHD, and half(~3/4x3/4) WHD.
  kWHD,       // 1280x720
  kFULLHD,    // 1920x1080
  kNumImageTypes
};

const uint32_t kSizeOfImageType[kNumImageTypes] = {
    25344, 57024, 76800, 101376, 172800, 307200, 518400, 921600, 2073600};

enum FrameRateLevelClass {
  kFrameRateLow,
  kFrameRateMiddle1,
  kFrameRateMiddle2,
  kFrameRateHigh
};

enum ContentLevelClass { kLow, kHigh, kDefault };

struct VCMContFeature {
  VCMContFeature() : value(0.0f), level(kDefault) {}
  void Reset() {
    value = 0.0f;
    level = kDefault;
  }
  float value;
  ContentLevelClass level;
};

enum UpDownAction { kUpResolution, kDownResolution };

enum SpatialAction {
  kNoChangeSpatial,
  kOneHalfSpatialUniform,     // 3/4 x 3/4: 9/6 ~1/2 pixel reduction.
  kOneQuarterSpatialUniform,  // 1/2 x 1/2: 1/4 pixel reduction.
  kNumModesSpatial
};

enum TemporalAction {
  kNoChangeTemporal,
  kTwoThirdsTemporal,  // 2/3 frame rate reduction
  kOneHalfTemporal,    // 1/2 frame rate reduction
  kNumModesTemporal
};

struct ResolutionAction {
  ResolutionAction() : spatial(kNoChangeSpatial), temporal(kNoChangeTemporal) {}
  SpatialAction spatial;
  TemporalAction temporal;
};

// Down-sampling factors for spatial (width and height), and temporal.
const float kFactorWidthSpatial[kNumModesSpatial] = {1.0f, 4.0f / 3.0f, 2.0f};

const float kFactorHeightSpatial[kNumModesSpatial] = {1.0f, 4.0f / 3.0f, 2.0f};

const float kFactorTemporal[kNumModesTemporal] = {1.0f, 1.5f, 2.0f};

enum EncoderState {
  kStableEncoding,    // Low rate mis-match, stable buffer levels.
  kStressedEncoding,  // Significant over-shooting of target rate,
                      // Buffer under-flow, etc.
  kEasyEncoding       // Significant under-shooting of target rate.
};

// QmMethod class: main class for resolution and robustness settings

class VCMQmMethod {
 public:
  VCMQmMethod();
  virtual ~VCMQmMethod();

  // Reset values
  void ResetQM();
  virtual void Reset() = 0;

  // Compute content class.
  uint8_t ComputeContentClass();

  // Update with the content metrics.
  void UpdateContent(const VideoContentMetrics* content_metrics);

  // Compute spatial texture magnitude and level.
  // Spatial texture is a spatial prediction error measure.
  void ComputeSpatial();

  // Compute motion magnitude and level for NFD metric.
  // NFD is normalized frame difference (normalized by spatial variance).
  void ComputeMotionNFD();

  // Get the imageType (CIF, VGA, HD, etc) for the system width/height.
  ImageType GetImageType(uint16_t width, uint16_t height);

  // Return the closest image type.
  ImageType FindClosestImageType(uint16_t width, uint16_t height);

  // Get the frame rate level.
  FrameRateLevelClass FrameRateLevel(float frame_rate);

 protected:
  // Content Data.
  const VideoContentMetrics* content_metrics_;

  // Encoder frame sizes and native frame sizes.
  uint16_t width_;
  uint16_t height_;
  float user_frame_rate_;
  uint16_t native_width_;
  uint16_t native_height_;
  float native_frame_rate_;
  float aspect_ratio_;
  // Image type and frame rate leve, for the current encoder resolution.
  ImageType image_type_;
  FrameRateLevelClass framerate_level_;
  // Content class data.
  VCMContFeature motion_;
  VCMContFeature spatial_;
  uint8_t content_class_;
  bool init_;
};

// Resolution settings class

class VCMQmResolution : public VCMQmMethod {
 public:
  VCMQmResolution();
  virtual ~VCMQmResolution();

  // Reset all quantities.
  virtual void Reset();

  // Reset rate quantities and counters after every SelectResolution() call.
  void ResetRates();

  // Reset down-sampling state.
  void ResetDownSamplingState();

  // Get the encoder state.
  EncoderState GetEncoderState();

  // Initialize after SetEncodingData in media_opt.
  int Initialize(float bitrate,
                 float user_framerate,
                 uint16_t width,
                 uint16_t height,
                 int num_layers);

  // Update the encoder frame size.
  void UpdateCodecParameters(float frame_rate, uint16_t width, uint16_t height);

  // Update with actual bit rate (size of the latest encoded frame)
  // and frame type, after every encoded frame.
  void UpdateEncodedSize(size_t encoded_size);

  // Update with new target bitrate, actual encoder sent rate, frame_rate,
  // loss rate: every ~1 sec from SetTargetRates in media_opt.
  void UpdateRates(float target_bitrate,
                   float encoder_sent_rate,
                   float incoming_framerate,
                   uint8_t packet_loss);

  // Extract ST (spatio-temporal) resolution action.
  // Inputs: qm: Reference to the quality modes pointer.
  // Output: the spatial and/or temporal scale change.
  int SelectResolution(VCMResolutionScale** qm);

 private:
  // Set the default resolution action.
  void SetDefaultAction();

  // Compute rates for the selection of down-sampling action.
  void ComputeRatesForSelection();

  // Compute the encoder state.
  void ComputeEncoderState();

  // Return true if the action is to go back up in resolution.
  bool GoingUpResolution();

  // Return true if the action is to go down in resolution.
  bool GoingDownResolution();

  // Check the condition for going up in resolution by the scale factors:
  // |facWidth|, |facHeight|, |facTemp|.
  // |scaleFac| is a scale factor for the transition rate.
  bool ConditionForGoingUp(float fac_width,
                           float fac_height,
                           float fac_temp,
                           float scale_fac);

  // Get the bitrate threshold for the resolution action.
  // The case |facWidth|=|facHeight|=|facTemp|==1 is for down-sampling action.
  // |scaleFac| is a scale factor for the transition rate.
  float GetTransitionRate(float fac_width,
                          float fac_height,
                          float fac_temp,
                          float scale_fac);

  // Update the down-sampling state.
  void UpdateDownsamplingState(UpDownAction up_down);

  // Update the codec frame size and frame rate.
  void UpdateCodecResolution();

  // Return a state based on average target rate relative transition rate.
  uint8_t RateClass(float transition_rate);

  // Adjust the action selected from the table.
  void AdjustAction();

  // Covert 2 stages of 3/4 (=9/16) spatial decimation to 1/2.
  void ConvertSpatialFractionalToWhole();

  // Returns true if the new frame sizes, under the selected spatial action,
  // are of even size.
  bool EvenFrameSize();

  // Insert latest down-sampling action into the history list.
  void InsertLatestDownAction();

  // Remove the last (first element) down-sampling action from the list.
  void RemoveLastDownAction();

  // Check constraints on the amount of down-sampling allowed.
  void ConstrainAmountOfDownSampling();

  // For going up in resolution: pick spatial or temporal action,
  // if both actions were separately selected.
  void PickSpatialOrTemporal();

  // Select the directional (1x2 or 2x1) spatial down-sampling action.
  void SelectSpatialDirectionMode(float transition_rate);

  enum { kDownActionHistorySize = 10 };

  VCMResolutionScale* qm_;
  // Encoder rate control parameters.
  float target_bitrate_;
  float incoming_framerate_;
  float per_frame_bandwidth_;
  float buffer_level_;

  // Data accumulated every ~1sec from MediaOpt.
  float sum_target_rate_;
  float sum_incoming_framerate_;
  float sum_rate_MM_;
  float sum_rate_MM_sgn_;
  float sum_packet_loss_;
  // Counters.
  uint32_t frame_cnt_;
  uint32_t frame_cnt_delta_;
  uint32_t update_rate_cnt_;
  uint32_t low_buffer_cnt_;

  // Resolution state parameters.
  float state_dec_factor_spatial_;
  float state_dec_factor_temporal_;

  // Quantities used for selection.
  float avg_target_rate_;
  float avg_incoming_framerate_;
  float avg_ratio_buffer_low_;
  float avg_rate_mismatch_;
  float avg_rate_mismatch_sgn_;
  float avg_packet_loss_;
  EncoderState encoder_state_;
  ResolutionAction action_;
  // Short history of the down-sampling actions from the Initialize() state.
  // This is needed for going up in resolution. Since the total amount of
  // down-sampling actions are constrained, the length of the list need not be
  // large: i.e., (4/3) ^{kDownActionHistorySize} <= kMaxDownSample.
  ResolutionAction down_action_history_[kDownActionHistorySize];
  int num_layers_;
};

// Robustness settings class.

class VCMQmRobustness : public VCMQmMethod {
 public:
  VCMQmRobustness();
  ~VCMQmRobustness();

  virtual void Reset();

  // Adjust FEC rate based on content: every ~1 sec from SetTargetRates.
  // Returns an adjustment factor.
  float AdjustFecFactor(uint8_t code_rate_delta,
                        float total_rate,
                        float framerate,
                        int64_t rtt_time,
                        uint8_t packet_loss);

  // Set the UEP protection on/off.
  bool SetUepProtection(uint8_t code_rate_delta,
                        float total_rate,
                        uint8_t packet_loss,
                        bool frame_type);

 private:
  // Previous state of network parameters.
  float prev_total_rate_;
  int64_t prev_rtt_time_;
  uint8_t prev_packet_loss_;
  uint8_t prev_code_rate_delta_;
};
}  // namespace webrtc
#endif  // WEBRTC_MODULES_VIDEO_CODING_QM_SELECT_H_