1/*
2 *  Copyright (c) 2012 The WebRTC project authors. All Rights Reserved.
3 *
4 *  Use of this source code is governed by a BSD-style license
5 *  that can be found in the LICENSE file in the root of the source
6 *  tree. An additional intellectual property rights grant can be found
7 *  in the file PATENTS.  All contributing project authors may
8 *  be found in the AUTHORS file in the root of the source tree.
9 */
10
11#ifndef WEBRTC_MODULES_VIDEO_CODING_QM_SELECT_H_
12#define WEBRTC_MODULES_VIDEO_CODING_QM_SELECT_H_
13
14#include "webrtc/common_types.h"
15#include "webrtc/typedefs.h"
16
17/******************************************************/
18/* Quality Modes: Resolution and Robustness settings  */
19/******************************************************/
20
21namespace webrtc {
22struct VideoContentMetrics;
23
24struct VCMResolutionScale {
25  VCMResolutionScale()
26      : codec_width(640),
27        codec_height(480),
28        frame_rate(30.0f),
29        spatial_width_fact(1.0f),
30        spatial_height_fact(1.0f),
31        temporal_fact(1.0f),
32        change_resolution_spatial(false),
33        change_resolution_temporal(false) {
34  }
35  uint16_t codec_width;
36  uint16_t codec_height;
37  float frame_rate;
38  float spatial_width_fact;
39  float spatial_height_fact;
40  float temporal_fact;
41  bool change_resolution_spatial;
42  bool change_resolution_temporal;
43};
44
45enum ImageType {
46  kQCIF = 0,            // 176x144
47  kHCIF,                // 264x216 = half(~3/4x3/4) CIF.
48  kQVGA,                // 320x240 = quarter VGA.
49  kCIF,                 // 352x288
50  kHVGA,                // 480x360 = half(~3/4x3/4) VGA.
51  kVGA,                 // 640x480
52  kQFULLHD,             // 960x540 = quarter FULLHD, and half(~3/4x3/4) WHD.
53  kWHD,                 // 1280x720
54  kFULLHD,              // 1920x1080
55  kNumImageTypes
56};
57
58const uint32_t kSizeOfImageType[kNumImageTypes] =
59{ 25344, 57024, 76800, 101376, 172800, 307200, 518400, 921600, 2073600 };
60
61enum FrameRateLevelClass {
62  kFrameRateLow,
63  kFrameRateMiddle1,
64  kFrameRateMiddle2,
65  kFrameRateHigh
66};
67
68enum ContentLevelClass {
69  kLow,
70  kHigh,
71  kDefault
72};
73
74struct VCMContFeature {
75  VCMContFeature()
76      : value(0.0f),
77        level(kDefault) {
78  }
79  void Reset() {
80    value = 0.0f;
81    level = kDefault;
82  }
83  float value;
84  ContentLevelClass level;
85};
86
87enum UpDownAction {
88  kUpResolution,
89  kDownResolution
90};
91
92enum SpatialAction {
93  kNoChangeSpatial,
94  kOneHalfSpatialUniform,        // 3/4 x 3/4: 9/6 ~1/2 pixel reduction.
95  kOneQuarterSpatialUniform,     // 1/2 x 1/2: 1/4 pixel reduction.
96  kNumModesSpatial
97};
98
99enum TemporalAction {
100  kNoChangeTemporal,
101  kTwoThirdsTemporal,     // 2/3 frame rate reduction
102  kOneHalfTemporal,       // 1/2 frame rate reduction
103  kNumModesTemporal
104};
105
106struct ResolutionAction {
107  ResolutionAction()
108      : spatial(kNoChangeSpatial),
109        temporal(kNoChangeTemporal) {
110  }
111  SpatialAction spatial;
112  TemporalAction temporal;
113};
114
115// Down-sampling factors for spatial (width and height), and temporal.
116const float kFactorWidthSpatial[kNumModesSpatial] =
117    { 1.0f, 4.0f / 3.0f, 2.0f };
118
119const float kFactorHeightSpatial[kNumModesSpatial] =
120    { 1.0f, 4.0f / 3.0f, 2.0f };
121
122const float kFactorTemporal[kNumModesTemporal] =
123    { 1.0f, 1.5f, 2.0f };
124
125enum EncoderState {
126  kStableEncoding,    // Low rate mis-match, stable buffer levels.
127  kStressedEncoding,  // Significant over-shooting of target rate,
128                      // Buffer under-flow, etc.
129  kEasyEncoding       // Significant under-shooting of target rate.
130};
131
132// QmMethod class: main class for resolution and robustness settings
133
134class VCMQmMethod {
135 public:
136  VCMQmMethod();
137  virtual ~VCMQmMethod();
138
139  // Reset values
140  void ResetQM();
141  virtual void Reset() = 0;
142
143  // Compute content class.
144  uint8_t ComputeContentClass();
145
146  // Update with the content metrics.
147  void UpdateContent(const VideoContentMetrics* content_metrics);
148
149  // Compute spatial texture magnitude and level.
150  // Spatial texture is a spatial prediction error measure.
151  void ComputeSpatial();
152
153  // Compute motion magnitude and level for NFD metric.
154  // NFD is normalized frame difference (normalized by spatial variance).
155  void ComputeMotionNFD();
156
157  // Get the imageType (CIF, VGA, HD, etc) for the system width/height.
158  ImageType GetImageType(uint16_t width, uint16_t height);
159
160  // Return the closest image type.
161  ImageType FindClosestImageType(uint16_t width, uint16_t height);
162
163  // Get the frame rate level.
164  FrameRateLevelClass FrameRateLevel(float frame_rate);
165
166 protected:
167  // Content Data.
168  const VideoContentMetrics* content_metrics_;
169
170  // Encoder frame sizes and native frame sizes.
171  uint16_t width_;
172  uint16_t height_;
173  float user_frame_rate_;
174  uint16_t native_width_;
175  uint16_t native_height_;
176  float native_frame_rate_;
177  float aspect_ratio_;
178  // Image type and frame rate leve, for the current encoder resolution.
179  ImageType image_type_;
180  FrameRateLevelClass framerate_level_;
181  // Content class data.
182  VCMContFeature motion_;
183  VCMContFeature spatial_;
184  uint8_t content_class_;
185  bool init_;
186};
187
188// Resolution settings class
189
190class VCMQmResolution : public VCMQmMethod {
191 public:
192  VCMQmResolution();
193  virtual ~VCMQmResolution();
194
195  // Reset all quantities.
196  virtual void Reset();
197
198  // Reset rate quantities and counters after every SelectResolution() call.
199  void ResetRates();
200
201  // Reset down-sampling state.
202  void ResetDownSamplingState();
203
204  // Get the encoder state.
205  EncoderState GetEncoderState();
206
207  // Initialize after SetEncodingData in media_opt.
208  int Initialize(float bitrate,
209                 float user_framerate,
210                 uint16_t width,
211                 uint16_t height,
212                 int num_layers);
213
214  // Update the encoder frame size.
215  void UpdateCodecParameters(float frame_rate, uint16_t width, uint16_t height);
216
217  // Update with actual bit rate (size of the latest encoded frame)
218  // and frame type, after every encoded frame.
219  void UpdateEncodedSize(int encoded_size,
220                         FrameType encoded_frame_type);
221
222  // Update with new target bitrate, actual encoder sent rate, frame_rate,
223  // loss rate: every ~1 sec from SetTargetRates in media_opt.
224  void UpdateRates(float target_bitrate,
225                   float encoder_sent_rate,
226                   float incoming_framerate,
227                   uint8_t packet_loss);
228
229  // Extract ST (spatio-temporal) resolution action.
230  // Inputs: qm: Reference to the quality modes pointer.
231  // Output: the spatial and/or temporal scale change.
232  int SelectResolution(VCMResolutionScale** qm);
233
234 private:
235  // Set the default resolution action.
236  void SetDefaultAction();
237
238  // Compute rates for the selection of down-sampling action.
239  void ComputeRatesForSelection();
240
241  // Compute the encoder state.
242  void ComputeEncoderState();
243
244  // Return true if the action is to go back up in resolution.
245  bool GoingUpResolution();
246
247  // Return true if the action is to go down in resolution.
248  bool GoingDownResolution();
249
250  // Check the condition for going up in resolution by the scale factors:
251  // |facWidth|, |facHeight|, |facTemp|.
252  // |scaleFac| is a scale factor for the transition rate.
253  bool ConditionForGoingUp(float fac_width,
254                           float fac_height,
255                           float fac_temp,
256                           float scale_fac);
257
258  // Get the bitrate threshold for the resolution action.
259  // The case |facWidth|=|facHeight|=|facTemp|==1 is for down-sampling action.
260  // |scaleFac| is a scale factor for the transition rate.
261  float GetTransitionRate(float fac_width,
262                          float fac_height,
263                          float fac_temp,
264                          float scale_fac);
265
266  // Update the down-sampling state.
267  void UpdateDownsamplingState(UpDownAction up_down);
268
269  // Update the codec frame size and frame rate.
270  void UpdateCodecResolution();
271
272  // Return a state based on average target rate relative transition rate.
273  uint8_t RateClass(float transition_rate);
274
275  // Adjust the action selected from the table.
276  void AdjustAction();
277
278  // Covert 2 stages of 3/4 (=9/16) spatial decimation to 1/2.
279  void ConvertSpatialFractionalToWhole();
280
281  // Returns true if the new frame sizes, under the selected spatial action,
282  // are of even size.
283  bool EvenFrameSize();
284
285  // Insert latest down-sampling action into the history list.
286  void InsertLatestDownAction();
287
288  // Remove the last (first element) down-sampling action from the list.
289  void RemoveLastDownAction();
290
291  // Check constraints on the amount of down-sampling allowed.
292  void ConstrainAmountOfDownSampling();
293
294  // For going up in resolution: pick spatial or temporal action,
295  // if both actions were separately selected.
296  void PickSpatialOrTemporal();
297
298  // Select the directional (1x2 or 2x1) spatial down-sampling action.
299  void SelectSpatialDirectionMode(float transition_rate);
300
301  enum { kDownActionHistorySize = 10};
302
303  VCMResolutionScale* qm_;
304  // Encoder rate control parameters.
305  float target_bitrate_;
306  float incoming_framerate_;
307  float per_frame_bandwidth_;
308  float buffer_level_;
309
310  // Data accumulated every ~1sec from MediaOpt.
311  float sum_target_rate_;
312  float sum_incoming_framerate_;
313  float sum_rate_MM_;
314  float sum_rate_MM_sgn_;
315  float sum_packet_loss_;
316  // Counters.
317  uint32_t frame_cnt_;
318  uint32_t frame_cnt_delta_;
319  uint32_t update_rate_cnt_;
320  uint32_t low_buffer_cnt_;
321
322  // Resolution state parameters.
323  float state_dec_factor_spatial_;
324  float state_dec_factor_temporal_;
325
326  // Quantities used for selection.
327  float avg_target_rate_;
328  float avg_incoming_framerate_;
329  float avg_ratio_buffer_low_;
330  float avg_rate_mismatch_;
331  float avg_rate_mismatch_sgn_;
332  float avg_packet_loss_;
333  EncoderState encoder_state_;
334  ResolutionAction action_;
335  // Short history of the down-sampling actions from the Initialize() state.
336  // This is needed for going up in resolution. Since the total amount of
337  // down-sampling actions are constrained, the length of the list need not be
338  // large: i.e., (4/3) ^{kDownActionHistorySize} <= kMaxDownSample.
339  ResolutionAction down_action_history_[kDownActionHistorySize];
340  int num_layers_;
341};
342
343// Robustness settings class.
344
345class VCMQmRobustness : public VCMQmMethod {
346 public:
347  VCMQmRobustness();
348  ~VCMQmRobustness();
349
350  virtual void Reset();
351
352  // Adjust FEC rate based on content: every ~1 sec from SetTargetRates.
353  // Returns an adjustment factor.
354  float AdjustFecFactor(uint8_t code_rate_delta,
355                        float total_rate,
356                        float framerate,
357                        uint32_t rtt_time,
358                        uint8_t packet_loss);
359
360  // Set the UEP protection on/off.
361  bool SetUepProtection(uint8_t code_rate_delta,
362                        float total_rate,
363                        uint8_t packet_loss,
364                        bool frame_type);
365
366 private:
367  // Previous state of network parameters.
368  float prev_total_rate_;
369  uint32_t prev_rtt_time_;
370  uint8_t prev_packet_loss_;
371  uint8_t prev_code_rate_delta_;
372};
373}  // namespace webrtc
374#endif  // WEBRTC_MODULES_VIDEO_CODING_QM_SELECT_H_
375