1/*
2 *  Copyright (c) 2015 The WebRTC project authors. All Rights Reserved.
3 *
4 *  Use of this source code is governed by a BSD-style license
5 *  that can be found in the LICENSE file in the root of the source
6 *  tree. An additional intellectual property rights grant can be found
7 *  in the file PATENTS.  All contributing project authors may
8 *  be found in the AUTHORS file in the root of the source tree.
9 */
10
11#include "webrtc/modules/rtp_rtcp/source/h264_sps_parser.h"
12
13#include "webrtc/base/bitbuffer.h"
14#include "webrtc/base/bytebuffer.h"
15#include "webrtc/base/logging.h"
16
17#define RETURN_FALSE_ON_FAIL(x) \
18  if (!(x)) {                   \
19    return false;               \
20  }
21
22namespace webrtc {
23
24H264SpsParser::H264SpsParser(const uint8_t* sps, size_t byte_length)
25    : sps_(sps), byte_length_(byte_length), width_(), height_() {
26}
27
28bool H264SpsParser::Parse() {
29  // General note: this is based off the 02/2014 version of the H.264 standard.
30  // You can find it on this page:
31  // http://www.itu.int/rec/T-REC-H.264
32
33  const char* sps_bytes = reinterpret_cast<const char*>(sps_);
34  // First, parse out rbsp, which is basically the source buffer minus emulation
35  // bytes (the last byte of a 0x00 0x00 0x03 sequence). RBSP is defined in
36  // section 7.3.1 of the H.264 standard.
37  rtc::ByteBuffer rbsp_buffer;
38  for (size_t i = 0; i < byte_length_;) {
39    // Be careful about over/underflow here. byte_length_ - 3 can underflow, and
40    // i + 3 can overflow, but byte_length_ - i can't, because i < byte_length_
41    // above, and that expression will produce the number of bytes left in
42    // the stream including the byte at i.
43    if (byte_length_ - i >= 3 && sps_[i] == 0 && sps_[i + 1] == 0 &&
44        sps_[i + 2] == 3) {
45      // Two rbsp bytes + the emulation byte.
46      rbsp_buffer.WriteBytes(sps_bytes + i, 2);
47      i += 3;
48    } else {
49      // Single rbsp byte.
50      rbsp_buffer.WriteBytes(sps_bytes + i, 1);
51      i++;
52    }
53  }
54
55  // Now, we need to use a bit buffer to parse through the actual AVC SPS
56  // format. See Section 7.3.2.1.1 ("Sequence parameter set data syntax") of the
57  // H.264 standard for a complete description.
58  // Since we only care about resolution, we ignore the majority of fields, but
59  // we still have to actively parse through a lot of the data, since many of
60  // the fields have variable size.
61  // We're particularly interested in:
62  // chroma_format_idc -> affects crop units
63  // pic_{width,height}_* -> resolution of the frame in macroblocks (16x16).
64  // frame_crop_*_offset -> crop information
65  rtc::BitBuffer parser(reinterpret_cast<const uint8_t*>(rbsp_buffer.Data()),
66                        rbsp_buffer.Length());
67
68  // The golomb values we have to read, not just consume.
69  uint32_t golomb_ignored;
70
71  // separate_colour_plane_flag is optional (assumed 0), but has implications
72  // about the ChromaArrayType, which modifies how we treat crop coordinates.
73  uint32_t separate_colour_plane_flag = 0;
74  // chroma_format_idc will be ChromaArrayType if separate_colour_plane_flag is
75  // 0. It defaults to 1, when not specified.
76  uint32_t chroma_format_idc = 1;
77
78  // profile_idc: u(8). We need it to determine if we need to read/skip chroma
79  // formats.
80  uint8_t profile_idc;
81  RETURN_FALSE_ON_FAIL(parser.ReadUInt8(&profile_idc));
82  // constraint_set0_flag through constraint_set5_flag + reserved_zero_2bits
83  // 1 bit each for the flags + 2 bits = 8 bits = 1 byte.
84  RETURN_FALSE_ON_FAIL(parser.ConsumeBytes(1));
85  // level_idc: u(8)
86  RETURN_FALSE_ON_FAIL(parser.ConsumeBytes(1));
87  // seq_parameter_set_id: ue(v)
88  RETURN_FALSE_ON_FAIL(parser.ReadExponentialGolomb(&golomb_ignored));
89  // See if profile_idc has chroma format information.
90  if (profile_idc == 100 || profile_idc == 110 || profile_idc == 122 ||
91      profile_idc == 244 || profile_idc == 44 || profile_idc == 83 ||
92      profile_idc == 86 || profile_idc == 118 || profile_idc == 128 ||
93      profile_idc == 138 || profile_idc == 139 || profile_idc == 134) {
94    // chroma_format_idc: ue(v)
95    RETURN_FALSE_ON_FAIL(parser.ReadExponentialGolomb(&chroma_format_idc));
96    if (chroma_format_idc == 3) {
97      // separate_colour_plane_flag: u(1)
98      RETURN_FALSE_ON_FAIL(parser.ReadBits(&separate_colour_plane_flag, 1));
99    }
100    // bit_depth_luma_minus8: ue(v)
101    RETURN_FALSE_ON_FAIL(parser.ReadExponentialGolomb(&golomb_ignored));
102    // bit_depth_chroma_minus8: ue(v)
103    RETURN_FALSE_ON_FAIL(parser.ReadExponentialGolomb(&golomb_ignored));
104    // qpprime_y_zero_transform_bypass_flag: u(1)
105    RETURN_FALSE_ON_FAIL(parser.ConsumeBits(1));
106    // seq_scaling_matrix_present_flag: u(1)
107    uint32_t seq_scaling_matrix_present_flag;
108    RETURN_FALSE_ON_FAIL(parser.ReadBits(&seq_scaling_matrix_present_flag, 1));
109    if (seq_scaling_matrix_present_flag) {
110      // seq_scaling_list_present_flags. Either 8 or 12, depending on
111      // chroma_format_idc.
112      uint32_t seq_scaling_list_present_flags;
113      if (chroma_format_idc != 3) {
114        RETURN_FALSE_ON_FAIL(
115            parser.ReadBits(&seq_scaling_list_present_flags, 8));
116      } else {
117        RETURN_FALSE_ON_FAIL(
118            parser.ReadBits(&seq_scaling_list_present_flags, 12));
119      }
120      // We don't support reading the sequence scaling list, and we don't really
121      // see/use them in practice, so we'll just reject the full sps if we see
122      // any provided.
123      if (seq_scaling_list_present_flags > 0) {
124        LOG(LS_WARNING) << "SPS contains scaling lists, which are unsupported.";
125        return false;
126      }
127    }
128  }
129  // log2_max_frame_num_minus4: ue(v)
130  RETURN_FALSE_ON_FAIL(parser.ReadExponentialGolomb(&golomb_ignored));
131  // pic_order_cnt_type: ue(v)
132  uint32_t pic_order_cnt_type;
133  RETURN_FALSE_ON_FAIL(parser.ReadExponentialGolomb(&pic_order_cnt_type));
134  if (pic_order_cnt_type == 0) {
135    // log2_max_pic_order_cnt_lsb_minus4: ue(v)
136    RETURN_FALSE_ON_FAIL(parser.ReadExponentialGolomb(&golomb_ignored));
137  } else if (pic_order_cnt_type == 1) {
138    // delta_pic_order_always_zero_flag: u(1)
139    RETURN_FALSE_ON_FAIL(parser.ConsumeBits(1));
140    // offset_for_non_ref_pic: se(v)
141    RETURN_FALSE_ON_FAIL(parser.ReadExponentialGolomb(&golomb_ignored));
142    // offset_for_top_to_bottom_field: se(v)
143    RETURN_FALSE_ON_FAIL(parser.ReadExponentialGolomb(&golomb_ignored));
144    // num_ref_frames_in_pic_order_cnt_cycle: ue(v)
145    uint32_t num_ref_frames_in_pic_order_cnt_cycle;
146    RETURN_FALSE_ON_FAIL(
147        parser.ReadExponentialGolomb(&num_ref_frames_in_pic_order_cnt_cycle));
148    for (size_t i = 0; i < num_ref_frames_in_pic_order_cnt_cycle; ++i) {
149      // offset_for_ref_frame[i]: se(v)
150      RETURN_FALSE_ON_FAIL(parser.ReadExponentialGolomb(&golomb_ignored));
151    }
152  }
153  // max_num_ref_frames: ue(v)
154  RETURN_FALSE_ON_FAIL(parser.ReadExponentialGolomb(&golomb_ignored));
155  // gaps_in_frame_num_value_allowed_flag: u(1)
156  RETURN_FALSE_ON_FAIL(parser.ConsumeBits(1));
157  //
158  // IMPORTANT ONES! Now we're getting to resolution. First we read the pic
159  // width/height in macroblocks (16x16), which gives us the base resolution,
160  // and then we continue on until we hit the frame crop offsets, which are used
161  // to signify resolutions that aren't multiples of 16.
162  //
163  // pic_width_in_mbs_minus1: ue(v)
164  uint32_t pic_width_in_mbs_minus1;
165  RETURN_FALSE_ON_FAIL(parser.ReadExponentialGolomb(&pic_width_in_mbs_minus1));
166  // pic_height_in_map_units_minus1: ue(v)
167  uint32_t pic_height_in_map_units_minus1;
168  RETURN_FALSE_ON_FAIL(
169      parser.ReadExponentialGolomb(&pic_height_in_map_units_minus1));
170  // frame_mbs_only_flag: u(1)
171  uint32_t frame_mbs_only_flag;
172  RETURN_FALSE_ON_FAIL(parser.ReadBits(&frame_mbs_only_flag, 1));
173  if (!frame_mbs_only_flag) {
174    // mb_adaptive_frame_field_flag: u(1)
175    RETURN_FALSE_ON_FAIL(parser.ConsumeBits(1));
176  }
177  // direct_8x8_inference_flag: u(1)
178  RETURN_FALSE_ON_FAIL(parser.ConsumeBits(1));
179  //
180  // MORE IMPORTANT ONES! Now we're at the frame crop information.
181  //
182  // frame_cropping_flag: u(1)
183  uint32_t frame_cropping_flag;
184  uint32_t frame_crop_left_offset = 0;
185  uint32_t frame_crop_right_offset = 0;
186  uint32_t frame_crop_top_offset = 0;
187  uint32_t frame_crop_bottom_offset = 0;
188  RETURN_FALSE_ON_FAIL(parser.ReadBits(&frame_cropping_flag, 1));
189  if (frame_cropping_flag) {
190    // frame_crop_{left, right, top, bottom}_offset: ue(v)
191    RETURN_FALSE_ON_FAIL(parser.ReadExponentialGolomb(&frame_crop_left_offset));
192    RETURN_FALSE_ON_FAIL(
193        parser.ReadExponentialGolomb(&frame_crop_right_offset));
194    RETURN_FALSE_ON_FAIL(parser.ReadExponentialGolomb(&frame_crop_top_offset));
195    RETURN_FALSE_ON_FAIL(
196        parser.ReadExponentialGolomb(&frame_crop_bottom_offset));
197  }
198
199  // Far enough! We don't use the rest of the SPS.
200
201  // Start with the resolution determined by the pic_width/pic_height fields.
202  int width = 16 * (pic_width_in_mbs_minus1 + 1);
203  int height =
204      16 * (2 - frame_mbs_only_flag) * (pic_height_in_map_units_minus1 + 1);
205
206  // Figure out the crop units in pixels. That's based on the chroma format's
207  // sampling, which is indicated by chroma_format_idc.
208  if (separate_colour_plane_flag || chroma_format_idc == 0) {
209    frame_crop_bottom_offset *= (2 - frame_mbs_only_flag);
210    frame_crop_top_offset *= (2 - frame_mbs_only_flag);
211  } else if (!separate_colour_plane_flag && chroma_format_idc > 0) {
212    // Width multipliers for formats 1 (4:2:0) and 2 (4:2:2).
213    if (chroma_format_idc == 1 || chroma_format_idc == 2) {
214      frame_crop_left_offset *= 2;
215      frame_crop_right_offset *= 2;
216    }
217    // Height multipliers for format 1 (4:2:0).
218    if (chroma_format_idc == 1) {
219      frame_crop_top_offset *= 2;
220      frame_crop_bottom_offset *= 2;
221    }
222  }
223  // Subtract the crop for each dimension.
224  width -= (frame_crop_left_offset + frame_crop_right_offset);
225  height -= (frame_crop_top_offset + frame_crop_bottom_offset);
226
227  width_ = width;
228  height_ = height;
229  return true;
230}
231
232}  // namespace webrtc
233