avc_utils.cpp revision d411b4ca2945cd8974a3a78199fce94646950128
1/*
2 * Copyright (C) 2010 The Android Open Source Project
3 *
4 * Licensed under the Apache License, Version 2.0 (the "License");
5 * you may not use this file except in compliance with the License.
6 * You may obtain a copy of the License at
7 *
8 *      http://www.apache.org/licenses/LICENSE-2.0
9 *
10 * Unless required by applicable law or agreed to in writing, software
11 * distributed under the License is distributed on an "AS IS" BASIS,
12 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13 * See the License for the specific language governing permissions and
14 * limitations under the License.
15 */
16
17//#define LOG_NDEBUG 0
18#define LOG_TAG "avc_utils"
19#include <utils/Log.h>
20
21#include "include/avc_utils.h"
22
23#include <media/stagefright/foundation/ABitReader.h>
24#include <media/stagefright/foundation/ADebug.h>
25#include <media/stagefright/foundation/hexdump.h>
26#include <media/stagefright/MediaDefs.h>
27#include <media/stagefright/MediaErrors.h>
28#include <media/stagefright/MetaData.h>
29
30namespace android {
31
32unsigned parseUE(ABitReader *br) {
33    unsigned numZeroes = 0;
34    while (br->getBits(1) == 0) {
35        ++numZeroes;
36    }
37
38    unsigned x = br->getBits(numZeroes);
39
40    return x + (1u << numZeroes) - 1;
41}
42
43// Determine video dimensions from the sequence parameterset.
44void FindAVCDimensions(
45        const sp<ABuffer> &seqParamSet,
46        int32_t *width, int32_t *height,
47        int32_t *sarWidth, int32_t *sarHeight) {
48    ABitReader br(seqParamSet->data() + 1, seqParamSet->size() - 1);
49
50    unsigned profile_idc = br.getBits(8);
51    br.skipBits(16);
52    parseUE(&br);  // seq_parameter_set_id
53
54    unsigned chroma_format_idc = 1;  // 4:2:0 chroma format
55
56    if (profile_idc == 100 || profile_idc == 110
57            || profile_idc == 122 || profile_idc == 244
58            || profile_idc == 44 || profile_idc == 83 || profile_idc == 86) {
59        chroma_format_idc = parseUE(&br);
60        if (chroma_format_idc == 3) {
61            br.skipBits(1);  // residual_colour_transform_flag
62        }
63        parseUE(&br);  // bit_depth_luma_minus8
64        parseUE(&br);  // bit_depth_chroma_minus8
65        br.skipBits(1);  // qpprime_y_zero_transform_bypass_flag
66        CHECK_EQ(br.getBits(1), 0u);  // seq_scaling_matrix_present_flag
67    }
68
69    parseUE(&br);  // log2_max_frame_num_minus4
70    unsigned pic_order_cnt_type = parseUE(&br);
71
72    if (pic_order_cnt_type == 0) {
73        parseUE(&br);  // log2_max_pic_order_cnt_lsb_minus4
74    } else if (pic_order_cnt_type == 1) {
75        // offset_for_non_ref_pic, offset_for_top_to_bottom_field and
76        // offset_for_ref_frame are technically se(v), but since we are
77        // just skipping over them the midpoint does not matter.
78
79        br.getBits(1);  // delta_pic_order_always_zero_flag
80        parseUE(&br);  // offset_for_non_ref_pic
81        parseUE(&br);  // offset_for_top_to_bottom_field
82
83        unsigned num_ref_frames_in_pic_order_cnt_cycle = parseUE(&br);
84        for (unsigned i = 0; i < num_ref_frames_in_pic_order_cnt_cycle; ++i) {
85            parseUE(&br);  // offset_for_ref_frame
86        }
87    }
88
89    parseUE(&br);  // num_ref_frames
90    br.getBits(1);  // gaps_in_frame_num_value_allowed_flag
91
92    unsigned pic_width_in_mbs_minus1 = parseUE(&br);
93    unsigned pic_height_in_map_units_minus1 = parseUE(&br);
94    unsigned frame_mbs_only_flag = br.getBits(1);
95
96    *width = pic_width_in_mbs_minus1 * 16 + 16;
97
98    *height = (2 - frame_mbs_only_flag)
99        * (pic_height_in_map_units_minus1 * 16 + 16);
100
101    if (!frame_mbs_only_flag) {
102        br.getBits(1);  // mb_adaptive_frame_field_flag
103    }
104
105    br.getBits(1);  // direct_8x8_inference_flag
106
107    if (br.getBits(1)) {  // frame_cropping_flag
108        unsigned frame_crop_left_offset = parseUE(&br);
109        unsigned frame_crop_right_offset = parseUE(&br);
110        unsigned frame_crop_top_offset = parseUE(&br);
111        unsigned frame_crop_bottom_offset = parseUE(&br);
112
113        unsigned cropUnitX, cropUnitY;
114        if (chroma_format_idc == 0  /* monochrome */) {
115            cropUnitX = 1;
116            cropUnitY = 2 - frame_mbs_only_flag;
117        } else {
118            unsigned subWidthC = (chroma_format_idc == 3) ? 1 : 2;
119            unsigned subHeightC = (chroma_format_idc == 1) ? 2 : 1;
120
121            cropUnitX = subWidthC;
122            cropUnitY = subHeightC * (2 - frame_mbs_only_flag);
123        }
124
125        ALOGV("frame_crop = (%u, %u, %u, %u), cropUnitX = %u, cropUnitY = %u",
126             frame_crop_left_offset, frame_crop_right_offset,
127             frame_crop_top_offset, frame_crop_bottom_offset,
128             cropUnitX, cropUnitY);
129
130        *width -=
131            (frame_crop_left_offset + frame_crop_right_offset) * cropUnitX;
132        *height -=
133            (frame_crop_top_offset + frame_crop_bottom_offset) * cropUnitY;
134    }
135
136    if (sarWidth != NULL) {
137        *sarWidth = 0;
138    }
139
140    if (sarHeight != NULL) {
141        *sarHeight = 0;
142    }
143
144    if (br.getBits(1)) {  // vui_parameters_present_flag
145        unsigned sar_width = 0, sar_height = 0;
146
147        if (br.getBits(1)) {  // aspect_ratio_info_present_flag
148            unsigned aspect_ratio_idc = br.getBits(8);
149
150            if (aspect_ratio_idc == 255 /* extendedSAR */) {
151                sar_width = br.getBits(16);
152                sar_height = br.getBits(16);
153            } else if (aspect_ratio_idc > 0 && aspect_ratio_idc < 14) {
154                static const int32_t kFixedSARWidth[] = {
155                    1, 12, 10, 16, 40, 24, 20, 32, 80, 18, 15, 64, 160
156                };
157
158                static const int32_t kFixedSARHeight[] = {
159                    1, 11, 11, 11, 33, 11, 11, 11, 33, 11, 11, 33, 99
160                };
161
162                sar_width = kFixedSARWidth[aspect_ratio_idc - 1];
163                sar_height = kFixedSARHeight[aspect_ratio_idc - 1];
164            }
165        }
166
167        ALOGV("sample aspect ratio = %u : %u", sar_width, sar_height);
168
169        if (sarWidth != NULL) {
170            *sarWidth = sar_width;
171        }
172
173        if (sarHeight != NULL) {
174            *sarHeight = sar_height;
175        }
176    }
177}
178
179status_t getNextNALUnit(
180        const uint8_t **_data, size_t *_size,
181        const uint8_t **nalStart, size_t *nalSize,
182        bool startCodeFollows) {
183    const uint8_t *data = *_data;
184    size_t size = *_size;
185
186    *nalStart = NULL;
187    *nalSize = 0;
188
189    if (size == 0) {
190        return -EAGAIN;
191    }
192
193    // Skip any number of leading 0x00.
194
195    size_t offset = 0;
196    while (offset < size && data[offset] == 0x00) {
197        ++offset;
198    }
199
200    if (offset == size) {
201        return -EAGAIN;
202    }
203
204    // A valid startcode consists of at least two 0x00 bytes followed by 0x01.
205
206    if (offset < 2 || data[offset] != 0x01) {
207        return ERROR_MALFORMED;
208    }
209
210    ++offset;
211
212    size_t startOffset = offset;
213
214    for (;;) {
215        while (offset < size && data[offset] != 0x01) {
216            ++offset;
217        }
218
219        if (offset == size) {
220            if (startCodeFollows) {
221                offset = size + 2;
222                break;
223            }
224
225            return -EAGAIN;
226        }
227
228        if (data[offset - 1] == 0x00 && data[offset - 2] == 0x00) {
229            break;
230        }
231
232        ++offset;
233    }
234
235    size_t endOffset = offset - 2;
236    while (endOffset > startOffset + 1 && data[endOffset - 1] == 0x00) {
237        --endOffset;
238    }
239
240    *nalStart = &data[startOffset];
241    *nalSize = endOffset - startOffset;
242
243    if (offset + 2 < size) {
244        *_data = &data[offset - 2];
245        *_size = size - offset + 2;
246    } else {
247        *_data = NULL;
248        *_size = 0;
249    }
250
251    return OK;
252}
253
254static sp<ABuffer> FindNAL(const uint8_t *data, size_t size, unsigned nalType) {
255    const uint8_t *nalStart;
256    size_t nalSize;
257    while (getNextNALUnit(&data, &size, &nalStart, &nalSize, true) == OK) {
258        if ((nalStart[0] & 0x1f) == nalType) {
259            sp<ABuffer> buffer = new ABuffer(nalSize);
260            memcpy(buffer->data(), nalStart, nalSize);
261            return buffer;
262        }
263    }
264
265    return NULL;
266}
267
268const char *AVCProfileToString(uint8_t profile) {
269    switch (profile) {
270        case kAVCProfileBaseline:
271            return "Baseline";
272        case kAVCProfileMain:
273            return "Main";
274        case kAVCProfileExtended:
275            return "Extended";
276        case kAVCProfileHigh:
277            return "High";
278        case kAVCProfileHigh10:
279            return "High 10";
280        case kAVCProfileHigh422:
281            return "High 422";
282        case kAVCProfileHigh444:
283            return "High 444";
284        case kAVCProfileCAVLC444Intra:
285            return "CAVLC 444 Intra";
286        default:   return "Unknown";
287    }
288}
289
290sp<MetaData> MakeAVCCodecSpecificData(const sp<ABuffer> &accessUnit) {
291    const uint8_t *data = accessUnit->data();
292    size_t size = accessUnit->size();
293
294    sp<ABuffer> seqParamSet = FindNAL(data, size, 7);
295    if (seqParamSet == NULL) {
296        return NULL;
297    }
298
299    int32_t width, height;
300    int32_t sarWidth, sarHeight;
301    FindAVCDimensions(
302            seqParamSet, &width, &height, &sarWidth, &sarHeight);
303
304    sp<ABuffer> picParamSet = FindNAL(data, size, 8);
305    CHECK(picParamSet != NULL);
306
307    size_t csdSize =
308        1 + 3 + 1 + 1
309        + 2 * 1 + seqParamSet->size()
310        + 1 + 2 * 1 + picParamSet->size();
311
312    sp<ABuffer> csd = new ABuffer(csdSize);
313    uint8_t *out = csd->data();
314
315    *out++ = 0x01;  // configurationVersion
316    memcpy(out, seqParamSet->data() + 1, 3);  // profile/level...
317
318    uint8_t profile = out[0];
319    uint8_t level = out[2];
320
321    out += 3;
322    *out++ = (0x3f << 2) | 1;  // lengthSize == 2 bytes
323    *out++ = 0xe0 | 1;
324
325    *out++ = seqParamSet->size() >> 8;
326    *out++ = seqParamSet->size() & 0xff;
327    memcpy(out, seqParamSet->data(), seqParamSet->size());
328    out += seqParamSet->size();
329
330    *out++ = 1;
331
332    *out++ = picParamSet->size() >> 8;
333    *out++ = picParamSet->size() & 0xff;
334    memcpy(out, picParamSet->data(), picParamSet->size());
335
336#if 0
337    ALOGI("AVC seq param set");
338    hexdump(seqParamSet->data(), seqParamSet->size());
339#endif
340
341    sp<MetaData> meta = new MetaData;
342    meta->setCString(kKeyMIMEType, MEDIA_MIMETYPE_VIDEO_AVC);
343
344    meta->setData(kKeyAVCC, kTypeAVCC, csd->data(), csd->size());
345    meta->setInt32(kKeyWidth, width);
346    meta->setInt32(kKeyHeight, height);
347
348    if (sarWidth > 1 || sarHeight > 1) {
349        // We treat 0:0 (unspecified) as 1:1.
350
351        meta->setInt32(kKeySARWidth, sarWidth);
352        meta->setInt32(kKeySARHeight, sarHeight);
353
354        ALOGI("found AVC codec config (%d x %d, %s-profile level %d.%d) "
355              "SAR %d : %d",
356             width,
357             height,
358             AVCProfileToString(profile),
359             level / 10,
360             level % 10,
361             sarWidth,
362             sarHeight);
363    } else {
364        ALOGI("found AVC codec config (%d x %d, %s-profile level %d.%d)",
365             width,
366             height,
367             AVCProfileToString(profile),
368             level / 10,
369             level % 10);
370    }
371
372    return meta;
373}
374
375bool IsIDR(const sp<ABuffer> &buffer) {
376    const uint8_t *data = buffer->data();
377    size_t size = buffer->size();
378
379    bool foundIDR = false;
380
381    const uint8_t *nalStart;
382    size_t nalSize;
383    while (getNextNALUnit(&data, &size, &nalStart, &nalSize, true) == OK) {
384        CHECK_GT(nalSize, 0u);
385
386        unsigned nalType = nalStart[0] & 0x1f;
387
388        if (nalType == 5) {
389            foundIDR = true;
390            break;
391        }
392    }
393
394    return foundIDR;
395}
396
397bool IsAVCReferenceFrame(const sp<ABuffer> &accessUnit) {
398    const uint8_t *data = accessUnit->data();
399    size_t size = accessUnit->size();
400
401    const uint8_t *nalStart;
402    size_t nalSize;
403    while (getNextNALUnit(&data, &size, &nalStart, &nalSize, true) == OK) {
404        CHECK_GT(nalSize, 0u);
405
406        unsigned nalType = nalStart[0] & 0x1f;
407
408        if (nalType == 5) {
409            return true;
410        } else if (nalType == 1) {
411            unsigned nal_ref_idc = (nalStart[0] >> 5) & 3;
412            return nal_ref_idc != 0;
413        }
414    }
415
416    return true;
417}
418
419sp<MetaData> MakeAACCodecSpecificData(
420        unsigned profile, unsigned sampling_freq_index,
421        unsigned channel_configuration) {
422    sp<MetaData> meta = new MetaData;
423    meta->setCString(kKeyMIMEType, MEDIA_MIMETYPE_AUDIO_AAC);
424
425    CHECK_LE(sampling_freq_index, 11u);
426    static const int32_t kSamplingFreq[] = {
427        96000, 88200, 64000, 48000, 44100, 32000, 24000, 22050,
428        16000, 12000, 11025, 8000
429    };
430    meta->setInt32(kKeySampleRate, kSamplingFreq[sampling_freq_index]);
431    meta->setInt32(kKeyChannelCount, channel_configuration);
432
433    static const uint8_t kStaticESDS[] = {
434        0x03, 22,
435        0x00, 0x00,     // ES_ID
436        0x00,           // streamDependenceFlag, URL_Flag, OCRstreamFlag
437
438        0x04, 17,
439        0x40,                       // Audio ISO/IEC 14496-3
440        0x00, 0x00, 0x00, 0x00,
441        0x00, 0x00, 0x00, 0x00,
442        0x00, 0x00, 0x00, 0x00,
443
444        0x05, 2,
445        // AudioSpecificInfo follows
446
447        // oooo offf fccc c000
448        // o - audioObjectType
449        // f - samplingFreqIndex
450        // c - channelConfig
451    };
452    sp<ABuffer> csd = new ABuffer(sizeof(kStaticESDS) + 2);
453    memcpy(csd->data(), kStaticESDS, sizeof(kStaticESDS));
454
455    csd->data()[sizeof(kStaticESDS)] =
456        ((profile + 1) << 3) | (sampling_freq_index >> 1);
457
458    csd->data()[sizeof(kStaticESDS) + 1] =
459        ((sampling_freq_index << 7) & 0x80) | (channel_configuration << 3);
460
461    meta->setData(kKeyESDS, 0, csd->data(), csd->size());
462
463    return meta;
464}
465
466bool ExtractDimensionsFromVOLHeader(
467        const uint8_t *data, size_t size, int32_t *width, int32_t *height) {
468    ABitReader br(&data[4], size - 4);
469    br.skipBits(1);  // random_accessible_vol
470    unsigned video_object_type_indication = br.getBits(8);
471
472    CHECK_NE(video_object_type_indication,
473             0x21u /* Fine Granularity Scalable */);
474
475    unsigned video_object_layer_verid;
476    unsigned video_object_layer_priority;
477    if (br.getBits(1)) {
478        video_object_layer_verid = br.getBits(4);
479        video_object_layer_priority = br.getBits(3);
480    }
481    unsigned aspect_ratio_info = br.getBits(4);
482    if (aspect_ratio_info == 0x0f /* extended PAR */) {
483        br.skipBits(8);  // par_width
484        br.skipBits(8);  // par_height
485    }
486    if (br.getBits(1)) {  // vol_control_parameters
487        br.skipBits(2);  // chroma_format
488        br.skipBits(1);  // low_delay
489        if (br.getBits(1)) {  // vbv_parameters
490            br.skipBits(15);  // first_half_bit_rate
491            CHECK(br.getBits(1));  // marker_bit
492            br.skipBits(15);  // latter_half_bit_rate
493            CHECK(br.getBits(1));  // marker_bit
494            br.skipBits(15);  // first_half_vbv_buffer_size
495            CHECK(br.getBits(1));  // marker_bit
496            br.skipBits(3);  // latter_half_vbv_buffer_size
497            br.skipBits(11);  // first_half_vbv_occupancy
498            CHECK(br.getBits(1));  // marker_bit
499            br.skipBits(15);  // latter_half_vbv_occupancy
500            CHECK(br.getBits(1));  // marker_bit
501        }
502    }
503    unsigned video_object_layer_shape = br.getBits(2);
504    CHECK_EQ(video_object_layer_shape, 0x00u /* rectangular */);
505
506    CHECK(br.getBits(1));  // marker_bit
507    unsigned vop_time_increment_resolution = br.getBits(16);
508    CHECK(br.getBits(1));  // marker_bit
509
510    if (br.getBits(1)) {  // fixed_vop_rate
511        // range [0..vop_time_increment_resolution)
512
513        // vop_time_increment_resolution
514        // 2 => 0..1, 1 bit
515        // 3 => 0..2, 2 bits
516        // 4 => 0..3, 2 bits
517        // 5 => 0..4, 3 bits
518        // ...
519
520        CHECK_GT(vop_time_increment_resolution, 0u);
521        --vop_time_increment_resolution;
522
523        unsigned numBits = 0;
524        while (vop_time_increment_resolution > 0) {
525            ++numBits;
526            vop_time_increment_resolution >>= 1;
527        }
528
529        br.skipBits(numBits);  // fixed_vop_time_increment
530    }
531
532    CHECK(br.getBits(1));  // marker_bit
533    unsigned video_object_layer_width = br.getBits(13);
534    CHECK(br.getBits(1));  // marker_bit
535    unsigned video_object_layer_height = br.getBits(13);
536    CHECK(br.getBits(1));  // marker_bit
537
538    unsigned interlaced = br.getBits(1);
539
540    *width = video_object_layer_width;
541    *height = video_object_layer_height;
542
543    return true;
544}
545
546bool GetMPEGAudioFrameSize(
547        uint32_t header, size_t *frame_size,
548        int *out_sampling_rate, int *out_channels,
549        int *out_bitrate, int *out_num_samples) {
550    *frame_size = 0;
551
552    if (out_sampling_rate) {
553        *out_sampling_rate = 0;
554    }
555
556    if (out_channels) {
557        *out_channels = 0;
558    }
559
560    if (out_bitrate) {
561        *out_bitrate = 0;
562    }
563
564    if (out_num_samples) {
565        *out_num_samples = 1152;
566    }
567
568    if ((header & 0xffe00000) != 0xffe00000) {
569        return false;
570    }
571
572    unsigned version = (header >> 19) & 3;
573
574    if (version == 0x01) {
575        return false;
576    }
577
578    unsigned layer = (header >> 17) & 3;
579
580    if (layer == 0x00) {
581        return false;
582    }
583
584    unsigned protection = (header >> 16) & 1;
585
586    unsigned bitrate_index = (header >> 12) & 0x0f;
587
588    if (bitrate_index == 0 || bitrate_index == 0x0f) {
589        // Disallow "free" bitrate.
590        return false;
591    }
592
593    unsigned sampling_rate_index = (header >> 10) & 3;
594
595    if (sampling_rate_index == 3) {
596        return false;
597    }
598
599    static const int kSamplingRateV1[] = { 44100, 48000, 32000 };
600    int sampling_rate = kSamplingRateV1[sampling_rate_index];
601    if (version == 2 /* V2 */) {
602        sampling_rate /= 2;
603    } else if (version == 0 /* V2.5 */) {
604        sampling_rate /= 4;
605    }
606
607    unsigned padding = (header >> 9) & 1;
608
609    if (layer == 3) {
610        // layer I
611
612        static const int kBitrateV1[] = {
613            32, 64, 96, 128, 160, 192, 224, 256,
614            288, 320, 352, 384, 416, 448
615        };
616
617        static const int kBitrateV2[] = {
618            32, 48, 56, 64, 80, 96, 112, 128,
619            144, 160, 176, 192, 224, 256
620        };
621
622        int bitrate =
623            (version == 3 /* V1 */)
624                ? kBitrateV1[bitrate_index - 1]
625                : kBitrateV2[bitrate_index - 1];
626
627        if (out_bitrate) {
628            *out_bitrate = bitrate;
629        }
630
631        *frame_size = (12000 * bitrate / sampling_rate + padding) * 4;
632
633        if (out_num_samples) {
634            *out_num_samples = 384;
635        }
636    } else {
637        // layer II or III
638
639        static const int kBitrateV1L2[] = {
640            32, 48, 56, 64, 80, 96, 112, 128,
641            160, 192, 224, 256, 320, 384
642        };
643
644        static const int kBitrateV1L3[] = {
645            32, 40, 48, 56, 64, 80, 96, 112,
646            128, 160, 192, 224, 256, 320
647        };
648
649        static const int kBitrateV2[] = {
650            8, 16, 24, 32, 40, 48, 56, 64,
651            80, 96, 112, 128, 144, 160
652        };
653
654        int bitrate;
655        if (version == 3 /* V1 */) {
656            bitrate = (layer == 2 /* L2 */)
657                ? kBitrateV1L2[bitrate_index - 1]
658                : kBitrateV1L3[bitrate_index - 1];
659
660            if (out_num_samples) {
661                *out_num_samples = 1152;
662            }
663        } else {
664            // V2 (or 2.5)
665
666            bitrate = kBitrateV2[bitrate_index - 1];
667            if (out_num_samples) {
668                *out_num_samples = (layer == 1 /* L3 */) ? 576 : 1152;
669            }
670        }
671
672        if (out_bitrate) {
673            *out_bitrate = bitrate;
674        }
675
676        if (version == 3 /* V1 */) {
677            *frame_size = 144000 * bitrate / sampling_rate + padding;
678        } else {
679            // V2 or V2.5
680            size_t tmp = (layer == 1 /* L3 */) ? 72000 : 144000;
681            *frame_size = tmp * bitrate / sampling_rate + padding;
682        }
683    }
684
685    if (out_sampling_rate) {
686        *out_sampling_rate = sampling_rate;
687    }
688
689    if (out_channels) {
690        int channel_mode = (header >> 6) & 3;
691
692        *out_channels = (channel_mode == 3) ? 1 : 2;
693    }
694
695    return true;
696}
697
698}  // namespace android
699
700