avc_utils.cpp revision df64d15042bbd5e0e4933ac49bf3c177dd94752c
1/*
2 * Copyright (C) 2010 The Android Open Source Project
3 *
4 * Licensed under the Apache License, Version 2.0 (the "License");
5 * you may not use this file except in compliance with the License.
6 * You may obtain a copy of the License at
7 *
8 *      http://www.apache.org/licenses/LICENSE-2.0
9 *
10 * Unless required by applicable law or agreed to in writing, software
11 * distributed under the License is distributed on an "AS IS" BASIS,
12 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13 * See the License for the specific language governing permissions and
14 * limitations under the License.
15 */
16
17//#define LOG_NDEBUG 0
18#define LOG_TAG "avc_utils"
19#include <utils/Log.h>
20
21#include "include/avc_utils.h"
22
23#include <media/stagefright/foundation/ABitReader.h>
24#include <media/stagefright/foundation/ADebug.h>
25#include <media/stagefright/MediaDefs.h>
26#include <media/stagefright/MediaErrors.h>
27#include <media/stagefright/MetaData.h>
28
29namespace android {
30
31unsigned parseUE(ABitReader *br) {
32    unsigned numZeroes = 0;
33    while (br->getBits(1) == 0) {
34        ++numZeroes;
35    }
36
37    unsigned x = br->getBits(numZeroes);
38
39    return x + (1u << numZeroes) - 1;
40}
41
42// Determine video dimensions from the sequence parameterset.
43void FindAVCDimensions(
44        const sp<ABuffer> &seqParamSet, int32_t *width, int32_t *height) {
45    ABitReader br(seqParamSet->data() + 1, seqParamSet->size() - 1);
46
47    unsigned profile_idc = br.getBits(8);
48    br.skipBits(16);
49    parseUE(&br);  // seq_parameter_set_id
50
51    unsigned chroma_format_idc = 1;  // 4:2:0 chroma format
52
53    if (profile_idc == 100 || profile_idc == 110
54            || profile_idc == 122 || profile_idc == 244
55            || profile_idc == 44 || profile_idc == 83 || profile_idc == 86) {
56        chroma_format_idc = parseUE(&br);
57        if (chroma_format_idc == 3) {
58            br.skipBits(1);  // residual_colour_transform_flag
59        }
60        parseUE(&br);  // bit_depth_luma_minus8
61        parseUE(&br);  // bit_depth_chroma_minus8
62        br.skipBits(1);  // qpprime_y_zero_transform_bypass_flag
63        CHECK_EQ(br.getBits(1), 0u);  // seq_scaling_matrix_present_flag
64    }
65
66    parseUE(&br);  // log2_max_frame_num_minus4
67    unsigned pic_order_cnt_type = parseUE(&br);
68
69    if (pic_order_cnt_type == 0) {
70        parseUE(&br);  // log2_max_pic_order_cnt_lsb_minus4
71    } else if (pic_order_cnt_type == 1) {
72        // offset_for_non_ref_pic, offset_for_top_to_bottom_field and
73        // offset_for_ref_frame are technically se(v), but since we are
74        // just skipping over them the midpoint does not matter.
75
76        br.getBits(1);  // delta_pic_order_always_zero_flag
77        parseUE(&br);  // offset_for_non_ref_pic
78        parseUE(&br);  // offset_for_top_to_bottom_field
79
80        unsigned num_ref_frames_in_pic_order_cnt_cycle = parseUE(&br);
81        for (unsigned i = 0; i < num_ref_frames_in_pic_order_cnt_cycle; ++i) {
82            parseUE(&br);  // offset_for_ref_frame
83        }
84    }
85
86    parseUE(&br);  // num_ref_frames
87    br.getBits(1);  // gaps_in_frame_num_value_allowed_flag
88
89    unsigned pic_width_in_mbs_minus1 = parseUE(&br);
90    unsigned pic_height_in_map_units_minus1 = parseUE(&br);
91    unsigned frame_mbs_only_flag = br.getBits(1);
92
93    *width = pic_width_in_mbs_minus1 * 16 + 16;
94
95    *height = (2 - frame_mbs_only_flag)
96        * (pic_height_in_map_units_minus1 * 16 + 16);
97
98    if (!frame_mbs_only_flag) {
99        br.getBits(1);  // mb_adaptive_frame_field_flag
100    }
101
102    br.getBits(1);  // direct_8x8_inference_flag
103
104    if (br.getBits(1)) {  // frame_cropping_flag
105        unsigned frame_crop_left_offset = parseUE(&br);
106        unsigned frame_crop_right_offset = parseUE(&br);
107        unsigned frame_crop_top_offset = parseUE(&br);
108        unsigned frame_crop_bottom_offset = parseUE(&br);
109
110        unsigned cropUnitX, cropUnitY;
111        if (chroma_format_idc == 0  /* monochrome */) {
112            cropUnitX = 1;
113            cropUnitY = 2 - frame_mbs_only_flag;
114        } else {
115            unsigned subWidthC = (chroma_format_idc == 3) ? 1 : 2;
116            unsigned subHeightC = (chroma_format_idc == 1) ? 2 : 1;
117
118            cropUnitX = subWidthC;
119            cropUnitY = subHeightC * (2 - frame_mbs_only_flag);
120        }
121
122        ALOGV("frame_crop = (%u, %u, %u, %u), cropUnitX = %u, cropUnitY = %u",
123             frame_crop_left_offset, frame_crop_right_offset,
124             frame_crop_top_offset, frame_crop_bottom_offset,
125             cropUnitX, cropUnitY);
126
127        *width -=
128            (frame_crop_left_offset + frame_crop_right_offset) * cropUnitX;
129        *height -=
130            (frame_crop_top_offset + frame_crop_bottom_offset) * cropUnitY;
131    }
132}
133
134status_t getNextNALUnit(
135        const uint8_t **_data, size_t *_size,
136        const uint8_t **nalStart, size_t *nalSize,
137        bool startCodeFollows) {
138    const uint8_t *data = *_data;
139    size_t size = *_size;
140
141    *nalStart = NULL;
142    *nalSize = 0;
143
144    if (size == 0) {
145        return -EAGAIN;
146    }
147
148    // Skip any number of leading 0x00.
149
150    size_t offset = 0;
151    while (offset < size && data[offset] == 0x00) {
152        ++offset;
153    }
154
155    if (offset == size) {
156        return -EAGAIN;
157    }
158
159    // A valid startcode consists of at least two 0x00 bytes followed by 0x01.
160
161    if (offset < 2 || data[offset] != 0x01) {
162        return ERROR_MALFORMED;
163    }
164
165    ++offset;
166
167    size_t startOffset = offset;
168
169    for (;;) {
170        while (offset < size && data[offset] != 0x01) {
171            ++offset;
172        }
173
174        if (offset == size) {
175            if (startCodeFollows) {
176                offset = size + 2;
177                break;
178            }
179
180            return -EAGAIN;
181        }
182
183        if (data[offset - 1] == 0x00 && data[offset - 2] == 0x00) {
184            break;
185        }
186
187        ++offset;
188    }
189
190    size_t endOffset = offset - 2;
191    while (endOffset > startOffset + 1 && data[endOffset - 1] == 0x00) {
192        --endOffset;
193    }
194
195    *nalStart = &data[startOffset];
196    *nalSize = endOffset - startOffset;
197
198    if (offset + 2 < size) {
199        *_data = &data[offset - 2];
200        *_size = size - offset + 2;
201    } else {
202        *_data = NULL;
203        *_size = 0;
204    }
205
206    return OK;
207}
208
209static sp<ABuffer> FindNAL(
210        const uint8_t *data, size_t size, unsigned nalType,
211        size_t *stopOffset) {
212    const uint8_t *nalStart;
213    size_t nalSize;
214    while (getNextNALUnit(&data, &size, &nalStart, &nalSize, true) == OK) {
215        if ((nalStart[0] & 0x1f) == nalType) {
216            sp<ABuffer> buffer = new ABuffer(nalSize);
217            memcpy(buffer->data(), nalStart, nalSize);
218            return buffer;
219        }
220    }
221
222    return NULL;
223}
224
225const char *AVCProfileToString(uint8_t profile) {
226    switch (profile) {
227        case kAVCProfileBaseline:
228            return "Baseline";
229        case kAVCProfileMain:
230            return "Main";
231        case kAVCProfileExtended:
232            return "Extended";
233        case kAVCProfileHigh:
234            return "High";
235        case kAVCProfileHigh10:
236            return "High 10";
237        case kAVCProfileHigh422:
238            return "High 422";
239        case kAVCProfileHigh444:
240            return "High 444";
241        case kAVCProfileCAVLC444Intra:
242            return "CAVLC 444 Intra";
243        default:   return "Unknown";
244    }
245}
246
247sp<MetaData> MakeAVCCodecSpecificData(const sp<ABuffer> &accessUnit) {
248    const uint8_t *data = accessUnit->data();
249    size_t size = accessUnit->size();
250
251    sp<ABuffer> seqParamSet = FindNAL(data, size, 7, NULL);
252    if (seqParamSet == NULL) {
253        return NULL;
254    }
255
256    int32_t width, height;
257    FindAVCDimensions(seqParamSet, &width, &height);
258
259    size_t stopOffset;
260    sp<ABuffer> picParamSet = FindNAL(data, size, 8, &stopOffset);
261    CHECK(picParamSet != NULL);
262
263    size_t csdSize =
264        1 + 3 + 1 + 1
265        + 2 * 1 + seqParamSet->size()
266        + 1 + 2 * 1 + picParamSet->size();
267
268    sp<ABuffer> csd = new ABuffer(csdSize);
269    uint8_t *out = csd->data();
270
271    *out++ = 0x01;  // configurationVersion
272    memcpy(out, seqParamSet->data() + 1, 3);  // profile/level...
273
274    uint8_t profile = out[0];
275    uint8_t level = out[2];
276
277    out += 3;
278    *out++ = (0x3f << 2) | 1;  // lengthSize == 2 bytes
279    *out++ = 0xe0 | 1;
280
281    *out++ = seqParamSet->size() >> 8;
282    *out++ = seqParamSet->size() & 0xff;
283    memcpy(out, seqParamSet->data(), seqParamSet->size());
284    out += seqParamSet->size();
285
286    *out++ = 1;
287
288    *out++ = picParamSet->size() >> 8;
289    *out++ = picParamSet->size() & 0xff;
290    memcpy(out, picParamSet->data(), picParamSet->size());
291
292#if 0
293    ALOGI("AVC seq param set");
294    hexdump(seqParamSet->data(), seqParamSet->size());
295#endif
296
297    sp<MetaData> meta = new MetaData;
298    meta->setCString(kKeyMIMEType, MEDIA_MIMETYPE_VIDEO_AVC);
299
300    meta->setData(kKeyAVCC, kTypeAVCC, csd->data(), csd->size());
301    meta->setInt32(kKeyWidth, width);
302    meta->setInt32(kKeyHeight, height);
303
304    ALOGI("found AVC codec config (%d x %d, %s-profile level %d.%d)",
305         width, height, AVCProfileToString(profile), level / 10, level % 10);
306
307    return meta;
308}
309
310bool IsIDR(const sp<ABuffer> &buffer) {
311    const uint8_t *data = buffer->data();
312    size_t size = buffer->size();
313
314    bool foundIDR = false;
315
316    const uint8_t *nalStart;
317    size_t nalSize;
318    while (getNextNALUnit(&data, &size, &nalStart, &nalSize, true) == OK) {
319        CHECK_GT(nalSize, 0u);
320
321        unsigned nalType = nalStart[0] & 0x1f;
322
323        if (nalType == 5) {
324            foundIDR = true;
325            break;
326        }
327    }
328
329    return foundIDR;
330}
331
332bool IsAVCReferenceFrame(const sp<ABuffer> &accessUnit) {
333    const uint8_t *data = accessUnit->data();
334    size_t size = accessUnit->size();
335
336    const uint8_t *nalStart;
337    size_t nalSize;
338    while (getNextNALUnit(&data, &size, &nalStart, &nalSize, true) == OK) {
339        CHECK_GT(nalSize, 0u);
340
341        unsigned nalType = nalStart[0] & 0x1f;
342
343        if (nalType == 5) {
344            return true;
345        } else if (nalType == 1) {
346            unsigned nal_ref_idc = (nalStart[0] >> 5) & 3;
347            return nal_ref_idc != 0;
348        }
349    }
350
351    return true;
352}
353
354sp<MetaData> MakeAACCodecSpecificData(
355        unsigned profile, unsigned sampling_freq_index,
356        unsigned channel_configuration) {
357    sp<MetaData> meta = new MetaData;
358    meta->setCString(kKeyMIMEType, MEDIA_MIMETYPE_AUDIO_AAC);
359
360    CHECK_LE(sampling_freq_index, 11u);
361    static const int32_t kSamplingFreq[] = {
362        96000, 88200, 64000, 48000, 44100, 32000, 24000, 22050,
363        16000, 12000, 11025, 8000
364    };
365    meta->setInt32(kKeySampleRate, kSamplingFreq[sampling_freq_index]);
366    meta->setInt32(kKeyChannelCount, channel_configuration);
367
368    static const uint8_t kStaticESDS[] = {
369        0x03, 22,
370        0x00, 0x00,     // ES_ID
371        0x00,           // streamDependenceFlag, URL_Flag, OCRstreamFlag
372
373        0x04, 17,
374        0x40,                       // Audio ISO/IEC 14496-3
375        0x00, 0x00, 0x00, 0x00,
376        0x00, 0x00, 0x00, 0x00,
377        0x00, 0x00, 0x00, 0x00,
378
379        0x05, 2,
380        // AudioSpecificInfo follows
381
382        // oooo offf fccc c000
383        // o - audioObjectType
384        // f - samplingFreqIndex
385        // c - channelConfig
386    };
387    sp<ABuffer> csd = new ABuffer(sizeof(kStaticESDS) + 2);
388    memcpy(csd->data(), kStaticESDS, sizeof(kStaticESDS));
389
390    csd->data()[sizeof(kStaticESDS)] =
391        ((profile + 1) << 3) | (sampling_freq_index >> 1);
392
393    csd->data()[sizeof(kStaticESDS) + 1] =
394        ((sampling_freq_index << 7) & 0x80) | (channel_configuration << 3);
395
396    meta->setData(kKeyESDS, 0, csd->data(), csd->size());
397
398    return meta;
399}
400
401bool ExtractDimensionsFromVOLHeader(
402        const uint8_t *data, size_t size, int32_t *width, int32_t *height) {
403    ABitReader br(&data[4], size - 4);
404    br.skipBits(1);  // random_accessible_vol
405    unsigned video_object_type_indication = br.getBits(8);
406
407    CHECK_NE(video_object_type_indication,
408             0x21u /* Fine Granularity Scalable */);
409
410    unsigned video_object_layer_verid;
411    unsigned video_object_layer_priority;
412    if (br.getBits(1)) {
413        video_object_layer_verid = br.getBits(4);
414        video_object_layer_priority = br.getBits(3);
415    }
416    unsigned aspect_ratio_info = br.getBits(4);
417    if (aspect_ratio_info == 0x0f /* extended PAR */) {
418        br.skipBits(8);  // par_width
419        br.skipBits(8);  // par_height
420    }
421    if (br.getBits(1)) {  // vol_control_parameters
422        br.skipBits(2);  // chroma_format
423        br.skipBits(1);  // low_delay
424        if (br.getBits(1)) {  // vbv_parameters
425            br.skipBits(15);  // first_half_bit_rate
426            CHECK(br.getBits(1));  // marker_bit
427            br.skipBits(15);  // latter_half_bit_rate
428            CHECK(br.getBits(1));  // marker_bit
429            br.skipBits(15);  // first_half_vbv_buffer_size
430            CHECK(br.getBits(1));  // marker_bit
431            br.skipBits(3);  // latter_half_vbv_buffer_size
432            br.skipBits(11);  // first_half_vbv_occupancy
433            CHECK(br.getBits(1));  // marker_bit
434            br.skipBits(15);  // latter_half_vbv_occupancy
435            CHECK(br.getBits(1));  // marker_bit
436        }
437    }
438    unsigned video_object_layer_shape = br.getBits(2);
439    CHECK_EQ(video_object_layer_shape, 0x00u /* rectangular */);
440
441    CHECK(br.getBits(1));  // marker_bit
442    unsigned vop_time_increment_resolution = br.getBits(16);
443    CHECK(br.getBits(1));  // marker_bit
444
445    if (br.getBits(1)) {  // fixed_vop_rate
446        // range [0..vop_time_increment_resolution)
447
448        // vop_time_increment_resolution
449        // 2 => 0..1, 1 bit
450        // 3 => 0..2, 2 bits
451        // 4 => 0..3, 2 bits
452        // 5 => 0..4, 3 bits
453        // ...
454
455        CHECK_GT(vop_time_increment_resolution, 0u);
456        --vop_time_increment_resolution;
457
458        unsigned numBits = 0;
459        while (vop_time_increment_resolution > 0) {
460            ++numBits;
461            vop_time_increment_resolution >>= 1;
462        }
463
464        br.skipBits(numBits);  // fixed_vop_time_increment
465    }
466
467    CHECK(br.getBits(1));  // marker_bit
468    unsigned video_object_layer_width = br.getBits(13);
469    CHECK(br.getBits(1));  // marker_bit
470    unsigned video_object_layer_height = br.getBits(13);
471    CHECK(br.getBits(1));  // marker_bit
472
473    unsigned interlaced = br.getBits(1);
474
475    *width = video_object_layer_width;
476    *height = video_object_layer_height;
477
478    return true;
479}
480
481bool GetMPEGAudioFrameSize(
482        uint32_t header, size_t *frame_size,
483        int *out_sampling_rate, int *out_channels,
484        int *out_bitrate, int *out_num_samples) {
485    *frame_size = 0;
486
487    if (out_sampling_rate) {
488        *out_sampling_rate = 0;
489    }
490
491    if (out_channels) {
492        *out_channels = 0;
493    }
494
495    if (out_bitrate) {
496        *out_bitrate = 0;
497    }
498
499    if (out_num_samples) {
500        *out_num_samples = 1152;
501    }
502
503    if ((header & 0xffe00000) != 0xffe00000) {
504        return false;
505    }
506
507    unsigned version = (header >> 19) & 3;
508
509    if (version == 0x01) {
510        return false;
511    }
512
513    unsigned layer = (header >> 17) & 3;
514
515    if (layer == 0x00) {
516        return false;
517    }
518
519    unsigned protection = (header >> 16) & 1;
520
521    unsigned bitrate_index = (header >> 12) & 0x0f;
522
523    if (bitrate_index == 0 || bitrate_index == 0x0f) {
524        // Disallow "free" bitrate.
525        return false;
526    }
527
528    unsigned sampling_rate_index = (header >> 10) & 3;
529
530    if (sampling_rate_index == 3) {
531        return false;
532    }
533
534    static const int kSamplingRateV1[] = { 44100, 48000, 32000 };
535    int sampling_rate = kSamplingRateV1[sampling_rate_index];
536    if (version == 2 /* V2 */) {
537        sampling_rate /= 2;
538    } else if (version == 0 /* V2.5 */) {
539        sampling_rate /= 4;
540    }
541
542    unsigned padding = (header >> 9) & 1;
543
544    if (layer == 3) {
545        // layer I
546
547        static const int kBitrateV1[] = {
548            32, 64, 96, 128, 160, 192, 224, 256,
549            288, 320, 352, 384, 416, 448
550        };
551
552        static const int kBitrateV2[] = {
553            32, 48, 56, 64, 80, 96, 112, 128,
554            144, 160, 176, 192, 224, 256
555        };
556
557        int bitrate =
558            (version == 3 /* V1 */)
559                ? kBitrateV1[bitrate_index - 1]
560                : kBitrateV2[bitrate_index - 1];
561
562        if (out_bitrate) {
563            *out_bitrate = bitrate;
564        }
565
566        *frame_size = (12000 * bitrate / sampling_rate + padding) * 4;
567
568        if (out_num_samples) {
569            *out_num_samples = 384;
570        }
571    } else {
572        // layer II or III
573
574        static const int kBitrateV1L2[] = {
575            32, 48, 56, 64, 80, 96, 112, 128,
576            160, 192, 224, 256, 320, 384
577        };
578
579        static const int kBitrateV1L3[] = {
580            32, 40, 48, 56, 64, 80, 96, 112,
581            128, 160, 192, 224, 256, 320
582        };
583
584        static const int kBitrateV2[] = {
585            8, 16, 24, 32, 40, 48, 56, 64,
586            80, 96, 112, 128, 144, 160
587        };
588
589        int bitrate;
590        if (version == 3 /* V1 */) {
591            bitrate = (layer == 2 /* L2 */)
592                ? kBitrateV1L2[bitrate_index - 1]
593                : kBitrateV1L3[bitrate_index - 1];
594
595            if (out_num_samples) {
596                *out_num_samples = 1152;
597            }
598        } else {
599            // V2 (or 2.5)
600
601            bitrate = kBitrateV2[bitrate_index - 1];
602            if (out_num_samples) {
603                *out_num_samples = 576;
604            }
605        }
606
607        if (out_bitrate) {
608            *out_bitrate = bitrate;
609        }
610
611        if (version == 3 /* V1 */) {
612            *frame_size = 144000 * bitrate / sampling_rate + padding;
613        } else {
614            // V2 or V2.5
615            *frame_size = 72000 * bitrate / sampling_rate + padding;
616        }
617    }
618
619    if (out_sampling_rate) {
620        *out_sampling_rate = sampling_rate;
621    }
622
623    if (out_channels) {
624        int channel_mode = (header >> 6) & 3;
625
626        *out_channels = (channel_mode == 3) ? 1 : 2;
627    }
628
629    return true;
630}
631
632}  // namespace android
633
634