ESQueue.cpp revision 666c96d37203fd91ec319b8b499442696f70fd53
1/*
2 * Copyright (C) 2010 The Android Open Source Project
3 *
4 * Licensed under the Apache License, Version 2.0 (the "License");
5 * you may not use this file except in compliance with the License.
6 * You may obtain a copy of the License at
7 *
8 *      http://www.apache.org/licenses/LICENSE-2.0
9 *
10 * Unless required by applicable law or agreed to in writing, software
11 * distributed under the License is distributed on an "AS IS" BASIS,
12 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13 * See the License for the specific language governing permissions and
14 * limitations under the License.
15 */
16
17//#define LOG_NDEBUG 0
18#define LOG_TAG "ESQueue"
19#include <media/stagefright/foundation/ADebug.h>
20
21#include "ESQueue.h"
22
23#include <media/stagefright/foundation/hexdump.h>
24#include <media/stagefright/foundation/ABitReader.h>
25#include <media/stagefright/foundation/ABuffer.h>
26#include <media/stagefright/foundation/AMessage.h>
27#include <media/stagefright/MediaErrors.h>
28#include <media/stagefright/MediaDefs.h>
29#include <media/stagefright/MetaData.h>
30#include <media/stagefright/Utils.h>
31
32#include "include/avc_utils.h"
33
34#include <inttypes.h>
35#include <netinet/in.h>
36
37namespace android {
38
39ElementaryStreamQueue::ElementaryStreamQueue(Mode mode, uint32_t flags)
40    : mMode(mode),
41      mFlags(flags) {
42}
43
44sp<MetaData> ElementaryStreamQueue::getFormat() {
45    return mFormat;
46}
47
48void ElementaryStreamQueue::clear(bool clearFormat) {
49    if (mBuffer != NULL) {
50        mBuffer->setRange(0, 0);
51    }
52
53    mRangeInfos.clear();
54
55    if (clearFormat) {
56        mFormat.clear();
57    }
58}
59
60// Parse AC3 header assuming the current ptr is start position of syncframe,
61// update metadata only applicable, and return the payload size
62static unsigned parseAC3SyncFrame(
63        const uint8_t *ptr, size_t size, sp<MetaData> *metaData) {
64    static const unsigned channelCountTable[] = {2, 1, 2, 3, 3, 4, 4, 5};
65    static const unsigned samplingRateTable[] = {48000, 44100, 32000};
66    static const unsigned rates[] = {32, 40, 48, 56, 64, 80, 96, 112, 128, 160, 192, 224, 256,
67            320, 384, 448, 512, 576, 640};
68
69    static const unsigned frameSizeTable[19][3] = {
70        { 64, 69, 96 },
71        { 80, 87, 120 },
72        { 96, 104, 144 },
73        { 112, 121, 168 },
74        { 128, 139, 192 },
75        { 160, 174, 240 },
76        { 192, 208, 288 },
77        { 224, 243, 336 },
78        { 256, 278, 384 },
79        { 320, 348, 480 },
80        { 384, 417, 576 },
81        { 448, 487, 672 },
82        { 512, 557, 768 },
83        { 640, 696, 960 },
84        { 768, 835, 1152 },
85        { 896, 975, 1344 },
86        { 1024, 1114, 1536 },
87        { 1152, 1253, 1728 },
88        { 1280, 1393, 1920 },
89    };
90
91    ABitReader bits(ptr, size);
92    unsigned syncStartPos = 0;  // in bytes
93    if (bits.numBitsLeft() < 16) {
94        return 0;
95    }
96    if (bits.getBits(16) != 0x0B77) {
97        return 0;
98    }
99
100    if (bits.numBitsLeft() < 16 + 2 + 6 + 5 + 3 + 3) {
101        ALOGV("Not enough bits left for further parsing");
102        return 0;
103    }
104    bits.skipBits(16);  // crc1
105
106    unsigned fscod = bits.getBits(2);
107    if (fscod == 3) {
108        ALOGW("Incorrect fscod in AC3 header");
109        return 0;
110    }
111
112    unsigned frmsizecod = bits.getBits(6);
113    if (frmsizecod > 37) {
114        ALOGW("Incorrect frmsizecod in AC3 header");
115        return 0;
116    }
117
118    unsigned bsid = bits.getBits(5);
119    if (bsid > 8) {
120        ALOGW("Incorrect bsid in AC3 header. Possibly E-AC-3?");
121        return 0;
122    }
123
124    unsigned bsmod = bits.getBits(3);
125    unsigned acmod = bits.getBits(3);
126    unsigned cmixlev = 0;
127    unsigned surmixlev = 0;
128    unsigned dsurmod = 0;
129
130    if ((acmod & 1) > 0 && acmod != 1) {
131        if (bits.numBitsLeft() < 2) {
132            return 0;
133        }
134        cmixlev = bits.getBits(2);
135    }
136    if ((acmod & 4) > 0) {
137        if (bits.numBitsLeft() < 2) {
138            return 0;
139        }
140        surmixlev = bits.getBits(2);
141    }
142    if (acmod == 2) {
143        if (bits.numBitsLeft() < 2) {
144            return 0;
145        }
146        dsurmod = bits.getBits(2);
147    }
148
149    if (bits.numBitsLeft() < 1) {
150        return 0;
151    }
152    unsigned lfeon = bits.getBits(1);
153
154    unsigned samplingRate = samplingRateTable[fscod];
155    unsigned payloadSize = frameSizeTable[frmsizecod >> 1][fscod];
156    if (fscod == 1) {
157        payloadSize += frmsizecod & 1;
158    }
159    payloadSize <<= 1;  // convert from 16-bit words to bytes
160
161    unsigned channelCount = channelCountTable[acmod] + lfeon;
162
163    if (metaData != NULL) {
164        (*metaData)->setCString(kKeyMIMEType, MEDIA_MIMETYPE_AUDIO_AC3);
165        (*metaData)->setInt32(kKeyChannelCount, channelCount);
166        (*metaData)->setInt32(kKeySampleRate, samplingRate);
167    }
168
169    return payloadSize;
170}
171
172static bool IsSeeminglyValidAC3Header(const uint8_t *ptr, size_t size) {
173    return parseAC3SyncFrame(ptr, size, NULL) > 0;
174}
175
176static bool IsSeeminglyValidADTSHeader(const uint8_t *ptr, size_t size) {
177    if (size < 3) {
178        // Not enough data to verify header.
179        return false;
180    }
181
182    if (ptr[0] != 0xff || (ptr[1] >> 4) != 0x0f) {
183        return false;
184    }
185
186    unsigned layer = (ptr[1] >> 1) & 3;
187
188    if (layer != 0) {
189        return false;
190    }
191
192    unsigned ID = (ptr[1] >> 3) & 1;
193    unsigned profile_ObjectType = ptr[2] >> 6;
194
195    if (ID == 1 && profile_ObjectType == 3) {
196        // MPEG-2 profile 3 is reserved.
197        return false;
198    }
199
200    return true;
201}
202
203static bool IsSeeminglyValidMPEGAudioHeader(const uint8_t *ptr, size_t size) {
204    if (size < 3) {
205        // Not enough data to verify header.
206        return false;
207    }
208
209    if (ptr[0] != 0xff || (ptr[1] >> 5) != 0x07) {
210        return false;
211    }
212
213    unsigned ID = (ptr[1] >> 3) & 3;
214
215    if (ID == 1) {
216        return false;  // reserved
217    }
218
219    unsigned layer = (ptr[1] >> 1) & 3;
220
221    if (layer == 0) {
222        return false;  // reserved
223    }
224
225    unsigned bitrateIndex = (ptr[2] >> 4);
226
227    if (bitrateIndex == 0x0f) {
228        return false;  // reserved
229    }
230
231    unsigned samplingRateIndex = (ptr[2] >> 2) & 3;
232
233    if (samplingRateIndex == 3) {
234        return false;  // reserved
235    }
236
237    return true;
238}
239
240status_t ElementaryStreamQueue::appendData(
241        const void *data, size_t size, int64_t timeUs) {
242    if (mBuffer == NULL || mBuffer->size() == 0) {
243        switch (mMode) {
244            case H264:
245            case MPEG_VIDEO:
246            {
247#if 0
248                if (size < 4 || memcmp("\x00\x00\x00\x01", data, 4)) {
249                    return ERROR_MALFORMED;
250                }
251#else
252                uint8_t *ptr = (uint8_t *)data;
253
254                ssize_t startOffset = -1;
255                for (size_t i = 0; i + 3 < size; ++i) {
256                    if (!memcmp("\x00\x00\x00\x01", &ptr[i], 4)) {
257                        startOffset = i;
258                        break;
259                    }
260                }
261
262                if (startOffset < 0) {
263                    return ERROR_MALFORMED;
264                }
265
266                if (startOffset > 0) {
267                    ALOGI("found something resembling an H.264/MPEG syncword "
268                          "at offset %zd",
269                          startOffset);
270                }
271
272                data = &ptr[startOffset];
273                size -= startOffset;
274#endif
275                break;
276            }
277
278            case MPEG4_VIDEO:
279            {
280#if 0
281                if (size < 3 || memcmp("\x00\x00\x01", data, 3)) {
282                    return ERROR_MALFORMED;
283                }
284#else
285                uint8_t *ptr = (uint8_t *)data;
286
287                ssize_t startOffset = -1;
288                for (size_t i = 0; i + 2 < size; ++i) {
289                    if (!memcmp("\x00\x00\x01", &ptr[i], 3)) {
290                        startOffset = i;
291                        break;
292                    }
293                }
294
295                if (startOffset < 0) {
296                    return ERROR_MALFORMED;
297                }
298
299                if (startOffset > 0) {
300                    ALOGI("found something resembling an H.264/MPEG syncword "
301                          "at offset %zd",
302                          startOffset);
303                }
304
305                data = &ptr[startOffset];
306                size -= startOffset;
307#endif
308                break;
309            }
310
311            case AAC:
312            {
313                uint8_t *ptr = (uint8_t *)data;
314
315#if 0
316                if (size < 2 || ptr[0] != 0xff || (ptr[1] >> 4) != 0x0f) {
317                    return ERROR_MALFORMED;
318                }
319#else
320                ssize_t startOffset = -1;
321                for (size_t i = 0; i < size; ++i) {
322                    if (IsSeeminglyValidADTSHeader(&ptr[i], size - i)) {
323                        startOffset = i;
324                        break;
325                    }
326                }
327
328                if (startOffset < 0) {
329                    return ERROR_MALFORMED;
330                }
331
332                if (startOffset > 0) {
333                    ALOGI("found something resembling an AAC syncword at "
334                          "offset %zd",
335                          startOffset);
336                }
337
338                data = &ptr[startOffset];
339                size -= startOffset;
340#endif
341                break;
342            }
343
344            case AC3:
345            {
346                uint8_t *ptr = (uint8_t *)data;
347
348                ssize_t startOffset = -1;
349                for (size_t i = 0; i < size; ++i) {
350                    if (IsSeeminglyValidAC3Header(&ptr[i], size - i)) {
351                        startOffset = i;
352                        break;
353                    }
354                }
355
356                if (startOffset < 0) {
357                    return ERROR_MALFORMED;
358                }
359
360                if (startOffset > 0) {
361                    ALOGI("found something resembling an AC3 syncword at "
362                          "offset %zd",
363                          startOffset);
364                }
365
366                data = &ptr[startOffset];
367                size -= startOffset;
368                break;
369            }
370
371            case MPEG_AUDIO:
372            {
373                uint8_t *ptr = (uint8_t *)data;
374
375                ssize_t startOffset = -1;
376                for (size_t i = 0; i < size; ++i) {
377                    if (IsSeeminglyValidMPEGAudioHeader(&ptr[i], size - i)) {
378                        startOffset = i;
379                        break;
380                    }
381                }
382
383                if (startOffset < 0) {
384                    return ERROR_MALFORMED;
385                }
386
387                if (startOffset > 0) {
388                    ALOGI("found something resembling an MPEG audio "
389                          "syncword at offset %zd",
390                          startOffset);
391                }
392
393                data = &ptr[startOffset];
394                size -= startOffset;
395                break;
396            }
397
398            case PCM_AUDIO:
399            {
400                break;
401            }
402
403            default:
404                TRESPASS();
405                break;
406        }
407    }
408
409    size_t neededSize = (mBuffer == NULL ? 0 : mBuffer->size()) + size;
410    if (mBuffer == NULL || neededSize > mBuffer->capacity()) {
411        neededSize = (neededSize + 65535) & ~65535;
412
413        ALOGV("resizing buffer to size %zu", neededSize);
414
415        sp<ABuffer> buffer = new ABuffer(neededSize);
416        if (mBuffer != NULL) {
417            memcpy(buffer->data(), mBuffer->data(), mBuffer->size());
418            buffer->setRange(0, mBuffer->size());
419        } else {
420            buffer->setRange(0, 0);
421        }
422
423        mBuffer = buffer;
424    }
425
426    memcpy(mBuffer->data() + mBuffer->size(), data, size);
427    mBuffer->setRange(0, mBuffer->size() + size);
428
429    RangeInfo info;
430    info.mLength = size;
431    info.mTimestampUs = timeUs;
432    mRangeInfos.push_back(info);
433
434#if 0
435    if (mMode == AAC) {
436        ALOGI("size = %zu, timeUs = %.2f secs", size, timeUs / 1E6);
437        hexdump(data, size);
438    }
439#endif
440
441    return OK;
442}
443
444sp<ABuffer> ElementaryStreamQueue::dequeueAccessUnit() {
445    if ((mFlags & kFlag_AlignedData) && mMode == H264) {
446        if (mRangeInfos.empty()) {
447            return NULL;
448        }
449
450        RangeInfo info = *mRangeInfos.begin();
451        mRangeInfos.erase(mRangeInfos.begin());
452
453        sp<ABuffer> accessUnit = new ABuffer(info.mLength);
454        memcpy(accessUnit->data(), mBuffer->data(), info.mLength);
455        accessUnit->meta()->setInt64("timeUs", info.mTimestampUs);
456
457        memmove(mBuffer->data(),
458                mBuffer->data() + info.mLength,
459                mBuffer->size() - info.mLength);
460
461        mBuffer->setRange(0, mBuffer->size() - info.mLength);
462
463        if (mFormat == NULL) {
464            mFormat = MakeAVCCodecSpecificData(accessUnit);
465        }
466
467        return accessUnit;
468    }
469
470    switch (mMode) {
471        case H264:
472            return dequeueAccessUnitH264();
473        case AAC:
474            return dequeueAccessUnitAAC();
475        case AC3:
476            return dequeueAccessUnitAC3();
477        case MPEG_VIDEO:
478            return dequeueAccessUnitMPEGVideo();
479        case MPEG4_VIDEO:
480            return dequeueAccessUnitMPEG4Video();
481        case PCM_AUDIO:
482            return dequeueAccessUnitPCMAudio();
483        default:
484            CHECK_EQ((unsigned)mMode, (unsigned)MPEG_AUDIO);
485            return dequeueAccessUnitMPEGAudio();
486    }
487}
488
489sp<ABuffer> ElementaryStreamQueue::dequeueAccessUnitAC3() {
490    unsigned syncStartPos = 0;  // in bytes
491    unsigned payloadSize = 0;
492    sp<MetaData> format = new MetaData;
493    while (true) {
494        if (syncStartPos + 2 >= mBuffer->size()) {
495            return NULL;
496        }
497
498        payloadSize = parseAC3SyncFrame(
499                mBuffer->data() + syncStartPos,
500                mBuffer->size() - syncStartPos,
501                &format);
502        if (payloadSize > 0) {
503            break;
504        }
505        ++syncStartPos;
506    }
507
508    if (mBuffer->size() < syncStartPos + payloadSize) {
509        ALOGV("Not enough buffer size for AC3");
510        return NULL;
511    }
512
513    if (mFormat == NULL) {
514        mFormat = format;
515    }
516
517    sp<ABuffer> accessUnit = new ABuffer(syncStartPos + payloadSize);
518    memcpy(accessUnit->data(), mBuffer->data(), syncStartPos + payloadSize);
519
520    int64_t timeUs = fetchTimestamp(syncStartPos + payloadSize);
521    CHECK_GE(timeUs, 0ll);
522    accessUnit->meta()->setInt64("timeUs", timeUs);
523
524    memmove(
525            mBuffer->data(),
526            mBuffer->data() + syncStartPos + payloadSize,
527            mBuffer->size() - syncStartPos - payloadSize);
528
529    mBuffer->setRange(0, mBuffer->size() - syncStartPos - payloadSize);
530
531    return accessUnit;
532}
533
534sp<ABuffer> ElementaryStreamQueue::dequeueAccessUnitPCMAudio() {
535    if (mBuffer->size() < 4) {
536        return NULL;
537    }
538
539    ABitReader bits(mBuffer->data(), 4);
540    CHECK_EQ(bits.getBits(8), 0xa0);
541    unsigned numAUs = bits.getBits(8);
542    bits.skipBits(8);
543    unsigned quantization_word_length = bits.getBits(2);
544    unsigned audio_sampling_frequency = bits.getBits(3);
545    unsigned num_channels = bits.getBits(3);
546
547    CHECK_EQ(audio_sampling_frequency, 2);  // 48kHz
548    CHECK_EQ(num_channels, 1u);  // stereo!
549
550    if (mFormat == NULL) {
551        mFormat = new MetaData;
552        mFormat->setCString(kKeyMIMEType, MEDIA_MIMETYPE_AUDIO_RAW);
553        mFormat->setInt32(kKeyChannelCount, 2);
554        mFormat->setInt32(kKeySampleRate, 48000);
555    }
556
557    static const size_t kFramesPerAU = 80;
558    size_t frameSize = 2 /* numChannels */ * sizeof(int16_t);
559
560    size_t payloadSize = numAUs * frameSize * kFramesPerAU;
561
562    if (mBuffer->size() < 4 + payloadSize) {
563        return NULL;
564    }
565
566    sp<ABuffer> accessUnit = new ABuffer(payloadSize);
567    memcpy(accessUnit->data(), mBuffer->data() + 4, payloadSize);
568
569    int64_t timeUs = fetchTimestamp(payloadSize + 4);
570    CHECK_GE(timeUs, 0ll);
571    accessUnit->meta()->setInt64("timeUs", timeUs);
572
573    int16_t *ptr = (int16_t *)accessUnit->data();
574    for (size_t i = 0; i < payloadSize / sizeof(int16_t); ++i) {
575        ptr[i] = ntohs(ptr[i]);
576    }
577
578    memmove(
579            mBuffer->data(),
580            mBuffer->data() + 4 + payloadSize,
581            mBuffer->size() - 4 - payloadSize);
582
583    mBuffer->setRange(0, mBuffer->size() - 4 - payloadSize);
584
585    return accessUnit;
586}
587
588sp<ABuffer> ElementaryStreamQueue::dequeueAccessUnitAAC() {
589    if (mBuffer->size() == 0) {
590        return NULL;
591    }
592
593    CHECK(!mRangeInfos.empty());
594
595    const RangeInfo &info = *mRangeInfos.begin();
596    if (mBuffer->size() < info.mLength) {
597        return NULL;
598    }
599
600    CHECK_GE(info.mTimestampUs, 0ll);
601
602    // The idea here is consume all AAC frames starting at offsets before
603    // info.mLength so we can assign a meaningful timestamp without
604    // having to interpolate.
605    // The final AAC frame may well extend into the next RangeInfo but
606    // that's ok.
607    // TODO: the logic commented above is skipped because codec cannot take
608    // arbitrary sized input buffers;
609    size_t offset = 0;
610    while (offset < info.mLength) {
611        if (offset + 7 > mBuffer->size()) {
612            return NULL;
613        }
614
615        ABitReader bits(mBuffer->data() + offset, mBuffer->size() - offset);
616
617        // adts_fixed_header
618
619        CHECK_EQ(bits.getBits(12), 0xfffu);
620        bits.skipBits(3);  // ID, layer
621        bool protection_absent = bits.getBits(1) != 0;
622
623        if (mFormat == NULL) {
624            unsigned profile = bits.getBits(2);
625            CHECK_NE(profile, 3u);
626            unsigned sampling_freq_index = bits.getBits(4);
627            bits.getBits(1);  // private_bit
628            unsigned channel_configuration = bits.getBits(3);
629            CHECK_NE(channel_configuration, 0u);
630            bits.skipBits(2);  // original_copy, home
631
632            mFormat = MakeAACCodecSpecificData(
633                    profile, sampling_freq_index, channel_configuration);
634
635            mFormat->setInt32(kKeyIsADTS, true);
636
637            int32_t sampleRate;
638            int32_t numChannels;
639            CHECK(mFormat->findInt32(kKeySampleRate, &sampleRate));
640            CHECK(mFormat->findInt32(kKeyChannelCount, &numChannels));
641
642            ALOGI("found AAC codec config (%d Hz, %d channels)",
643                 sampleRate, numChannels);
644        } else {
645            // profile_ObjectType, sampling_frequency_index, private_bits,
646            // channel_configuration, original_copy, home
647            bits.skipBits(12);
648        }
649
650        // adts_variable_header
651
652        // copyright_identification_bit, copyright_identification_start
653        bits.skipBits(2);
654
655        unsigned aac_frame_length = bits.getBits(13);
656
657        bits.skipBits(11);  // adts_buffer_fullness
658
659        unsigned number_of_raw_data_blocks_in_frame = bits.getBits(2);
660
661        if (number_of_raw_data_blocks_in_frame != 0) {
662            // To be implemented.
663            TRESPASS();
664        }
665
666        if (offset + aac_frame_length > mBuffer->size()) {
667            return NULL;
668        }
669
670        size_t headerSize = protection_absent ? 7 : 9;
671
672        offset += aac_frame_length;
673        // TODO: move back to concatenation when codec can support arbitrary input buffers.
674        // For now only queue a single buffer
675        break;
676    }
677
678    int64_t timeUs = fetchTimestampAAC(offset);
679
680    sp<ABuffer> accessUnit = new ABuffer(offset);
681    memcpy(accessUnit->data(), mBuffer->data(), offset);
682
683    memmove(mBuffer->data(), mBuffer->data() + offset,
684            mBuffer->size() - offset);
685    mBuffer->setRange(0, mBuffer->size() - offset);
686
687    accessUnit->meta()->setInt64("timeUs", timeUs);
688
689    return accessUnit;
690}
691
692int64_t ElementaryStreamQueue::fetchTimestamp(size_t size) {
693    int64_t timeUs = -1;
694    bool first = true;
695
696    while (size > 0) {
697        CHECK(!mRangeInfos.empty());
698
699        RangeInfo *info = &*mRangeInfos.begin();
700
701        if (first) {
702            timeUs = info->mTimestampUs;
703            first = false;
704        }
705
706        if (info->mLength > size) {
707            info->mLength -= size;
708            size = 0;
709        } else {
710            size -= info->mLength;
711
712            mRangeInfos.erase(mRangeInfos.begin());
713            info = NULL;
714        }
715
716    }
717
718    if (timeUs == 0ll) {
719        ALOGV("Returning 0 timestamp");
720    }
721
722    return timeUs;
723}
724
725// TODO: avoid interpolating timestamps once codec supports arbitrary sized input buffers
726int64_t ElementaryStreamQueue::fetchTimestampAAC(size_t size) {
727    int64_t timeUs = -1;
728    bool first = true;
729
730    size_t samplesize = size;
731    while (size > 0) {
732        CHECK(!mRangeInfos.empty());
733
734        RangeInfo *info = &*mRangeInfos.begin();
735
736        if (first) {
737            timeUs = info->mTimestampUs;
738            first = false;
739        }
740
741        if (info->mLength > size) {
742            int32_t sampleRate;
743            CHECK(mFormat->findInt32(kKeySampleRate, &sampleRate));
744            info->mLength -= size;
745            size_t numSamples = 1024 * size / samplesize;
746            info->mTimestampUs += numSamples * 1000000ll / sampleRate;
747            size = 0;
748        } else {
749            size -= info->mLength;
750
751            mRangeInfos.erase(mRangeInfos.begin());
752            info = NULL;
753        }
754
755    }
756
757    if (timeUs == 0ll) {
758        ALOGV("Returning 0 timestamp");
759    }
760
761    return timeUs;
762}
763
764struct NALPosition {
765    size_t nalOffset;
766    size_t nalSize;
767};
768
769sp<ABuffer> ElementaryStreamQueue::dequeueAccessUnitH264() {
770    const uint8_t *data = mBuffer->data();
771
772    size_t size = mBuffer->size();
773    Vector<NALPosition> nals;
774
775    size_t totalSize = 0;
776
777    status_t err;
778    const uint8_t *nalStart;
779    size_t nalSize;
780    bool foundSlice = false;
781    while ((err = getNextNALUnit(&data, &size, &nalStart, &nalSize)) == OK) {
782        if (nalSize == 0) continue;
783
784        unsigned nalType = nalStart[0] & 0x1f;
785        bool flush = false;
786
787        if (nalType == 1 || nalType == 5) {
788            if (foundSlice) {
789                ABitReader br(nalStart + 1, nalSize);
790                unsigned first_mb_in_slice = parseUE(&br);
791
792                if (first_mb_in_slice == 0) {
793                    // This slice starts a new frame.
794
795                    flush = true;
796                }
797            }
798
799            foundSlice = true;
800        } else if ((nalType == 9 || nalType == 7) && foundSlice) {
801            // Access unit delimiter and SPS will be associated with the
802            // next frame.
803
804            flush = true;
805        }
806
807        if (flush) {
808            // The access unit will contain all nal units up to, but excluding
809            // the current one, separated by 0x00 0x00 0x00 0x01 startcodes.
810
811            size_t auSize = 4 * nals.size() + totalSize;
812            sp<ABuffer> accessUnit = new ABuffer(auSize);
813
814#if !LOG_NDEBUG
815            AString out;
816#endif
817
818            size_t dstOffset = 0;
819            for (size_t i = 0; i < nals.size(); ++i) {
820                const NALPosition &pos = nals.itemAt(i);
821
822                unsigned nalType = mBuffer->data()[pos.nalOffset] & 0x1f;
823
824                if (nalType == 6) {
825                    sp<ABuffer> sei = new ABuffer(pos.nalSize);
826                    memcpy(sei->data(), mBuffer->data() + pos.nalOffset, pos.nalSize);
827                    accessUnit->meta()->setBuffer("sei", sei);
828                }
829
830#if !LOG_NDEBUG
831                char tmp[128];
832                sprintf(tmp, "0x%02x", nalType);
833                if (i > 0) {
834                    out.append(", ");
835                }
836                out.append(tmp);
837#endif
838
839                memcpy(accessUnit->data() + dstOffset, "\x00\x00\x00\x01", 4);
840
841                memcpy(accessUnit->data() + dstOffset + 4,
842                       mBuffer->data() + pos.nalOffset,
843                       pos.nalSize);
844
845                dstOffset += pos.nalSize + 4;
846            }
847
848#if !LOG_NDEBUG
849            ALOGV("accessUnit contains nal types %s", out.c_str());
850#endif
851
852            const NALPosition &pos = nals.itemAt(nals.size() - 1);
853            size_t nextScan = pos.nalOffset + pos.nalSize;
854
855            memmove(mBuffer->data(),
856                    mBuffer->data() + nextScan,
857                    mBuffer->size() - nextScan);
858
859            mBuffer->setRange(0, mBuffer->size() - nextScan);
860
861            int64_t timeUs = fetchTimestamp(nextScan);
862            CHECK_GE(timeUs, 0ll);
863
864            accessUnit->meta()->setInt64("timeUs", timeUs);
865
866            if (mFormat == NULL) {
867                mFormat = MakeAVCCodecSpecificData(accessUnit);
868            }
869
870            return accessUnit;
871        }
872
873        NALPosition pos;
874        pos.nalOffset = nalStart - mBuffer->data();
875        pos.nalSize = nalSize;
876
877        nals.push(pos);
878
879        totalSize += nalSize;
880    }
881    CHECK_EQ(err, (status_t)-EAGAIN);
882
883    return NULL;
884}
885
886sp<ABuffer> ElementaryStreamQueue::dequeueAccessUnitMPEGAudio() {
887    const uint8_t *data = mBuffer->data();
888    size_t size = mBuffer->size();
889
890    if (size < 4) {
891        return NULL;
892    }
893
894    uint32_t header = U32_AT(data);
895
896    size_t frameSize;
897    int samplingRate, numChannels, bitrate, numSamples;
898    CHECK(GetMPEGAudioFrameSize(
899                header, &frameSize, &samplingRate, &numChannels,
900                &bitrate, &numSamples));
901
902    if (size < frameSize) {
903        return NULL;
904    }
905
906    unsigned layer = 4 - ((header >> 17) & 3);
907
908    sp<ABuffer> accessUnit = new ABuffer(frameSize);
909    memcpy(accessUnit->data(), data, frameSize);
910
911    memmove(mBuffer->data(),
912            mBuffer->data() + frameSize,
913            mBuffer->size() - frameSize);
914
915    mBuffer->setRange(0, mBuffer->size() - frameSize);
916
917    int64_t timeUs = fetchTimestamp(frameSize);
918    CHECK_GE(timeUs, 0ll);
919
920    accessUnit->meta()->setInt64("timeUs", timeUs);
921
922    if (mFormat == NULL) {
923        mFormat = new MetaData;
924
925        switch (layer) {
926            case 1:
927                mFormat->setCString(
928                        kKeyMIMEType, MEDIA_MIMETYPE_AUDIO_MPEG_LAYER_I);
929                break;
930            case 2:
931                mFormat->setCString(
932                        kKeyMIMEType, MEDIA_MIMETYPE_AUDIO_MPEG_LAYER_II);
933                break;
934            case 3:
935                mFormat->setCString(
936                        kKeyMIMEType, MEDIA_MIMETYPE_AUDIO_MPEG);
937                break;
938            default:
939                TRESPASS();
940        }
941
942        mFormat->setInt32(kKeySampleRate, samplingRate);
943        mFormat->setInt32(kKeyChannelCount, numChannels);
944    }
945
946    return accessUnit;
947}
948
949static void EncodeSize14(uint8_t **_ptr, size_t size) {
950    CHECK_LE(size, 0x3fff);
951
952    uint8_t *ptr = *_ptr;
953
954    *ptr++ = 0x80 | (size >> 7);
955    *ptr++ = size & 0x7f;
956
957    *_ptr = ptr;
958}
959
960static sp<ABuffer> MakeMPEGVideoESDS(const sp<ABuffer> &csd) {
961    sp<ABuffer> esds = new ABuffer(csd->size() + 25);
962
963    uint8_t *ptr = esds->data();
964    *ptr++ = 0x03;
965    EncodeSize14(&ptr, 22 + csd->size());
966
967    *ptr++ = 0x00;  // ES_ID
968    *ptr++ = 0x00;
969
970    *ptr++ = 0x00;  // streamDependenceFlag, URL_Flag, OCRstreamFlag
971
972    *ptr++ = 0x04;
973    EncodeSize14(&ptr, 16 + csd->size());
974
975    *ptr++ = 0x40;  // Audio ISO/IEC 14496-3
976
977    for (size_t i = 0; i < 12; ++i) {
978        *ptr++ = 0x00;
979    }
980
981    *ptr++ = 0x05;
982    EncodeSize14(&ptr, csd->size());
983
984    memcpy(ptr, csd->data(), csd->size());
985
986    return esds;
987}
988
989sp<ABuffer> ElementaryStreamQueue::dequeueAccessUnitMPEGVideo() {
990    const uint8_t *data = mBuffer->data();
991    size_t size = mBuffer->size();
992
993    bool sawPictureStart = false;
994    int pprevStartCode = -1;
995    int prevStartCode = -1;
996    int currentStartCode = -1;
997
998    size_t offset = 0;
999    while (offset + 3 < size) {
1000        if (memcmp(&data[offset], "\x00\x00\x01", 3)) {
1001            ++offset;
1002            continue;
1003        }
1004
1005        pprevStartCode = prevStartCode;
1006        prevStartCode = currentStartCode;
1007        currentStartCode = data[offset + 3];
1008
1009        if (currentStartCode == 0xb3 && mFormat == NULL) {
1010            memmove(mBuffer->data(), mBuffer->data() + offset, size - offset);
1011            size -= offset;
1012            (void)fetchTimestamp(offset);
1013            offset = 0;
1014            mBuffer->setRange(0, size);
1015        }
1016
1017        if ((prevStartCode == 0xb3 && currentStartCode != 0xb5)
1018                || (pprevStartCode == 0xb3 && prevStartCode == 0xb5)) {
1019            // seqHeader without/with extension
1020
1021            if (mFormat == NULL) {
1022                CHECK_GE(size, 7u);
1023
1024                unsigned width =
1025                    (data[4] << 4) | data[5] >> 4;
1026
1027                unsigned height =
1028                    ((data[5] & 0x0f) << 8) | data[6];
1029
1030                mFormat = new MetaData;
1031                mFormat->setCString(kKeyMIMEType, MEDIA_MIMETYPE_VIDEO_MPEG2);
1032                mFormat->setInt32(kKeyWidth, width);
1033                mFormat->setInt32(kKeyHeight, height);
1034
1035                ALOGI("found MPEG2 video codec config (%d x %d)", width, height);
1036
1037                sp<ABuffer> csd = new ABuffer(offset);
1038                memcpy(csd->data(), data, offset);
1039
1040                memmove(mBuffer->data(),
1041                        mBuffer->data() + offset,
1042                        mBuffer->size() - offset);
1043
1044                mBuffer->setRange(0, mBuffer->size() - offset);
1045                size -= offset;
1046                (void)fetchTimestamp(offset);
1047                offset = 0;
1048
1049                // hexdump(csd->data(), csd->size());
1050
1051                sp<ABuffer> esds = MakeMPEGVideoESDS(csd);
1052                mFormat->setData(
1053                        kKeyESDS, kTypeESDS, esds->data(), esds->size());
1054
1055                return NULL;
1056            }
1057        }
1058
1059        if (mFormat != NULL && currentStartCode == 0x00) {
1060            // Picture start
1061
1062            if (!sawPictureStart) {
1063                sawPictureStart = true;
1064            } else {
1065                sp<ABuffer> accessUnit = new ABuffer(offset);
1066                memcpy(accessUnit->data(), data, offset);
1067
1068                memmove(mBuffer->data(),
1069                        mBuffer->data() + offset,
1070                        mBuffer->size() - offset);
1071
1072                mBuffer->setRange(0, mBuffer->size() - offset);
1073
1074                int64_t timeUs = fetchTimestamp(offset);
1075                CHECK_GE(timeUs, 0ll);
1076
1077                offset = 0;
1078
1079                accessUnit->meta()->setInt64("timeUs", timeUs);
1080
1081                ALOGV("returning MPEG video access unit at time %" PRId64 " us",
1082                      timeUs);
1083
1084                // hexdump(accessUnit->data(), accessUnit->size());
1085
1086                return accessUnit;
1087            }
1088        }
1089
1090        ++offset;
1091    }
1092
1093    return NULL;
1094}
1095
1096static ssize_t getNextChunkSize(
1097        const uint8_t *data, size_t size) {
1098    static const char kStartCode[] = "\x00\x00\x01";
1099
1100    if (size < 3) {
1101        return -EAGAIN;
1102    }
1103
1104    if (memcmp(kStartCode, data, 3)) {
1105        TRESPASS();
1106    }
1107
1108    size_t offset = 3;
1109    while (offset + 2 < size) {
1110        if (!memcmp(&data[offset], kStartCode, 3)) {
1111            return offset;
1112        }
1113
1114        ++offset;
1115    }
1116
1117    return -EAGAIN;
1118}
1119
1120sp<ABuffer> ElementaryStreamQueue::dequeueAccessUnitMPEG4Video() {
1121    uint8_t *data = mBuffer->data();
1122    size_t size = mBuffer->size();
1123
1124    enum {
1125        SKIP_TO_VISUAL_OBJECT_SEQ_START,
1126        EXPECT_VISUAL_OBJECT_START,
1127        EXPECT_VO_START,
1128        EXPECT_VOL_START,
1129        WAIT_FOR_VOP_START,
1130        SKIP_TO_VOP_START,
1131
1132    } state;
1133
1134    if (mFormat == NULL) {
1135        state = SKIP_TO_VISUAL_OBJECT_SEQ_START;
1136    } else {
1137        state = SKIP_TO_VOP_START;
1138    }
1139
1140    int32_t width = -1, height = -1;
1141
1142    size_t offset = 0;
1143    ssize_t chunkSize;
1144    while ((chunkSize = getNextChunkSize(
1145                    &data[offset], size - offset)) > 0) {
1146        bool discard = false;
1147
1148        unsigned chunkType = data[offset + 3];
1149
1150        switch (state) {
1151            case SKIP_TO_VISUAL_OBJECT_SEQ_START:
1152            {
1153                if (chunkType == 0xb0) {
1154                    // Discard anything before this marker.
1155
1156                    state = EXPECT_VISUAL_OBJECT_START;
1157                } else {
1158                    discard = true;
1159                }
1160                break;
1161            }
1162
1163            case EXPECT_VISUAL_OBJECT_START:
1164            {
1165                CHECK_EQ(chunkType, 0xb5);
1166                state = EXPECT_VO_START;
1167                break;
1168            }
1169
1170            case EXPECT_VO_START:
1171            {
1172                CHECK_LE(chunkType, 0x1f);
1173                state = EXPECT_VOL_START;
1174                break;
1175            }
1176
1177            case EXPECT_VOL_START:
1178            {
1179                CHECK((chunkType & 0xf0) == 0x20);
1180
1181                CHECK(ExtractDimensionsFromVOLHeader(
1182                            &data[offset], chunkSize,
1183                            &width, &height));
1184
1185                state = WAIT_FOR_VOP_START;
1186                break;
1187            }
1188
1189            case WAIT_FOR_VOP_START:
1190            {
1191                if (chunkType == 0xb3 || chunkType == 0xb6) {
1192                    // group of VOP or VOP start.
1193
1194                    mFormat = new MetaData;
1195                    mFormat->setCString(
1196                            kKeyMIMEType, MEDIA_MIMETYPE_VIDEO_MPEG4);
1197
1198                    mFormat->setInt32(kKeyWidth, width);
1199                    mFormat->setInt32(kKeyHeight, height);
1200
1201                    ALOGI("found MPEG4 video codec config (%d x %d)",
1202                         width, height);
1203
1204                    sp<ABuffer> csd = new ABuffer(offset);
1205                    memcpy(csd->data(), data, offset);
1206
1207                    // hexdump(csd->data(), csd->size());
1208
1209                    sp<ABuffer> esds = MakeMPEGVideoESDS(csd);
1210                    mFormat->setData(
1211                            kKeyESDS, kTypeESDS,
1212                            esds->data(), esds->size());
1213
1214                    discard = true;
1215                    state = SKIP_TO_VOP_START;
1216                }
1217
1218                break;
1219            }
1220
1221            case SKIP_TO_VOP_START:
1222            {
1223                if (chunkType == 0xb6) {
1224                    offset += chunkSize;
1225
1226                    sp<ABuffer> accessUnit = new ABuffer(offset);
1227                    memcpy(accessUnit->data(), data, offset);
1228
1229                    memmove(data, &data[offset], size - offset);
1230                    size -= offset;
1231                    mBuffer->setRange(0, size);
1232
1233                    int64_t timeUs = fetchTimestamp(offset);
1234                    CHECK_GE(timeUs, 0ll);
1235
1236                    offset = 0;
1237
1238                    accessUnit->meta()->setInt64("timeUs", timeUs);
1239
1240                    ALOGV("returning MPEG4 video access unit at time %" PRId64 " us",
1241                         timeUs);
1242
1243                    // hexdump(accessUnit->data(), accessUnit->size());
1244
1245                    return accessUnit;
1246                } else if (chunkType != 0xb3) {
1247                    offset += chunkSize;
1248                    discard = true;
1249                }
1250
1251                break;
1252            }
1253
1254            default:
1255                TRESPASS();
1256        }
1257
1258        if (discard) {
1259            (void)fetchTimestamp(offset);
1260            memmove(data, &data[offset], size - offset);
1261            size -= offset;
1262            offset = 0;
1263            mBuffer->setRange(0, size);
1264        } else {
1265            offset += chunkSize;
1266        }
1267    }
1268
1269    return NULL;
1270}
1271
1272}  // namespace android
1273