1/*
2 * Copyright (C) 2010 The Android Open Source Project
3 *
4 * Licensed under the Apache License, Version 2.0 (the "License");
5 * you may not use this file except in compliance with the License.
6 * You may obtain a copy of the License at
7 *
8 *      http://www.apache.org/licenses/LICENSE-2.0
9 *
10 * Unless required by applicable law or agreed to in writing, software
11 * distributed under the License is distributed on an "AS IS" BASIS,
12 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13 * See the License for the specific language governing permissions and
14 * limitations under the License.
15 */
16
17//#define LOG_NDEBUG 0
18#define LOG_TAG "ESQueue"
19#include <media/stagefright/foundation/ADebug.h>
20
21#include "ESQueue.h"
22
23#include <media/stagefright/foundation/hexdump.h>
24#include <media/stagefright/foundation/ABitReader.h>
25#include <media/stagefright/foundation/ABuffer.h>
26#include <media/stagefright/foundation/AMessage.h>
27#include <media/stagefright/MediaErrors.h>
28#include <media/stagefright/MediaDefs.h>
29#include <media/stagefright/MetaData.h>
30#include <media/stagefright/Utils.h>
31
32#include "include/avc_utils.h"
33
34#include <inttypes.h>
35#include <netinet/in.h>
36
37namespace android {
38
39ElementaryStreamQueue::ElementaryStreamQueue(Mode mode, uint32_t flags)
40    : mMode(mode),
41      mFlags(flags) {
42}
43
44sp<MetaData> ElementaryStreamQueue::getFormat() {
45    return mFormat;
46}
47
48void ElementaryStreamQueue::clear(bool clearFormat) {
49    if (mBuffer != NULL) {
50        mBuffer->setRange(0, 0);
51    }
52
53    mRangeInfos.clear();
54
55    if (clearFormat) {
56        mFormat.clear();
57    }
58}
59
60// Parse AC3 header assuming the current ptr is start position of syncframe,
61// update metadata only applicable, and return the payload size
62static unsigned parseAC3SyncFrame(
63        const uint8_t *ptr, size_t size, sp<MetaData> *metaData) {
64    static const unsigned channelCountTable[] = {2, 1, 2, 3, 3, 4, 4, 5};
65    static const unsigned samplingRateTable[] = {48000, 44100, 32000};
66    static const unsigned rates[] = {32, 40, 48, 56, 64, 80, 96, 112, 128, 160, 192, 224, 256,
67            320, 384, 448, 512, 576, 640};
68
69    static const unsigned frameSizeTable[19][3] = {
70        { 64, 69, 96 },
71        { 80, 87, 120 },
72        { 96, 104, 144 },
73        { 112, 121, 168 },
74        { 128, 139, 192 },
75        { 160, 174, 240 },
76        { 192, 208, 288 },
77        { 224, 243, 336 },
78        { 256, 278, 384 },
79        { 320, 348, 480 },
80        { 384, 417, 576 },
81        { 448, 487, 672 },
82        { 512, 557, 768 },
83        { 640, 696, 960 },
84        { 768, 835, 1152 },
85        { 896, 975, 1344 },
86        { 1024, 1114, 1536 },
87        { 1152, 1253, 1728 },
88        { 1280, 1393, 1920 },
89    };
90
91    ABitReader bits(ptr, size);
92    unsigned syncStartPos = 0;  // in bytes
93    if (bits.numBitsLeft() < 16) {
94        return 0;
95    }
96    if (bits.getBits(16) != 0x0B77) {
97        return 0;
98    }
99
100    if (bits.numBitsLeft() < 16 + 2 + 6 + 5 + 3 + 3) {
101        ALOGV("Not enough bits left for further parsing");
102        return 0;
103    }
104    bits.skipBits(16);  // crc1
105
106    unsigned fscod = bits.getBits(2);
107    if (fscod == 3) {
108        ALOGW("Incorrect fscod in AC3 header");
109        return 0;
110    }
111
112    unsigned frmsizecod = bits.getBits(6);
113    if (frmsizecod > 37) {
114        ALOGW("Incorrect frmsizecod in AC3 header");
115        return 0;
116    }
117
118    unsigned bsid = bits.getBits(5);
119    if (bsid > 8) {
120        ALOGW("Incorrect bsid in AC3 header. Possibly E-AC-3?");
121        return 0;
122    }
123
124    unsigned bsmod = bits.getBits(3);
125    unsigned acmod = bits.getBits(3);
126    unsigned cmixlev = 0;
127    unsigned surmixlev = 0;
128    unsigned dsurmod = 0;
129
130    if ((acmod & 1) > 0 && acmod != 1) {
131        if (bits.numBitsLeft() < 2) {
132            return 0;
133        }
134        cmixlev = bits.getBits(2);
135    }
136    if ((acmod & 4) > 0) {
137        if (bits.numBitsLeft() < 2) {
138            return 0;
139        }
140        surmixlev = bits.getBits(2);
141    }
142    if (acmod == 2) {
143        if (bits.numBitsLeft() < 2) {
144            return 0;
145        }
146        dsurmod = bits.getBits(2);
147    }
148
149    if (bits.numBitsLeft() < 1) {
150        return 0;
151    }
152    unsigned lfeon = bits.getBits(1);
153
154    unsigned samplingRate = samplingRateTable[fscod];
155    unsigned payloadSize = frameSizeTable[frmsizecod >> 1][fscod];
156    if (fscod == 1) {
157        payloadSize += frmsizecod & 1;
158    }
159    payloadSize <<= 1;  // convert from 16-bit words to bytes
160
161    unsigned channelCount = channelCountTable[acmod] + lfeon;
162
163    if (metaData != NULL) {
164        (*metaData)->setCString(kKeyMIMEType, MEDIA_MIMETYPE_AUDIO_AC3);
165        (*metaData)->setInt32(kKeyChannelCount, channelCount);
166        (*metaData)->setInt32(kKeySampleRate, samplingRate);
167    }
168
169    return payloadSize;
170}
171
172static bool IsSeeminglyValidAC3Header(const uint8_t *ptr, size_t size) {
173    return parseAC3SyncFrame(ptr, size, NULL) > 0;
174}
175
176static bool IsSeeminglyValidADTSHeader(
177        const uint8_t *ptr, size_t size, size_t *frameLength) {
178    if (size < 7) {
179        // Not enough data to verify header.
180        return false;
181    }
182
183    if (ptr[0] != 0xff || (ptr[1] >> 4) != 0x0f) {
184        return false;
185    }
186
187    unsigned layer = (ptr[1] >> 1) & 3;
188
189    if (layer != 0) {
190        return false;
191    }
192
193    unsigned ID = (ptr[1] >> 3) & 1;
194    unsigned profile_ObjectType = ptr[2] >> 6;
195
196    if (ID == 1 && profile_ObjectType == 3) {
197        // MPEG-2 profile 3 is reserved.
198        return false;
199    }
200
201    size_t frameLengthInHeader =
202            ((ptr[3] & 3) << 11) + (ptr[4] << 3) + ((ptr[5] >> 5) & 7);
203    if (frameLengthInHeader > size) {
204        return false;
205    }
206
207    *frameLength = frameLengthInHeader;
208    return true;
209}
210
211static bool IsSeeminglyValidMPEGAudioHeader(const uint8_t *ptr, size_t size) {
212    if (size < 3) {
213        // Not enough data to verify header.
214        return false;
215    }
216
217    if (ptr[0] != 0xff || (ptr[1] >> 5) != 0x07) {
218        return false;
219    }
220
221    unsigned ID = (ptr[1] >> 3) & 3;
222
223    if (ID == 1) {
224        return false;  // reserved
225    }
226
227    unsigned layer = (ptr[1] >> 1) & 3;
228
229    if (layer == 0) {
230        return false;  // reserved
231    }
232
233    unsigned bitrateIndex = (ptr[2] >> 4);
234
235    if (bitrateIndex == 0x0f) {
236        return false;  // reserved
237    }
238
239    unsigned samplingRateIndex = (ptr[2] >> 2) & 3;
240
241    if (samplingRateIndex == 3) {
242        return false;  // reserved
243    }
244
245    return true;
246}
247
248status_t ElementaryStreamQueue::appendData(
249        const void *data, size_t size, int64_t timeUs) {
250    if (mBuffer == NULL || mBuffer->size() == 0) {
251        switch (mMode) {
252            case H264:
253            case MPEG_VIDEO:
254            {
255#if 0
256                if (size < 4 || memcmp("\x00\x00\x00\x01", data, 4)) {
257                    return ERROR_MALFORMED;
258                }
259#else
260                uint8_t *ptr = (uint8_t *)data;
261
262                ssize_t startOffset = -1;
263                for (size_t i = 0; i + 3 < size; ++i) {
264                    if (!memcmp("\x00\x00\x00\x01", &ptr[i], 4)) {
265                        startOffset = i;
266                        break;
267                    }
268                }
269
270                if (startOffset < 0) {
271                    return ERROR_MALFORMED;
272                }
273
274                if (startOffset > 0) {
275                    ALOGI("found something resembling an H.264/MPEG syncword "
276                          "at offset %zd",
277                          startOffset);
278                }
279
280                data = &ptr[startOffset];
281                size -= startOffset;
282#endif
283                break;
284            }
285
286            case MPEG4_VIDEO:
287            {
288#if 0
289                if (size < 3 || memcmp("\x00\x00\x01", data, 3)) {
290                    return ERROR_MALFORMED;
291                }
292#else
293                uint8_t *ptr = (uint8_t *)data;
294
295                ssize_t startOffset = -1;
296                for (size_t i = 0; i + 2 < size; ++i) {
297                    if (!memcmp("\x00\x00\x01", &ptr[i], 3)) {
298                        startOffset = i;
299                        break;
300                    }
301                }
302
303                if (startOffset < 0) {
304                    return ERROR_MALFORMED;
305                }
306
307                if (startOffset > 0) {
308                    ALOGI("found something resembling an H.264/MPEG syncword "
309                          "at offset %zd",
310                          startOffset);
311                }
312
313                data = &ptr[startOffset];
314                size -= startOffset;
315#endif
316                break;
317            }
318
319            case AAC:
320            {
321                uint8_t *ptr = (uint8_t *)data;
322
323#if 0
324                if (size < 2 || ptr[0] != 0xff || (ptr[1] >> 4) != 0x0f) {
325                    return ERROR_MALFORMED;
326                }
327#else
328                ssize_t startOffset = -1;
329                size_t frameLength;
330                for (size_t i = 0; i < size; ++i) {
331                    if (IsSeeminglyValidADTSHeader(
332                            &ptr[i], size - i, &frameLength)) {
333                        startOffset = i;
334                        break;
335                    }
336                }
337
338                if (startOffset < 0) {
339                    return ERROR_MALFORMED;
340                }
341
342                if (startOffset > 0) {
343                    ALOGI("found something resembling an AAC syncword at "
344                          "offset %zd",
345                          startOffset);
346                }
347
348                if (frameLength != size - startOffset) {
349                    ALOGV("First ADTS AAC frame length is %zd bytes, "
350                          "while the buffer size is %zd bytes.",
351                          frameLength, size - startOffset);
352                }
353
354                data = &ptr[startOffset];
355                size -= startOffset;
356#endif
357                break;
358            }
359
360            case AC3:
361            {
362                uint8_t *ptr = (uint8_t *)data;
363
364                ssize_t startOffset = -1;
365                for (size_t i = 0; i < size; ++i) {
366                    if (IsSeeminglyValidAC3Header(&ptr[i], size - i)) {
367                        startOffset = i;
368                        break;
369                    }
370                }
371
372                if (startOffset < 0) {
373                    return ERROR_MALFORMED;
374                }
375
376                if (startOffset > 0) {
377                    ALOGI("found something resembling an AC3 syncword at "
378                          "offset %zd",
379                          startOffset);
380                }
381
382                data = &ptr[startOffset];
383                size -= startOffset;
384                break;
385            }
386
387            case MPEG_AUDIO:
388            {
389                uint8_t *ptr = (uint8_t *)data;
390
391                ssize_t startOffset = -1;
392                for (size_t i = 0; i < size; ++i) {
393                    if (IsSeeminglyValidMPEGAudioHeader(&ptr[i], size - i)) {
394                        startOffset = i;
395                        break;
396                    }
397                }
398
399                if (startOffset < 0) {
400                    return ERROR_MALFORMED;
401                }
402
403                if (startOffset > 0) {
404                    ALOGI("found something resembling an MPEG audio "
405                          "syncword at offset %zd",
406                          startOffset);
407                }
408
409                data = &ptr[startOffset];
410                size -= startOffset;
411                break;
412            }
413
414            case PCM_AUDIO:
415            {
416                break;
417            }
418
419            default:
420                TRESPASS();
421                break;
422        }
423    }
424
425    size_t neededSize = (mBuffer == NULL ? 0 : mBuffer->size()) + size;
426    if (mBuffer == NULL || neededSize > mBuffer->capacity()) {
427        neededSize = (neededSize + 65535) & ~65535;
428
429        ALOGV("resizing buffer to size %zu", neededSize);
430
431        sp<ABuffer> buffer = new ABuffer(neededSize);
432        if (mBuffer != NULL) {
433            memcpy(buffer->data(), mBuffer->data(), mBuffer->size());
434            buffer->setRange(0, mBuffer->size());
435        } else {
436            buffer->setRange(0, 0);
437        }
438
439        mBuffer = buffer;
440    }
441
442    memcpy(mBuffer->data() + mBuffer->size(), data, size);
443    mBuffer->setRange(0, mBuffer->size() + size);
444
445    RangeInfo info;
446    info.mLength = size;
447    info.mTimestampUs = timeUs;
448    mRangeInfos.push_back(info);
449
450#if 0
451    if (mMode == AAC) {
452        ALOGI("size = %zu, timeUs = %.2f secs", size, timeUs / 1E6);
453        hexdump(data, size);
454    }
455#endif
456
457    return OK;
458}
459
460sp<ABuffer> ElementaryStreamQueue::dequeueAccessUnit() {
461    if ((mFlags & kFlag_AlignedData) && mMode == H264) {
462        if (mRangeInfos.empty()) {
463            return NULL;
464        }
465
466        RangeInfo info = *mRangeInfos.begin();
467        mRangeInfos.erase(mRangeInfos.begin());
468
469        sp<ABuffer> accessUnit = new ABuffer(info.mLength);
470        memcpy(accessUnit->data(), mBuffer->data(), info.mLength);
471        accessUnit->meta()->setInt64("timeUs", info.mTimestampUs);
472
473        memmove(mBuffer->data(),
474                mBuffer->data() + info.mLength,
475                mBuffer->size() - info.mLength);
476
477        mBuffer->setRange(0, mBuffer->size() - info.mLength);
478
479        if (mFormat == NULL) {
480            mFormat = MakeAVCCodecSpecificData(accessUnit);
481        }
482
483        return accessUnit;
484    }
485
486    switch (mMode) {
487        case H264:
488            return dequeueAccessUnitH264();
489        case AAC:
490            return dequeueAccessUnitAAC();
491        case AC3:
492            return dequeueAccessUnitAC3();
493        case MPEG_VIDEO:
494            return dequeueAccessUnitMPEGVideo();
495        case MPEG4_VIDEO:
496            return dequeueAccessUnitMPEG4Video();
497        case PCM_AUDIO:
498            return dequeueAccessUnitPCMAudio();
499        default:
500            CHECK_EQ((unsigned)mMode, (unsigned)MPEG_AUDIO);
501            return dequeueAccessUnitMPEGAudio();
502    }
503}
504
505sp<ABuffer> ElementaryStreamQueue::dequeueAccessUnitAC3() {
506    unsigned syncStartPos = 0;  // in bytes
507    unsigned payloadSize = 0;
508    sp<MetaData> format = new MetaData;
509    while (true) {
510        if (syncStartPos + 2 >= mBuffer->size()) {
511            return NULL;
512        }
513
514        payloadSize = parseAC3SyncFrame(
515                mBuffer->data() + syncStartPos,
516                mBuffer->size() - syncStartPos,
517                &format);
518        if (payloadSize > 0) {
519            break;
520        }
521        ++syncStartPos;
522    }
523
524    if (mBuffer->size() < syncStartPos + payloadSize) {
525        ALOGV("Not enough buffer size for AC3");
526        return NULL;
527    }
528
529    if (mFormat == NULL) {
530        mFormat = format;
531    }
532
533    sp<ABuffer> accessUnit = new ABuffer(syncStartPos + payloadSize);
534    memcpy(accessUnit->data(), mBuffer->data(), syncStartPos + payloadSize);
535
536    int64_t timeUs = fetchTimestamp(syncStartPos + payloadSize);
537    CHECK_GE(timeUs, 0ll);
538    accessUnit->meta()->setInt64("timeUs", timeUs);
539
540    memmove(
541            mBuffer->data(),
542            mBuffer->data() + syncStartPos + payloadSize,
543            mBuffer->size() - syncStartPos - payloadSize);
544
545    mBuffer->setRange(0, mBuffer->size() - syncStartPos - payloadSize);
546
547    return accessUnit;
548}
549
550sp<ABuffer> ElementaryStreamQueue::dequeueAccessUnitPCMAudio() {
551    if (mBuffer->size() < 4) {
552        return NULL;
553    }
554
555    ABitReader bits(mBuffer->data(), 4);
556    CHECK_EQ(bits.getBits(8), 0xa0);
557    unsigned numAUs = bits.getBits(8);
558    bits.skipBits(8);
559    unsigned quantization_word_length = bits.getBits(2);
560    unsigned audio_sampling_frequency = bits.getBits(3);
561    unsigned num_channels = bits.getBits(3);
562
563    CHECK_EQ(audio_sampling_frequency, 2);  // 48kHz
564    CHECK_EQ(num_channels, 1u);  // stereo!
565
566    if (mFormat == NULL) {
567        mFormat = new MetaData;
568        mFormat->setCString(kKeyMIMEType, MEDIA_MIMETYPE_AUDIO_RAW);
569        mFormat->setInt32(kKeyChannelCount, 2);
570        mFormat->setInt32(kKeySampleRate, 48000);
571    }
572
573    static const size_t kFramesPerAU = 80;
574    size_t frameSize = 2 /* numChannels */ * sizeof(int16_t);
575
576    size_t payloadSize = numAUs * frameSize * kFramesPerAU;
577
578    if (mBuffer->size() < 4 + payloadSize) {
579        return NULL;
580    }
581
582    sp<ABuffer> accessUnit = new ABuffer(payloadSize);
583    memcpy(accessUnit->data(), mBuffer->data() + 4, payloadSize);
584
585    int64_t timeUs = fetchTimestamp(payloadSize + 4);
586    CHECK_GE(timeUs, 0ll);
587    accessUnit->meta()->setInt64("timeUs", timeUs);
588
589    int16_t *ptr = (int16_t *)accessUnit->data();
590    for (size_t i = 0; i < payloadSize / sizeof(int16_t); ++i) {
591        ptr[i] = ntohs(ptr[i]);
592    }
593
594    memmove(
595            mBuffer->data(),
596            mBuffer->data() + 4 + payloadSize,
597            mBuffer->size() - 4 - payloadSize);
598
599    mBuffer->setRange(0, mBuffer->size() - 4 - payloadSize);
600
601    return accessUnit;
602}
603
604sp<ABuffer> ElementaryStreamQueue::dequeueAccessUnitAAC() {
605    if (mBuffer->size() == 0) {
606        return NULL;
607    }
608
609    CHECK(!mRangeInfos.empty());
610
611    const RangeInfo &info = *mRangeInfos.begin();
612    if (mBuffer->size() < info.mLength) {
613        return NULL;
614    }
615
616    CHECK_GE(info.mTimestampUs, 0ll);
617
618    // The idea here is consume all AAC frames starting at offsets before
619    // info.mLength so we can assign a meaningful timestamp without
620    // having to interpolate.
621    // The final AAC frame may well extend into the next RangeInfo but
622    // that's ok.
623    // TODO: the logic commented above is skipped because codec cannot take
624    // arbitrary sized input buffers;
625    size_t offset = 0;
626    while (offset < info.mLength) {
627        if (offset + 7 > mBuffer->size()) {
628            return NULL;
629        }
630
631        ABitReader bits(mBuffer->data() + offset, mBuffer->size() - offset);
632
633        // adts_fixed_header
634
635        CHECK_EQ(bits.getBits(12), 0xfffu);
636        bits.skipBits(3);  // ID, layer
637        bool protection_absent = bits.getBits(1) != 0;
638
639        if (mFormat == NULL) {
640            unsigned profile = bits.getBits(2);
641            CHECK_NE(profile, 3u);
642            unsigned sampling_freq_index = bits.getBits(4);
643            bits.getBits(1);  // private_bit
644            unsigned channel_configuration = bits.getBits(3);
645            CHECK_NE(channel_configuration, 0u);
646            bits.skipBits(2);  // original_copy, home
647
648            mFormat = MakeAACCodecSpecificData(
649                    profile, sampling_freq_index, channel_configuration);
650
651            mFormat->setInt32(kKeyIsADTS, true);
652
653            int32_t sampleRate;
654            int32_t numChannels;
655            CHECK(mFormat->findInt32(kKeySampleRate, &sampleRate));
656            CHECK(mFormat->findInt32(kKeyChannelCount, &numChannels));
657
658            ALOGI("found AAC codec config (%d Hz, %d channels)",
659                 sampleRate, numChannels);
660        } else {
661            // profile_ObjectType, sampling_frequency_index, private_bits,
662            // channel_configuration, original_copy, home
663            bits.skipBits(12);
664        }
665
666        // adts_variable_header
667
668        // copyright_identification_bit, copyright_identification_start
669        bits.skipBits(2);
670
671        unsigned aac_frame_length = bits.getBits(13);
672
673        bits.skipBits(11);  // adts_buffer_fullness
674
675        unsigned number_of_raw_data_blocks_in_frame = bits.getBits(2);
676
677        if (number_of_raw_data_blocks_in_frame != 0) {
678            // To be implemented.
679            TRESPASS();
680        }
681
682        if (offset + aac_frame_length > mBuffer->size()) {
683            return NULL;
684        }
685
686        size_t headerSize = protection_absent ? 7 : 9;
687
688        offset += aac_frame_length;
689        // TODO: move back to concatenation when codec can support arbitrary input buffers.
690        // For now only queue a single buffer
691        break;
692    }
693
694    int64_t timeUs = fetchTimestampAAC(offset);
695
696    sp<ABuffer> accessUnit = new ABuffer(offset);
697    memcpy(accessUnit->data(), mBuffer->data(), offset);
698
699    memmove(mBuffer->data(), mBuffer->data() + offset,
700            mBuffer->size() - offset);
701    mBuffer->setRange(0, mBuffer->size() - offset);
702
703    accessUnit->meta()->setInt64("timeUs", timeUs);
704
705    return accessUnit;
706}
707
708int64_t ElementaryStreamQueue::fetchTimestamp(size_t size) {
709    int64_t timeUs = -1;
710    bool first = true;
711
712    while (size > 0) {
713        CHECK(!mRangeInfos.empty());
714
715        RangeInfo *info = &*mRangeInfos.begin();
716
717        if (first) {
718            timeUs = info->mTimestampUs;
719            first = false;
720        }
721
722        if (info->mLength > size) {
723            info->mLength -= size;
724            size = 0;
725        } else {
726            size -= info->mLength;
727
728            mRangeInfos.erase(mRangeInfos.begin());
729            info = NULL;
730        }
731
732    }
733
734    if (timeUs == 0ll) {
735        ALOGV("Returning 0 timestamp");
736    }
737
738    return timeUs;
739}
740
741// TODO: avoid interpolating timestamps once codec supports arbitrary sized input buffers
742int64_t ElementaryStreamQueue::fetchTimestampAAC(size_t size) {
743    int64_t timeUs = -1;
744    bool first = true;
745
746    size_t samplesize = size;
747    while (size > 0) {
748        CHECK(!mRangeInfos.empty());
749
750        RangeInfo *info = &*mRangeInfos.begin();
751
752        if (first) {
753            timeUs = info->mTimestampUs;
754            first = false;
755        }
756
757        if (info->mLength > size) {
758            int32_t sampleRate;
759            CHECK(mFormat->findInt32(kKeySampleRate, &sampleRate));
760            info->mLength -= size;
761            size_t numSamples = 1024 * size / samplesize;
762            info->mTimestampUs += numSamples * 1000000ll / sampleRate;
763            size = 0;
764        } else {
765            size -= info->mLength;
766
767            mRangeInfos.erase(mRangeInfos.begin());
768            info = NULL;
769        }
770
771    }
772
773    if (timeUs == 0ll) {
774        ALOGV("Returning 0 timestamp");
775    }
776
777    return timeUs;
778}
779
780struct NALPosition {
781    size_t nalOffset;
782    size_t nalSize;
783};
784
785sp<ABuffer> ElementaryStreamQueue::dequeueAccessUnitH264() {
786    const uint8_t *data = mBuffer->data();
787
788    size_t size = mBuffer->size();
789    Vector<NALPosition> nals;
790
791    size_t totalSize = 0;
792
793    status_t err;
794    const uint8_t *nalStart;
795    size_t nalSize;
796    bool foundSlice = false;
797    while ((err = getNextNALUnit(&data, &size, &nalStart, &nalSize)) == OK) {
798        if (nalSize == 0) continue;
799
800        unsigned nalType = nalStart[0] & 0x1f;
801        bool flush = false;
802
803        if (nalType == 1 || nalType == 5) {
804            if (foundSlice) {
805                ABitReader br(nalStart + 1, nalSize);
806                unsigned first_mb_in_slice = parseUE(&br);
807
808                if (first_mb_in_slice == 0) {
809                    // This slice starts a new frame.
810
811                    flush = true;
812                }
813            }
814
815            foundSlice = true;
816        } else if ((nalType == 9 || nalType == 7) && foundSlice) {
817            // Access unit delimiter and SPS will be associated with the
818            // next frame.
819
820            flush = true;
821        }
822
823        if (flush) {
824            // The access unit will contain all nal units up to, but excluding
825            // the current one, separated by 0x00 0x00 0x00 0x01 startcodes.
826
827            size_t auSize = 4 * nals.size() + totalSize;
828            sp<ABuffer> accessUnit = new ABuffer(auSize);
829
830#if !LOG_NDEBUG
831            AString out;
832#endif
833
834            size_t dstOffset = 0;
835            for (size_t i = 0; i < nals.size(); ++i) {
836                const NALPosition &pos = nals.itemAt(i);
837
838                unsigned nalType = mBuffer->data()[pos.nalOffset] & 0x1f;
839
840                if (nalType == 6) {
841                    sp<ABuffer> sei = new ABuffer(pos.nalSize);
842                    memcpy(sei->data(), mBuffer->data() + pos.nalOffset, pos.nalSize);
843                    accessUnit->meta()->setBuffer("sei", sei);
844                }
845
846#if !LOG_NDEBUG
847                char tmp[128];
848                sprintf(tmp, "0x%02x", nalType);
849                if (i > 0) {
850                    out.append(", ");
851                }
852                out.append(tmp);
853#endif
854
855                memcpy(accessUnit->data() + dstOffset, "\x00\x00\x00\x01", 4);
856
857                memcpy(accessUnit->data() + dstOffset + 4,
858                       mBuffer->data() + pos.nalOffset,
859                       pos.nalSize);
860
861                dstOffset += pos.nalSize + 4;
862            }
863
864#if !LOG_NDEBUG
865            ALOGV("accessUnit contains nal types %s", out.c_str());
866#endif
867
868            const NALPosition &pos = nals.itemAt(nals.size() - 1);
869            size_t nextScan = pos.nalOffset + pos.nalSize;
870
871            memmove(mBuffer->data(),
872                    mBuffer->data() + nextScan,
873                    mBuffer->size() - nextScan);
874
875            mBuffer->setRange(0, mBuffer->size() - nextScan);
876
877            int64_t timeUs = fetchTimestamp(nextScan);
878            CHECK_GE(timeUs, 0ll);
879
880            accessUnit->meta()->setInt64("timeUs", timeUs);
881
882            if (mFormat == NULL) {
883                mFormat = MakeAVCCodecSpecificData(accessUnit);
884            }
885
886            return accessUnit;
887        }
888
889        NALPosition pos;
890        pos.nalOffset = nalStart - mBuffer->data();
891        pos.nalSize = nalSize;
892
893        nals.push(pos);
894
895        totalSize += nalSize;
896    }
897    CHECK_EQ(err, (status_t)-EAGAIN);
898
899    return NULL;
900}
901
902sp<ABuffer> ElementaryStreamQueue::dequeueAccessUnitMPEGAudio() {
903    const uint8_t *data = mBuffer->data();
904    size_t size = mBuffer->size();
905
906    if (size < 4) {
907        return NULL;
908    }
909
910    uint32_t header = U32_AT(data);
911
912    size_t frameSize;
913    int samplingRate, numChannels, bitrate, numSamples;
914    CHECK(GetMPEGAudioFrameSize(
915                header, &frameSize, &samplingRate, &numChannels,
916                &bitrate, &numSamples));
917
918    if (size < frameSize) {
919        return NULL;
920    }
921
922    unsigned layer = 4 - ((header >> 17) & 3);
923
924    sp<ABuffer> accessUnit = new ABuffer(frameSize);
925    memcpy(accessUnit->data(), data, frameSize);
926
927    memmove(mBuffer->data(),
928            mBuffer->data() + frameSize,
929            mBuffer->size() - frameSize);
930
931    mBuffer->setRange(0, mBuffer->size() - frameSize);
932
933    int64_t timeUs = fetchTimestamp(frameSize);
934    CHECK_GE(timeUs, 0ll);
935
936    accessUnit->meta()->setInt64("timeUs", timeUs);
937
938    if (mFormat == NULL) {
939        mFormat = new MetaData;
940
941        switch (layer) {
942            case 1:
943                mFormat->setCString(
944                        kKeyMIMEType, MEDIA_MIMETYPE_AUDIO_MPEG_LAYER_I);
945                break;
946            case 2:
947                mFormat->setCString(
948                        kKeyMIMEType, MEDIA_MIMETYPE_AUDIO_MPEG_LAYER_II);
949                break;
950            case 3:
951                mFormat->setCString(
952                        kKeyMIMEType, MEDIA_MIMETYPE_AUDIO_MPEG);
953                break;
954            default:
955                TRESPASS();
956        }
957
958        mFormat->setInt32(kKeySampleRate, samplingRate);
959        mFormat->setInt32(kKeyChannelCount, numChannels);
960    }
961
962    return accessUnit;
963}
964
965static void EncodeSize14(uint8_t **_ptr, size_t size) {
966    CHECK_LE(size, 0x3fff);
967
968    uint8_t *ptr = *_ptr;
969
970    *ptr++ = 0x80 | (size >> 7);
971    *ptr++ = size & 0x7f;
972
973    *_ptr = ptr;
974}
975
976static sp<ABuffer> MakeMPEGVideoESDS(const sp<ABuffer> &csd) {
977    sp<ABuffer> esds = new ABuffer(csd->size() + 25);
978
979    uint8_t *ptr = esds->data();
980    *ptr++ = 0x03;
981    EncodeSize14(&ptr, 22 + csd->size());
982
983    *ptr++ = 0x00;  // ES_ID
984    *ptr++ = 0x00;
985
986    *ptr++ = 0x00;  // streamDependenceFlag, URL_Flag, OCRstreamFlag
987
988    *ptr++ = 0x04;
989    EncodeSize14(&ptr, 16 + csd->size());
990
991    *ptr++ = 0x40;  // Audio ISO/IEC 14496-3
992
993    for (size_t i = 0; i < 12; ++i) {
994        *ptr++ = 0x00;
995    }
996
997    *ptr++ = 0x05;
998    EncodeSize14(&ptr, csd->size());
999
1000    memcpy(ptr, csd->data(), csd->size());
1001
1002    return esds;
1003}
1004
1005sp<ABuffer> ElementaryStreamQueue::dequeueAccessUnitMPEGVideo() {
1006    const uint8_t *data = mBuffer->data();
1007    size_t size = mBuffer->size();
1008
1009    bool sawPictureStart = false;
1010    int pprevStartCode = -1;
1011    int prevStartCode = -1;
1012    int currentStartCode = -1;
1013
1014    size_t offset = 0;
1015    while (offset + 3 < size) {
1016        if (memcmp(&data[offset], "\x00\x00\x01", 3)) {
1017            ++offset;
1018            continue;
1019        }
1020
1021        pprevStartCode = prevStartCode;
1022        prevStartCode = currentStartCode;
1023        currentStartCode = data[offset + 3];
1024
1025        if (currentStartCode == 0xb3 && mFormat == NULL) {
1026            memmove(mBuffer->data(), mBuffer->data() + offset, size - offset);
1027            size -= offset;
1028            (void)fetchTimestamp(offset);
1029            offset = 0;
1030            mBuffer->setRange(0, size);
1031        }
1032
1033        if ((prevStartCode == 0xb3 && currentStartCode != 0xb5)
1034                || (pprevStartCode == 0xb3 && prevStartCode == 0xb5)) {
1035            // seqHeader without/with extension
1036
1037            if (mFormat == NULL) {
1038                CHECK_GE(size, 7u);
1039
1040                unsigned width =
1041                    (data[4] << 4) | data[5] >> 4;
1042
1043                unsigned height =
1044                    ((data[5] & 0x0f) << 8) | data[6];
1045
1046                mFormat = new MetaData;
1047                mFormat->setCString(kKeyMIMEType, MEDIA_MIMETYPE_VIDEO_MPEG2);
1048                mFormat->setInt32(kKeyWidth, width);
1049                mFormat->setInt32(kKeyHeight, height);
1050
1051                ALOGI("found MPEG2 video codec config (%d x %d)", width, height);
1052
1053                sp<ABuffer> csd = new ABuffer(offset);
1054                memcpy(csd->data(), data, offset);
1055
1056                memmove(mBuffer->data(),
1057                        mBuffer->data() + offset,
1058                        mBuffer->size() - offset);
1059
1060                mBuffer->setRange(0, mBuffer->size() - offset);
1061                size -= offset;
1062                (void)fetchTimestamp(offset);
1063                offset = 0;
1064
1065                // hexdump(csd->data(), csd->size());
1066
1067                sp<ABuffer> esds = MakeMPEGVideoESDS(csd);
1068                mFormat->setData(
1069                        kKeyESDS, kTypeESDS, esds->data(), esds->size());
1070
1071                return NULL;
1072            }
1073        }
1074
1075        if (mFormat != NULL && currentStartCode == 0x00) {
1076            // Picture start
1077
1078            if (!sawPictureStart) {
1079                sawPictureStart = true;
1080            } else {
1081                sp<ABuffer> accessUnit = new ABuffer(offset);
1082                memcpy(accessUnit->data(), data, offset);
1083
1084                memmove(mBuffer->data(),
1085                        mBuffer->data() + offset,
1086                        mBuffer->size() - offset);
1087
1088                mBuffer->setRange(0, mBuffer->size() - offset);
1089
1090                int64_t timeUs = fetchTimestamp(offset);
1091                CHECK_GE(timeUs, 0ll);
1092
1093                offset = 0;
1094
1095                accessUnit->meta()->setInt64("timeUs", timeUs);
1096
1097                ALOGV("returning MPEG video access unit at time %" PRId64 " us",
1098                      timeUs);
1099
1100                // hexdump(accessUnit->data(), accessUnit->size());
1101
1102                return accessUnit;
1103            }
1104        }
1105
1106        ++offset;
1107    }
1108
1109    return NULL;
1110}
1111
1112static ssize_t getNextChunkSize(
1113        const uint8_t *data, size_t size) {
1114    static const char kStartCode[] = "\x00\x00\x01";
1115
1116    if (size < 3) {
1117        return -EAGAIN;
1118    }
1119
1120    if (memcmp(kStartCode, data, 3)) {
1121        TRESPASS();
1122    }
1123
1124    size_t offset = 3;
1125    while (offset + 2 < size) {
1126        if (!memcmp(&data[offset], kStartCode, 3)) {
1127            return offset;
1128        }
1129
1130        ++offset;
1131    }
1132
1133    return -EAGAIN;
1134}
1135
1136sp<ABuffer> ElementaryStreamQueue::dequeueAccessUnitMPEG4Video() {
1137    uint8_t *data = mBuffer->data();
1138    size_t size = mBuffer->size();
1139
1140    enum {
1141        SKIP_TO_VISUAL_OBJECT_SEQ_START,
1142        EXPECT_VISUAL_OBJECT_START,
1143        EXPECT_VO_START,
1144        EXPECT_VOL_START,
1145        WAIT_FOR_VOP_START,
1146        SKIP_TO_VOP_START,
1147
1148    } state;
1149
1150    if (mFormat == NULL) {
1151        state = SKIP_TO_VISUAL_OBJECT_SEQ_START;
1152    } else {
1153        state = SKIP_TO_VOP_START;
1154    }
1155
1156    int32_t width = -1, height = -1;
1157
1158    size_t offset = 0;
1159    ssize_t chunkSize;
1160    while ((chunkSize = getNextChunkSize(
1161                    &data[offset], size - offset)) > 0) {
1162        bool discard = false;
1163
1164        unsigned chunkType = data[offset + 3];
1165
1166        switch (state) {
1167            case SKIP_TO_VISUAL_OBJECT_SEQ_START:
1168            {
1169                if (chunkType == 0xb0) {
1170                    // Discard anything before this marker.
1171
1172                    state = EXPECT_VISUAL_OBJECT_START;
1173                } else {
1174                    discard = true;
1175                }
1176                break;
1177            }
1178
1179            case EXPECT_VISUAL_OBJECT_START:
1180            {
1181                CHECK_EQ(chunkType, 0xb5);
1182                state = EXPECT_VO_START;
1183                break;
1184            }
1185
1186            case EXPECT_VO_START:
1187            {
1188                CHECK_LE(chunkType, 0x1f);
1189                state = EXPECT_VOL_START;
1190                break;
1191            }
1192
1193            case EXPECT_VOL_START:
1194            {
1195                CHECK((chunkType & 0xf0) == 0x20);
1196
1197                CHECK(ExtractDimensionsFromVOLHeader(
1198                            &data[offset], chunkSize,
1199                            &width, &height));
1200
1201                state = WAIT_FOR_VOP_START;
1202                break;
1203            }
1204
1205            case WAIT_FOR_VOP_START:
1206            {
1207                if (chunkType == 0xb3 || chunkType == 0xb6) {
1208                    // group of VOP or VOP start.
1209
1210                    mFormat = new MetaData;
1211                    mFormat->setCString(
1212                            kKeyMIMEType, MEDIA_MIMETYPE_VIDEO_MPEG4);
1213
1214                    mFormat->setInt32(kKeyWidth, width);
1215                    mFormat->setInt32(kKeyHeight, height);
1216
1217                    ALOGI("found MPEG4 video codec config (%d x %d)",
1218                         width, height);
1219
1220                    sp<ABuffer> csd = new ABuffer(offset);
1221                    memcpy(csd->data(), data, offset);
1222
1223                    // hexdump(csd->data(), csd->size());
1224
1225                    sp<ABuffer> esds = MakeMPEGVideoESDS(csd);
1226                    mFormat->setData(
1227                            kKeyESDS, kTypeESDS,
1228                            esds->data(), esds->size());
1229
1230                    discard = true;
1231                    state = SKIP_TO_VOP_START;
1232                }
1233
1234                break;
1235            }
1236
1237            case SKIP_TO_VOP_START:
1238            {
1239                if (chunkType == 0xb6) {
1240                    offset += chunkSize;
1241
1242                    sp<ABuffer> accessUnit = new ABuffer(offset);
1243                    memcpy(accessUnit->data(), data, offset);
1244
1245                    memmove(data, &data[offset], size - offset);
1246                    size -= offset;
1247                    mBuffer->setRange(0, size);
1248
1249                    int64_t timeUs = fetchTimestamp(offset);
1250                    CHECK_GE(timeUs, 0ll);
1251
1252                    offset = 0;
1253
1254                    accessUnit->meta()->setInt64("timeUs", timeUs);
1255
1256                    ALOGV("returning MPEG4 video access unit at time %" PRId64 " us",
1257                         timeUs);
1258
1259                    // hexdump(accessUnit->data(), accessUnit->size());
1260
1261                    return accessUnit;
1262                } else if (chunkType != 0xb3) {
1263                    offset += chunkSize;
1264                    discard = true;
1265                }
1266
1267                break;
1268            }
1269
1270            default:
1271                TRESPASS();
1272        }
1273
1274        if (discard) {
1275            (void)fetchTimestamp(offset);
1276            memmove(data, &data[offset], size - offset);
1277            size -= offset;
1278            offset = 0;
1279            mBuffer->setRange(0, size);
1280        } else {
1281            offset += chunkSize;
1282        }
1283    }
1284
1285    return NULL;
1286}
1287
1288}  // namespace android
1289