ESQueue.cpp revision be9634d071e79b72a42a4504f64eda9e2a0bceb8
1/*
2 * Copyright (C) 2010 The Android Open Source Project
3 *
4 * Licensed under the Apache License, Version 2.0 (the "License");
5 * you may not use this file except in compliance with the License.
6 * You may obtain a copy of the License at
7 *
8 *      http://www.apache.org/licenses/LICENSE-2.0
9 *
10 * Unless required by applicable law or agreed to in writing, software
11 * distributed under the License is distributed on an "AS IS" BASIS,
12 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13 * See the License for the specific language governing permissions and
14 * limitations under the License.
15 */
16
17//#define LOG_NDEBUG 0
18#define LOG_TAG "ESQueue"
19#include <media/stagefright/foundation/ADebug.h>
20
21#include "ESQueue.h"
22
23#include <media/stagefright/foundation/hexdump.h>
24#include <media/stagefright/foundation/ABitReader.h>
25#include <media/stagefright/foundation/ABuffer.h>
26#include <media/stagefright/foundation/AMessage.h>
27#include <media/stagefright/MediaErrors.h>
28#include <media/stagefright/MediaDefs.h>
29#include <media/stagefright/MetaData.h>
30#include <media/stagefright/Utils.h>
31
32#include "include/avc_utils.h"
33
34#include <inttypes.h>
35#include <netinet/in.h>
36
37namespace android {
38
39ElementaryStreamQueue::ElementaryStreamQueue(Mode mode, uint32_t flags)
40    : mMode(mode),
41      mFlags(flags),
42      mEOSReached(false) {
43}
44
45sp<MetaData> ElementaryStreamQueue::getFormat() {
46    return mFormat;
47}
48
49void ElementaryStreamQueue::clear(bool clearFormat) {
50    if (mBuffer != NULL) {
51        mBuffer->setRange(0, 0);
52    }
53
54    mRangeInfos.clear();
55
56    if (clearFormat) {
57        mFormat.clear();
58    }
59}
60
61// Parse AC3 header assuming the current ptr is start position of syncframe,
62// update metadata only applicable, and return the payload size
63static unsigned parseAC3SyncFrame(
64        const uint8_t *ptr, size_t size, sp<MetaData> *metaData) {
65    static const unsigned channelCountTable[] = {2, 1, 2, 3, 3, 4, 4, 5};
66    static const unsigned samplingRateTable[] = {48000, 44100, 32000};
67
68    static const unsigned frameSizeTable[19][3] = {
69        { 64, 69, 96 },
70        { 80, 87, 120 },
71        { 96, 104, 144 },
72        { 112, 121, 168 },
73        { 128, 139, 192 },
74        { 160, 174, 240 },
75        { 192, 208, 288 },
76        { 224, 243, 336 },
77        { 256, 278, 384 },
78        { 320, 348, 480 },
79        { 384, 417, 576 },
80        { 448, 487, 672 },
81        { 512, 557, 768 },
82        { 640, 696, 960 },
83        { 768, 835, 1152 },
84        { 896, 975, 1344 },
85        { 1024, 1114, 1536 },
86        { 1152, 1253, 1728 },
87        { 1280, 1393, 1920 },
88    };
89
90    ABitReader bits(ptr, size);
91    if (bits.numBitsLeft() < 16) {
92        return 0;
93    }
94    if (bits.getBits(16) != 0x0B77) {
95        return 0;
96    }
97
98    if (bits.numBitsLeft() < 16 + 2 + 6 + 5 + 3 + 3) {
99        ALOGV("Not enough bits left for further parsing");
100        return 0;
101    }
102    bits.skipBits(16);  // crc1
103
104    unsigned fscod = bits.getBits(2);
105    if (fscod == 3) {
106        ALOGW("Incorrect fscod in AC3 header");
107        return 0;
108    }
109
110    unsigned frmsizecod = bits.getBits(6);
111    if (frmsizecod > 37) {
112        ALOGW("Incorrect frmsizecod in AC3 header");
113        return 0;
114    }
115
116    unsigned bsid = bits.getBits(5);
117    if (bsid > 8) {
118        ALOGW("Incorrect bsid in AC3 header. Possibly E-AC-3?");
119        return 0;
120    }
121
122    unsigned bsmod __unused = bits.getBits(3);
123    unsigned acmod = bits.getBits(3);
124    unsigned cmixlev __unused = 0;
125    unsigned surmixlev __unused = 0;
126    unsigned dsurmod __unused = 0;
127
128    if ((acmod & 1) > 0 && acmod != 1) {
129        if (bits.numBitsLeft() < 2) {
130            return 0;
131        }
132        cmixlev = bits.getBits(2);
133    }
134    if ((acmod & 4) > 0) {
135        if (bits.numBitsLeft() < 2) {
136            return 0;
137        }
138        surmixlev = bits.getBits(2);
139    }
140    if (acmod == 2) {
141        if (bits.numBitsLeft() < 2) {
142            return 0;
143        }
144        dsurmod = bits.getBits(2);
145    }
146
147    if (bits.numBitsLeft() < 1) {
148        return 0;
149    }
150    unsigned lfeon = bits.getBits(1);
151
152    unsigned samplingRate = samplingRateTable[fscod];
153    unsigned payloadSize = frameSizeTable[frmsizecod >> 1][fscod];
154    if (fscod == 1) {
155        payloadSize += frmsizecod & 1;
156    }
157    payloadSize <<= 1;  // convert from 16-bit words to bytes
158
159    unsigned channelCount = channelCountTable[acmod] + lfeon;
160
161    if (metaData != NULL) {
162        (*metaData)->setCString(kKeyMIMEType, MEDIA_MIMETYPE_AUDIO_AC3);
163        (*metaData)->setInt32(kKeyChannelCount, channelCount);
164        (*metaData)->setInt32(kKeySampleRate, samplingRate);
165    }
166
167    return payloadSize;
168}
169
170static bool IsSeeminglyValidAC3Header(const uint8_t *ptr, size_t size) {
171    return parseAC3SyncFrame(ptr, size, NULL) > 0;
172}
173
174static bool IsSeeminglyValidADTSHeader(
175        const uint8_t *ptr, size_t size, size_t *frameLength) {
176    if (size < 7) {
177        // Not enough data to verify header.
178        return false;
179    }
180
181    if (ptr[0] != 0xff || (ptr[1] >> 4) != 0x0f) {
182        return false;
183    }
184
185    unsigned layer = (ptr[1] >> 1) & 3;
186
187    if (layer != 0) {
188        return false;
189    }
190
191    unsigned ID = (ptr[1] >> 3) & 1;
192    unsigned profile_ObjectType = ptr[2] >> 6;
193
194    if (ID == 1 && profile_ObjectType == 3) {
195        // MPEG-2 profile 3 is reserved.
196        return false;
197    }
198
199    size_t frameLengthInHeader =
200            ((ptr[3] & 3) << 11) + (ptr[4] << 3) + ((ptr[5] >> 5) & 7);
201    if (frameLengthInHeader > size) {
202        return false;
203    }
204
205    *frameLength = frameLengthInHeader;
206    return true;
207}
208
209static bool IsSeeminglyValidMPEGAudioHeader(const uint8_t *ptr, size_t size) {
210    if (size < 3) {
211        // Not enough data to verify header.
212        return false;
213    }
214
215    if (ptr[0] != 0xff || (ptr[1] >> 5) != 0x07) {
216        return false;
217    }
218
219    unsigned ID = (ptr[1] >> 3) & 3;
220
221    if (ID == 1) {
222        return false;  // reserved
223    }
224
225    unsigned layer = (ptr[1] >> 1) & 3;
226
227    if (layer == 0) {
228        return false;  // reserved
229    }
230
231    unsigned bitrateIndex = (ptr[2] >> 4);
232
233    if (bitrateIndex == 0x0f) {
234        return false;  // reserved
235    }
236
237    unsigned samplingRateIndex = (ptr[2] >> 2) & 3;
238
239    if (samplingRateIndex == 3) {
240        return false;  // reserved
241    }
242
243    return true;
244}
245
246status_t ElementaryStreamQueue::appendData(
247        const void *data, size_t size, int64_t timeUs) {
248
249    if (mEOSReached) {
250        ALOGE("appending data after EOS");
251        return ERROR_MALFORMED;
252    }
253    if (mBuffer == NULL || mBuffer->size() == 0) {
254        switch (mMode) {
255            case H264:
256            case MPEG_VIDEO:
257            {
258#if 0
259                if (size < 4 || memcmp("\x00\x00\x00\x01", data, 4)) {
260                    return ERROR_MALFORMED;
261                }
262#else
263                uint8_t *ptr = (uint8_t *)data;
264
265                ssize_t startOffset = -1;
266                for (size_t i = 0; i + 2 < size; ++i) {
267                    if (!memcmp("\x00\x00\x01", &ptr[i], 3)) {
268                        startOffset = i;
269                        break;
270                    }
271                }
272
273                if (startOffset < 0) {
274                    return ERROR_MALFORMED;
275                }
276
277                if (startOffset > 0) {
278                    ALOGI("found something resembling an H.264/MPEG syncword "
279                          "at offset %zd",
280                          startOffset);
281                }
282
283                data = &ptr[startOffset];
284                size -= startOffset;
285#endif
286                break;
287            }
288
289            case MPEG4_VIDEO:
290            {
291#if 0
292                if (size < 3 || memcmp("\x00\x00\x01", data, 3)) {
293                    return ERROR_MALFORMED;
294                }
295#else
296                uint8_t *ptr = (uint8_t *)data;
297
298                ssize_t startOffset = -1;
299                for (size_t i = 0; i + 2 < size; ++i) {
300                    if (!memcmp("\x00\x00\x01", &ptr[i], 3)) {
301                        startOffset = i;
302                        break;
303                    }
304                }
305
306                if (startOffset < 0) {
307                    return ERROR_MALFORMED;
308                }
309
310                if (startOffset > 0) {
311                    ALOGI("found something resembling an H.264/MPEG syncword "
312                          "at offset %zd",
313                          startOffset);
314                }
315
316                data = &ptr[startOffset];
317                size -= startOffset;
318#endif
319                break;
320            }
321
322            case AAC:
323            {
324                uint8_t *ptr = (uint8_t *)data;
325
326#if 0
327                if (size < 2 || ptr[0] != 0xff || (ptr[1] >> 4) != 0x0f) {
328                    return ERROR_MALFORMED;
329                }
330#else
331                ssize_t startOffset = -1;
332                size_t frameLength;
333                for (size_t i = 0; i < size; ++i) {
334                    if (IsSeeminglyValidADTSHeader(
335                            &ptr[i], size - i, &frameLength)) {
336                        startOffset = i;
337                        break;
338                    }
339                }
340
341                if (startOffset < 0) {
342                    return ERROR_MALFORMED;
343                }
344
345                if (startOffset > 0) {
346                    ALOGI("found something resembling an AAC syncword at "
347                          "offset %zd",
348                          startOffset);
349                }
350
351                if (frameLength != size - startOffset) {
352                    ALOGV("First ADTS AAC frame length is %zd bytes, "
353                          "while the buffer size is %zd bytes.",
354                          frameLength, size - startOffset);
355                }
356
357                data = &ptr[startOffset];
358                size -= startOffset;
359#endif
360                break;
361            }
362
363            case AC3:
364            {
365                uint8_t *ptr = (uint8_t *)data;
366
367                ssize_t startOffset = -1;
368                for (size_t i = 0; i < size; ++i) {
369                    if (IsSeeminglyValidAC3Header(&ptr[i], size - i)) {
370                        startOffset = i;
371                        break;
372                    }
373                }
374
375                if (startOffset < 0) {
376                    return ERROR_MALFORMED;
377                }
378
379                if (startOffset > 0) {
380                    ALOGI("found something resembling an AC3 syncword at "
381                          "offset %zd",
382                          startOffset);
383                }
384
385                data = &ptr[startOffset];
386                size -= startOffset;
387                break;
388            }
389
390            case MPEG_AUDIO:
391            {
392                uint8_t *ptr = (uint8_t *)data;
393
394                ssize_t startOffset = -1;
395                for (size_t i = 0; i < size; ++i) {
396                    if (IsSeeminglyValidMPEGAudioHeader(&ptr[i], size - i)) {
397                        startOffset = i;
398                        break;
399                    }
400                }
401
402                if (startOffset < 0) {
403                    return ERROR_MALFORMED;
404                }
405
406                if (startOffset > 0) {
407                    ALOGI("found something resembling an MPEG audio "
408                          "syncword at offset %zd",
409                          startOffset);
410                }
411
412                data = &ptr[startOffset];
413                size -= startOffset;
414                break;
415            }
416
417            case PCM_AUDIO:
418            {
419                break;
420            }
421
422            default:
423                TRESPASS();
424                break;
425        }
426    }
427
428    size_t neededSize = (mBuffer == NULL ? 0 : mBuffer->size()) + size;
429    if (mBuffer == NULL || neededSize > mBuffer->capacity()) {
430        neededSize = (neededSize + 65535) & ~65535;
431
432        ALOGV("resizing buffer to size %zu", neededSize);
433
434        sp<ABuffer> buffer = new ABuffer(neededSize);
435        if (mBuffer != NULL) {
436            memcpy(buffer->data(), mBuffer->data(), mBuffer->size());
437            buffer->setRange(0, mBuffer->size());
438        } else {
439            buffer->setRange(0, 0);
440        }
441
442        mBuffer = buffer;
443    }
444
445    memcpy(mBuffer->data() + mBuffer->size(), data, size);
446    mBuffer->setRange(0, mBuffer->size() + size);
447
448    RangeInfo info;
449    info.mLength = size;
450    info.mTimestampUs = timeUs;
451    mRangeInfos.push_back(info);
452
453#if 0
454    if (mMode == AAC) {
455        ALOGI("size = %zu, timeUs = %.2f secs", size, timeUs / 1E6);
456        hexdump(data, size);
457    }
458#endif
459
460    return OK;
461}
462
463sp<ABuffer> ElementaryStreamQueue::dequeueAccessUnit() {
464    if ((mFlags & kFlag_AlignedData) && mMode == H264) {
465        if (mRangeInfos.empty()) {
466            return NULL;
467        }
468
469        RangeInfo info = *mRangeInfos.begin();
470        mRangeInfos.erase(mRangeInfos.begin());
471
472        sp<ABuffer> accessUnit = new ABuffer(info.mLength);
473        memcpy(accessUnit->data(), mBuffer->data(), info.mLength);
474        accessUnit->meta()->setInt64("timeUs", info.mTimestampUs);
475
476        memmove(mBuffer->data(),
477                mBuffer->data() + info.mLength,
478                mBuffer->size() - info.mLength);
479
480        mBuffer->setRange(0, mBuffer->size() - info.mLength);
481
482        if (mFormat == NULL) {
483            mFormat = MakeAVCCodecSpecificData(accessUnit);
484        }
485
486        return accessUnit;
487    }
488
489    switch (mMode) {
490        case H264:
491            return dequeueAccessUnitH264();
492        case AAC:
493            return dequeueAccessUnitAAC();
494        case AC3:
495            return dequeueAccessUnitAC3();
496        case MPEG_VIDEO:
497            return dequeueAccessUnitMPEGVideo();
498        case MPEG4_VIDEO:
499            return dequeueAccessUnitMPEG4Video();
500        case PCM_AUDIO:
501            return dequeueAccessUnitPCMAudio();
502        default:
503            CHECK_EQ((unsigned)mMode, (unsigned)MPEG_AUDIO);
504            return dequeueAccessUnitMPEGAudio();
505    }
506}
507
508sp<ABuffer> ElementaryStreamQueue::dequeueAccessUnitAC3() {
509    unsigned syncStartPos = 0;  // in bytes
510    unsigned payloadSize = 0;
511    sp<MetaData> format = new MetaData;
512    while (true) {
513        if (syncStartPos + 2 >= mBuffer->size()) {
514            return NULL;
515        }
516
517        payloadSize = parseAC3SyncFrame(
518                mBuffer->data() + syncStartPos,
519                mBuffer->size() - syncStartPos,
520                &format);
521        if (payloadSize > 0) {
522            break;
523        }
524        ++syncStartPos;
525    }
526
527    if (mBuffer->size() < syncStartPos + payloadSize) {
528        ALOGV("Not enough buffer size for AC3");
529        return NULL;
530    }
531
532    if (mFormat == NULL) {
533        mFormat = format;
534    }
535
536    sp<ABuffer> accessUnit = new ABuffer(syncStartPos + payloadSize);
537    memcpy(accessUnit->data(), mBuffer->data(), syncStartPos + payloadSize);
538
539    int64_t timeUs = fetchTimestamp(syncStartPos + payloadSize);
540    CHECK_GE(timeUs, 0ll);
541    accessUnit->meta()->setInt64("timeUs", timeUs);
542
543    memmove(
544            mBuffer->data(),
545            mBuffer->data() + syncStartPos + payloadSize,
546            mBuffer->size() - syncStartPos - payloadSize);
547
548    mBuffer->setRange(0, mBuffer->size() - syncStartPos - payloadSize);
549
550    return accessUnit;
551}
552
553sp<ABuffer> ElementaryStreamQueue::dequeueAccessUnitPCMAudio() {
554    if (mBuffer->size() < 4) {
555        return NULL;
556    }
557
558    ABitReader bits(mBuffer->data(), 4);
559    CHECK_EQ(bits.getBits(8), 0xa0);
560    unsigned numAUs = bits.getBits(8);
561    bits.skipBits(8);
562    unsigned quantization_word_length __unused = bits.getBits(2);
563    unsigned audio_sampling_frequency = bits.getBits(3);
564    unsigned num_channels = bits.getBits(3);
565
566    CHECK_EQ(audio_sampling_frequency, 2);  // 48kHz
567    CHECK_EQ(num_channels, 1u);  // stereo!
568
569    if (mFormat == NULL) {
570        mFormat = new MetaData;
571        mFormat->setCString(kKeyMIMEType, MEDIA_MIMETYPE_AUDIO_RAW);
572        mFormat->setInt32(kKeyChannelCount, 2);
573        mFormat->setInt32(kKeySampleRate, 48000);
574    }
575
576    static const size_t kFramesPerAU = 80;
577    size_t frameSize = 2 /* numChannels */ * sizeof(int16_t);
578
579    size_t payloadSize = numAUs * frameSize * kFramesPerAU;
580
581    if (mBuffer->size() < 4 + payloadSize) {
582        return NULL;
583    }
584
585    sp<ABuffer> accessUnit = new ABuffer(payloadSize);
586    memcpy(accessUnit->data(), mBuffer->data() + 4, payloadSize);
587
588    int64_t timeUs = fetchTimestamp(payloadSize + 4);
589    CHECK_GE(timeUs, 0ll);
590    accessUnit->meta()->setInt64("timeUs", timeUs);
591
592    int16_t *ptr = (int16_t *)accessUnit->data();
593    for (size_t i = 0; i < payloadSize / sizeof(int16_t); ++i) {
594        ptr[i] = ntohs(ptr[i]);
595    }
596
597    memmove(
598            mBuffer->data(),
599            mBuffer->data() + 4 + payloadSize,
600            mBuffer->size() - 4 - payloadSize);
601
602    mBuffer->setRange(0, mBuffer->size() - 4 - payloadSize);
603
604    return accessUnit;
605}
606
607sp<ABuffer> ElementaryStreamQueue::dequeueAccessUnitAAC() {
608    if (mBuffer->size() == 0) {
609        return NULL;
610    }
611
612    CHECK(!mRangeInfos.empty());
613
614    const RangeInfo &info = *mRangeInfos.begin();
615    if (mBuffer->size() < info.mLength) {
616        return NULL;
617    }
618
619    CHECK_GE(info.mTimestampUs, 0ll);
620
621    // The idea here is consume all AAC frames starting at offsets before
622    // info.mLength so we can assign a meaningful timestamp without
623    // having to interpolate.
624    // The final AAC frame may well extend into the next RangeInfo but
625    // that's ok.
626    // TODO: the logic commented above is skipped because codec cannot take
627    // arbitrary sized input buffers;
628    size_t offset = 0;
629    while (offset < info.mLength) {
630        if (offset + 7 > mBuffer->size()) {
631            return NULL;
632        }
633
634        ABitReader bits(mBuffer->data() + offset, mBuffer->size() - offset);
635
636        // adts_fixed_header
637
638        CHECK_EQ(bits.getBits(12), 0xfffu);
639        bits.skipBits(3);  // ID, layer
640        bool protection_absent __unused = bits.getBits(1) != 0;
641
642        if (mFormat == NULL) {
643            unsigned profile = bits.getBits(2);
644            CHECK_NE(profile, 3u);
645            unsigned sampling_freq_index = bits.getBits(4);
646            bits.getBits(1);  // private_bit
647            unsigned channel_configuration = bits.getBits(3);
648            CHECK_NE(channel_configuration, 0u);
649            bits.skipBits(2);  // original_copy, home
650
651            mFormat = MakeAACCodecSpecificData(
652                    profile, sampling_freq_index, channel_configuration);
653
654            mFormat->setInt32(kKeyIsADTS, true);
655
656            int32_t sampleRate;
657            int32_t numChannels;
658            CHECK(mFormat->findInt32(kKeySampleRate, &sampleRate));
659            CHECK(mFormat->findInt32(kKeyChannelCount, &numChannels));
660
661            ALOGI("found AAC codec config (%d Hz, %d channels)",
662                 sampleRate, numChannels);
663        } else {
664            // profile_ObjectType, sampling_frequency_index, private_bits,
665            // channel_configuration, original_copy, home
666            bits.skipBits(12);
667        }
668
669        // adts_variable_header
670
671        // copyright_identification_bit, copyright_identification_start
672        bits.skipBits(2);
673
674        unsigned aac_frame_length = bits.getBits(13);
675
676        bits.skipBits(11);  // adts_buffer_fullness
677
678        unsigned number_of_raw_data_blocks_in_frame = bits.getBits(2);
679
680        if (number_of_raw_data_blocks_in_frame != 0) {
681            // To be implemented.
682            TRESPASS();
683        }
684
685        if (offset + aac_frame_length > mBuffer->size()) {
686            return NULL;
687        }
688
689        size_t headerSize __unused = protection_absent ? 7 : 9;
690
691        offset += aac_frame_length;
692        // TODO: move back to concatenation when codec can support arbitrary input buffers.
693        // For now only queue a single buffer
694        break;
695    }
696
697    int64_t timeUs = fetchTimestampAAC(offset);
698
699    sp<ABuffer> accessUnit = new ABuffer(offset);
700    memcpy(accessUnit->data(), mBuffer->data(), offset);
701
702    memmove(mBuffer->data(), mBuffer->data() + offset,
703            mBuffer->size() - offset);
704    mBuffer->setRange(0, mBuffer->size() - offset);
705
706    accessUnit->meta()->setInt64("timeUs", timeUs);
707
708    return accessUnit;
709}
710
711int64_t ElementaryStreamQueue::fetchTimestamp(size_t size) {
712    int64_t timeUs = -1;
713    bool first = true;
714
715    while (size > 0) {
716        CHECK(!mRangeInfos.empty());
717
718        RangeInfo *info = &*mRangeInfos.begin();
719
720        if (first) {
721            timeUs = info->mTimestampUs;
722            first = false;
723        }
724
725        if (info->mLength > size) {
726            info->mLength -= size;
727            size = 0;
728        } else {
729            size -= info->mLength;
730
731            mRangeInfos.erase(mRangeInfos.begin());
732            info = NULL;
733        }
734
735    }
736
737    if (timeUs == 0ll) {
738        ALOGV("Returning 0 timestamp");
739    }
740
741    return timeUs;
742}
743
744// TODO: avoid interpolating timestamps once codec supports arbitrary sized input buffers
745int64_t ElementaryStreamQueue::fetchTimestampAAC(size_t size) {
746    int64_t timeUs = -1;
747    bool first = true;
748
749    size_t samplesize = size;
750    while (size > 0) {
751        CHECK(!mRangeInfos.empty());
752
753        RangeInfo *info = &*mRangeInfos.begin();
754
755        if (first) {
756            timeUs = info->mTimestampUs;
757            first = false;
758        }
759
760        if (info->mLength > size) {
761            int32_t sampleRate;
762            CHECK(mFormat->findInt32(kKeySampleRate, &sampleRate));
763            info->mLength -= size;
764            size_t numSamples = 1024 * size / samplesize;
765            info->mTimestampUs += numSamples * 1000000ll / sampleRate;
766            size = 0;
767        } else {
768            size -= info->mLength;
769
770            mRangeInfos.erase(mRangeInfos.begin());
771            info = NULL;
772        }
773
774    }
775
776    if (timeUs == 0ll) {
777        ALOGV("Returning 0 timestamp");
778    }
779
780    return timeUs;
781}
782
783struct NALPosition {
784    size_t nalOffset;
785    size_t nalSize;
786};
787
788sp<ABuffer> ElementaryStreamQueue::dequeueAccessUnitH264() {
789    const uint8_t *data = mBuffer->data();
790
791    size_t size = mBuffer->size();
792    Vector<NALPosition> nals;
793
794    size_t totalSize = 0;
795
796    status_t err;
797    const uint8_t *nalStart;
798    size_t nalSize;
799    bool foundSlice = false;
800    while ((err = getNextNALUnit(&data, &size, &nalStart, &nalSize)) == OK) {
801        if (nalSize == 0) continue;
802
803        unsigned nalType = nalStart[0] & 0x1f;
804        bool flush = false;
805
806        if (nalType == 1 || nalType == 5) {
807            if (foundSlice) {
808                ABitReader br(nalStart + 1, nalSize);
809                unsigned first_mb_in_slice = parseUE(&br);
810
811                if (first_mb_in_slice == 0) {
812                    // This slice starts a new frame.
813
814                    flush = true;
815                }
816            }
817
818            foundSlice = true;
819        } else if ((nalType == 9 || nalType == 7) && foundSlice) {
820            // Access unit delimiter and SPS will be associated with the
821            // next frame.
822
823            flush = true;
824        }
825
826        if (flush) {
827            // The access unit will contain all nal units up to, but excluding
828            // the current one, separated by 0x00 0x00 0x00 0x01 startcodes.
829
830            size_t auSize = 4 * nals.size() + totalSize;
831            sp<ABuffer> accessUnit = new ABuffer(auSize);
832
833#if !LOG_NDEBUG
834            AString out;
835#endif
836
837            size_t dstOffset = 0;
838            for (size_t i = 0; i < nals.size(); ++i) {
839                const NALPosition &pos = nals.itemAt(i);
840
841                unsigned nalType = mBuffer->data()[pos.nalOffset] & 0x1f;
842
843                if (nalType == 6) {
844                    sp<ABuffer> sei = new ABuffer(pos.nalSize);
845                    memcpy(sei->data(), mBuffer->data() + pos.nalOffset, pos.nalSize);
846                    accessUnit->meta()->setBuffer("sei", sei);
847                }
848
849#if !LOG_NDEBUG
850                char tmp[128];
851                sprintf(tmp, "0x%02x", nalType);
852                if (i > 0) {
853                    out.append(", ");
854                }
855                out.append(tmp);
856#endif
857
858                memcpy(accessUnit->data() + dstOffset, "\x00\x00\x00\x01", 4);
859
860                memcpy(accessUnit->data() + dstOffset + 4,
861                       mBuffer->data() + pos.nalOffset,
862                       pos.nalSize);
863
864                dstOffset += pos.nalSize + 4;
865            }
866
867#if !LOG_NDEBUG
868            ALOGV("accessUnit contains nal types %s", out.c_str());
869#endif
870
871            const NALPosition &pos = nals.itemAt(nals.size() - 1);
872            size_t nextScan = pos.nalOffset + pos.nalSize;
873
874            memmove(mBuffer->data(),
875                    mBuffer->data() + nextScan,
876                    mBuffer->size() - nextScan);
877
878            mBuffer->setRange(0, mBuffer->size() - nextScan);
879
880            int64_t timeUs = fetchTimestamp(nextScan);
881            CHECK_GE(timeUs, 0ll);
882
883            accessUnit->meta()->setInt64("timeUs", timeUs);
884
885            if (mFormat == NULL) {
886                mFormat = MakeAVCCodecSpecificData(accessUnit);
887            }
888
889            return accessUnit;
890        }
891
892        NALPosition pos;
893        pos.nalOffset = nalStart - mBuffer->data();
894        pos.nalSize = nalSize;
895
896        nals.push(pos);
897
898        totalSize += nalSize;
899    }
900    CHECK_EQ(err, (status_t)-EAGAIN);
901
902    return NULL;
903}
904
905sp<ABuffer> ElementaryStreamQueue::dequeueAccessUnitMPEGAudio() {
906    const uint8_t *data = mBuffer->data();
907    size_t size = mBuffer->size();
908
909    if (size < 4) {
910        return NULL;
911    }
912
913    uint32_t header = U32_AT(data);
914
915    size_t frameSize;
916    int samplingRate, numChannels, bitrate, numSamples;
917    CHECK(GetMPEGAudioFrameSize(
918                header, &frameSize, &samplingRate, &numChannels,
919                &bitrate, &numSamples));
920
921    if (size < frameSize) {
922        return NULL;
923    }
924
925    unsigned layer = 4 - ((header >> 17) & 3);
926
927    sp<ABuffer> accessUnit = new ABuffer(frameSize);
928    memcpy(accessUnit->data(), data, frameSize);
929
930    memmove(mBuffer->data(),
931            mBuffer->data() + frameSize,
932            mBuffer->size() - frameSize);
933
934    mBuffer->setRange(0, mBuffer->size() - frameSize);
935
936    int64_t timeUs = fetchTimestamp(frameSize);
937    CHECK_GE(timeUs, 0ll);
938
939    accessUnit->meta()->setInt64("timeUs", timeUs);
940
941    if (mFormat == NULL) {
942        mFormat = new MetaData;
943
944        switch (layer) {
945            case 1:
946                mFormat->setCString(
947                        kKeyMIMEType, MEDIA_MIMETYPE_AUDIO_MPEG_LAYER_I);
948                break;
949            case 2:
950                mFormat->setCString(
951                        kKeyMIMEType, MEDIA_MIMETYPE_AUDIO_MPEG_LAYER_II);
952                break;
953            case 3:
954                mFormat->setCString(
955                        kKeyMIMEType, MEDIA_MIMETYPE_AUDIO_MPEG);
956                break;
957            default:
958                TRESPASS();
959        }
960
961        mFormat->setInt32(kKeySampleRate, samplingRate);
962        mFormat->setInt32(kKeyChannelCount, numChannels);
963    }
964
965    return accessUnit;
966}
967
968static void EncodeSize14(uint8_t **_ptr, size_t size) {
969    CHECK_LE(size, 0x3fff);
970
971    uint8_t *ptr = *_ptr;
972
973    *ptr++ = 0x80 | (size >> 7);
974    *ptr++ = size & 0x7f;
975
976    *_ptr = ptr;
977}
978
979static sp<ABuffer> MakeMPEGVideoESDS(const sp<ABuffer> &csd) {
980    sp<ABuffer> esds = new ABuffer(csd->size() + 25);
981
982    uint8_t *ptr = esds->data();
983    *ptr++ = 0x03;
984    EncodeSize14(&ptr, 22 + csd->size());
985
986    *ptr++ = 0x00;  // ES_ID
987    *ptr++ = 0x00;
988
989    *ptr++ = 0x00;  // streamDependenceFlag, URL_Flag, OCRstreamFlag
990
991    *ptr++ = 0x04;
992    EncodeSize14(&ptr, 16 + csd->size());
993
994    *ptr++ = 0x40;  // Audio ISO/IEC 14496-3
995
996    for (size_t i = 0; i < 12; ++i) {
997        *ptr++ = 0x00;
998    }
999
1000    *ptr++ = 0x05;
1001    EncodeSize14(&ptr, csd->size());
1002
1003    memcpy(ptr, csd->data(), csd->size());
1004
1005    return esds;
1006}
1007
1008sp<ABuffer> ElementaryStreamQueue::dequeueAccessUnitMPEGVideo() {
1009    const uint8_t *data = mBuffer->data();
1010    size_t size = mBuffer->size();
1011
1012    bool sawPictureStart = false;
1013    int pprevStartCode = -1;
1014    int prevStartCode = -1;
1015    int currentStartCode = -1;
1016
1017    size_t offset = 0;
1018    while (offset + 3 < size) {
1019        if (memcmp(&data[offset], "\x00\x00\x01", 3)) {
1020            ++offset;
1021            continue;
1022        }
1023
1024        pprevStartCode = prevStartCode;
1025        prevStartCode = currentStartCode;
1026        currentStartCode = data[offset + 3];
1027
1028        if (currentStartCode == 0xb3 && mFormat == NULL) {
1029            memmove(mBuffer->data(), mBuffer->data() + offset, size - offset);
1030            size -= offset;
1031            (void)fetchTimestamp(offset);
1032            offset = 0;
1033            mBuffer->setRange(0, size);
1034        }
1035
1036        if ((prevStartCode == 0xb3 && currentStartCode != 0xb5)
1037                || (pprevStartCode == 0xb3 && prevStartCode == 0xb5)) {
1038            // seqHeader without/with extension
1039
1040            if (mFormat == NULL) {
1041                CHECK_GE(size, 7u);
1042
1043                unsigned width =
1044                    (data[4] << 4) | data[5] >> 4;
1045
1046                unsigned height =
1047                    ((data[5] & 0x0f) << 8) | data[6];
1048
1049                mFormat = new MetaData;
1050                mFormat->setCString(kKeyMIMEType, MEDIA_MIMETYPE_VIDEO_MPEG2);
1051                mFormat->setInt32(kKeyWidth, width);
1052                mFormat->setInt32(kKeyHeight, height);
1053
1054                ALOGI("found MPEG2 video codec config (%d x %d)", width, height);
1055
1056                sp<ABuffer> csd = new ABuffer(offset);
1057                memcpy(csd->data(), data, offset);
1058
1059                memmove(mBuffer->data(),
1060                        mBuffer->data() + offset,
1061                        mBuffer->size() - offset);
1062
1063                mBuffer->setRange(0, mBuffer->size() - offset);
1064                size -= offset;
1065                (void)fetchTimestamp(offset);
1066                offset = 0;
1067
1068                // hexdump(csd->data(), csd->size());
1069
1070                sp<ABuffer> esds = MakeMPEGVideoESDS(csd);
1071                mFormat->setData(
1072                        kKeyESDS, kTypeESDS, esds->data(), esds->size());
1073
1074                return NULL;
1075            }
1076        }
1077
1078        if (mFormat != NULL && currentStartCode == 0x00) {
1079            // Picture start
1080
1081            if (!sawPictureStart) {
1082                sawPictureStart = true;
1083            } else {
1084                sp<ABuffer> accessUnit = new ABuffer(offset);
1085                memcpy(accessUnit->data(), data, offset);
1086
1087                memmove(mBuffer->data(),
1088                        mBuffer->data() + offset,
1089                        mBuffer->size() - offset);
1090
1091                mBuffer->setRange(0, mBuffer->size() - offset);
1092
1093                int64_t timeUs = fetchTimestamp(offset);
1094                CHECK_GE(timeUs, 0ll);
1095
1096                offset = 0;
1097
1098                accessUnit->meta()->setInt64("timeUs", timeUs);
1099
1100                ALOGV("returning MPEG video access unit at time %" PRId64 " us",
1101                      timeUs);
1102
1103                // hexdump(accessUnit->data(), accessUnit->size());
1104
1105                return accessUnit;
1106            }
1107        }
1108
1109        ++offset;
1110    }
1111
1112    return NULL;
1113}
1114
1115static ssize_t getNextChunkSize(
1116        const uint8_t *data, size_t size) {
1117    static const char kStartCode[] = "\x00\x00\x01";
1118
1119    if (size < 3) {
1120        return -EAGAIN;
1121    }
1122
1123    if (memcmp(kStartCode, data, 3)) {
1124        TRESPASS();
1125    }
1126
1127    size_t offset = 3;
1128    while (offset + 2 < size) {
1129        if (!memcmp(&data[offset], kStartCode, 3)) {
1130            return offset;
1131        }
1132
1133        ++offset;
1134    }
1135
1136    return -EAGAIN;
1137}
1138
1139sp<ABuffer> ElementaryStreamQueue::dequeueAccessUnitMPEG4Video() {
1140    uint8_t *data = mBuffer->data();
1141    size_t size = mBuffer->size();
1142
1143    enum {
1144        SKIP_TO_VISUAL_OBJECT_SEQ_START,
1145        EXPECT_VISUAL_OBJECT_START,
1146        EXPECT_VO_START,
1147        EXPECT_VOL_START,
1148        WAIT_FOR_VOP_START,
1149        SKIP_TO_VOP_START,
1150
1151    } state;
1152
1153    if (mFormat == NULL) {
1154        state = SKIP_TO_VISUAL_OBJECT_SEQ_START;
1155    } else {
1156        state = SKIP_TO_VOP_START;
1157    }
1158
1159    int32_t width = -1, height = -1;
1160
1161    size_t offset = 0;
1162    ssize_t chunkSize;
1163    while ((chunkSize = getNextChunkSize(
1164                    &data[offset], size - offset)) > 0) {
1165        bool discard = false;
1166
1167        unsigned chunkType = data[offset + 3];
1168
1169        switch (state) {
1170            case SKIP_TO_VISUAL_OBJECT_SEQ_START:
1171            {
1172                if (chunkType == 0xb0) {
1173                    // Discard anything before this marker.
1174
1175                    state = EXPECT_VISUAL_OBJECT_START;
1176                } else {
1177                    discard = true;
1178                }
1179                break;
1180            }
1181
1182            case EXPECT_VISUAL_OBJECT_START:
1183            {
1184                CHECK_EQ(chunkType, 0xb5);
1185                state = EXPECT_VO_START;
1186                break;
1187            }
1188
1189            case EXPECT_VO_START:
1190            {
1191                CHECK_LE(chunkType, 0x1f);
1192                state = EXPECT_VOL_START;
1193                break;
1194            }
1195
1196            case EXPECT_VOL_START:
1197            {
1198                CHECK((chunkType & 0xf0) == 0x20);
1199
1200                CHECK(ExtractDimensionsFromVOLHeader(
1201                            &data[offset], chunkSize,
1202                            &width, &height));
1203
1204                state = WAIT_FOR_VOP_START;
1205                break;
1206            }
1207
1208            case WAIT_FOR_VOP_START:
1209            {
1210                if (chunkType == 0xb3 || chunkType == 0xb6) {
1211                    // group of VOP or VOP start.
1212
1213                    mFormat = new MetaData;
1214                    mFormat->setCString(
1215                            kKeyMIMEType, MEDIA_MIMETYPE_VIDEO_MPEG4);
1216
1217                    mFormat->setInt32(kKeyWidth, width);
1218                    mFormat->setInt32(kKeyHeight, height);
1219
1220                    ALOGI("found MPEG4 video codec config (%d x %d)",
1221                         width, height);
1222
1223                    sp<ABuffer> csd = new ABuffer(offset);
1224                    memcpy(csd->data(), data, offset);
1225
1226                    // hexdump(csd->data(), csd->size());
1227
1228                    sp<ABuffer> esds = MakeMPEGVideoESDS(csd);
1229                    mFormat->setData(
1230                            kKeyESDS, kTypeESDS,
1231                            esds->data(), esds->size());
1232
1233                    discard = true;
1234                    state = SKIP_TO_VOP_START;
1235                }
1236
1237                break;
1238            }
1239
1240            case SKIP_TO_VOP_START:
1241            {
1242                if (chunkType == 0xb6) {
1243                    offset += chunkSize;
1244
1245                    sp<ABuffer> accessUnit = new ABuffer(offset);
1246                    memcpy(accessUnit->data(), data, offset);
1247
1248                    memmove(data, &data[offset], size - offset);
1249                    size -= offset;
1250                    mBuffer->setRange(0, size);
1251
1252                    int64_t timeUs = fetchTimestamp(offset);
1253                    CHECK_GE(timeUs, 0ll);
1254
1255                    offset = 0;
1256
1257                    accessUnit->meta()->setInt64("timeUs", timeUs);
1258
1259                    ALOGV("returning MPEG4 video access unit at time %" PRId64 " us",
1260                         timeUs);
1261
1262                    // hexdump(accessUnit->data(), accessUnit->size());
1263
1264                    return accessUnit;
1265                } else if (chunkType != 0xb3) {
1266                    offset += chunkSize;
1267                    discard = true;
1268                }
1269
1270                break;
1271            }
1272
1273            default:
1274                TRESPASS();
1275        }
1276
1277        if (discard) {
1278            (void)fetchTimestamp(offset);
1279            memmove(data, &data[offset], size - offset);
1280            size -= offset;
1281            offset = 0;
1282            mBuffer->setRange(0, size);
1283        } else {
1284            offset += chunkSize;
1285        }
1286    }
1287
1288    return NULL;
1289}
1290
1291void ElementaryStreamQueue::signalEOS() {
1292    if (!mEOSReached) {
1293        if (mMode == MPEG_VIDEO) {
1294            const char *theEnd = "\x00\x00\x01\x00";
1295            appendData(theEnd, 4, 0);
1296        }
1297        mEOSReached = true;
1298    } else {
1299        ALOGW("EOS already signaled");
1300    }
1301}
1302
1303
1304}  // namespace android
1305