1/*
2 * Copyright (C) 2010 The Android Open Source Project
3 *
4 * Licensed under the Apache License, Version 2.0 (the "License");
5 * you may not use this file except in compliance with the License.
6 * You may obtain a copy of the License at
7 *
8 *      http://www.apache.org/licenses/LICENSE-2.0
9 *
10 * Unless required by applicable law or agreed to in writing, software
11 * distributed under the License is distributed on an "AS IS" BASIS,
12 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13 * See the License for the specific language governing permissions and
14 * limitations under the License.
15 */
16
17//#define LOG_NDEBUG 0
18#define LOG_TAG "ESQueue"
19#include <media/stagefright/foundation/ADebug.h>
20
21#include "ESQueue.h"
22
23#include <media/stagefright/foundation/hexdump.h>
24#include <media/stagefright/foundation/ABitReader.h>
25#include <media/stagefright/foundation/ABuffer.h>
26#include <media/stagefright/foundation/AMessage.h>
27#include <media/stagefright/MediaErrors.h>
28#include <media/stagefright/MediaDefs.h>
29#include <media/stagefright/MetaData.h>
30#include <media/stagefright/Utils.h>
31
32#include "include/avc_utils.h"
33
34#include <inttypes.h>
35#include <netinet/in.h>
36
37namespace android {
38
39ElementaryStreamQueue::ElementaryStreamQueue(Mode mode, uint32_t flags)
40    : mMode(mode),
41      mFlags(flags),
42      mEOSReached(false) {
43}
44
45sp<MetaData> ElementaryStreamQueue::getFormat() {
46    return mFormat;
47}
48
49void ElementaryStreamQueue::clear(bool clearFormat) {
50    if (mBuffer != NULL) {
51        mBuffer->setRange(0, 0);
52    }
53
54    mRangeInfos.clear();
55
56    if (clearFormat) {
57        mFormat.clear();
58    }
59
60    mEOSReached = false;
61}
62
63// Parse AC3 header assuming the current ptr is start position of syncframe,
64// update metadata only applicable, and return the payload size
65static unsigned parseAC3SyncFrame(
66        const uint8_t *ptr, size_t size, sp<MetaData> *metaData) {
67    static const unsigned channelCountTable[] = {2, 1, 2, 3, 3, 4, 4, 5};
68    static const unsigned samplingRateTable[] = {48000, 44100, 32000};
69
70    static const unsigned frameSizeTable[19][3] = {
71        { 64, 69, 96 },
72        { 80, 87, 120 },
73        { 96, 104, 144 },
74        { 112, 121, 168 },
75        { 128, 139, 192 },
76        { 160, 174, 240 },
77        { 192, 208, 288 },
78        { 224, 243, 336 },
79        { 256, 278, 384 },
80        { 320, 348, 480 },
81        { 384, 417, 576 },
82        { 448, 487, 672 },
83        { 512, 557, 768 },
84        { 640, 696, 960 },
85        { 768, 835, 1152 },
86        { 896, 975, 1344 },
87        { 1024, 1114, 1536 },
88        { 1152, 1253, 1728 },
89        { 1280, 1393, 1920 },
90    };
91
92    ABitReader bits(ptr, size);
93    if (bits.numBitsLeft() < 16) {
94        return 0;
95    }
96    if (bits.getBits(16) != 0x0B77) {
97        return 0;
98    }
99
100    if (bits.numBitsLeft() < 16 + 2 + 6 + 5 + 3 + 3) {
101        ALOGV("Not enough bits left for further parsing");
102        return 0;
103    }
104    bits.skipBits(16);  // crc1
105
106    unsigned fscod = bits.getBits(2);
107    if (fscod == 3) {
108        ALOGW("Incorrect fscod in AC3 header");
109        return 0;
110    }
111
112    unsigned frmsizecod = bits.getBits(6);
113    if (frmsizecod > 37) {
114        ALOGW("Incorrect frmsizecod in AC3 header");
115        return 0;
116    }
117
118    unsigned bsid = bits.getBits(5);
119    if (bsid > 8) {
120        ALOGW("Incorrect bsid in AC3 header. Possibly E-AC-3?");
121        return 0;
122    }
123
124    unsigned bsmod __unused = bits.getBits(3);
125    unsigned acmod = bits.getBits(3);
126    unsigned cmixlev __unused = 0;
127    unsigned surmixlev __unused = 0;
128    unsigned dsurmod __unused = 0;
129
130    if ((acmod & 1) > 0 && acmod != 1) {
131        if (bits.numBitsLeft() < 2) {
132            return 0;
133        }
134        cmixlev = bits.getBits(2);
135    }
136    if ((acmod & 4) > 0) {
137        if (bits.numBitsLeft() < 2) {
138            return 0;
139        }
140        surmixlev = bits.getBits(2);
141    }
142    if (acmod == 2) {
143        if (bits.numBitsLeft() < 2) {
144            return 0;
145        }
146        dsurmod = bits.getBits(2);
147    }
148
149    if (bits.numBitsLeft() < 1) {
150        return 0;
151    }
152    unsigned lfeon = bits.getBits(1);
153
154    unsigned samplingRate = samplingRateTable[fscod];
155    unsigned payloadSize = frameSizeTable[frmsizecod >> 1][fscod];
156    if (fscod == 1) {
157        payloadSize += frmsizecod & 1;
158    }
159    payloadSize <<= 1;  // convert from 16-bit words to bytes
160
161    unsigned channelCount = channelCountTable[acmod] + lfeon;
162
163    if (metaData != NULL) {
164        (*metaData)->setCString(kKeyMIMEType, MEDIA_MIMETYPE_AUDIO_AC3);
165        (*metaData)->setInt32(kKeyChannelCount, channelCount);
166        (*metaData)->setInt32(kKeySampleRate, samplingRate);
167    }
168
169    return payloadSize;
170}
171
172static bool IsSeeminglyValidAC3Header(const uint8_t *ptr, size_t size) {
173    return parseAC3SyncFrame(ptr, size, NULL) > 0;
174}
175
176static bool IsSeeminglyValidADTSHeader(
177        const uint8_t *ptr, size_t size, size_t *frameLength) {
178    if (size < 7) {
179        // Not enough data to verify header.
180        return false;
181    }
182
183    if (ptr[0] != 0xff || (ptr[1] >> 4) != 0x0f) {
184        return false;
185    }
186
187    unsigned layer = (ptr[1] >> 1) & 3;
188
189    if (layer != 0) {
190        return false;
191    }
192
193    unsigned ID = (ptr[1] >> 3) & 1;
194    unsigned profile_ObjectType = ptr[2] >> 6;
195
196    if (ID == 1 && profile_ObjectType == 3) {
197        // MPEG-2 profile 3 is reserved.
198        return false;
199    }
200
201    size_t frameLengthInHeader =
202            ((ptr[3] & 3) << 11) + (ptr[4] << 3) + ((ptr[5] >> 5) & 7);
203    if (frameLengthInHeader > size) {
204        return false;
205    }
206
207    *frameLength = frameLengthInHeader;
208    return true;
209}
210
211static bool IsSeeminglyValidMPEGAudioHeader(const uint8_t *ptr, size_t size) {
212    if (size < 3) {
213        // Not enough data to verify header.
214        return false;
215    }
216
217    if (ptr[0] != 0xff || (ptr[1] >> 5) != 0x07) {
218        return false;
219    }
220
221    unsigned ID = (ptr[1] >> 3) & 3;
222
223    if (ID == 1) {
224        return false;  // reserved
225    }
226
227    unsigned layer = (ptr[1] >> 1) & 3;
228
229    if (layer == 0) {
230        return false;  // reserved
231    }
232
233    unsigned bitrateIndex = (ptr[2] >> 4);
234
235    if (bitrateIndex == 0x0f) {
236        return false;  // reserved
237    }
238
239    unsigned samplingRateIndex = (ptr[2] >> 2) & 3;
240
241    if (samplingRateIndex == 3) {
242        return false;  // reserved
243    }
244
245    return true;
246}
247
248status_t ElementaryStreamQueue::appendData(
249        const void *data, size_t size, int64_t timeUs) {
250
251    if (mEOSReached) {
252        ALOGE("appending data after EOS");
253        return ERROR_MALFORMED;
254    }
255    if (mBuffer == NULL || mBuffer->size() == 0) {
256        switch (mMode) {
257            case H264:
258            case MPEG_VIDEO:
259            {
260#if 0
261                if (size < 4 || memcmp("\x00\x00\x00\x01", data, 4)) {
262                    return ERROR_MALFORMED;
263                }
264#else
265                uint8_t *ptr = (uint8_t *)data;
266
267                ssize_t startOffset = -1;
268                for (size_t i = 0; i + 2 < size; ++i) {
269                    if (!memcmp("\x00\x00\x01", &ptr[i], 3)) {
270                        startOffset = i;
271                        break;
272                    }
273                }
274
275                if (startOffset < 0) {
276                    return ERROR_MALFORMED;
277                }
278
279                if (startOffset > 0) {
280                    ALOGI("found something resembling an H.264/MPEG syncword "
281                          "at offset %zd",
282                          startOffset);
283                }
284
285                data = &ptr[startOffset];
286                size -= startOffset;
287#endif
288                break;
289            }
290
291            case MPEG4_VIDEO:
292            {
293#if 0
294                if (size < 3 || memcmp("\x00\x00\x01", data, 3)) {
295                    return ERROR_MALFORMED;
296                }
297#else
298                uint8_t *ptr = (uint8_t *)data;
299
300                ssize_t startOffset = -1;
301                for (size_t i = 0; i + 2 < size; ++i) {
302                    if (!memcmp("\x00\x00\x01", &ptr[i], 3)) {
303                        startOffset = i;
304                        break;
305                    }
306                }
307
308                if (startOffset < 0) {
309                    return ERROR_MALFORMED;
310                }
311
312                if (startOffset > 0) {
313                    ALOGI("found something resembling an H.264/MPEG syncword "
314                          "at offset %zd",
315                          startOffset);
316                }
317
318                data = &ptr[startOffset];
319                size -= startOffset;
320#endif
321                break;
322            }
323
324            case AAC:
325            {
326                uint8_t *ptr = (uint8_t *)data;
327
328#if 0
329                if (size < 2 || ptr[0] != 0xff || (ptr[1] >> 4) != 0x0f) {
330                    return ERROR_MALFORMED;
331                }
332#else
333                ssize_t startOffset = -1;
334                size_t frameLength;
335                for (size_t i = 0; i < size; ++i) {
336                    if (IsSeeminglyValidADTSHeader(
337                            &ptr[i], size - i, &frameLength)) {
338                        startOffset = i;
339                        break;
340                    }
341                }
342
343                if (startOffset < 0) {
344                    return ERROR_MALFORMED;
345                }
346
347                if (startOffset > 0) {
348                    ALOGI("found something resembling an AAC syncword at "
349                          "offset %zd",
350                          startOffset);
351                }
352
353                if (frameLength != size - startOffset) {
354                    ALOGV("First ADTS AAC frame length is %zd bytes, "
355                          "while the buffer size is %zd bytes.",
356                          frameLength, size - startOffset);
357                }
358
359                data = &ptr[startOffset];
360                size -= startOffset;
361#endif
362                break;
363            }
364
365            case AC3:
366            {
367                uint8_t *ptr = (uint8_t *)data;
368
369                ssize_t startOffset = -1;
370                for (size_t i = 0; i < size; ++i) {
371                    if (IsSeeminglyValidAC3Header(&ptr[i], size - i)) {
372                        startOffset = i;
373                        break;
374                    }
375                }
376
377                if (startOffset < 0) {
378                    return ERROR_MALFORMED;
379                }
380
381                if (startOffset > 0) {
382                    ALOGI("found something resembling an AC3 syncword at "
383                          "offset %zd",
384                          startOffset);
385                }
386
387                data = &ptr[startOffset];
388                size -= startOffset;
389                break;
390            }
391
392            case MPEG_AUDIO:
393            {
394                uint8_t *ptr = (uint8_t *)data;
395
396                ssize_t startOffset = -1;
397                for (size_t i = 0; i < size; ++i) {
398                    if (IsSeeminglyValidMPEGAudioHeader(&ptr[i], size - i)) {
399                        startOffset = i;
400                        break;
401                    }
402                }
403
404                if (startOffset < 0) {
405                    return ERROR_MALFORMED;
406                }
407
408                if (startOffset > 0) {
409                    ALOGI("found something resembling an MPEG audio "
410                          "syncword at offset %zd",
411                          startOffset);
412                }
413
414                data = &ptr[startOffset];
415                size -= startOffset;
416                break;
417            }
418
419            case PCM_AUDIO:
420            case METADATA:
421            {
422                break;
423            }
424
425            default:
426                ALOGE("Unknown mode: %d", mMode);
427                return ERROR_MALFORMED;
428        }
429    }
430
431    size_t neededSize = (mBuffer == NULL ? 0 : mBuffer->size()) + size;
432    if (mBuffer == NULL || neededSize > mBuffer->capacity()) {
433        neededSize = (neededSize + 65535) & ~65535;
434
435        ALOGV("resizing buffer to size %zu", neededSize);
436
437        sp<ABuffer> buffer = new ABuffer(neededSize);
438        if (mBuffer != NULL) {
439            memcpy(buffer->data(), mBuffer->data(), mBuffer->size());
440            buffer->setRange(0, mBuffer->size());
441        } else {
442            buffer->setRange(0, 0);
443        }
444
445        mBuffer = buffer;
446    }
447
448    memcpy(mBuffer->data() + mBuffer->size(), data, size);
449    mBuffer->setRange(0, mBuffer->size() + size);
450
451    RangeInfo info;
452    info.mLength = size;
453    info.mTimestampUs = timeUs;
454    mRangeInfos.push_back(info);
455
456#if 0
457    if (mMode == AAC) {
458        ALOGI("size = %zu, timeUs = %.2f secs", size, timeUs / 1E6);
459        hexdump(data, size);
460    }
461#endif
462
463    return OK;
464}
465
466sp<ABuffer> ElementaryStreamQueue::dequeueAccessUnit() {
467    if ((mFlags & kFlag_AlignedData) && mMode == H264) {
468        if (mRangeInfos.empty()) {
469            return NULL;
470        }
471
472        RangeInfo info = *mRangeInfos.begin();
473        mRangeInfos.erase(mRangeInfos.begin());
474
475        sp<ABuffer> accessUnit = new ABuffer(info.mLength);
476        memcpy(accessUnit->data(), mBuffer->data(), info.mLength);
477        accessUnit->meta()->setInt64("timeUs", info.mTimestampUs);
478
479        memmove(mBuffer->data(),
480                mBuffer->data() + info.mLength,
481                mBuffer->size() - info.mLength);
482
483        mBuffer->setRange(0, mBuffer->size() - info.mLength);
484
485        if (mFormat == NULL) {
486            mFormat = MakeAVCCodecSpecificData(accessUnit);
487        }
488
489        return accessUnit;
490    }
491
492    switch (mMode) {
493        case H264:
494            return dequeueAccessUnitH264();
495        case AAC:
496            return dequeueAccessUnitAAC();
497        case AC3:
498            return dequeueAccessUnitAC3();
499        case MPEG_VIDEO:
500            return dequeueAccessUnitMPEGVideo();
501        case MPEG4_VIDEO:
502            return dequeueAccessUnitMPEG4Video();
503        case PCM_AUDIO:
504            return dequeueAccessUnitPCMAudio();
505        case METADATA:
506            return dequeueAccessUnitMetadata();
507        default:
508            if (mMode != MPEG_AUDIO) {
509                ALOGE("Unknown mode");
510                return NULL;
511            }
512            return dequeueAccessUnitMPEGAudio();
513    }
514}
515
516sp<ABuffer> ElementaryStreamQueue::dequeueAccessUnitAC3() {
517    unsigned syncStartPos = 0;  // in bytes
518    unsigned payloadSize = 0;
519    sp<MetaData> format = new MetaData;
520    while (true) {
521        if (syncStartPos + 2 >= mBuffer->size()) {
522            return NULL;
523        }
524
525        payloadSize = parseAC3SyncFrame(
526                mBuffer->data() + syncStartPos,
527                mBuffer->size() - syncStartPos,
528                &format);
529        if (payloadSize > 0) {
530            break;
531        }
532        ++syncStartPos;
533    }
534
535    if (mBuffer->size() < syncStartPos + payloadSize) {
536        ALOGV("Not enough buffer size for AC3");
537        return NULL;
538    }
539
540    if (mFormat == NULL) {
541        mFormat = format;
542    }
543
544    sp<ABuffer> accessUnit = new ABuffer(syncStartPos + payloadSize);
545    memcpy(accessUnit->data(), mBuffer->data(), syncStartPos + payloadSize);
546
547    int64_t timeUs = fetchTimestamp(syncStartPos + payloadSize);
548    if (timeUs < 0ll) {
549        ALOGE("negative timeUs");
550        return NULL;
551    }
552    accessUnit->meta()->setInt64("timeUs", timeUs);
553    accessUnit->meta()->setInt32("isSync", 1);
554
555    memmove(
556            mBuffer->data(),
557            mBuffer->data() + syncStartPos + payloadSize,
558            mBuffer->size() - syncStartPos - payloadSize);
559
560    mBuffer->setRange(0, mBuffer->size() - syncStartPos - payloadSize);
561
562    return accessUnit;
563}
564
565sp<ABuffer> ElementaryStreamQueue::dequeueAccessUnitPCMAudio() {
566    if (mBuffer->size() < 4) {
567        return NULL;
568    }
569
570    ABitReader bits(mBuffer->data(), 4);
571    if (bits.getBits(8) != 0xa0) {
572        ALOGE("Unexpected bit values");
573        return NULL;
574    }
575    unsigned numAUs = bits.getBits(8);
576    bits.skipBits(8);
577    unsigned quantization_word_length __unused = bits.getBits(2);
578    unsigned audio_sampling_frequency = bits.getBits(3);
579    unsigned num_channels = bits.getBits(3);
580
581    if (audio_sampling_frequency != 2) {
582        ALOGE("Wrong sampling freq");
583        return NULL;
584    }
585    if (num_channels != 1u) {
586        ALOGE("Wrong channel #");
587        return NULL;
588    }
589
590    if (mFormat == NULL) {
591        mFormat = new MetaData;
592        mFormat->setCString(kKeyMIMEType, MEDIA_MIMETYPE_AUDIO_RAW);
593        mFormat->setInt32(kKeyChannelCount, 2);
594        mFormat->setInt32(kKeySampleRate, 48000);
595        mFormat->setInt32(kKeyPcmEncoding, kAudioEncodingPcm16bit);
596    }
597
598    static const size_t kFramesPerAU = 80;
599    size_t frameSize = 2 /* numChannels */ * sizeof(int16_t);
600
601    size_t payloadSize = numAUs * frameSize * kFramesPerAU;
602
603    if (mBuffer->size() < 4 + payloadSize) {
604        return NULL;
605    }
606
607    sp<ABuffer> accessUnit = new ABuffer(payloadSize);
608    memcpy(accessUnit->data(), mBuffer->data() + 4, payloadSize);
609
610    int64_t timeUs = fetchTimestamp(payloadSize + 4);
611    if (timeUs < 0ll) {
612        ALOGE("Negative timeUs");
613        return NULL;
614    }
615    accessUnit->meta()->setInt64("timeUs", timeUs);
616    accessUnit->meta()->setInt32("isSync", 1);
617
618    int16_t *ptr = (int16_t *)accessUnit->data();
619    for (size_t i = 0; i < payloadSize / sizeof(int16_t); ++i) {
620        ptr[i] = ntohs(ptr[i]);
621    }
622
623    memmove(
624            mBuffer->data(),
625            mBuffer->data() + 4 + payloadSize,
626            mBuffer->size() - 4 - payloadSize);
627
628    mBuffer->setRange(0, mBuffer->size() - 4 - payloadSize);
629
630    return accessUnit;
631}
632
633sp<ABuffer> ElementaryStreamQueue::dequeueAccessUnitAAC() {
634    if (mBuffer->size() == 0) {
635        return NULL;
636    }
637
638    if (mRangeInfos.empty()) {
639        return NULL;
640    }
641
642    const RangeInfo &info = *mRangeInfos.begin();
643    if (mBuffer->size() < info.mLength) {
644        return NULL;
645    }
646
647    if (info.mTimestampUs < 0ll) {
648        ALOGE("Negative info.mTimestampUs");
649        return NULL;
650    }
651
652    // The idea here is consume all AAC frames starting at offsets before
653    // info.mLength so we can assign a meaningful timestamp without
654    // having to interpolate.
655    // The final AAC frame may well extend into the next RangeInfo but
656    // that's ok.
657    size_t offset = 0;
658    while (offset < info.mLength) {
659        if (offset + 7 > mBuffer->size()) {
660            return NULL;
661        }
662
663        ABitReader bits(mBuffer->data() + offset, mBuffer->size() - offset);
664
665        // adts_fixed_header
666
667        if (bits.getBits(12) != 0xfffu) {
668            ALOGE("Wrong atds_fixed_header");
669            return NULL;
670        }
671        bits.skipBits(3);  // ID, layer
672        bool protection_absent __unused = bits.getBits(1) != 0;
673
674        if (mFormat == NULL) {
675            unsigned profile = bits.getBits(2);
676            if (profile == 3u) {
677                ALOGE("profile should not be 3");
678                return NULL;
679            }
680            unsigned sampling_freq_index = bits.getBits(4);
681            bits.getBits(1);  // private_bit
682            unsigned channel_configuration = bits.getBits(3);
683            if (channel_configuration == 0u) {
684                ALOGE("channel_config should not be 0");
685                return NULL;
686            }
687            bits.skipBits(2);  // original_copy, home
688
689            mFormat = MakeAACCodecSpecificData(
690                    profile, sampling_freq_index, channel_configuration);
691
692            mFormat->setInt32(kKeyIsADTS, true);
693
694            int32_t sampleRate;
695            int32_t numChannels;
696            if (!mFormat->findInt32(kKeySampleRate, &sampleRate)) {
697                ALOGE("SampleRate not found");
698                return NULL;
699            }
700            if (!mFormat->findInt32(kKeyChannelCount, &numChannels)) {
701                ALOGE("ChannelCount not found");
702                return NULL;
703            }
704
705            ALOGI("found AAC codec config (%d Hz, %d channels)",
706                 sampleRate, numChannels);
707        } else {
708            // profile_ObjectType, sampling_frequency_index, private_bits,
709            // channel_configuration, original_copy, home
710            bits.skipBits(12);
711        }
712
713        // adts_variable_header
714
715        // copyright_identification_bit, copyright_identification_start
716        bits.skipBits(2);
717
718        unsigned aac_frame_length = bits.getBits(13);
719
720        bits.skipBits(11);  // adts_buffer_fullness
721
722        unsigned number_of_raw_data_blocks_in_frame = bits.getBits(2);
723
724        if (number_of_raw_data_blocks_in_frame != 0) {
725            // To be implemented.
726            ALOGE("Should not reach here.");
727            return NULL;
728        }
729
730        if (offset + aac_frame_length > mBuffer->size()) {
731            return NULL;
732        }
733
734        size_t headerSize __unused = protection_absent ? 7 : 9;
735
736        offset += aac_frame_length;
737    }
738
739    int64_t timeUs = fetchTimestamp(offset);
740
741    sp<ABuffer> accessUnit = new ABuffer(offset);
742    memcpy(accessUnit->data(), mBuffer->data(), offset);
743
744    memmove(mBuffer->data(), mBuffer->data() + offset,
745            mBuffer->size() - offset);
746    mBuffer->setRange(0, mBuffer->size() - offset);
747
748    accessUnit->meta()->setInt64("timeUs", timeUs);
749    accessUnit->meta()->setInt32("isSync", 1);
750
751    return accessUnit;
752}
753
754int64_t ElementaryStreamQueue::fetchTimestamp(size_t size) {
755    int64_t timeUs = -1;
756    bool first = true;
757
758    while (size > 0) {
759        if (mRangeInfos.empty()) {
760            return timeUs;
761        }
762
763        RangeInfo *info = &*mRangeInfos.begin();
764
765        if (first) {
766            timeUs = info->mTimestampUs;
767            first = false;
768        }
769
770        if (info->mLength > size) {
771            info->mLength -= size;
772            size = 0;
773        } else {
774            size -= info->mLength;
775
776            mRangeInfos.erase(mRangeInfos.begin());
777            info = NULL;
778        }
779
780    }
781
782    if (timeUs == 0ll) {
783        ALOGV("Returning 0 timestamp");
784    }
785
786    return timeUs;
787}
788
789sp<ABuffer> ElementaryStreamQueue::dequeueAccessUnitH264() {
790    const uint8_t *data = mBuffer->data();
791
792    size_t size = mBuffer->size();
793    Vector<NALPosition> nals;
794
795    size_t totalSize = 0;
796    size_t seiCount = 0;
797
798    status_t err;
799    const uint8_t *nalStart;
800    size_t nalSize;
801    bool foundSlice = false;
802    bool foundIDR = false;
803    while ((err = getNextNALUnit(&data, &size, &nalStart, &nalSize)) == OK) {
804        if (nalSize == 0) continue;
805
806        unsigned nalType = nalStart[0] & 0x1f;
807        bool flush = false;
808
809        if (nalType == 1 || nalType == 5) {
810            if (nalType == 5) {
811                foundIDR = true;
812            }
813            if (foundSlice) {
814                ABitReader br(nalStart + 1, nalSize);
815                unsigned first_mb_in_slice = parseUE(&br);
816
817                if (first_mb_in_slice == 0) {
818                    // This slice starts a new frame.
819
820                    flush = true;
821                }
822            }
823
824            foundSlice = true;
825        } else if ((nalType == 9 || nalType == 7) && foundSlice) {
826            // Access unit delimiter and SPS will be associated with the
827            // next frame.
828
829            flush = true;
830        } else if (nalType == 6 && nalSize > 0) {
831            // found non-zero sized SEI
832            ++seiCount;
833        }
834
835        if (flush) {
836            // The access unit will contain all nal units up to, but excluding
837            // the current one, separated by 0x00 0x00 0x00 0x01 startcodes.
838
839            size_t auSize = 4 * nals.size() + totalSize;
840            sp<ABuffer> accessUnit = new ABuffer(auSize);
841            sp<ABuffer> sei;
842
843            if (seiCount > 0) {
844                sei = new ABuffer(seiCount * sizeof(NALPosition));
845                accessUnit->meta()->setBuffer("sei", sei);
846            }
847
848#if !LOG_NDEBUG
849            AString out;
850#endif
851
852            size_t dstOffset = 0;
853            size_t seiIndex = 0;
854            for (size_t i = 0; i < nals.size(); ++i) {
855                const NALPosition &pos = nals.itemAt(i);
856
857                unsigned nalType = mBuffer->data()[pos.nalOffset] & 0x1f;
858
859                if (nalType == 6 && pos.nalSize > 0) {
860                    if (seiIndex >= sei->size() / sizeof(NALPosition)) {
861                        ALOGE("Wrong seiIndex");
862                        return NULL;
863                    }
864                    NALPosition &seiPos = ((NALPosition *)sei->data())[seiIndex++];
865                    seiPos.nalOffset = dstOffset + 4;
866                    seiPos.nalSize = pos.nalSize;
867                }
868
869#if !LOG_NDEBUG
870                char tmp[128];
871                sprintf(tmp, "0x%02x", nalType);
872                if (i > 0) {
873                    out.append(", ");
874                }
875                out.append(tmp);
876#endif
877
878                memcpy(accessUnit->data() + dstOffset, "\x00\x00\x00\x01", 4);
879
880                memcpy(accessUnit->data() + dstOffset + 4,
881                       mBuffer->data() + pos.nalOffset,
882                       pos.nalSize);
883
884                dstOffset += pos.nalSize + 4;
885            }
886
887#if !LOG_NDEBUG
888            ALOGV("accessUnit contains nal types %s", out.c_str());
889#endif
890
891            const NALPosition &pos = nals.itemAt(nals.size() - 1);
892            size_t nextScan = pos.nalOffset + pos.nalSize;
893
894            memmove(mBuffer->data(),
895                    mBuffer->data() + nextScan,
896                    mBuffer->size() - nextScan);
897
898            mBuffer->setRange(0, mBuffer->size() - nextScan);
899
900            int64_t timeUs = fetchTimestamp(nextScan);
901            if (timeUs < 0ll) {
902                ALOGE("Negative timeUs");
903                return NULL;
904            }
905
906            accessUnit->meta()->setInt64("timeUs", timeUs);
907            if (foundIDR) {
908                accessUnit->meta()->setInt32("isSync", 1);
909            }
910
911            if (mFormat == NULL) {
912                mFormat = MakeAVCCodecSpecificData(accessUnit);
913            }
914
915            return accessUnit;
916        }
917
918        NALPosition pos;
919        pos.nalOffset = nalStart - mBuffer->data();
920        pos.nalSize = nalSize;
921
922        nals.push(pos);
923
924        totalSize += nalSize;
925    }
926    if (err != (status_t)-EAGAIN) {
927        ALOGE("Unexpeted err");
928        return NULL;
929    }
930
931    return NULL;
932}
933
934sp<ABuffer> ElementaryStreamQueue::dequeueAccessUnitMPEGAudio() {
935    const uint8_t *data = mBuffer->data();
936    size_t size = mBuffer->size();
937
938    if (size < 4) {
939        return NULL;
940    }
941
942    uint32_t header = U32_AT(data);
943
944    size_t frameSize;
945    int samplingRate, numChannels, bitrate, numSamples;
946    if (!GetMPEGAudioFrameSize(
947                header, &frameSize, &samplingRate, &numChannels,
948                &bitrate, &numSamples)) {
949        ALOGE("Failed to get audio frame size");
950        return NULL;
951    }
952
953    if (size < frameSize) {
954        return NULL;
955    }
956
957    unsigned layer = 4 - ((header >> 17) & 3);
958
959    sp<ABuffer> accessUnit = new ABuffer(frameSize);
960    memcpy(accessUnit->data(), data, frameSize);
961
962    memmove(mBuffer->data(),
963            mBuffer->data() + frameSize,
964            mBuffer->size() - frameSize);
965
966    mBuffer->setRange(0, mBuffer->size() - frameSize);
967
968    int64_t timeUs = fetchTimestamp(frameSize);
969    if (timeUs < 0ll) {
970        ALOGE("Negative timeUs");
971        return NULL;
972    }
973
974    accessUnit->meta()->setInt64("timeUs", timeUs);
975    accessUnit->meta()->setInt32("isSync", 1);
976
977    if (mFormat == NULL) {
978        mFormat = new MetaData;
979
980        switch (layer) {
981            case 1:
982                mFormat->setCString(
983                        kKeyMIMEType, MEDIA_MIMETYPE_AUDIO_MPEG_LAYER_I);
984                break;
985            case 2:
986                mFormat->setCString(
987                        kKeyMIMEType, MEDIA_MIMETYPE_AUDIO_MPEG_LAYER_II);
988                break;
989            case 3:
990                mFormat->setCString(
991                        kKeyMIMEType, MEDIA_MIMETYPE_AUDIO_MPEG);
992                break;
993            default:
994                return NULL;
995        }
996
997        mFormat->setInt32(kKeySampleRate, samplingRate);
998        mFormat->setInt32(kKeyChannelCount, numChannels);
999    }
1000
1001    return accessUnit;
1002}
1003
1004static void EncodeSize14(uint8_t **_ptr, size_t size) {
1005    if (size > 0x3fff) {
1006        ALOGE("Wrong size");
1007        return;
1008    }
1009
1010    uint8_t *ptr = *_ptr;
1011
1012    *ptr++ = 0x80 | (size >> 7);
1013    *ptr++ = size & 0x7f;
1014
1015    *_ptr = ptr;
1016}
1017
1018static sp<ABuffer> MakeMPEGVideoESDS(const sp<ABuffer> &csd) {
1019    sp<ABuffer> esds = new ABuffer(csd->size() + 25);
1020
1021    uint8_t *ptr = esds->data();
1022    *ptr++ = 0x03;
1023    EncodeSize14(&ptr, 22 + csd->size());
1024
1025    *ptr++ = 0x00;  // ES_ID
1026    *ptr++ = 0x00;
1027
1028    *ptr++ = 0x00;  // streamDependenceFlag, URL_Flag, OCRstreamFlag
1029
1030    *ptr++ = 0x04;
1031    EncodeSize14(&ptr, 16 + csd->size());
1032
1033    *ptr++ = 0x40;  // Audio ISO/IEC 14496-3
1034
1035    for (size_t i = 0; i < 12; ++i) {
1036        *ptr++ = 0x00;
1037    }
1038
1039    *ptr++ = 0x05;
1040    EncodeSize14(&ptr, csd->size());
1041
1042    memcpy(ptr, csd->data(), csd->size());
1043
1044    return esds;
1045}
1046
1047sp<ABuffer> ElementaryStreamQueue::dequeueAccessUnitMPEGVideo() {
1048    const uint8_t *data = mBuffer->data();
1049    size_t size = mBuffer->size();
1050
1051    Vector<size_t> userDataPositions;
1052
1053    bool sawPictureStart = false;
1054    int pprevStartCode = -1;
1055    int prevStartCode = -1;
1056    int currentStartCode = -1;
1057    bool gopFound = false;
1058    bool isClosedGop = false;
1059    bool brokenLink = false;
1060
1061    size_t offset = 0;
1062    while (offset + 3 < size) {
1063        if (memcmp(&data[offset], "\x00\x00\x01", 3)) {
1064            ++offset;
1065            continue;
1066        }
1067
1068        pprevStartCode = prevStartCode;
1069        prevStartCode = currentStartCode;
1070        currentStartCode = data[offset + 3];
1071
1072        if (currentStartCode == 0xb3 && mFormat == NULL) {
1073            memmove(mBuffer->data(), mBuffer->data() + offset, size - offset);
1074            size -= offset;
1075            (void)fetchTimestamp(offset);
1076            offset = 0;
1077            mBuffer->setRange(0, size);
1078        }
1079
1080        if ((prevStartCode == 0xb3 && currentStartCode != 0xb5)
1081                || (pprevStartCode == 0xb3 && prevStartCode == 0xb5)) {
1082            // seqHeader without/with extension
1083
1084            if (mFormat == NULL) {
1085                if (size < 7u) {
1086                    ALOGE("Size too small");
1087                    return NULL;
1088                }
1089
1090                unsigned width =
1091                    (data[4] << 4) | data[5] >> 4;
1092
1093                unsigned height =
1094                    ((data[5] & 0x0f) << 8) | data[6];
1095
1096                mFormat = new MetaData;
1097                mFormat->setCString(kKeyMIMEType, MEDIA_MIMETYPE_VIDEO_MPEG2);
1098                mFormat->setInt32(kKeyWidth, width);
1099                mFormat->setInt32(kKeyHeight, height);
1100
1101                ALOGI("found MPEG2 video codec config (%d x %d)", width, height);
1102
1103                sp<ABuffer> csd = new ABuffer(offset);
1104                memcpy(csd->data(), data, offset);
1105
1106                memmove(mBuffer->data(),
1107                        mBuffer->data() + offset,
1108                        mBuffer->size() - offset);
1109
1110                mBuffer->setRange(0, mBuffer->size() - offset);
1111                size -= offset;
1112                (void)fetchTimestamp(offset);
1113                offset = 0;
1114
1115                // hexdump(csd->data(), csd->size());
1116
1117                sp<ABuffer> esds = MakeMPEGVideoESDS(csd);
1118                mFormat->setData(
1119                        kKeyESDS, kTypeESDS, esds->data(), esds->size());
1120
1121                return NULL;
1122            }
1123        }
1124
1125        if (mFormat != NULL && currentStartCode == 0xb8) {
1126            // GOP layer
1127            if (offset + 7 >= size) {
1128                ALOGE("Size too small");
1129                return NULL;
1130            }
1131            gopFound = true;
1132            isClosedGop = (data[offset + 7] & 0x40) != 0;
1133            brokenLink = (data[offset + 7] & 0x20) != 0;
1134        }
1135
1136        if (mFormat != NULL && currentStartCode == 0xb2) {
1137            userDataPositions.add(offset);
1138        }
1139
1140        if (mFormat != NULL && currentStartCode == 0x00) {
1141            // Picture start
1142
1143            if (!sawPictureStart) {
1144                sawPictureStart = true;
1145            } else {
1146                sp<ABuffer> accessUnit = new ABuffer(offset);
1147                memcpy(accessUnit->data(), data, offset);
1148
1149                memmove(mBuffer->data(),
1150                        mBuffer->data() + offset,
1151                        mBuffer->size() - offset);
1152
1153                mBuffer->setRange(0, mBuffer->size() - offset);
1154
1155                int64_t timeUs = fetchTimestamp(offset);
1156                if (timeUs < 0ll) {
1157                    ALOGE("Negative timeUs");
1158                    return NULL;
1159                }
1160
1161                offset = 0;
1162
1163                accessUnit->meta()->setInt64("timeUs", timeUs);
1164                if (gopFound && (!brokenLink || isClosedGop)) {
1165                    accessUnit->meta()->setInt32("isSync", 1);
1166                }
1167
1168                ALOGV("returning MPEG video access unit at time %" PRId64 " us",
1169                      timeUs);
1170
1171                // hexdump(accessUnit->data(), accessUnit->size());
1172
1173                if (userDataPositions.size() > 0) {
1174                    sp<ABuffer> mpegUserData =
1175                        new ABuffer(userDataPositions.size() * sizeof(size_t));
1176                    if (mpegUserData != NULL && mpegUserData->data() != NULL) {
1177                        for (size_t i = 0; i < userDataPositions.size(); ++i) {
1178                            memcpy(
1179                                    mpegUserData->data() + i * sizeof(size_t),
1180                                    &userDataPositions[i], sizeof(size_t));
1181                        }
1182                        accessUnit->meta()->setBuffer("mpegUserData", mpegUserData);
1183                    }
1184                }
1185
1186                return accessUnit;
1187            }
1188        }
1189
1190        ++offset;
1191    }
1192
1193    return NULL;
1194}
1195
1196static ssize_t getNextChunkSize(
1197        const uint8_t *data, size_t size) {
1198    static const char kStartCode[] = "\x00\x00\x01";
1199
1200    if (size < 3) {
1201        return -EAGAIN;
1202    }
1203
1204    if (memcmp(kStartCode, data, 3)) {
1205        return -EAGAIN;
1206    }
1207
1208    size_t offset = 3;
1209    while (offset + 2 < size) {
1210        if (!memcmp(&data[offset], kStartCode, 3)) {
1211            return offset;
1212        }
1213
1214        ++offset;
1215    }
1216
1217    return -EAGAIN;
1218}
1219
1220sp<ABuffer> ElementaryStreamQueue::dequeueAccessUnitMPEG4Video() {
1221    uint8_t *data = mBuffer->data();
1222    size_t size = mBuffer->size();
1223
1224    enum {
1225        SKIP_TO_VISUAL_OBJECT_SEQ_START,
1226        EXPECT_VISUAL_OBJECT_START,
1227        EXPECT_VO_START,
1228        EXPECT_VOL_START,
1229        WAIT_FOR_VOP_START,
1230        SKIP_TO_VOP_START,
1231
1232    } state;
1233
1234    if (mFormat == NULL) {
1235        state = SKIP_TO_VISUAL_OBJECT_SEQ_START;
1236    } else {
1237        state = SKIP_TO_VOP_START;
1238    }
1239
1240    int32_t width = -1, height = -1;
1241
1242    size_t offset = 0;
1243    ssize_t chunkSize;
1244    while ((chunkSize = getNextChunkSize(
1245                    &data[offset], size - offset)) > 0) {
1246        bool discard = false;
1247
1248        unsigned chunkType = data[offset + 3];
1249
1250        switch (state) {
1251            case SKIP_TO_VISUAL_OBJECT_SEQ_START:
1252            {
1253                if (chunkType == 0xb0) {
1254                    // Discard anything before this marker.
1255
1256                    state = EXPECT_VISUAL_OBJECT_START;
1257                } else {
1258                    discard = true;
1259                }
1260                break;
1261            }
1262
1263            case EXPECT_VISUAL_OBJECT_START:
1264            {
1265                if (chunkType != 0xb5) {
1266                    ALOGE("Unexpected chunkType");
1267                    return NULL;
1268                }
1269                state = EXPECT_VO_START;
1270                break;
1271            }
1272
1273            case EXPECT_VO_START:
1274            {
1275                if (chunkType > 0x1f) {
1276                    ALOGE("Unexpected chunkType");
1277                    return NULL;
1278                }
1279                state = EXPECT_VOL_START;
1280                break;
1281            }
1282
1283            case EXPECT_VOL_START:
1284            {
1285                if ((chunkType & 0xf0) != 0x20) {
1286                    ALOGE("Wrong chunkType");
1287                    return NULL;
1288                }
1289
1290                if (!ExtractDimensionsFromVOLHeader(
1291                            &data[offset], chunkSize,
1292                            &width, &height)) {
1293                    ALOGE("Failed to get dimension");
1294                    return NULL;
1295                }
1296
1297                state = WAIT_FOR_VOP_START;
1298                break;
1299            }
1300
1301            case WAIT_FOR_VOP_START:
1302            {
1303                if (chunkType == 0xb3 || chunkType == 0xb6) {
1304                    // group of VOP or VOP start.
1305
1306                    mFormat = new MetaData;
1307                    mFormat->setCString(
1308                            kKeyMIMEType, MEDIA_MIMETYPE_VIDEO_MPEG4);
1309
1310                    mFormat->setInt32(kKeyWidth, width);
1311                    mFormat->setInt32(kKeyHeight, height);
1312
1313                    ALOGI("found MPEG4 video codec config (%d x %d)",
1314                         width, height);
1315
1316                    sp<ABuffer> csd = new ABuffer(offset);
1317                    memcpy(csd->data(), data, offset);
1318
1319                    // hexdump(csd->data(), csd->size());
1320
1321                    sp<ABuffer> esds = MakeMPEGVideoESDS(csd);
1322                    mFormat->setData(
1323                            kKeyESDS, kTypeESDS,
1324                            esds->data(), esds->size());
1325
1326                    discard = true;
1327                    state = SKIP_TO_VOP_START;
1328                }
1329
1330                break;
1331            }
1332
1333            case SKIP_TO_VOP_START:
1334            {
1335                if (chunkType == 0xb6) {
1336                    int vopCodingType = (data[offset + 4] & 0xc0) >> 6;
1337
1338                    offset += chunkSize;
1339
1340                    sp<ABuffer> accessUnit = new ABuffer(offset);
1341                    memcpy(accessUnit->data(), data, offset);
1342
1343                    memmove(data, &data[offset], size - offset);
1344                    size -= offset;
1345                    mBuffer->setRange(0, size);
1346
1347                    int64_t timeUs = fetchTimestamp(offset);
1348                    if (timeUs < 0ll) {
1349                        ALOGE("Negative timeus");
1350                        return NULL;
1351                    }
1352
1353                    offset = 0;
1354
1355                    accessUnit->meta()->setInt64("timeUs", timeUs);
1356                    if (vopCodingType == 0) {  // intra-coded VOP
1357                        accessUnit->meta()->setInt32("isSync", 1);
1358                    }
1359
1360                    ALOGV("returning MPEG4 video access unit at time %" PRId64 " us",
1361                         timeUs);
1362
1363                    // hexdump(accessUnit->data(), accessUnit->size());
1364
1365                    return accessUnit;
1366                } else if (chunkType != 0xb3) {
1367                    offset += chunkSize;
1368                    discard = true;
1369                }
1370
1371                break;
1372            }
1373
1374            default:
1375                ALOGE("Unknown state: %d", state);
1376                return NULL;
1377        }
1378
1379        if (discard) {
1380            (void)fetchTimestamp(offset);
1381            memmove(data, &data[offset], size - offset);
1382            size -= offset;
1383            offset = 0;
1384            mBuffer->setRange(0, size);
1385        } else {
1386            offset += chunkSize;
1387        }
1388    }
1389
1390    return NULL;
1391}
1392
1393void ElementaryStreamQueue::signalEOS() {
1394    if (!mEOSReached) {
1395        if (mMode == MPEG_VIDEO) {
1396            const char *theEnd = "\x00\x00\x01\x00";
1397            appendData(theEnd, 4, 0);
1398        }
1399        mEOSReached = true;
1400    } else {
1401        ALOGW("EOS already signaled");
1402    }
1403}
1404
1405sp<ABuffer> ElementaryStreamQueue::dequeueAccessUnitMetadata() {
1406    size_t size = mBuffer->size();
1407    if (!size) {
1408        return NULL;
1409    }
1410
1411    sp<ABuffer> accessUnit = new ABuffer(size);
1412    int64_t timeUs = fetchTimestamp(size);
1413    accessUnit->meta()->setInt64("timeUs", timeUs);
1414
1415    memcpy(accessUnit->data(), mBuffer->data(), size);
1416    mBuffer->setRange(0, 0);
1417
1418    if (mFormat == NULL) {
1419        mFormat = new MetaData;
1420        mFormat->setCString(kKeyMIMEType, MEDIA_MIMETYPE_DATA_TIMED_ID3);
1421    }
1422
1423    return accessUnit;
1424}
1425
1426}  // namespace android
1427