MPEG4Extractor.cpp revision bcc8e5817fa3dc624f214e58f756098053ac5682
1/*
2 * Copyright (C) 2009 The Android Open Source Project
3 *
4 * Licensed under the Apache License, Version 2.0 (the "License");
5 * you may not use this file except in compliance with the License.
6 * You may obtain a copy of the License at
7 *
8 *      http://www.apache.org/licenses/LICENSE-2.0
9 *
10 * Unless required by applicable law or agreed to in writing, software
11 * distributed under the License is distributed on an "AS IS" BASIS,
12 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13 * See the License for the specific language governing permissions and
14 * limitations under the License.
15 */
16
17//#define LOG_NDEBUG 0
18#define LOG_TAG "MPEG4Extractor"
19#include <utils/Log.h>
20
21#include "include/MPEG4Extractor.h"
22#include "include/SampleTable.h"
23#include "include/ESDS.h"
24
25#include <ctype.h>
26#include <stdint.h>
27#include <stdlib.h>
28#include <string.h>
29
30#include <media/stagefright/foundation/ABitReader.h>
31#include <media/stagefright/foundation/ABuffer.h>
32#include <media/stagefright/foundation/ADebug.h>
33#include <media/stagefright/foundation/AMessage.h>
34#include <media/stagefright/MediaBuffer.h>
35#include <media/stagefright/MediaBufferGroup.h>
36#include <media/stagefright/MediaDefs.h>
37#include <media/stagefright/MediaSource.h>
38#include <media/stagefright/MetaData.h>
39#include <utils/String8.h>
40
41namespace android {
42
43class MPEG4Source : public MediaSource {
44public:
45    // Caller retains ownership of both "dataSource" and "sampleTable".
46    MPEG4Source(const sp<MetaData> &format,
47                const sp<DataSource> &dataSource,
48                int32_t timeScale,
49                const sp<SampleTable> &sampleTable,
50                Vector<SidxEntry> &sidx,
51                off64_t firstMoofOffset);
52
53    virtual status_t start(MetaData *params = NULL);
54    virtual status_t stop();
55
56    virtual sp<MetaData> getFormat();
57
58    virtual status_t read(MediaBuffer **buffer, const ReadOptions *options = NULL);
59    virtual status_t fragmentedRead(MediaBuffer **buffer, const ReadOptions *options = NULL);
60
61protected:
62    virtual ~MPEG4Source();
63
64private:
65    Mutex mLock;
66
67    sp<MetaData> mFormat;
68    sp<DataSource> mDataSource;
69    int32_t mTimescale;
70    sp<SampleTable> mSampleTable;
71    uint32_t mCurrentSampleIndex;
72    uint32_t mCurrentFragmentIndex;
73    Vector<SidxEntry> &mSegments;
74    off64_t mFirstMoofOffset;
75    off64_t mCurrentMoofOffset;
76    off64_t mNextMoofOffset;
77    uint32_t mCurrentTime;
78    int32_t mLastParsedTrackId;
79    int32_t mTrackId;
80
81    int32_t mCryptoMode;    // passed in from extractor
82    int32_t mDefaultIVSize; // passed in from extractor
83    uint8_t mCryptoKey[16]; // passed in from extractor
84    uint32_t mCurrentAuxInfoType;
85    uint32_t mCurrentAuxInfoTypeParameter;
86    int32_t mCurrentDefaultSampleInfoSize;
87    uint32_t mCurrentSampleInfoCount;
88    uint32_t mCurrentSampleInfoAllocSize;
89    uint8_t* mCurrentSampleInfoSizes;
90    uint32_t mCurrentSampleInfoOffsetCount;
91    uint32_t mCurrentSampleInfoOffsetsAllocSize;
92    uint64_t* mCurrentSampleInfoOffsets;
93
94    bool mIsAVC;
95    size_t mNALLengthSize;
96
97    bool mStarted;
98
99    MediaBufferGroup *mGroup;
100
101    MediaBuffer *mBuffer;
102
103    bool mWantsNALFragments;
104
105    uint8_t *mSrcBuffer;
106
107    size_t parseNALSize(const uint8_t *data) const;
108    status_t parseChunk(off64_t *offset);
109    status_t parseTrackFragmentHeader(off64_t offset, off64_t size);
110    status_t parseTrackFragmentRun(off64_t offset, off64_t size);
111    status_t parseSampleAuxiliaryInformationSizes(off64_t offset, off64_t size);
112    status_t parseSampleAuxiliaryInformationOffsets(off64_t offset, off64_t size);
113
114    struct TrackFragmentHeaderInfo {
115        enum Flags {
116            kBaseDataOffsetPresent         = 0x01,
117            kSampleDescriptionIndexPresent = 0x02,
118            kDefaultSampleDurationPresent  = 0x08,
119            kDefaultSampleSizePresent      = 0x10,
120            kDefaultSampleFlagsPresent     = 0x20,
121            kDurationIsEmpty               = 0x10000,
122        };
123
124        uint32_t mTrackID;
125        uint32_t mFlags;
126        uint64_t mBaseDataOffset;
127        uint32_t mSampleDescriptionIndex;
128        uint32_t mDefaultSampleDuration;
129        uint32_t mDefaultSampleSize;
130        uint32_t mDefaultSampleFlags;
131
132        uint64_t mDataOffset;
133    };
134    TrackFragmentHeaderInfo mTrackFragmentHeaderInfo;
135
136    struct Sample {
137        off64_t offset;
138        size_t size;
139        uint32_t duration;
140        uint8_t iv[16];
141        Vector<size_t> clearsizes;
142        Vector<size_t> encryptedsizes;
143    };
144    Vector<Sample> mCurrentSamples;
145
146    MPEG4Source(const MPEG4Source &);
147    MPEG4Source &operator=(const MPEG4Source &);
148};
149
150// This custom data source wraps an existing one and satisfies requests
151// falling entirely within a cached range from the cache while forwarding
152// all remaining requests to the wrapped datasource.
153// This is used to cache the full sampletable metadata for a single track,
154// possibly wrapping multiple times to cover all tracks, i.e.
155// Each MPEG4DataSource caches the sampletable metadata for a single track.
156
157struct MPEG4DataSource : public DataSource {
158    MPEG4DataSource(const sp<DataSource> &source);
159
160    virtual status_t initCheck() const;
161    virtual ssize_t readAt(off64_t offset, void *data, size_t size);
162    virtual status_t getSize(off64_t *size);
163    virtual uint32_t flags();
164
165    status_t setCachedRange(off64_t offset, size_t size);
166
167protected:
168    virtual ~MPEG4DataSource();
169
170private:
171    Mutex mLock;
172
173    sp<DataSource> mSource;
174    off64_t mCachedOffset;
175    size_t mCachedSize;
176    uint8_t *mCache;
177
178    void clearCache();
179
180    MPEG4DataSource(const MPEG4DataSource &);
181    MPEG4DataSource &operator=(const MPEG4DataSource &);
182};
183
184MPEG4DataSource::MPEG4DataSource(const sp<DataSource> &source)
185    : mSource(source),
186      mCachedOffset(0),
187      mCachedSize(0),
188      mCache(NULL) {
189}
190
191MPEG4DataSource::~MPEG4DataSource() {
192    clearCache();
193}
194
195void MPEG4DataSource::clearCache() {
196    if (mCache) {
197        free(mCache);
198        mCache = NULL;
199    }
200
201    mCachedOffset = 0;
202    mCachedSize = 0;
203}
204
205status_t MPEG4DataSource::initCheck() const {
206    return mSource->initCheck();
207}
208
209ssize_t MPEG4DataSource::readAt(off64_t offset, void *data, size_t size) {
210    Mutex::Autolock autoLock(mLock);
211
212    if (offset >= mCachedOffset
213            && offset + size <= mCachedOffset + mCachedSize) {
214        memcpy(data, &mCache[offset - mCachedOffset], size);
215        return size;
216    }
217
218    return mSource->readAt(offset, data, size);
219}
220
221status_t MPEG4DataSource::getSize(off64_t *size) {
222    return mSource->getSize(size);
223}
224
225uint32_t MPEG4DataSource::flags() {
226    return mSource->flags();
227}
228
229status_t MPEG4DataSource::setCachedRange(off64_t offset, size_t size) {
230    Mutex::Autolock autoLock(mLock);
231
232    clearCache();
233
234    mCache = (uint8_t *)malloc(size);
235
236    if (mCache == NULL) {
237        return -ENOMEM;
238    }
239
240    mCachedOffset = offset;
241    mCachedSize = size;
242
243    ssize_t err = mSource->readAt(mCachedOffset, mCache, mCachedSize);
244
245    if (err < (ssize_t)size) {
246        clearCache();
247
248        return ERROR_IO;
249    }
250
251    return OK;
252}
253
254////////////////////////////////////////////////////////////////////////////////
255
256static void hexdump(const void *_data, size_t size) {
257    const uint8_t *data = (const uint8_t *)_data;
258    size_t offset = 0;
259    while (offset < size) {
260        printf("0x%04x  ", offset);
261
262        size_t n = size - offset;
263        if (n > 16) {
264            n = 16;
265        }
266
267        for (size_t i = 0; i < 16; ++i) {
268            if (i == 8) {
269                printf(" ");
270            }
271
272            if (offset + i < size) {
273                printf("%02x ", data[offset + i]);
274            } else {
275                printf("   ");
276            }
277        }
278
279        printf(" ");
280
281        for (size_t i = 0; i < n; ++i) {
282            if (isprint(data[offset + i])) {
283                printf("%c", data[offset + i]);
284            } else {
285                printf(".");
286            }
287        }
288
289        printf("\n");
290
291        offset += 16;
292    }
293}
294
295static const char *FourCC2MIME(uint32_t fourcc) {
296    switch (fourcc) {
297        case FOURCC('m', 'p', '4', 'a'):
298            return MEDIA_MIMETYPE_AUDIO_AAC;
299
300        case FOURCC('s', 'a', 'm', 'r'):
301            return MEDIA_MIMETYPE_AUDIO_AMR_NB;
302
303        case FOURCC('s', 'a', 'w', 'b'):
304            return MEDIA_MIMETYPE_AUDIO_AMR_WB;
305
306        case FOURCC('m', 'p', '4', 'v'):
307            return MEDIA_MIMETYPE_VIDEO_MPEG4;
308
309        case FOURCC('s', '2', '6', '3'):
310        case FOURCC('h', '2', '6', '3'):
311        case FOURCC('H', '2', '6', '3'):
312            return MEDIA_MIMETYPE_VIDEO_H263;
313
314        case FOURCC('a', 'v', 'c', '1'):
315            return MEDIA_MIMETYPE_VIDEO_AVC;
316
317        default:
318            CHECK(!"should not be here.");
319            return NULL;
320    }
321}
322
323static bool AdjustChannelsAndRate(uint32_t fourcc, uint32_t *channels, uint32_t *rate) {
324    if (!strcasecmp(MEDIA_MIMETYPE_AUDIO_AMR_NB, FourCC2MIME(fourcc))) {
325        // AMR NB audio is always mono, 8kHz
326        *channels = 1;
327        *rate = 8000;
328        return true;
329    } else if (!strcasecmp(MEDIA_MIMETYPE_AUDIO_AMR_WB, FourCC2MIME(fourcc))) {
330        // AMR WB audio is always mono, 16kHz
331        *channels = 1;
332        *rate = 16000;
333        return true;
334    }
335    return false;
336}
337
338MPEG4Extractor::MPEG4Extractor(const sp<DataSource> &source)
339    : mSidxDuration(0),
340      mMoofOffset(0),
341      mDataSource(source),
342      mInitCheck(NO_INIT),
343      mHasVideo(false),
344      mFirstTrack(NULL),
345      mLastTrack(NULL),
346      mFileMetaData(new MetaData),
347      mFirstSINF(NULL),
348      mIsDrm(false) {
349}
350
351MPEG4Extractor::~MPEG4Extractor() {
352    Track *track = mFirstTrack;
353    while (track) {
354        Track *next = track->next;
355
356        delete track;
357        track = next;
358    }
359    mFirstTrack = mLastTrack = NULL;
360
361    SINF *sinf = mFirstSINF;
362    while (sinf) {
363        SINF *next = sinf->next;
364        delete sinf->IPMPData;
365        delete sinf;
366        sinf = next;
367    }
368    mFirstSINF = NULL;
369
370    for (size_t i = 0; i < mPssh.size(); i++) {
371        delete [] mPssh[i].data;
372    }
373}
374
375uint32_t MPEG4Extractor::flags() const {
376    return CAN_PAUSE |
377            ((mMoofOffset == 0 || mSidxEntries.size() != 0) ?
378                    (CAN_SEEK_BACKWARD | CAN_SEEK_FORWARD | CAN_SEEK) : 0);
379}
380
381sp<MetaData> MPEG4Extractor::getMetaData() {
382    status_t err;
383    if ((err = readMetaData()) != OK) {
384        return new MetaData;
385    }
386
387    return mFileMetaData;
388}
389
390size_t MPEG4Extractor::countTracks() {
391    status_t err;
392    if ((err = readMetaData()) != OK) {
393        ALOGV("MPEG4Extractor::countTracks: no tracks");
394        return 0;
395    }
396
397    size_t n = 0;
398    Track *track = mFirstTrack;
399    while (track) {
400        ++n;
401        track = track->next;
402    }
403
404    ALOGV("MPEG4Extractor::countTracks: %d tracks", n);
405    return n;
406}
407
408sp<MetaData> MPEG4Extractor::getTrackMetaData(
409        size_t index, uint32_t flags) {
410    status_t err;
411    if ((err = readMetaData()) != OK) {
412        return NULL;
413    }
414
415    Track *track = mFirstTrack;
416    while (index > 0) {
417        if (track == NULL) {
418            return NULL;
419        }
420
421        track = track->next;
422        --index;
423    }
424
425    if (track == NULL) {
426        return NULL;
427    }
428
429    if ((flags & kIncludeExtensiveMetaData)
430            && !track->includes_expensive_metadata) {
431        track->includes_expensive_metadata = true;
432
433        const char *mime;
434        CHECK(track->meta->findCString(kKeyMIMEType, &mime));
435        if (!strncasecmp("video/", mime, 6)) {
436            if (mMoofOffset > 0) {
437                int64_t duration;
438                if (track->meta->findInt64(kKeyDuration, &duration)) {
439                    // nothing fancy, just pick a frame near 1/4th of the duration
440                    track->meta->setInt64(
441                            kKeyThumbnailTime, duration / 4);
442                }
443            } else {
444                uint32_t sampleIndex;
445                uint32_t sampleTime;
446                if (track->sampleTable->findThumbnailSample(&sampleIndex) == OK
447                        && track->sampleTable->getMetaDataForSample(
448                            sampleIndex, NULL /* offset */, NULL /* size */,
449                            &sampleTime) == OK) {
450                    track->meta->setInt64(
451                            kKeyThumbnailTime,
452                            ((int64_t)sampleTime * 1000000) / track->timescale);
453                }
454            }
455        }
456    }
457
458    return track->meta;
459}
460
461static void MakeFourCCString(uint32_t x, char *s) {
462    s[0] = x >> 24;
463    s[1] = (x >> 16) & 0xff;
464    s[2] = (x >> 8) & 0xff;
465    s[3] = x & 0xff;
466    s[4] = '\0';
467}
468
469status_t MPEG4Extractor::readMetaData() {
470    if (mInitCheck != NO_INIT) {
471        return mInitCheck;
472    }
473
474    off64_t offset = 0;
475    status_t err;
476    while (true) {
477        err = parseChunk(&offset, 0);
478        if (err == OK) {
479            continue;
480        }
481
482        uint32_t hdr[2];
483        if (mDataSource->readAt(offset, hdr, 8) < 8) {
484            break;
485        }
486        uint32_t chunk_type = ntohl(hdr[1]);
487        if (chunk_type == FOURCC('s', 'i', 'd', 'x')) {
488            // parse the sidx box too
489            continue;
490        } else if (chunk_type == FOURCC('m', 'o', 'o', 'f')) {
491            // store the offset of the first segment
492            mMoofOffset = offset;
493        }
494        break;
495    }
496
497    if (mInitCheck == OK) {
498        if (mHasVideo) {
499            mFileMetaData->setCString(
500                    kKeyMIMEType, MEDIA_MIMETYPE_CONTAINER_MPEG4);
501        } else {
502            mFileMetaData->setCString(kKeyMIMEType, "audio/mp4");
503        }
504
505        mInitCheck = OK;
506    } else {
507        mInitCheck = err;
508    }
509
510    CHECK_NE(err, (status_t)NO_INIT);
511
512    // copy pssh data into file metadata
513    int psshsize = 0;
514    for (size_t i = 0; i < mPssh.size(); i++) {
515        psshsize += 20 + mPssh[i].datalen;
516    }
517    if (psshsize) {
518        char *buf = (char*)malloc(psshsize);
519        char *ptr = buf;
520        for (size_t i = 0; i < mPssh.size(); i++) {
521            memcpy(ptr, mPssh[i].uuid, 20); // uuid + length
522            memcpy(ptr + 20, mPssh[i].data, mPssh[i].datalen);
523            ptr += (20 + mPssh[i].datalen);
524        }
525        mFileMetaData->setData(kKeyPssh, 'pssh', buf, psshsize);
526        free(buf);
527    }
528    return mInitCheck;
529}
530
531char* MPEG4Extractor::getDrmTrackInfo(size_t trackID, int *len) {
532    if (mFirstSINF == NULL) {
533        return NULL;
534    }
535
536    SINF *sinf = mFirstSINF;
537    while (sinf && (trackID != sinf->trackID)) {
538        sinf = sinf->next;
539    }
540
541    if (sinf == NULL) {
542        return NULL;
543    }
544
545    *len = sinf->len;
546    return sinf->IPMPData;
547}
548
549// Reads an encoded integer 7 bits at a time until it encounters the high bit clear.
550static int32_t readSize(off64_t offset,
551        const sp<DataSource> DataSource, uint8_t *numOfBytes) {
552    uint32_t size = 0;
553    uint8_t data;
554    bool moreData = true;
555    *numOfBytes = 0;
556
557    while (moreData) {
558        if (DataSource->readAt(offset, &data, 1) < 1) {
559            return -1;
560        }
561        offset ++;
562        moreData = (data >= 128) ? true : false;
563        size = (size << 7) | (data & 0x7f); // Take last 7 bits
564        (*numOfBytes) ++;
565    }
566
567    return size;
568}
569
570status_t MPEG4Extractor::parseDrmSINF(off64_t *offset, off64_t data_offset) {
571    uint8_t updateIdTag;
572    if (mDataSource->readAt(data_offset, &updateIdTag, 1) < 1) {
573        return ERROR_IO;
574    }
575    data_offset ++;
576
577    if (0x01/*OBJECT_DESCRIPTOR_UPDATE_ID_TAG*/ != updateIdTag) {
578        return ERROR_MALFORMED;
579    }
580
581    uint8_t numOfBytes;
582    int32_t size = readSize(data_offset, mDataSource, &numOfBytes);
583    if (size < 0) {
584        return ERROR_IO;
585    }
586    int32_t classSize = size;
587    data_offset += numOfBytes;
588
589    while(size >= 11 ) {
590        uint8_t descriptorTag;
591        if (mDataSource->readAt(data_offset, &descriptorTag, 1) < 1) {
592            return ERROR_IO;
593        }
594        data_offset ++;
595
596        if (0x11/*OBJECT_DESCRIPTOR_ID_TAG*/ != descriptorTag) {
597            return ERROR_MALFORMED;
598        }
599
600        uint8_t buffer[8];
601        //ObjectDescriptorID and ObjectDescriptor url flag
602        if (mDataSource->readAt(data_offset, buffer, 2) < 2) {
603            return ERROR_IO;
604        }
605        data_offset += 2;
606
607        if ((buffer[1] >> 5) & 0x0001) { //url flag is set
608            return ERROR_MALFORMED;
609        }
610
611        if (mDataSource->readAt(data_offset, buffer, 8) < 8) {
612            return ERROR_IO;
613        }
614        data_offset += 8;
615
616        if ((0x0F/*ES_ID_REF_TAG*/ != buffer[1])
617                || ( 0x0A/*IPMP_DESCRIPTOR_POINTER_ID_TAG*/ != buffer[5])) {
618            return ERROR_MALFORMED;
619        }
620
621        SINF *sinf = new SINF;
622        sinf->trackID = U16_AT(&buffer[3]);
623        sinf->IPMPDescriptorID = buffer[7];
624        sinf->next = mFirstSINF;
625        mFirstSINF = sinf;
626
627        size -= (8 + 2 + 1);
628    }
629
630    if (size != 0) {
631        return ERROR_MALFORMED;
632    }
633
634    if (mDataSource->readAt(data_offset, &updateIdTag, 1) < 1) {
635        return ERROR_IO;
636    }
637    data_offset ++;
638
639    if(0x05/*IPMP_DESCRIPTOR_UPDATE_ID_TAG*/ != updateIdTag) {
640        return ERROR_MALFORMED;
641    }
642
643    size = readSize(data_offset, mDataSource, &numOfBytes);
644    if (size < 0) {
645        return ERROR_IO;
646    }
647    classSize = size;
648    data_offset += numOfBytes;
649
650    while (size > 0) {
651        uint8_t tag;
652        int32_t dataLen;
653        if (mDataSource->readAt(data_offset, &tag, 1) < 1) {
654            return ERROR_IO;
655        }
656        data_offset ++;
657
658        if (0x0B/*IPMP_DESCRIPTOR_ID_TAG*/ == tag) {
659            uint8_t id;
660            dataLen = readSize(data_offset, mDataSource, &numOfBytes);
661            if (dataLen < 0) {
662                return ERROR_IO;
663            } else if (dataLen < 4) {
664                return ERROR_MALFORMED;
665            }
666            data_offset += numOfBytes;
667
668            if (mDataSource->readAt(data_offset, &id, 1) < 1) {
669                return ERROR_IO;
670            }
671            data_offset ++;
672
673            SINF *sinf = mFirstSINF;
674            while (sinf && (sinf->IPMPDescriptorID != id)) {
675                sinf = sinf->next;
676            }
677            if (sinf == NULL) {
678                return ERROR_MALFORMED;
679            }
680            sinf->len = dataLen - 3;
681            sinf->IPMPData = new char[sinf->len];
682
683            if (mDataSource->readAt(data_offset + 2, sinf->IPMPData, sinf->len) < sinf->len) {
684                return ERROR_IO;
685            }
686            data_offset += sinf->len;
687
688            size -= (dataLen + numOfBytes + 1);
689        }
690    }
691
692    if (size != 0) {
693        return ERROR_MALFORMED;
694    }
695
696    return UNKNOWN_ERROR;  // Return a dummy error.
697}
698
699struct PathAdder {
700    PathAdder(Vector<uint32_t> *path, uint32_t chunkType)
701        : mPath(path) {
702        mPath->push(chunkType);
703    }
704
705    ~PathAdder() {
706        mPath->pop();
707    }
708
709private:
710    Vector<uint32_t> *mPath;
711
712    PathAdder(const PathAdder &);
713    PathAdder &operator=(const PathAdder &);
714};
715
716static bool underMetaDataPath(const Vector<uint32_t> &path) {
717    return path.size() >= 5
718        && path[0] == FOURCC('m', 'o', 'o', 'v')
719        && path[1] == FOURCC('u', 'd', 't', 'a')
720        && path[2] == FOURCC('m', 'e', 't', 'a')
721        && path[3] == FOURCC('i', 'l', 's', 't');
722}
723
724// Given a time in seconds since Jan 1 1904, produce a human-readable string.
725static void convertTimeToDate(int64_t time_1904, String8 *s) {
726    time_t time_1970 = time_1904 - (((66 * 365 + 17) * 24) * 3600);
727
728    char tmp[32];
729    strftime(tmp, sizeof(tmp), "%Y%m%dT%H%M%S.000Z", gmtime(&time_1970));
730
731    s->setTo(tmp);
732}
733
734status_t MPEG4Extractor::parseChunk(off64_t *offset, int depth) {
735    ALOGV("entering parseChunk %lld/%d", *offset, depth);
736    uint32_t hdr[2];
737    if (mDataSource->readAt(*offset, hdr, 8) < 8) {
738        return ERROR_IO;
739    }
740    uint64_t chunk_size = ntohl(hdr[0]);
741    uint32_t chunk_type = ntohl(hdr[1]);
742    off64_t data_offset = *offset + 8;
743
744    if (chunk_size == 1) {
745        if (mDataSource->readAt(*offset + 8, &chunk_size, 8) < 8) {
746            return ERROR_IO;
747        }
748        chunk_size = ntoh64(chunk_size);
749        data_offset += 8;
750
751        if (chunk_size < 16) {
752            // The smallest valid chunk is 16 bytes long in this case.
753            return ERROR_MALFORMED;
754        }
755    } else if (chunk_size < 8) {
756        // The smallest valid chunk is 8 bytes long.
757        return ERROR_MALFORMED;
758    }
759
760    char chunk[5];
761    MakeFourCCString(chunk_type, chunk);
762    ALOGV("chunk: %s @ %lld, %d", chunk, *offset, depth);
763
764#if 0
765    static const char kWhitespace[] = "                                        ";
766    const char *indent = &kWhitespace[sizeof(kWhitespace) - 1 - 2 * depth];
767    printf("%sfound chunk '%s' of size %lld\n", indent, chunk, chunk_size);
768
769    char buffer[256];
770    size_t n = chunk_size;
771    if (n > sizeof(buffer)) {
772        n = sizeof(buffer);
773    }
774    if (mDataSource->readAt(*offset, buffer, n)
775            < (ssize_t)n) {
776        return ERROR_IO;
777    }
778
779    hexdump(buffer, n);
780#endif
781
782    PathAdder autoAdder(&mPath, chunk_type);
783
784    off64_t chunk_data_size = *offset + chunk_size - data_offset;
785
786    if (chunk_type != FOURCC('c', 'p', 'r', 't')
787            && chunk_type != FOURCC('c', 'o', 'v', 'r')
788            && mPath.size() == 5 && underMetaDataPath(mPath)) {
789        off64_t stop_offset = *offset + chunk_size;
790        *offset = data_offset;
791        while (*offset < stop_offset) {
792            status_t err = parseChunk(offset, depth + 1);
793            if (err != OK) {
794                return err;
795            }
796        }
797
798        if (*offset != stop_offset) {
799            return ERROR_MALFORMED;
800        }
801
802        return OK;
803    }
804
805    switch(chunk_type) {
806        case FOURCC('m', 'o', 'o', 'v'):
807        case FOURCC('t', 'r', 'a', 'k'):
808        case FOURCC('m', 'd', 'i', 'a'):
809        case FOURCC('m', 'i', 'n', 'f'):
810        case FOURCC('d', 'i', 'n', 'f'):
811        case FOURCC('s', 't', 'b', 'l'):
812        case FOURCC('m', 'v', 'e', 'x'):
813        case FOURCC('m', 'o', 'o', 'f'):
814        case FOURCC('t', 'r', 'a', 'f'):
815        case FOURCC('m', 'f', 'r', 'a'):
816        case FOURCC('u', 'd', 't', 'a'):
817        case FOURCC('i', 'l', 's', 't'):
818        case FOURCC('s', 'i', 'n', 'f'):
819        case FOURCC('s', 'c', 'h', 'i'):
820        {
821            if (chunk_type == FOURCC('s', 't', 'b', 'l')) {
822                ALOGV("sampleTable chunk is %d bytes long.", (size_t)chunk_size);
823
824                if (mDataSource->flags()
825                        & (DataSource::kWantsPrefetching
826                            | DataSource::kIsCachingDataSource)) {
827                    sp<MPEG4DataSource> cachedSource =
828                        new MPEG4DataSource(mDataSource);
829
830                    if (cachedSource->setCachedRange(*offset, chunk_size) == OK) {
831                        mDataSource = cachedSource;
832                    }
833                }
834
835                mLastTrack->sampleTable = new SampleTable(mDataSource);
836            }
837
838            bool isTrack = false;
839            if (chunk_type == FOURCC('t', 'r', 'a', 'k')) {
840                isTrack = true;
841
842                Track *track = new Track;
843                track->next = NULL;
844                if (mLastTrack) {
845                    mLastTrack->next = track;
846                } else {
847                    mFirstTrack = track;
848                }
849                mLastTrack = track;
850
851                track->meta = new MetaData;
852                track->includes_expensive_metadata = false;
853                track->skipTrack = false;
854                track->timescale = 0;
855                track->meta->setCString(kKeyMIMEType, "application/octet-stream");
856            }
857
858            off64_t stop_offset = *offset + chunk_size;
859            *offset = data_offset;
860            while (*offset < stop_offset) {
861                status_t err = parseChunk(offset, depth + 1);
862                if (err != OK) {
863                    return err;
864                }
865            }
866
867            if (*offset != stop_offset) {
868                return ERROR_MALFORMED;
869            }
870
871            if (isTrack) {
872                if (mLastTrack->skipTrack) {
873                    Track *cur = mFirstTrack;
874
875                    if (cur == mLastTrack) {
876                        delete cur;
877                        mFirstTrack = mLastTrack = NULL;
878                    } else {
879                        while (cur && cur->next != mLastTrack) {
880                            cur = cur->next;
881                        }
882                        cur->next = NULL;
883                        delete mLastTrack;
884                        mLastTrack = cur;
885                    }
886
887                    return OK;
888                }
889
890                status_t err = verifyTrack(mLastTrack);
891
892                if (err != OK) {
893                    return err;
894                }
895            } else if (chunk_type == FOURCC('m', 'o', 'o', 'v')) {
896                mInitCheck = OK;
897
898                if (!mIsDrm) {
899                    return UNKNOWN_ERROR;  // Return a dummy error.
900                } else {
901                    return OK;
902                }
903            }
904            break;
905        }
906
907        case FOURCC('f', 'r', 'm', 'a'):
908        {
909            uint32_t original_fourcc;
910            if (mDataSource->readAt(data_offset, &original_fourcc, 4) < 4) {
911                return ERROR_IO;
912            }
913            original_fourcc = ntohl(original_fourcc);
914            ALOGV("read original format: %d", original_fourcc);
915            mLastTrack->meta->setCString(kKeyMIMEType, FourCC2MIME(original_fourcc));
916            uint32_t num_channels = 0;
917            uint32_t sample_rate = 0;
918            if (AdjustChannelsAndRate(original_fourcc, &num_channels, &sample_rate)) {
919                mLastTrack->meta->setInt32(kKeyChannelCount, num_channels);
920                mLastTrack->meta->setInt32(kKeySampleRate, sample_rate);
921            }
922            *offset += chunk_size;
923            break;
924        }
925
926        case FOURCC('t', 'e', 'n', 'c'):
927        {
928            if (chunk_size < 32) {
929                return ERROR_MALFORMED;
930            }
931
932            // tenc box contains 1 byte version, 3 byte flags, 3 byte default algorithm id, one byte
933            // default IV size, 16 bytes default KeyID
934            // (ISO 23001-7)
935            char buf[4];
936            memset(buf, 0, 4);
937            if (mDataSource->readAt(data_offset + 4, buf + 1, 3) < 3) {
938                return ERROR_IO;
939            }
940            uint32_t defaultAlgorithmId = ntohl(*((int32_t*)buf));
941            if (defaultAlgorithmId > 1) {
942                // only 0 (clear) and 1 (AES-128) are valid
943                return ERROR_MALFORMED;
944            }
945
946            memset(buf, 0, 4);
947            if (mDataSource->readAt(data_offset + 7, buf + 3, 1) < 1) {
948                return ERROR_IO;
949            }
950            uint32_t defaultIVSize = ntohl(*((int32_t*)buf));
951
952            if ((defaultAlgorithmId == 0 && defaultIVSize != 0) ||
953                    (defaultAlgorithmId != 0 && defaultIVSize == 0)) {
954                // only unencrypted data must have 0 IV size
955                return ERROR_MALFORMED;
956            } else if (defaultIVSize != 0 &&
957                    defaultIVSize != 8 &&
958                    defaultIVSize != 16) {
959                // only supported sizes are 0, 8 and 16
960                return ERROR_MALFORMED;
961            }
962
963            uint8_t defaultKeyId[16];
964
965            if (mDataSource->readAt(data_offset + 8, &defaultKeyId, 16) < 16) {
966                return ERROR_IO;
967            }
968
969            mLastTrack->meta->setInt32(kKeyCryptoMode, defaultAlgorithmId);
970            mLastTrack->meta->setInt32(kKeyCryptoDefaultIVSize, defaultIVSize);
971            mLastTrack->meta->setData(kKeyCryptoKey, 'tenc', defaultKeyId, 16);
972            *offset += chunk_size;
973            break;
974        }
975
976        case FOURCC('t', 'k', 'h', 'd'):
977        {
978            status_t err;
979            if ((err = parseTrackHeader(data_offset, chunk_data_size)) != OK) {
980                return err;
981            }
982
983            *offset += chunk_size;
984            break;
985        }
986
987        case FOURCC('p', 's', 's', 'h'):
988        {
989            PsshInfo pssh;
990
991            if (mDataSource->readAt(data_offset + 4, &pssh.uuid, 16) < 16) {
992                return ERROR_IO;
993            }
994
995            uint32_t psshdatalen = 0;
996            if (mDataSource->readAt(data_offset + 20, &psshdatalen, 4) < 4) {
997                return ERROR_IO;
998            }
999            pssh.datalen = ntohl(psshdatalen);
1000            ALOGV("pssh data size: %d", pssh.datalen);
1001            if (pssh.datalen + 20 > chunk_size) {
1002                // pssh data length exceeds size of containing box
1003                return ERROR_MALFORMED;
1004            }
1005
1006            pssh.data = new uint8_t[pssh.datalen];
1007            ALOGV("allocated pssh @ %p", pssh.data);
1008            ssize_t requested = (ssize_t) pssh.datalen;
1009            if (mDataSource->readAt(data_offset + 24, pssh.data, requested) < requested) {
1010                return ERROR_IO;
1011            }
1012            mPssh.push_back(pssh);
1013
1014            *offset += chunk_size;
1015            break;
1016        }
1017
1018        case FOURCC('m', 'd', 'h', 'd'):
1019        {
1020            if (chunk_data_size < 4) {
1021                return ERROR_MALFORMED;
1022            }
1023
1024            uint8_t version;
1025            if (mDataSource->readAt(
1026                        data_offset, &version, sizeof(version))
1027                    < (ssize_t)sizeof(version)) {
1028                return ERROR_IO;
1029            }
1030
1031            off64_t timescale_offset;
1032
1033            if (version == 1) {
1034                timescale_offset = data_offset + 4 + 16;
1035            } else if (version == 0) {
1036                timescale_offset = data_offset + 4 + 8;
1037            } else {
1038                return ERROR_IO;
1039            }
1040
1041            uint32_t timescale;
1042            if (mDataSource->readAt(
1043                        timescale_offset, &timescale, sizeof(timescale))
1044                    < (ssize_t)sizeof(timescale)) {
1045                return ERROR_IO;
1046            }
1047
1048            mLastTrack->timescale = ntohl(timescale);
1049
1050            int64_t duration = 0;
1051            if (version == 1) {
1052                if (mDataSource->readAt(
1053                            timescale_offset + 4, &duration, sizeof(duration))
1054                        < (ssize_t)sizeof(duration)) {
1055                    return ERROR_IO;
1056                }
1057                duration = ntoh64(duration);
1058            } else {
1059                uint32_t duration32;
1060                if (mDataSource->readAt(
1061                            timescale_offset + 4, &duration32, sizeof(duration32))
1062                        < (ssize_t)sizeof(duration32)) {
1063                    return ERROR_IO;
1064                }
1065                // ffmpeg sets duration to -1, which is incorrect.
1066                if (duration32 != 0xffffffff) {
1067                    duration = ntohl(duration32);
1068                }
1069            }
1070            mLastTrack->meta->setInt64(
1071                    kKeyDuration, (duration * 1000000) / mLastTrack->timescale);
1072
1073            uint8_t lang[2];
1074            off64_t lang_offset;
1075            if (version == 1) {
1076                lang_offset = timescale_offset + 4 + 8;
1077            } else if (version == 0) {
1078                lang_offset = timescale_offset + 4 + 4;
1079            } else {
1080                return ERROR_IO;
1081            }
1082
1083            if (mDataSource->readAt(lang_offset, &lang, sizeof(lang))
1084                    < (ssize_t)sizeof(lang)) {
1085                return ERROR_IO;
1086            }
1087
1088            // To get the ISO-639-2/T three character language code
1089            // 1 bit pad followed by 3 5-bits characters. Each character
1090            // is packed as the difference between its ASCII value and 0x60.
1091            char lang_code[4];
1092            lang_code[0] = ((lang[0] >> 2) & 0x1f) + 0x60;
1093            lang_code[1] = ((lang[0] & 0x3) << 3 | (lang[1] >> 5)) + 0x60;
1094            lang_code[2] = (lang[1] & 0x1f) + 0x60;
1095            lang_code[3] = '\0';
1096
1097            mLastTrack->meta->setCString(
1098                    kKeyMediaLanguage, lang_code);
1099
1100            *offset += chunk_size;
1101            break;
1102        }
1103
1104        case FOURCC('s', 't', 's', 'd'):
1105        {
1106            if (chunk_data_size < 8) {
1107                return ERROR_MALFORMED;
1108            }
1109
1110            uint8_t buffer[8];
1111            if (chunk_data_size < (off64_t)sizeof(buffer)) {
1112                return ERROR_MALFORMED;
1113            }
1114
1115            if (mDataSource->readAt(
1116                        data_offset, buffer, 8) < 8) {
1117                return ERROR_IO;
1118            }
1119
1120            if (U32_AT(buffer) != 0) {
1121                // Should be version 0, flags 0.
1122                return ERROR_MALFORMED;
1123            }
1124
1125            uint32_t entry_count = U32_AT(&buffer[4]);
1126
1127            if (entry_count > 1) {
1128                // For 3GPP timed text, there could be multiple tx3g boxes contain
1129                // multiple text display formats. These formats will be used to
1130                // display the timed text.
1131                // For encrypted files, there may also be more than one entry.
1132                const char *mime;
1133                CHECK(mLastTrack->meta->findCString(kKeyMIMEType, &mime));
1134                if (strcasecmp(mime, MEDIA_MIMETYPE_TEXT_3GPP) &&
1135                        strcasecmp(mime, "application/octet-stream")) {
1136                    // For now we only support a single type of media per track.
1137                    mLastTrack->skipTrack = true;
1138                    *offset += chunk_size;
1139                    break;
1140                }
1141            }
1142            off64_t stop_offset = *offset + chunk_size;
1143            *offset = data_offset + 8;
1144            for (uint32_t i = 0; i < entry_count; ++i) {
1145                status_t err = parseChunk(offset, depth + 1);
1146                if (err != OK) {
1147                    return err;
1148                }
1149            }
1150
1151            if (*offset != stop_offset) {
1152                return ERROR_MALFORMED;
1153            }
1154            break;
1155        }
1156
1157        case FOURCC('m', 'p', '4', 'a'):
1158        case FOURCC('e', 'n', 'c', 'a'):
1159        case FOURCC('s', 'a', 'm', 'r'):
1160        case FOURCC('s', 'a', 'w', 'b'):
1161        {
1162            uint8_t buffer[8 + 20];
1163            if (chunk_data_size < (ssize_t)sizeof(buffer)) {
1164                // Basic AudioSampleEntry size.
1165                return ERROR_MALFORMED;
1166            }
1167
1168            if (mDataSource->readAt(
1169                        data_offset, buffer, sizeof(buffer)) < (ssize_t)sizeof(buffer)) {
1170                return ERROR_IO;
1171            }
1172
1173            uint16_t data_ref_index = U16_AT(&buffer[6]);
1174            uint32_t num_channels = U16_AT(&buffer[16]);
1175
1176            uint16_t sample_size = U16_AT(&buffer[18]);
1177            uint32_t sample_rate = U32_AT(&buffer[24]) >> 16;
1178
1179            if (chunk_type != FOURCC('e', 'n', 'c', 'a')) {
1180                // if the chunk type is enca, we'll get the type from the sinf/frma box later
1181                mLastTrack->meta->setCString(kKeyMIMEType, FourCC2MIME(chunk_type));
1182                AdjustChannelsAndRate(chunk_type, &num_channels, &sample_rate);
1183            }
1184            ALOGV("*** coding='%s' %d channels, size %d, rate %d\n",
1185                   chunk, num_channels, sample_size, sample_rate);
1186            mLastTrack->meta->setInt32(kKeyChannelCount, num_channels);
1187            mLastTrack->meta->setInt32(kKeySampleRate, sample_rate);
1188
1189            off64_t stop_offset = *offset + chunk_size;
1190            *offset = data_offset + sizeof(buffer);
1191            while (*offset < stop_offset) {
1192                status_t err = parseChunk(offset, depth + 1);
1193                if (err != OK) {
1194                    return err;
1195                }
1196            }
1197
1198            if (*offset != stop_offset) {
1199                return ERROR_MALFORMED;
1200            }
1201            break;
1202        }
1203
1204        case FOURCC('m', 'p', '4', 'v'):
1205        case FOURCC('e', 'n', 'c', 'v'):
1206        case FOURCC('s', '2', '6', '3'):
1207        case FOURCC('H', '2', '6', '3'):
1208        case FOURCC('h', '2', '6', '3'):
1209        case FOURCC('a', 'v', 'c', '1'):
1210        {
1211            mHasVideo = true;
1212
1213            uint8_t buffer[78];
1214            if (chunk_data_size < (ssize_t)sizeof(buffer)) {
1215                // Basic VideoSampleEntry size.
1216                return ERROR_MALFORMED;
1217            }
1218
1219            if (mDataSource->readAt(
1220                        data_offset, buffer, sizeof(buffer)) < (ssize_t)sizeof(buffer)) {
1221                return ERROR_IO;
1222            }
1223
1224            uint16_t data_ref_index = U16_AT(&buffer[6]);
1225            uint16_t width = U16_AT(&buffer[6 + 18]);
1226            uint16_t height = U16_AT(&buffer[6 + 20]);
1227
1228            // The video sample is not standard-compliant if it has invalid dimension.
1229            // Use some default width and height value, and
1230            // let the decoder figure out the actual width and height (and thus
1231            // be prepared for INFO_FOMRAT_CHANGED event).
1232            if (width == 0)  width  = 352;
1233            if (height == 0) height = 288;
1234
1235            // printf("*** coding='%s' width=%d height=%d\n",
1236            //        chunk, width, height);
1237
1238            if (chunk_type != FOURCC('e', 'n', 'c', 'v')) {
1239                // if the chunk type is encv, we'll get the type from the sinf/frma box later
1240                mLastTrack->meta->setCString(kKeyMIMEType, FourCC2MIME(chunk_type));
1241            }
1242            mLastTrack->meta->setInt32(kKeyWidth, width);
1243            mLastTrack->meta->setInt32(kKeyHeight, height);
1244
1245            off64_t stop_offset = *offset + chunk_size;
1246            *offset = data_offset + sizeof(buffer);
1247            while (*offset < stop_offset) {
1248                status_t err = parseChunk(offset, depth + 1);
1249                if (err != OK) {
1250                    return err;
1251                }
1252            }
1253
1254            if (*offset != stop_offset) {
1255                return ERROR_MALFORMED;
1256            }
1257            break;
1258        }
1259
1260        case FOURCC('s', 't', 'c', 'o'):
1261        case FOURCC('c', 'o', '6', '4'):
1262        {
1263            status_t err =
1264                mLastTrack->sampleTable->setChunkOffsetParams(
1265                        chunk_type, data_offset, chunk_data_size);
1266
1267            if (err != OK) {
1268                return err;
1269            }
1270
1271            *offset += chunk_size;
1272            break;
1273        }
1274
1275        case FOURCC('s', 't', 's', 'c'):
1276        {
1277            status_t err =
1278                mLastTrack->sampleTable->setSampleToChunkParams(
1279                        data_offset, chunk_data_size);
1280
1281            if (err != OK) {
1282                return err;
1283            }
1284
1285            *offset += chunk_size;
1286            break;
1287        }
1288
1289        case FOURCC('s', 't', 's', 'z'):
1290        case FOURCC('s', 't', 'z', '2'):
1291        {
1292            status_t err =
1293                mLastTrack->sampleTable->setSampleSizeParams(
1294                        chunk_type, data_offset, chunk_data_size);
1295
1296            if (err != OK) {
1297                return err;
1298            }
1299
1300            size_t max_size;
1301            err = mLastTrack->sampleTable->getMaxSampleSize(&max_size);
1302
1303            if (err != OK) {
1304                return err;
1305            }
1306
1307            if (max_size != 0) {
1308                // Assume that a given buffer only contains at most 10 chunks,
1309                // each chunk originally prefixed with a 2 byte length will
1310                // have a 4 byte header (0x00 0x00 0x00 0x01) after conversion,
1311                // and thus will grow by 2 bytes per chunk.
1312                mLastTrack->meta->setInt32(kKeyMaxInputSize, max_size + 10 * 2);
1313            } else {
1314                // No size was specified. Pick a conservatively large size.
1315                int32_t width, height;
1316                if (mLastTrack->meta->findInt32(kKeyWidth, &width) &&
1317                        mLastTrack->meta->findInt32(kKeyHeight, &height)) {
1318                    mLastTrack->meta->setInt32(kKeyMaxInputSize, width * height * 3 / 2);
1319                } else {
1320                    ALOGE("No width or height, assuming worst case 1080p");
1321                    mLastTrack->meta->setInt32(kKeyMaxInputSize, 3110400);
1322                }
1323            }
1324            *offset += chunk_size;
1325
1326            // Calculate average frame rate.
1327            const char *mime;
1328            CHECK(mLastTrack->meta->findCString(kKeyMIMEType, &mime));
1329            if (!strncasecmp("video/", mime, 6)) {
1330                size_t nSamples = mLastTrack->sampleTable->countSamples();
1331                int64_t durationUs;
1332                if (mLastTrack->meta->findInt64(kKeyDuration, &durationUs)) {
1333                    if (durationUs > 0) {
1334                        int32_t frameRate = (nSamples * 1000000LL +
1335                                    (durationUs >> 1)) / durationUs;
1336                        mLastTrack->meta->setInt32(kKeyFrameRate, frameRate);
1337                    }
1338                }
1339            }
1340
1341            break;
1342        }
1343
1344        case FOURCC('s', 't', 't', 's'):
1345        {
1346            status_t err =
1347                mLastTrack->sampleTable->setTimeToSampleParams(
1348                        data_offset, chunk_data_size);
1349
1350            if (err != OK) {
1351                return err;
1352            }
1353
1354            *offset += chunk_size;
1355            break;
1356        }
1357
1358        case FOURCC('c', 't', 't', 's'):
1359        {
1360            status_t err =
1361                mLastTrack->sampleTable->setCompositionTimeToSampleParams(
1362                        data_offset, chunk_data_size);
1363
1364            if (err != OK) {
1365                return err;
1366            }
1367
1368            *offset += chunk_size;
1369            break;
1370        }
1371
1372        case FOURCC('s', 't', 's', 's'):
1373        {
1374            status_t err =
1375                mLastTrack->sampleTable->setSyncSampleParams(
1376                        data_offset, chunk_data_size);
1377
1378            if (err != OK) {
1379                return err;
1380            }
1381
1382            *offset += chunk_size;
1383            break;
1384        }
1385
1386        // @xyz
1387        case FOURCC('\xA9', 'x', 'y', 'z'):
1388        {
1389            // Best case the total data length inside "@xyz" box
1390            // would be 8, for instance "@xyz" + "\x00\x04\x15\xc7" + "0+0/",
1391            // where "\x00\x04" is the text string length with value = 4,
1392            // "\0x15\xc7" is the language code = en, and "0+0" is a
1393            // location (string) value with longitude = 0 and latitude = 0.
1394            if (chunk_data_size < 8) {
1395                return ERROR_MALFORMED;
1396            }
1397
1398            // Worst case the location string length would be 18,
1399            // for instance +90.0000-180.0000, without the trailing "/" and
1400            // the string length + language code.
1401            char buffer[18];
1402
1403            // Substracting 5 from the data size is because the text string length +
1404            // language code takes 4 bytes, and the trailing slash "/" takes 1 byte.
1405            off64_t location_length = chunk_data_size - 5;
1406            if (location_length >= (off64_t) sizeof(buffer)) {
1407                return ERROR_MALFORMED;
1408            }
1409
1410            if (mDataSource->readAt(
1411                        data_offset + 4, buffer, location_length) < location_length) {
1412                return ERROR_IO;
1413            }
1414
1415            buffer[location_length] = '\0';
1416            mFileMetaData->setCString(kKeyLocation, buffer);
1417            *offset += chunk_size;
1418            break;
1419        }
1420
1421        case FOURCC('e', 's', 'd', 's'):
1422        {
1423            if (chunk_data_size < 4) {
1424                return ERROR_MALFORMED;
1425            }
1426
1427            uint8_t buffer[256];
1428            if (chunk_data_size > (off64_t)sizeof(buffer)) {
1429                return ERROR_BUFFER_TOO_SMALL;
1430            }
1431
1432            if (mDataSource->readAt(
1433                        data_offset, buffer, chunk_data_size) < chunk_data_size) {
1434                return ERROR_IO;
1435            }
1436
1437            if (U32_AT(buffer) != 0) {
1438                // Should be version 0, flags 0.
1439                return ERROR_MALFORMED;
1440            }
1441
1442            mLastTrack->meta->setData(
1443                    kKeyESDS, kTypeESDS, &buffer[4], chunk_data_size - 4);
1444
1445            if (mPath.size() >= 2
1446                    && mPath[mPath.size() - 2] == FOURCC('m', 'p', '4', 'a')) {
1447                // Information from the ESDS must be relied on for proper
1448                // setup of sample rate and channel count for MPEG4 Audio.
1449                // The generic header appears to only contain generic
1450                // information...
1451
1452                status_t err = updateAudioTrackInfoFromESDS_MPEG4Audio(
1453                        &buffer[4], chunk_data_size - 4);
1454
1455                if (err != OK) {
1456                    return err;
1457                }
1458            }
1459
1460            *offset += chunk_size;
1461            break;
1462        }
1463
1464        case FOURCC('a', 'v', 'c', 'C'):
1465        {
1466            sp<ABuffer> buffer = new ABuffer(chunk_data_size);
1467
1468            if (mDataSource->readAt(
1469                        data_offset, buffer->data(), chunk_data_size) < chunk_data_size) {
1470                return ERROR_IO;
1471            }
1472
1473            mLastTrack->meta->setData(
1474                    kKeyAVCC, kTypeAVCC, buffer->data(), chunk_data_size);
1475
1476            *offset += chunk_size;
1477            break;
1478        }
1479
1480        case FOURCC('d', '2', '6', '3'):
1481        {
1482            /*
1483             * d263 contains a fixed 7 bytes part:
1484             *   vendor - 4 bytes
1485             *   version - 1 byte
1486             *   level - 1 byte
1487             *   profile - 1 byte
1488             * optionally, "d263" box itself may contain a 16-byte
1489             * bit rate box (bitr)
1490             *   average bit rate - 4 bytes
1491             *   max bit rate - 4 bytes
1492             */
1493            char buffer[23];
1494            if (chunk_data_size != 7 &&
1495                chunk_data_size != 23) {
1496                ALOGE("Incorrect D263 box size %lld", chunk_data_size);
1497                return ERROR_MALFORMED;
1498            }
1499
1500            if (mDataSource->readAt(
1501                    data_offset, buffer, chunk_data_size) < chunk_data_size) {
1502                return ERROR_IO;
1503            }
1504
1505            mLastTrack->meta->setData(kKeyD263, kTypeD263, buffer, chunk_data_size);
1506
1507            *offset += chunk_size;
1508            break;
1509        }
1510
1511        case FOURCC('m', 'e', 't', 'a'):
1512        {
1513            uint8_t buffer[4];
1514            if (chunk_data_size < (off64_t)sizeof(buffer)) {
1515                return ERROR_MALFORMED;
1516            }
1517
1518            if (mDataSource->readAt(
1519                        data_offset, buffer, 4) < 4) {
1520                return ERROR_IO;
1521            }
1522
1523            if (U32_AT(buffer) != 0) {
1524                // Should be version 0, flags 0.
1525
1526                // If it's not, let's assume this is one of those
1527                // apparently malformed chunks that don't have flags
1528                // and completely different semantics than what's
1529                // in the MPEG4 specs and skip it.
1530                *offset += chunk_size;
1531                return OK;
1532            }
1533
1534            off64_t stop_offset = *offset + chunk_size;
1535            *offset = data_offset + sizeof(buffer);
1536            while (*offset < stop_offset) {
1537                status_t err = parseChunk(offset, depth + 1);
1538                if (err != OK) {
1539                    return err;
1540                }
1541            }
1542
1543            if (*offset != stop_offset) {
1544                return ERROR_MALFORMED;
1545            }
1546            break;
1547        }
1548
1549        case FOURCC('m', 'e', 'a', 'n'):
1550        case FOURCC('n', 'a', 'm', 'e'):
1551        case FOURCC('d', 'a', 't', 'a'):
1552        {
1553            if (mPath.size() == 6 && underMetaDataPath(mPath)) {
1554                status_t err = parseMetaData(data_offset, chunk_data_size);
1555
1556                if (err != OK) {
1557                    return err;
1558                }
1559            }
1560
1561            *offset += chunk_size;
1562            break;
1563        }
1564
1565        case FOURCC('m', 'v', 'h', 'd'):
1566        {
1567            if (chunk_data_size < 12) {
1568                return ERROR_MALFORMED;
1569            }
1570
1571            uint8_t header[12];
1572            if (mDataSource->readAt(
1573                        data_offset, header, sizeof(header))
1574                    < (ssize_t)sizeof(header)) {
1575                return ERROR_IO;
1576            }
1577
1578            int64_t creationTime;
1579            if (header[0] == 1) {
1580                creationTime = U64_AT(&header[4]);
1581            } else if (header[0] != 0) {
1582                return ERROR_MALFORMED;
1583            } else {
1584                creationTime = U32_AT(&header[4]);
1585            }
1586
1587            String8 s;
1588            convertTimeToDate(creationTime, &s);
1589
1590            mFileMetaData->setCString(kKeyDate, s.string());
1591
1592            *offset += chunk_size;
1593            break;
1594        }
1595
1596        case FOURCC('m', 'd', 'a', 't'):
1597        {
1598            ALOGV("mdat chunk, drm: %d", mIsDrm);
1599            if (!mIsDrm) {
1600                *offset += chunk_size;
1601                break;
1602            }
1603
1604            if (chunk_size < 8) {
1605                return ERROR_MALFORMED;
1606            }
1607
1608            return parseDrmSINF(offset, data_offset);
1609        }
1610
1611        case FOURCC('h', 'd', 'l', 'r'):
1612        {
1613            uint32_t buffer;
1614            if (mDataSource->readAt(
1615                        data_offset + 8, &buffer, 4) < 4) {
1616                return ERROR_IO;
1617            }
1618
1619            uint32_t type = ntohl(buffer);
1620            // For the 3GPP file format, the handler-type within the 'hdlr' box
1621            // shall be 'text'. We also want to support 'sbtl' handler type
1622            // for a practical reason as various MPEG4 containers use it.
1623            if (type == FOURCC('t', 'e', 'x', 't') || type == FOURCC('s', 'b', 't', 'l')) {
1624                mLastTrack->meta->setCString(kKeyMIMEType, MEDIA_MIMETYPE_TEXT_3GPP);
1625            }
1626
1627            *offset += chunk_size;
1628            break;
1629        }
1630
1631        case FOURCC('t', 'x', '3', 'g'):
1632        {
1633            uint32_t type;
1634            const void *data;
1635            size_t size = 0;
1636            if (!mLastTrack->meta->findData(
1637                    kKeyTextFormatData, &type, &data, &size)) {
1638                size = 0;
1639            }
1640
1641            if (SIZE_MAX - chunk_size <= size) {
1642                return ERROR_MALFORMED;
1643            }
1644
1645            uint8_t *buffer = new uint8_t[size + chunk_size];
1646            if (buffer == NULL) {
1647                return ERROR_MALFORMED;
1648            }
1649
1650            if (size > 0) {
1651                memcpy(buffer, data, size);
1652            }
1653
1654            if ((size_t)(mDataSource->readAt(*offset, buffer + size, chunk_size))
1655                    < chunk_size) {
1656                delete[] buffer;
1657                buffer = NULL;
1658
1659                return ERROR_IO;
1660            }
1661
1662            mLastTrack->meta->setData(
1663                    kKeyTextFormatData, 0, buffer, size + chunk_size);
1664
1665            delete[] buffer;
1666
1667            *offset += chunk_size;
1668            break;
1669        }
1670
1671        case FOURCC('c', 'o', 'v', 'r'):
1672        {
1673            if (mFileMetaData != NULL) {
1674                ALOGV("chunk_data_size = %lld and data_offset = %lld",
1675                        chunk_data_size, data_offset);
1676                if (chunk_data_size >= SIZE_MAX - 1) {
1677                    return ERROR_MALFORMED;
1678                }
1679                sp<ABuffer> buffer = new ABuffer(chunk_data_size + 1);
1680                if (mDataSource->readAt(
1681                    data_offset, buffer->data(), chunk_data_size) != (ssize_t)chunk_data_size) {
1682                    return ERROR_IO;
1683                }
1684                const int kSkipBytesOfDataBox = 16;
1685                mFileMetaData->setData(
1686                    kKeyAlbumArt, MetaData::TYPE_NONE,
1687                    buffer->data() + kSkipBytesOfDataBox, chunk_data_size - kSkipBytesOfDataBox);
1688            }
1689
1690            *offset += chunk_size;
1691            break;
1692        }
1693
1694        case FOURCC('-', '-', '-', '-'):
1695        {
1696            mLastCommentMean.clear();
1697            mLastCommentName.clear();
1698            mLastCommentData.clear();
1699            *offset += chunk_size;
1700            break;
1701        }
1702
1703        case FOURCC('s', 'i', 'd', 'x'):
1704        {
1705            parseSegmentIndex(data_offset, chunk_data_size);
1706            *offset += chunk_size;
1707            return UNKNOWN_ERROR; // stop parsing after sidx
1708        }
1709
1710        default:
1711        {
1712            *offset += chunk_size;
1713            break;
1714        }
1715    }
1716
1717    return OK;
1718}
1719
1720status_t MPEG4Extractor::parseSegmentIndex(off64_t offset, size_t size) {
1721  ALOGV("MPEG4Extractor::parseSegmentIndex");
1722
1723    if (size < 12) {
1724      return -EINVAL;
1725    }
1726
1727    uint32_t flags;
1728    if (!mDataSource->getUInt32(offset, &flags)) {
1729        return ERROR_MALFORMED;
1730    }
1731
1732    uint32_t version = flags >> 24;
1733    flags &= 0xffffff;
1734
1735    ALOGV("sidx version %d", version);
1736
1737    uint32_t referenceId;
1738    if (!mDataSource->getUInt32(offset + 4, &referenceId)) {
1739        return ERROR_MALFORMED;
1740    }
1741
1742    uint32_t timeScale;
1743    if (!mDataSource->getUInt32(offset + 8, &timeScale)) {
1744        return ERROR_MALFORMED;
1745    }
1746    ALOGV("sidx refid/timescale: %d/%d", referenceId, timeScale);
1747
1748    uint64_t earliestPresentationTime;
1749    uint64_t firstOffset;
1750
1751    offset += 12;
1752    size -= 12;
1753
1754    if (version == 0) {
1755        if (size < 8) {
1756            return -EINVAL;
1757        }
1758        uint32_t tmp;
1759        if (!mDataSource->getUInt32(offset, &tmp)) {
1760            return ERROR_MALFORMED;
1761        }
1762        earliestPresentationTime = tmp;
1763        if (!mDataSource->getUInt32(offset + 4, &tmp)) {
1764            return ERROR_MALFORMED;
1765        }
1766        firstOffset = tmp;
1767        offset += 8;
1768        size -= 8;
1769    } else {
1770        if (size < 16) {
1771            return -EINVAL;
1772        }
1773        if (!mDataSource->getUInt64(offset, &earliestPresentationTime)) {
1774            return ERROR_MALFORMED;
1775        }
1776        if (!mDataSource->getUInt64(offset + 8, &firstOffset)) {
1777            return ERROR_MALFORMED;
1778        }
1779        offset += 16;
1780        size -= 16;
1781    }
1782    ALOGV("sidx pres/off: %Ld/%Ld", earliestPresentationTime, firstOffset);
1783
1784    if (size < 4) {
1785        return -EINVAL;
1786    }
1787
1788    uint16_t referenceCount;
1789    if (!mDataSource->getUInt16(offset + 2, &referenceCount)) {
1790        return ERROR_MALFORMED;
1791    }
1792    offset += 4;
1793    size -= 4;
1794    ALOGV("refcount: %d", referenceCount);
1795
1796    if (size < referenceCount * 12) {
1797        return -EINVAL;
1798    }
1799
1800    uint64_t total_duration = 0;
1801    for (unsigned int i = 0; i < referenceCount; i++) {
1802        uint32_t d1, d2, d3;
1803
1804        if (!mDataSource->getUInt32(offset, &d1) ||     // size
1805            !mDataSource->getUInt32(offset + 4, &d2) || // duration
1806            !mDataSource->getUInt32(offset + 8, &d3)) { // flags
1807            return ERROR_MALFORMED;
1808        }
1809
1810        if (d1 & 0x80000000) {
1811            ALOGW("sub-sidx boxes not supported yet");
1812        }
1813        bool sap = d3 & 0x80000000;
1814        bool saptype = d3 >> 28;
1815        if (!sap || saptype > 2) {
1816            ALOGW("not a stream access point, or unsupported type");
1817        }
1818        total_duration += d2;
1819        offset += 12;
1820        ALOGV(" item %d, %08x %08x %08x", i, d1, d2, d3);
1821        SidxEntry se;
1822        se.mSize = d1 & 0x7fffffff;
1823        se.mDurationUs = 1000000LL * d2 / timeScale;
1824        mSidxEntries.add(se);
1825    }
1826
1827    mSidxDuration = total_duration * 1000000 / timeScale;
1828    ALOGV("duration: %lld", mSidxDuration);
1829
1830    int64_t metaDuration;
1831    if (!mLastTrack->meta->findInt64(kKeyDuration, &metaDuration) || metaDuration == 0) {
1832        mLastTrack->meta->setInt64(kKeyDuration, mSidxDuration);
1833    }
1834    return OK;
1835}
1836
1837
1838
1839status_t MPEG4Extractor::parseTrackHeader(
1840        off64_t data_offset, off64_t data_size) {
1841    if (data_size < 4) {
1842        return ERROR_MALFORMED;
1843    }
1844
1845    uint8_t version;
1846    if (mDataSource->readAt(data_offset, &version, 1) < 1) {
1847        return ERROR_IO;
1848    }
1849
1850    size_t dynSize = (version == 1) ? 36 : 24;
1851
1852    uint8_t buffer[36 + 60];
1853
1854    if (data_size != (off64_t)dynSize + 60) {
1855        return ERROR_MALFORMED;
1856    }
1857
1858    if (mDataSource->readAt(
1859                data_offset, buffer, data_size) < (ssize_t)data_size) {
1860        return ERROR_IO;
1861    }
1862
1863    uint64_t ctime, mtime, duration;
1864    int32_t id;
1865
1866    if (version == 1) {
1867        ctime = U64_AT(&buffer[4]);
1868        mtime = U64_AT(&buffer[12]);
1869        id = U32_AT(&buffer[20]);
1870        duration = U64_AT(&buffer[28]);
1871    } else {
1872        CHECK_EQ((unsigned)version, 0u);
1873
1874        ctime = U32_AT(&buffer[4]);
1875        mtime = U32_AT(&buffer[8]);
1876        id = U32_AT(&buffer[12]);
1877        duration = U32_AT(&buffer[20]);
1878    }
1879
1880    mLastTrack->meta->setInt32(kKeyTrackID, id);
1881
1882    size_t matrixOffset = dynSize + 16;
1883    int32_t a00 = U32_AT(&buffer[matrixOffset]);
1884    int32_t a01 = U32_AT(&buffer[matrixOffset + 4]);
1885    int32_t dx = U32_AT(&buffer[matrixOffset + 8]);
1886    int32_t a10 = U32_AT(&buffer[matrixOffset + 12]);
1887    int32_t a11 = U32_AT(&buffer[matrixOffset + 16]);
1888    int32_t dy = U32_AT(&buffer[matrixOffset + 20]);
1889
1890#if 0
1891    ALOGI("x' = %.2f * x + %.2f * y + %.2f",
1892         a00 / 65536.0f, a01 / 65536.0f, dx / 65536.0f);
1893    ALOGI("y' = %.2f * x + %.2f * y + %.2f",
1894         a10 / 65536.0f, a11 / 65536.0f, dy / 65536.0f);
1895#endif
1896
1897    uint32_t rotationDegrees;
1898
1899    static const int32_t kFixedOne = 0x10000;
1900    if (a00 == kFixedOne && a01 == 0 && a10 == 0 && a11 == kFixedOne) {
1901        // Identity, no rotation
1902        rotationDegrees = 0;
1903    } else if (a00 == 0 && a01 == kFixedOne && a10 == -kFixedOne && a11 == 0) {
1904        rotationDegrees = 90;
1905    } else if (a00 == 0 && a01 == -kFixedOne && a10 == kFixedOne && a11 == 0) {
1906        rotationDegrees = 270;
1907    } else if (a00 == -kFixedOne && a01 == 0 && a10 == 0 && a11 == -kFixedOne) {
1908        rotationDegrees = 180;
1909    } else {
1910        ALOGW("We only support 0,90,180,270 degree rotation matrices");
1911        rotationDegrees = 0;
1912    }
1913
1914    if (rotationDegrees != 0) {
1915        mLastTrack->meta->setInt32(kKeyRotation, rotationDegrees);
1916    }
1917
1918    // Handle presentation display size, which could be different
1919    // from the image size indicated by kKeyWidth and kKeyHeight.
1920    uint32_t width = U32_AT(&buffer[dynSize + 52]);
1921    uint32_t height = U32_AT(&buffer[dynSize + 56]);
1922    mLastTrack->meta->setInt32(kKeyDisplayWidth, width >> 16);
1923    mLastTrack->meta->setInt32(kKeyDisplayHeight, height >> 16);
1924
1925    return OK;
1926}
1927
1928status_t MPEG4Extractor::parseMetaData(off64_t offset, size_t size) {
1929    if (size < 4) {
1930        return ERROR_MALFORMED;
1931    }
1932
1933    uint8_t *buffer = new uint8_t[size + 1];
1934    if (mDataSource->readAt(
1935                offset, buffer, size) != (ssize_t)size) {
1936        delete[] buffer;
1937        buffer = NULL;
1938
1939        return ERROR_IO;
1940    }
1941
1942    uint32_t flags = U32_AT(buffer);
1943
1944    uint32_t metadataKey = 0;
1945    char chunk[5];
1946    MakeFourCCString(mPath[4], chunk);
1947    ALOGV("meta: %s @ %lld", chunk, offset);
1948    switch (mPath[4]) {
1949        case FOURCC(0xa9, 'a', 'l', 'b'):
1950        {
1951            metadataKey = kKeyAlbum;
1952            break;
1953        }
1954        case FOURCC(0xa9, 'A', 'R', 'T'):
1955        {
1956            metadataKey = kKeyArtist;
1957            break;
1958        }
1959        case FOURCC('a', 'A', 'R', 'T'):
1960        {
1961            metadataKey = kKeyAlbumArtist;
1962            break;
1963        }
1964        case FOURCC(0xa9, 'd', 'a', 'y'):
1965        {
1966            metadataKey = kKeyYear;
1967            break;
1968        }
1969        case FOURCC(0xa9, 'n', 'a', 'm'):
1970        {
1971            metadataKey = kKeyTitle;
1972            break;
1973        }
1974        case FOURCC(0xa9, 'w', 'r', 't'):
1975        {
1976            metadataKey = kKeyWriter;
1977            break;
1978        }
1979        case FOURCC('c', 'o', 'v', 'r'):
1980        {
1981            metadataKey = kKeyAlbumArt;
1982            break;
1983        }
1984        case FOURCC('g', 'n', 'r', 'e'):
1985        {
1986            metadataKey = kKeyGenre;
1987            break;
1988        }
1989        case FOURCC(0xa9, 'g', 'e', 'n'):
1990        {
1991            metadataKey = kKeyGenre;
1992            break;
1993        }
1994        case FOURCC('c', 'p', 'i', 'l'):
1995        {
1996            if (size == 9 && flags == 21) {
1997                char tmp[16];
1998                sprintf(tmp, "%d",
1999                        (int)buffer[size - 1]);
2000
2001                mFileMetaData->setCString(kKeyCompilation, tmp);
2002            }
2003            break;
2004        }
2005        case FOURCC('t', 'r', 'k', 'n'):
2006        {
2007            if (size == 16 && flags == 0) {
2008                char tmp[16];
2009                uint16_t* pTrack = (uint16_t*)&buffer[10];
2010                uint16_t* pTotalTracks = (uint16_t*)&buffer[12];
2011                sprintf(tmp, "%d/%d", ntohs(*pTrack), ntohs(*pTotalTracks));
2012
2013                mFileMetaData->setCString(kKeyCDTrackNumber, tmp);
2014            }
2015            break;
2016        }
2017        case FOURCC('d', 'i', 's', 'k'):
2018        {
2019            if ((size == 14 || size == 16) && flags == 0) {
2020                char tmp[16];
2021                uint16_t* pDisc = (uint16_t*)&buffer[10];
2022                uint16_t* pTotalDiscs = (uint16_t*)&buffer[12];
2023                sprintf(tmp, "%d/%d", ntohs(*pDisc), ntohs(*pTotalDiscs));
2024
2025                mFileMetaData->setCString(kKeyDiscNumber, tmp);
2026            }
2027            break;
2028        }
2029        case FOURCC('-', '-', '-', '-'):
2030        {
2031            buffer[size] = '\0';
2032            switch (mPath[5]) {
2033                case FOURCC('m', 'e', 'a', 'n'):
2034                    mLastCommentMean.setTo((const char *)buffer + 4);
2035                    break;
2036                case FOURCC('n', 'a', 'm', 'e'):
2037                    mLastCommentName.setTo((const char *)buffer + 4);
2038                    break;
2039                case FOURCC('d', 'a', 't', 'a'):
2040                    mLastCommentData.setTo((const char *)buffer + 8);
2041                    break;
2042            }
2043
2044            // Once we have a set of mean/name/data info, go ahead and process
2045            // it to see if its something we are interested in.  Whether or not
2046            // were are interested in the specific tag, make sure to clear out
2047            // the set so we can be ready to process another tuple should one
2048            // show up later in the file.
2049            if ((mLastCommentMean.length() != 0) &&
2050                (mLastCommentName.length() != 0) &&
2051                (mLastCommentData.length() != 0)) {
2052
2053                if (mLastCommentMean == "com.apple.iTunes"
2054                        && mLastCommentName == "iTunSMPB") {
2055                    int32_t delay, padding;
2056                    if (sscanf(mLastCommentData,
2057                               " %*x %x %x %*x", &delay, &padding) == 2) {
2058                        mLastTrack->meta->setInt32(kKeyEncoderDelay, delay);
2059                        mLastTrack->meta->setInt32(kKeyEncoderPadding, padding);
2060                    }
2061                }
2062
2063                mLastCommentMean.clear();
2064                mLastCommentName.clear();
2065                mLastCommentData.clear();
2066            }
2067            break;
2068        }
2069
2070        default:
2071            break;
2072    }
2073
2074    if (size >= 8 && metadataKey) {
2075        if (metadataKey == kKeyAlbumArt) {
2076            mFileMetaData->setData(
2077                    kKeyAlbumArt, MetaData::TYPE_NONE,
2078                    buffer + 8, size - 8);
2079        } else if (metadataKey == kKeyGenre) {
2080            if (flags == 0) {
2081                // uint8_t genre code, iTunes genre codes are
2082                // the standard id3 codes, except they start
2083                // at 1 instead of 0 (e.g. Pop is 14, not 13)
2084                // We use standard id3 numbering, so subtract 1.
2085                int genrecode = (int)buffer[size - 1];
2086                genrecode--;
2087                if (genrecode < 0) {
2088                    genrecode = 255; // reserved for 'unknown genre'
2089                }
2090                char genre[10];
2091                sprintf(genre, "%d", genrecode);
2092
2093                mFileMetaData->setCString(metadataKey, genre);
2094            } else if (flags == 1) {
2095                // custom genre string
2096                buffer[size] = '\0';
2097
2098                mFileMetaData->setCString(
2099                        metadataKey, (const char *)buffer + 8);
2100            }
2101        } else {
2102            buffer[size] = '\0';
2103
2104            mFileMetaData->setCString(
2105                    metadataKey, (const char *)buffer + 8);
2106        }
2107    }
2108
2109    delete[] buffer;
2110    buffer = NULL;
2111
2112    return OK;
2113}
2114
2115sp<MediaSource> MPEG4Extractor::getTrack(size_t index) {
2116    status_t err;
2117    if ((err = readMetaData()) != OK) {
2118        return NULL;
2119    }
2120
2121    Track *track = mFirstTrack;
2122    while (index > 0) {
2123        if (track == NULL) {
2124            return NULL;
2125        }
2126
2127        track = track->next;
2128        --index;
2129    }
2130
2131    if (track == NULL) {
2132        return NULL;
2133    }
2134
2135    ALOGV("getTrack called, pssh: %d", mPssh.size());
2136
2137    return new MPEG4Source(
2138            track->meta, mDataSource, track->timescale, track->sampleTable,
2139            mSidxEntries, mMoofOffset);
2140}
2141
2142// static
2143status_t MPEG4Extractor::verifyTrack(Track *track) {
2144    const char *mime;
2145    CHECK(track->meta->findCString(kKeyMIMEType, &mime));
2146
2147    uint32_t type;
2148    const void *data;
2149    size_t size;
2150    if (!strcasecmp(mime, MEDIA_MIMETYPE_VIDEO_AVC)) {
2151        if (!track->meta->findData(kKeyAVCC, &type, &data, &size)
2152                || type != kTypeAVCC) {
2153            return ERROR_MALFORMED;
2154        }
2155    } else if (!strcasecmp(mime, MEDIA_MIMETYPE_VIDEO_MPEG4)
2156            || !strcasecmp(mime, MEDIA_MIMETYPE_AUDIO_AAC)) {
2157        if (!track->meta->findData(kKeyESDS, &type, &data, &size)
2158                || type != kTypeESDS) {
2159            return ERROR_MALFORMED;
2160        }
2161    }
2162
2163    if (!track->sampleTable->isValid()) {
2164        // Make sure we have all the metadata we need.
2165        return ERROR_MALFORMED;
2166    }
2167
2168    return OK;
2169}
2170
2171status_t MPEG4Extractor::updateAudioTrackInfoFromESDS_MPEG4Audio(
2172        const void *esds_data, size_t esds_size) {
2173    ESDS esds(esds_data, esds_size);
2174
2175    uint8_t objectTypeIndication;
2176    if (esds.getObjectTypeIndication(&objectTypeIndication) != OK) {
2177        return ERROR_MALFORMED;
2178    }
2179
2180    if (objectTypeIndication == 0xe1) {
2181        // This isn't MPEG4 audio at all, it's QCELP 14k...
2182        mLastTrack->meta->setCString(kKeyMIMEType, MEDIA_MIMETYPE_AUDIO_QCELP);
2183        return OK;
2184    }
2185
2186    if (objectTypeIndication  == 0x6b) {
2187        // The media subtype is MP3 audio
2188        // Our software MP3 audio decoder may not be able to handle
2189        // packetized MP3 audio; for now, lets just return ERROR_UNSUPPORTED
2190        ALOGE("MP3 track in MP4/3GPP file is not supported");
2191        return ERROR_UNSUPPORTED;
2192    }
2193
2194    const uint8_t *csd;
2195    size_t csd_size;
2196    if (esds.getCodecSpecificInfo(
2197                (const void **)&csd, &csd_size) != OK) {
2198        return ERROR_MALFORMED;
2199    }
2200
2201#if 0
2202    printf("ESD of size %d\n", csd_size);
2203    hexdump(csd, csd_size);
2204#endif
2205
2206    if (csd_size == 0) {
2207        // There's no further information, i.e. no codec specific data
2208        // Let's assume that the information provided in the mpeg4 headers
2209        // is accurate and hope for the best.
2210
2211        return OK;
2212    }
2213
2214    if (csd_size < 2) {
2215        return ERROR_MALFORMED;
2216    }
2217
2218    ABitReader br(csd, csd_size);
2219    uint32_t objectType = br.getBits(5);
2220
2221    if (objectType == 31) {  // AAC-ELD => additional 6 bits
2222        objectType = 32 + br.getBits(6);
2223    }
2224
2225    uint32_t freqIndex = br.getBits(4);
2226
2227    int32_t sampleRate = 0;
2228    int32_t numChannels = 0;
2229    if (freqIndex == 15) {
2230        if (csd_size < 5) {
2231            return ERROR_MALFORMED;
2232        }
2233        sampleRate = br.getBits(24);
2234        numChannels = br.getBits(4);
2235    } else {
2236        numChannels = br.getBits(4);
2237        if (objectType == 5) {
2238            // SBR specific config per 14496-3 table 1.13
2239            freqIndex = br.getBits(4);
2240            if (freqIndex == 15) {
2241                if (csd_size < 8) {
2242                    return ERROR_MALFORMED;
2243                }
2244                sampleRate = br.getBits(24);
2245            }
2246        }
2247
2248        if (sampleRate == 0) {
2249            static uint32_t kSamplingRate[] = {
2250                96000, 88200, 64000, 48000, 44100, 32000, 24000, 22050,
2251                16000, 12000, 11025, 8000, 7350
2252            };
2253
2254            if (freqIndex == 13 || freqIndex == 14) {
2255                return ERROR_MALFORMED;
2256            }
2257
2258            sampleRate = kSamplingRate[freqIndex];
2259        }
2260    }
2261
2262    if (numChannels == 0) {
2263        return ERROR_UNSUPPORTED;
2264    }
2265
2266    int32_t prevSampleRate;
2267    CHECK(mLastTrack->meta->findInt32(kKeySampleRate, &prevSampleRate));
2268
2269    if (prevSampleRate != sampleRate) {
2270        ALOGV("mpeg4 audio sample rate different from previous setting. "
2271             "was: %d, now: %d", prevSampleRate, sampleRate);
2272    }
2273
2274    mLastTrack->meta->setInt32(kKeySampleRate, sampleRate);
2275
2276    int32_t prevChannelCount;
2277    CHECK(mLastTrack->meta->findInt32(kKeyChannelCount, &prevChannelCount));
2278
2279    if (prevChannelCount != numChannels) {
2280        ALOGV("mpeg4 audio channel count different from previous setting. "
2281             "was: %d, now: %d", prevChannelCount, numChannels);
2282    }
2283
2284    mLastTrack->meta->setInt32(kKeyChannelCount, numChannels);
2285
2286    return OK;
2287}
2288
2289////////////////////////////////////////////////////////////////////////////////
2290
2291MPEG4Source::MPEG4Source(
2292        const sp<MetaData> &format,
2293        const sp<DataSource> &dataSource,
2294        int32_t timeScale,
2295        const sp<SampleTable> &sampleTable,
2296        Vector<SidxEntry> &sidx,
2297        off64_t firstMoofOffset)
2298    : mFormat(format),
2299      mDataSource(dataSource),
2300      mTimescale(timeScale),
2301      mSampleTable(sampleTable),
2302      mCurrentSampleIndex(0),
2303      mCurrentFragmentIndex(0),
2304      mSegments(sidx),
2305      mFirstMoofOffset(firstMoofOffset),
2306      mCurrentMoofOffset(firstMoofOffset),
2307      mCurrentTime(0),
2308      mCurrentSampleInfoAllocSize(0),
2309      mCurrentSampleInfoSizes(NULL),
2310      mCurrentSampleInfoOffsetsAllocSize(0),
2311      mCurrentSampleInfoOffsets(NULL),
2312      mIsAVC(false),
2313      mNALLengthSize(0),
2314      mStarted(false),
2315      mGroup(NULL),
2316      mBuffer(NULL),
2317      mWantsNALFragments(false),
2318      mSrcBuffer(NULL) {
2319
2320    mFormat->findInt32(kKeyCryptoMode, &mCryptoMode);
2321    mDefaultIVSize = 0;
2322    mFormat->findInt32(kKeyCryptoDefaultIVSize, &mDefaultIVSize);
2323    uint32_t keytype;
2324    const void *key;
2325    size_t keysize;
2326    if (mFormat->findData(kKeyCryptoKey, &keytype, &key, &keysize)) {
2327        CHECK(keysize <= 16);
2328        memset(mCryptoKey, 0, 16);
2329        memcpy(mCryptoKey, key, keysize);
2330    }
2331
2332    const char *mime;
2333    bool success = mFormat->findCString(kKeyMIMEType, &mime);
2334    CHECK(success);
2335
2336    mIsAVC = !strcasecmp(mime, MEDIA_MIMETYPE_VIDEO_AVC);
2337
2338    if (mIsAVC) {
2339        uint32_t type;
2340        const void *data;
2341        size_t size;
2342        CHECK(format->findData(kKeyAVCC, &type, &data, &size));
2343
2344        const uint8_t *ptr = (const uint8_t *)data;
2345
2346        CHECK(size >= 7);
2347        CHECK_EQ((unsigned)ptr[0], 1u);  // configurationVersion == 1
2348
2349        // The number of bytes used to encode the length of a NAL unit.
2350        mNALLengthSize = 1 + (ptr[4] & 3);
2351    }
2352
2353    CHECK(format->findInt32(kKeyTrackID, &mTrackId));
2354
2355    if (mFirstMoofOffset != 0) {
2356        off64_t offset = mFirstMoofOffset;
2357        parseChunk(&offset);
2358    }
2359}
2360
2361MPEG4Source::~MPEG4Source() {
2362    if (mStarted) {
2363        stop();
2364    }
2365    free(mCurrentSampleInfoSizes);
2366    free(mCurrentSampleInfoOffsets);
2367}
2368
2369status_t MPEG4Source::start(MetaData *params) {
2370    Mutex::Autolock autoLock(mLock);
2371
2372    CHECK(!mStarted);
2373
2374    int32_t val;
2375    if (params && params->findInt32(kKeyWantsNALFragments, &val)
2376        && val != 0) {
2377        mWantsNALFragments = true;
2378    } else {
2379        mWantsNALFragments = false;
2380    }
2381
2382    mGroup = new MediaBufferGroup;
2383
2384    int32_t max_size;
2385    CHECK(mFormat->findInt32(kKeyMaxInputSize, &max_size));
2386
2387    mGroup->add_buffer(new MediaBuffer(max_size));
2388
2389    mSrcBuffer = new uint8_t[max_size];
2390
2391    mStarted = true;
2392
2393    return OK;
2394}
2395
2396status_t MPEG4Source::stop() {
2397    Mutex::Autolock autoLock(mLock);
2398
2399    CHECK(mStarted);
2400
2401    if (mBuffer != NULL) {
2402        mBuffer->release();
2403        mBuffer = NULL;
2404    }
2405
2406    delete[] mSrcBuffer;
2407    mSrcBuffer = NULL;
2408
2409    delete mGroup;
2410    mGroup = NULL;
2411
2412    mStarted = false;
2413    mCurrentSampleIndex = 0;
2414
2415    return OK;
2416}
2417
2418status_t MPEG4Source::parseChunk(off64_t *offset) {
2419    uint32_t hdr[2];
2420    if (mDataSource->readAt(*offset, hdr, 8) < 8) {
2421        return ERROR_IO;
2422    }
2423    uint64_t chunk_size = ntohl(hdr[0]);
2424    uint32_t chunk_type = ntohl(hdr[1]);
2425    off64_t data_offset = *offset + 8;
2426
2427    if (chunk_size == 1) {
2428        if (mDataSource->readAt(*offset + 8, &chunk_size, 8) < 8) {
2429            return ERROR_IO;
2430        }
2431        chunk_size = ntoh64(chunk_size);
2432        data_offset += 8;
2433
2434        if (chunk_size < 16) {
2435            // The smallest valid chunk is 16 bytes long in this case.
2436            return ERROR_MALFORMED;
2437        }
2438    } else if (chunk_size < 8) {
2439        // The smallest valid chunk is 8 bytes long.
2440        return ERROR_MALFORMED;
2441    }
2442
2443    char chunk[5];
2444    MakeFourCCString(chunk_type, chunk);
2445    ALOGV("MPEG4Source chunk %s @ %llx", chunk, *offset);
2446
2447    off64_t chunk_data_size = *offset + chunk_size - data_offset;
2448
2449    switch(chunk_type) {
2450
2451        case FOURCC('t', 'r', 'a', 'f'):
2452        case FOURCC('m', 'o', 'o', 'f'): {
2453            off64_t stop_offset = *offset + chunk_size;
2454            *offset = data_offset;
2455            while (*offset < stop_offset) {
2456                status_t err = parseChunk(offset);
2457                if (err != OK) {
2458                    return err;
2459                }
2460            }
2461            if (chunk_type == FOURCC('m', 'o', 'o', 'f')) {
2462                // *offset points to the mdat box following this moof
2463                parseChunk(offset); // doesn't actually parse it, just updates offset
2464                mNextMoofOffset = *offset;
2465            }
2466            break;
2467        }
2468
2469        case FOURCC('t', 'f', 'h', 'd'): {
2470                status_t err;
2471                if ((err = parseTrackFragmentHeader(data_offset, chunk_data_size)) != OK) {
2472                    return err;
2473                }
2474                *offset += chunk_size;
2475                break;
2476        }
2477
2478        case FOURCC('t', 'r', 'u', 'n'): {
2479                status_t err;
2480                if (mLastParsedTrackId == mTrackId) {
2481                    if ((err = parseTrackFragmentRun(data_offset, chunk_data_size)) != OK) {
2482                        return err;
2483                    }
2484                }
2485
2486                *offset += chunk_size;
2487                break;
2488        }
2489
2490        case FOURCC('s', 'a', 'i', 'z'): {
2491            status_t err;
2492            if ((err = parseSampleAuxiliaryInformationSizes(data_offset, chunk_data_size)) != OK) {
2493                return err;
2494            }
2495            *offset += chunk_size;
2496            break;
2497        }
2498        case FOURCC('s', 'a', 'i', 'o'): {
2499            status_t err;
2500            if ((err = parseSampleAuxiliaryInformationOffsets(data_offset, chunk_data_size)) != OK) {
2501                return err;
2502            }
2503            *offset += chunk_size;
2504            break;
2505        }
2506
2507        case FOURCC('m', 'd', 'a', 't'): {
2508            // parse DRM info if present
2509            ALOGV("MPEG4Source::parseChunk mdat");
2510            // if saiz/saoi was previously observed, do something with the sampleinfos
2511            *offset += chunk_size;
2512            break;
2513        }
2514
2515        default: {
2516            *offset += chunk_size;
2517            break;
2518        }
2519    }
2520    return OK;
2521}
2522
2523status_t MPEG4Source::parseSampleAuxiliaryInformationSizes(off64_t offset, off64_t size) {
2524    ALOGV("parseSampleAuxiliaryInformationSizes");
2525    // 14496-12 8.7.12
2526    uint8_t version;
2527    if (mDataSource->readAt(
2528            offset, &version, sizeof(version))
2529            < (ssize_t)sizeof(version)) {
2530        return ERROR_IO;
2531    }
2532
2533    if (version != 0) {
2534        return ERROR_UNSUPPORTED;
2535    }
2536    offset++;
2537
2538    uint32_t flags;
2539    if (!mDataSource->getUInt24(offset, &flags)) {
2540        return ERROR_IO;
2541    }
2542    offset += 3;
2543
2544    if (flags & 1) {
2545        uint32_t tmp;
2546        if (!mDataSource->getUInt32(offset, &tmp)) {
2547            return ERROR_MALFORMED;
2548        }
2549        mCurrentAuxInfoType = tmp;
2550        offset += 4;
2551        if (!mDataSource->getUInt32(offset, &tmp)) {
2552            return ERROR_MALFORMED;
2553        }
2554        mCurrentAuxInfoTypeParameter = tmp;
2555        offset += 4;
2556    }
2557
2558    uint8_t defsize;
2559    if (mDataSource->readAt(offset, &defsize, 1) != 1) {
2560        return ERROR_MALFORMED;
2561    }
2562    mCurrentDefaultSampleInfoSize = defsize;
2563    offset++;
2564
2565    uint32_t smplcnt;
2566    if (!mDataSource->getUInt32(offset, &smplcnt)) {
2567        return ERROR_MALFORMED;
2568    }
2569    mCurrentSampleInfoCount = smplcnt;
2570    offset += 4;
2571
2572    if (mCurrentDefaultSampleInfoSize != 0) {
2573        ALOGV("@@@@ using default sample info size of %d", mCurrentDefaultSampleInfoSize);
2574        return OK;
2575    }
2576    if (smplcnt > mCurrentSampleInfoAllocSize) {
2577        mCurrentSampleInfoSizes = (uint8_t*) realloc(mCurrentSampleInfoSizes, smplcnt);
2578        mCurrentSampleInfoAllocSize = smplcnt;
2579    }
2580
2581    mDataSource->readAt(offset, mCurrentSampleInfoSizes, smplcnt);
2582    return OK;
2583}
2584
2585status_t MPEG4Source::parseSampleAuxiliaryInformationOffsets(off64_t offset, off64_t size) {
2586    ALOGV("parseSampleAuxiliaryInformationOffsets");
2587    // 14496-12 8.7.13
2588    uint8_t version;
2589    if (mDataSource->readAt(offset, &version, sizeof(version)) != 1) {
2590        return ERROR_IO;
2591    }
2592    offset++;
2593
2594    uint32_t flags;
2595    if (!mDataSource->getUInt24(offset, &flags)) {
2596        return ERROR_IO;
2597    }
2598    offset += 3;
2599
2600    uint32_t entrycount;
2601    if (!mDataSource->getUInt32(offset, &entrycount)) {
2602        return ERROR_IO;
2603    }
2604    offset += 4;
2605
2606    if (entrycount > mCurrentSampleInfoOffsetsAllocSize) {
2607        mCurrentSampleInfoOffsets = (uint64_t*) realloc(mCurrentSampleInfoOffsets, entrycount * 8);
2608        mCurrentSampleInfoOffsetsAllocSize = entrycount;
2609    }
2610    mCurrentSampleInfoOffsetCount = entrycount;
2611
2612    for (size_t i = 0; i < entrycount; i++) {
2613        if (version == 0) {
2614            uint32_t tmp;
2615            if (!mDataSource->getUInt32(offset, &tmp)) {
2616                return ERROR_IO;
2617            }
2618            mCurrentSampleInfoOffsets[i] = tmp;
2619            offset += 4;
2620        } else {
2621            uint64_t tmp;
2622            if (!mDataSource->getUInt64(offset, &tmp)) {
2623                return ERROR_IO;
2624            }
2625            mCurrentSampleInfoOffsets[i] = tmp;
2626            offset += 8;
2627        }
2628    }
2629
2630    // parse clear/encrypted data
2631
2632    off64_t drmoffset = mCurrentSampleInfoOffsets[0]; // from moof
2633
2634    drmoffset += mCurrentMoofOffset;
2635    int ivlength;
2636    CHECK(mFormat->findInt32(kKeyCryptoDefaultIVSize, &ivlength));
2637
2638    // read CencSampleAuxiliaryDataFormats
2639    for (size_t i = 0; i < mCurrentSampleInfoCount; i++) {
2640        Sample *smpl = &mCurrentSamples.editItemAt(i);
2641
2642        memset(smpl->iv, 0, 16);
2643        if (mDataSource->readAt(drmoffset, smpl->iv, ivlength) != ivlength) {
2644            return ERROR_IO;
2645        }
2646
2647        drmoffset += ivlength;
2648
2649        int32_t smplinfosize = mCurrentDefaultSampleInfoSize;
2650        if (smplinfosize == 0) {
2651            smplinfosize = mCurrentSampleInfoSizes[i];
2652        }
2653        if (smplinfosize > ivlength) {
2654            uint16_t numsubsamples;
2655            if (!mDataSource->getUInt16(drmoffset, &numsubsamples)) {
2656                return ERROR_IO;
2657            }
2658            drmoffset += 2;
2659            for (size_t j = 0; j < numsubsamples; j++) {
2660                uint16_t numclear;
2661                uint32_t numencrypted;
2662                if (!mDataSource->getUInt16(drmoffset, &numclear)) {
2663                    return ERROR_IO;
2664                }
2665                drmoffset += 2;
2666                if (!mDataSource->getUInt32(drmoffset, &numencrypted)) {
2667                    return ERROR_IO;
2668                }
2669                drmoffset += 4;
2670                smpl->clearsizes.add(numclear);
2671                smpl->encryptedsizes.add(numencrypted);
2672            }
2673        } else {
2674            smpl->clearsizes.add(0);
2675            smpl->encryptedsizes.add(smpl->size);
2676        }
2677    }
2678
2679
2680    return OK;
2681}
2682
2683status_t MPEG4Source::parseTrackFragmentHeader(off64_t offset, off64_t size) {
2684
2685    if (size < 8) {
2686        return -EINVAL;
2687    }
2688
2689    uint32_t flags;
2690    if (!mDataSource->getUInt32(offset, &flags)) { // actually version + flags
2691        return ERROR_MALFORMED;
2692    }
2693
2694    if (flags & 0xff000000) {
2695        return -EINVAL;
2696    }
2697
2698    if (!mDataSource->getUInt32(offset + 4, (uint32_t*)&mLastParsedTrackId)) {
2699        return ERROR_MALFORMED;
2700    }
2701
2702    if (mLastParsedTrackId != mTrackId) {
2703        // this is not the right track, skip it
2704        return OK;
2705    }
2706
2707    mTrackFragmentHeaderInfo.mFlags = flags;
2708    mTrackFragmentHeaderInfo.mTrackID = mLastParsedTrackId;
2709    offset += 8;
2710    size -= 8;
2711
2712    ALOGV("fragment header: %08x %08x", flags, mTrackFragmentHeaderInfo.mTrackID);
2713
2714    if (flags & TrackFragmentHeaderInfo::kBaseDataOffsetPresent) {
2715        if (size < 8) {
2716            return -EINVAL;
2717        }
2718
2719        if (!mDataSource->getUInt64(offset, &mTrackFragmentHeaderInfo.mBaseDataOffset)) {
2720            return ERROR_MALFORMED;
2721        }
2722        offset += 8;
2723        size -= 8;
2724    }
2725
2726    if (flags & TrackFragmentHeaderInfo::kSampleDescriptionIndexPresent) {
2727        if (size < 4) {
2728            return -EINVAL;
2729        }
2730
2731        if (!mDataSource->getUInt32(offset, &mTrackFragmentHeaderInfo.mSampleDescriptionIndex)) {
2732            return ERROR_MALFORMED;
2733        }
2734        offset += 4;
2735        size -= 4;
2736    }
2737
2738    if (flags & TrackFragmentHeaderInfo::kDefaultSampleDurationPresent) {
2739        if (size < 4) {
2740            return -EINVAL;
2741        }
2742
2743        if (!mDataSource->getUInt32(offset, &mTrackFragmentHeaderInfo.mDefaultSampleDuration)) {
2744            return ERROR_MALFORMED;
2745        }
2746        offset += 4;
2747        size -= 4;
2748    }
2749
2750    if (flags & TrackFragmentHeaderInfo::kDefaultSampleSizePresent) {
2751        if (size < 4) {
2752            return -EINVAL;
2753        }
2754
2755        if (!mDataSource->getUInt32(offset, &mTrackFragmentHeaderInfo.mDefaultSampleSize)) {
2756            return ERROR_MALFORMED;
2757        }
2758        offset += 4;
2759        size -= 4;
2760    }
2761
2762    if (flags & TrackFragmentHeaderInfo::kDefaultSampleFlagsPresent) {
2763        if (size < 4) {
2764            return -EINVAL;
2765        }
2766
2767        if (!mDataSource->getUInt32(offset, &mTrackFragmentHeaderInfo.mDefaultSampleFlags)) {
2768            return ERROR_MALFORMED;
2769        }
2770        offset += 4;
2771        size -= 4;
2772    }
2773
2774    if (!(flags & TrackFragmentHeaderInfo::kBaseDataOffsetPresent)) {
2775        mTrackFragmentHeaderInfo.mBaseDataOffset = mCurrentMoofOffset;
2776    }
2777
2778    mTrackFragmentHeaderInfo.mDataOffset = 0;
2779    return OK;
2780}
2781
2782status_t MPEG4Source::parseTrackFragmentRun(off64_t offset, off64_t size) {
2783
2784    ALOGV("MPEG4Extractor::parseTrackFragmentRun");
2785    if (size < 8) {
2786        return -EINVAL;
2787    }
2788
2789    enum {
2790        kDataOffsetPresent                  = 0x01,
2791        kFirstSampleFlagsPresent            = 0x04,
2792        kSampleDurationPresent              = 0x100,
2793        kSampleSizePresent                  = 0x200,
2794        kSampleFlagsPresent                 = 0x400,
2795        kSampleCompositionTimeOffsetPresent = 0x800,
2796    };
2797
2798    uint32_t flags;
2799    if (!mDataSource->getUInt32(offset, &flags)) {
2800        return ERROR_MALFORMED;
2801    }
2802    ALOGV("fragment run flags: %08x", flags);
2803
2804    if (flags & 0xff000000) {
2805        return -EINVAL;
2806    }
2807
2808    if ((flags & kFirstSampleFlagsPresent) && (flags & kSampleFlagsPresent)) {
2809        // These two shall not be used together.
2810        return -EINVAL;
2811    }
2812
2813    uint32_t sampleCount;
2814    if (!mDataSource->getUInt32(offset + 4, &sampleCount)) {
2815        return ERROR_MALFORMED;
2816    }
2817    offset += 8;
2818    size -= 8;
2819
2820    uint64_t dataOffset = mTrackFragmentHeaderInfo.mDataOffset;
2821
2822    uint32_t firstSampleFlags = 0;
2823
2824    if (flags & kDataOffsetPresent) {
2825        if (size < 4) {
2826            return -EINVAL;
2827        }
2828
2829        int32_t dataOffsetDelta;
2830        if (!mDataSource->getUInt32(offset, (uint32_t*)&dataOffsetDelta)) {
2831            return ERROR_MALFORMED;
2832        }
2833
2834        dataOffset = mTrackFragmentHeaderInfo.mBaseDataOffset + dataOffsetDelta;
2835
2836        offset += 4;
2837        size -= 4;
2838    }
2839
2840    if (flags & kFirstSampleFlagsPresent) {
2841        if (size < 4) {
2842            return -EINVAL;
2843        }
2844
2845        if (!mDataSource->getUInt32(offset, &firstSampleFlags)) {
2846            return ERROR_MALFORMED;
2847        }
2848        offset += 4;
2849        size -= 4;
2850    }
2851
2852    uint32_t sampleDuration = 0, sampleSize = 0, sampleFlags = 0,
2853             sampleCtsOffset = 0;
2854
2855    size_t bytesPerSample = 0;
2856    if (flags & kSampleDurationPresent) {
2857        bytesPerSample += 4;
2858    } else if (mTrackFragmentHeaderInfo.mFlags
2859            & TrackFragmentHeaderInfo::kDefaultSampleDurationPresent) {
2860        sampleDuration = mTrackFragmentHeaderInfo.mDefaultSampleDuration;
2861    } else {
2862        sampleDuration = mTrackFragmentHeaderInfo.mDefaultSampleDuration;
2863    }
2864
2865    if (flags & kSampleSizePresent) {
2866        bytesPerSample += 4;
2867    } else if (mTrackFragmentHeaderInfo.mFlags
2868            & TrackFragmentHeaderInfo::kDefaultSampleSizePresent) {
2869        sampleSize = mTrackFragmentHeaderInfo.mDefaultSampleSize;
2870    } else {
2871        sampleSize = mTrackFragmentHeaderInfo.mDefaultSampleSize;
2872    }
2873
2874    if (flags & kSampleFlagsPresent) {
2875        bytesPerSample += 4;
2876    } else if (mTrackFragmentHeaderInfo.mFlags
2877            & TrackFragmentHeaderInfo::kDefaultSampleFlagsPresent) {
2878        sampleFlags = mTrackFragmentHeaderInfo.mDefaultSampleFlags;
2879    } else {
2880        sampleFlags = mTrackFragmentHeaderInfo.mDefaultSampleFlags;
2881    }
2882
2883    if (flags & kSampleCompositionTimeOffsetPresent) {
2884        bytesPerSample += 4;
2885    } else {
2886        sampleCtsOffset = 0;
2887    }
2888
2889    if (size < sampleCount * bytesPerSample) {
2890        return -EINVAL;
2891    }
2892
2893    Sample tmp;
2894    for (uint32_t i = 0; i < sampleCount; ++i) {
2895        if (flags & kSampleDurationPresent) {
2896            if (!mDataSource->getUInt32(offset, &sampleDuration)) {
2897                return ERROR_MALFORMED;
2898            }
2899            offset += 4;
2900        }
2901
2902        if (flags & kSampleSizePresent) {
2903            if (!mDataSource->getUInt32(offset, &sampleSize)) {
2904                return ERROR_MALFORMED;
2905            }
2906            offset += 4;
2907        }
2908
2909        if (flags & kSampleFlagsPresent) {
2910            if (!mDataSource->getUInt32(offset, &sampleFlags)) {
2911                return ERROR_MALFORMED;
2912            }
2913            offset += 4;
2914        }
2915
2916        if (flags & kSampleCompositionTimeOffsetPresent) {
2917            if (!mDataSource->getUInt32(offset, &sampleCtsOffset)) {
2918                return ERROR_MALFORMED;
2919            }
2920            offset += 4;
2921        }
2922
2923        ALOGV("adding sample %d at offset 0x%08llx, size %u, duration %u, "
2924              " flags 0x%08x", i + 1,
2925                dataOffset, sampleSize, sampleDuration,
2926                (flags & kFirstSampleFlagsPresent) && i == 0
2927                    ? firstSampleFlags : sampleFlags);
2928        tmp.offset = dataOffset;
2929        tmp.size = sampleSize;
2930        tmp.duration = sampleDuration;
2931        mCurrentSamples.add(tmp);
2932
2933        dataOffset += sampleSize;
2934    }
2935
2936    mTrackFragmentHeaderInfo.mDataOffset = dataOffset;
2937
2938    return OK;
2939}
2940
2941sp<MetaData> MPEG4Source::getFormat() {
2942    Mutex::Autolock autoLock(mLock);
2943
2944    return mFormat;
2945}
2946
2947size_t MPEG4Source::parseNALSize(const uint8_t *data) const {
2948    switch (mNALLengthSize) {
2949        case 1:
2950            return *data;
2951        case 2:
2952            return U16_AT(data);
2953        case 3:
2954            return ((size_t)data[0] << 16) | U16_AT(&data[1]);
2955        case 4:
2956            return U32_AT(data);
2957    }
2958
2959    // This cannot happen, mNALLengthSize springs to life by adding 1 to
2960    // a 2-bit integer.
2961    CHECK(!"Should not be here.");
2962
2963    return 0;
2964}
2965
2966status_t MPEG4Source::read(
2967        MediaBuffer **out, const ReadOptions *options) {
2968    Mutex::Autolock autoLock(mLock);
2969
2970    CHECK(mStarted);
2971
2972    if (mFirstMoofOffset > 0) {
2973        return fragmentedRead(out, options);
2974    }
2975
2976    *out = NULL;
2977
2978    int64_t targetSampleTimeUs = -1;
2979
2980    int64_t seekTimeUs;
2981    ReadOptions::SeekMode mode;
2982    if (options && options->getSeekTo(&seekTimeUs, &mode)) {
2983        uint32_t findFlags = 0;
2984        switch (mode) {
2985            case ReadOptions::SEEK_PREVIOUS_SYNC:
2986                findFlags = SampleTable::kFlagBefore;
2987                break;
2988            case ReadOptions::SEEK_NEXT_SYNC:
2989                findFlags = SampleTable::kFlagAfter;
2990                break;
2991            case ReadOptions::SEEK_CLOSEST_SYNC:
2992            case ReadOptions::SEEK_CLOSEST:
2993                findFlags = SampleTable::kFlagClosest;
2994                break;
2995            default:
2996                CHECK(!"Should not be here.");
2997                break;
2998        }
2999
3000        uint32_t sampleIndex;
3001        status_t err = mSampleTable->findSampleAtTime(
3002                seekTimeUs * mTimescale / 1000000,
3003                &sampleIndex, findFlags);
3004
3005        if (mode == ReadOptions::SEEK_CLOSEST) {
3006            // We found the closest sample already, now we want the sync
3007            // sample preceding it (or the sample itself of course), even
3008            // if the subsequent sync sample is closer.
3009            findFlags = SampleTable::kFlagBefore;
3010        }
3011
3012        uint32_t syncSampleIndex;
3013        if (err == OK) {
3014            err = mSampleTable->findSyncSampleNear(
3015                    sampleIndex, &syncSampleIndex, findFlags);
3016        }
3017
3018        uint32_t sampleTime;
3019        if (err == OK) {
3020            err = mSampleTable->getMetaDataForSample(
3021                    sampleIndex, NULL, NULL, &sampleTime);
3022        }
3023
3024        if (err != OK) {
3025            if (err == ERROR_OUT_OF_RANGE) {
3026                // An attempt to seek past the end of the stream would
3027                // normally cause this ERROR_OUT_OF_RANGE error. Propagating
3028                // this all the way to the MediaPlayer would cause abnormal
3029                // termination. Legacy behaviour appears to be to behave as if
3030                // we had seeked to the end of stream, ending normally.
3031                err = ERROR_END_OF_STREAM;
3032            }
3033            ALOGV("end of stream");
3034            return err;
3035        }
3036
3037        if (mode == ReadOptions::SEEK_CLOSEST) {
3038            targetSampleTimeUs = (sampleTime * 1000000ll) / mTimescale;
3039        }
3040
3041#if 0
3042        uint32_t syncSampleTime;
3043        CHECK_EQ(OK, mSampleTable->getMetaDataForSample(
3044                    syncSampleIndex, NULL, NULL, &syncSampleTime));
3045
3046        ALOGI("seek to time %lld us => sample at time %lld us, "
3047             "sync sample at time %lld us",
3048             seekTimeUs,
3049             sampleTime * 1000000ll / mTimescale,
3050             syncSampleTime * 1000000ll / mTimescale);
3051#endif
3052
3053        mCurrentSampleIndex = syncSampleIndex;
3054        if (mBuffer != NULL) {
3055            mBuffer->release();
3056            mBuffer = NULL;
3057        }
3058
3059        // fall through
3060    }
3061
3062    off64_t offset;
3063    size_t size;
3064    uint32_t cts;
3065    bool isSyncSample;
3066    bool newBuffer = false;
3067    if (mBuffer == NULL) {
3068        newBuffer = true;
3069
3070        status_t err =
3071            mSampleTable->getMetaDataForSample(
3072                    mCurrentSampleIndex, &offset, &size, &cts, &isSyncSample);
3073
3074        if (err != OK) {
3075            return err;
3076        }
3077
3078        err = mGroup->acquire_buffer(&mBuffer);
3079
3080        if (err != OK) {
3081            CHECK(mBuffer == NULL);
3082            return err;
3083        }
3084    }
3085
3086    if (!mIsAVC || mWantsNALFragments) {
3087        if (newBuffer) {
3088            ssize_t num_bytes_read =
3089                mDataSource->readAt(offset, (uint8_t *)mBuffer->data(), size);
3090
3091            if (num_bytes_read < (ssize_t)size) {
3092                mBuffer->release();
3093                mBuffer = NULL;
3094
3095                return ERROR_IO;
3096            }
3097
3098            CHECK(mBuffer != NULL);
3099            mBuffer->set_range(0, size);
3100            mBuffer->meta_data()->clear();
3101            mBuffer->meta_data()->setInt64(
3102                    kKeyTime, ((int64_t)cts * 1000000) / mTimescale);
3103
3104            if (targetSampleTimeUs >= 0) {
3105                mBuffer->meta_data()->setInt64(
3106                        kKeyTargetTime, targetSampleTimeUs);
3107            }
3108
3109            if (isSyncSample) {
3110                mBuffer->meta_data()->setInt32(kKeyIsSyncFrame, 1);
3111            }
3112
3113            ++mCurrentSampleIndex;
3114        }
3115
3116        if (!mIsAVC) {
3117            *out = mBuffer;
3118            mBuffer = NULL;
3119
3120            return OK;
3121        }
3122
3123        // Each NAL unit is split up into its constituent fragments and
3124        // each one of them returned in its own buffer.
3125
3126        CHECK(mBuffer->range_length() >= mNALLengthSize);
3127
3128        const uint8_t *src =
3129            (const uint8_t *)mBuffer->data() + mBuffer->range_offset();
3130
3131        size_t nal_size = parseNALSize(src);
3132        if (mBuffer->range_length() < mNALLengthSize + nal_size) {
3133            ALOGE("incomplete NAL unit.");
3134
3135            mBuffer->release();
3136            mBuffer = NULL;
3137
3138            return ERROR_MALFORMED;
3139        }
3140
3141        MediaBuffer *clone = mBuffer->clone();
3142        CHECK(clone != NULL);
3143        clone->set_range(mBuffer->range_offset() + mNALLengthSize, nal_size);
3144
3145        CHECK(mBuffer != NULL);
3146        mBuffer->set_range(
3147                mBuffer->range_offset() + mNALLengthSize + nal_size,
3148                mBuffer->range_length() - mNALLengthSize - nal_size);
3149
3150        if (mBuffer->range_length() == 0) {
3151            mBuffer->release();
3152            mBuffer = NULL;
3153        }
3154
3155        *out = clone;
3156
3157        return OK;
3158    } else {
3159        // Whole NAL units are returned but each fragment is prefixed by
3160        // the start code (0x00 00 00 01).
3161        ssize_t num_bytes_read = 0;
3162        int32_t drm = 0;
3163        bool usesDRM = (mFormat->findInt32(kKeyIsDRM, &drm) && drm != 0);
3164        if (usesDRM) {
3165            num_bytes_read =
3166                mDataSource->readAt(offset, (uint8_t*)mBuffer->data(), size);
3167        } else {
3168            num_bytes_read = mDataSource->readAt(offset, mSrcBuffer, size);
3169        }
3170
3171        if (num_bytes_read < (ssize_t)size) {
3172            mBuffer->release();
3173            mBuffer = NULL;
3174
3175            return ERROR_IO;
3176        }
3177
3178        if (usesDRM) {
3179            CHECK(mBuffer != NULL);
3180            mBuffer->set_range(0, size);
3181
3182        } else {
3183            uint8_t *dstData = (uint8_t *)mBuffer->data();
3184            size_t srcOffset = 0;
3185            size_t dstOffset = 0;
3186
3187            while (srcOffset < size) {
3188                bool isMalFormed = (srcOffset + mNALLengthSize > size);
3189                size_t nalLength = 0;
3190                if (!isMalFormed) {
3191                    nalLength = parseNALSize(&mSrcBuffer[srcOffset]);
3192                    srcOffset += mNALLengthSize;
3193                    isMalFormed = srcOffset + nalLength > size;
3194                }
3195
3196                if (isMalFormed) {
3197                    ALOGE("Video is malformed");
3198                    mBuffer->release();
3199                    mBuffer = NULL;
3200                    return ERROR_MALFORMED;
3201                }
3202
3203                if (nalLength == 0) {
3204                    continue;
3205                }
3206
3207                CHECK(dstOffset + 4 <= mBuffer->size());
3208
3209                dstData[dstOffset++] = 0;
3210                dstData[dstOffset++] = 0;
3211                dstData[dstOffset++] = 0;
3212                dstData[dstOffset++] = 1;
3213                memcpy(&dstData[dstOffset], &mSrcBuffer[srcOffset], nalLength);
3214                srcOffset += nalLength;
3215                dstOffset += nalLength;
3216            }
3217            CHECK_EQ(srcOffset, size);
3218            CHECK(mBuffer != NULL);
3219            mBuffer->set_range(0, dstOffset);
3220        }
3221
3222        mBuffer->meta_data()->clear();
3223        mBuffer->meta_data()->setInt64(
3224                kKeyTime, ((int64_t)cts * 1000000) / mTimescale);
3225
3226        if (targetSampleTimeUs >= 0) {
3227            mBuffer->meta_data()->setInt64(
3228                    kKeyTargetTime, targetSampleTimeUs);
3229        }
3230
3231        if (isSyncSample) {
3232            mBuffer->meta_data()->setInt32(kKeyIsSyncFrame, 1);
3233        }
3234
3235        ++mCurrentSampleIndex;
3236
3237        *out = mBuffer;
3238        mBuffer = NULL;
3239
3240        return OK;
3241    }
3242}
3243
3244status_t MPEG4Source::fragmentedRead(
3245        MediaBuffer **out, const ReadOptions *options) {
3246
3247    ALOGV("MPEG4Source::fragmentedRead");
3248
3249    CHECK(mStarted);
3250
3251    *out = NULL;
3252
3253    int64_t targetSampleTimeUs = -1;
3254
3255    int64_t seekTimeUs;
3256    ReadOptions::SeekMode mode;
3257    if (options && options->getSeekTo(&seekTimeUs, &mode)) {
3258
3259        int numSidxEntries = mSegments.size();
3260        if (numSidxEntries != 0) {
3261            int64_t totalTime = 0;
3262            off64_t totalOffset = mFirstMoofOffset;
3263            for (int i = 0; i < numSidxEntries; i++) {
3264                const SidxEntry *se = &mSegments[i];
3265                if (totalTime + se->mDurationUs > seekTimeUs) {
3266                    // The requested time is somewhere in this segment
3267                    if ((mode == ReadOptions::SEEK_NEXT_SYNC) ||
3268                        (mode == ReadOptions::SEEK_CLOSEST_SYNC &&
3269                        (seekTimeUs - totalTime) > (totalTime + se->mDurationUs - seekTimeUs))) {
3270                        // requested next sync, or closest sync and it was closer to the end of
3271                        // this segment
3272                        totalTime += se->mDurationUs;
3273                        totalOffset += se->mSize;
3274                    }
3275                    break;
3276                }
3277                totalTime += se->mDurationUs;
3278                totalOffset += se->mSize;
3279            }
3280        mCurrentMoofOffset = totalOffset;
3281        mCurrentSamples.clear();
3282        mCurrentSampleIndex = 0;
3283        parseChunk(&totalOffset);
3284        mCurrentTime = totalTime * mTimescale / 1000000ll;
3285        }
3286
3287        if (mBuffer != NULL) {
3288            mBuffer->release();
3289            mBuffer = NULL;
3290        }
3291
3292        // fall through
3293    }
3294
3295    off64_t offset = 0;
3296    size_t size;
3297    uint32_t cts = 0;
3298    bool isSyncSample = false;
3299    bool newBuffer = false;
3300    if (mBuffer == NULL) {
3301        newBuffer = true;
3302
3303        if (mCurrentSampleIndex >= mCurrentSamples.size()) {
3304            // move to next fragment
3305            Sample lastSample = mCurrentSamples[mCurrentSamples.size() - 1];
3306            off64_t nextMoof = mNextMoofOffset; // lastSample.offset + lastSample.size;
3307            mCurrentMoofOffset = nextMoof;
3308            mCurrentSamples.clear();
3309            mCurrentSampleIndex = 0;
3310            parseChunk(&nextMoof);
3311                if (mCurrentSampleIndex >= mCurrentSamples.size()) {
3312                    return ERROR_END_OF_STREAM;
3313                }
3314        }
3315
3316        const Sample *smpl = &mCurrentSamples[mCurrentSampleIndex];
3317        offset = smpl->offset;
3318        size = smpl->size;
3319        cts = mCurrentTime;
3320        mCurrentTime += smpl->duration;
3321        isSyncSample = (mCurrentSampleIndex == 0); // XXX
3322
3323        status_t err = mGroup->acquire_buffer(&mBuffer);
3324
3325        if (err != OK) {
3326            CHECK(mBuffer == NULL);
3327            ALOGV("acquire_buffer returned %d", err);
3328            return err;
3329        }
3330    }
3331
3332    const Sample *smpl = &mCurrentSamples[mCurrentSampleIndex];
3333    const sp<MetaData> bufmeta = mBuffer->meta_data();
3334    bufmeta->clear();
3335    if (smpl->encryptedsizes.size()) {
3336        // store clear/encrypted lengths in metadata
3337        bufmeta->setData(kKeyPlainSizes, 0,
3338                smpl->clearsizes.array(), smpl->clearsizes.size() * 4);
3339        bufmeta->setData(kKeyEncryptedSizes, 0,
3340                smpl->encryptedsizes.array(), smpl->encryptedsizes.size() * 4);
3341        bufmeta->setData(kKeyCryptoIV, 0, smpl->iv, 16); // use 16 or the actual size?
3342        bufmeta->setInt32(kKeyCryptoDefaultIVSize, mDefaultIVSize);
3343        bufmeta->setInt32(kKeyCryptoMode, mCryptoMode);
3344        bufmeta->setData(kKeyCryptoKey, 0, mCryptoKey, 16);
3345    }
3346
3347    if (!mIsAVC || mWantsNALFragments) {
3348        if (newBuffer) {
3349            ssize_t num_bytes_read =
3350                mDataSource->readAt(offset, (uint8_t *)mBuffer->data(), size);
3351
3352            if (num_bytes_read < (ssize_t)size) {
3353                mBuffer->release();
3354                mBuffer = NULL;
3355
3356                ALOGV("i/o error");
3357                return ERROR_IO;
3358            }
3359
3360            CHECK(mBuffer != NULL);
3361            mBuffer->set_range(0, size);
3362            mBuffer->meta_data()->setInt64(
3363                    kKeyTime, ((int64_t)cts * 1000000) / mTimescale);
3364
3365            if (targetSampleTimeUs >= 0) {
3366                mBuffer->meta_data()->setInt64(
3367                        kKeyTargetTime, targetSampleTimeUs);
3368            }
3369
3370            if (isSyncSample) {
3371                mBuffer->meta_data()->setInt32(kKeyIsSyncFrame, 1);
3372            }
3373
3374            ++mCurrentSampleIndex;
3375        }
3376
3377        if (!mIsAVC) {
3378            *out = mBuffer;
3379            mBuffer = NULL;
3380
3381            return OK;
3382        }
3383
3384        // Each NAL unit is split up into its constituent fragments and
3385        // each one of them returned in its own buffer.
3386
3387        CHECK(mBuffer->range_length() >= mNALLengthSize);
3388
3389        const uint8_t *src =
3390            (const uint8_t *)mBuffer->data() + mBuffer->range_offset();
3391
3392        size_t nal_size = parseNALSize(src);
3393        if (mBuffer->range_length() < mNALLengthSize + nal_size) {
3394            ALOGE("incomplete NAL unit.");
3395
3396            mBuffer->release();
3397            mBuffer = NULL;
3398
3399            return ERROR_MALFORMED;
3400        }
3401
3402        MediaBuffer *clone = mBuffer->clone();
3403        CHECK(clone != NULL);
3404        clone->set_range(mBuffer->range_offset() + mNALLengthSize, nal_size);
3405
3406        CHECK(mBuffer != NULL);
3407        mBuffer->set_range(
3408                mBuffer->range_offset() + mNALLengthSize + nal_size,
3409                mBuffer->range_length() - mNALLengthSize - nal_size);
3410
3411        if (mBuffer->range_length() == 0) {
3412            mBuffer->release();
3413            mBuffer = NULL;
3414        }
3415
3416        *out = clone;
3417
3418        return OK;
3419    } else {
3420        ALOGV("whole NAL");
3421        // Whole NAL units are returned but each fragment is prefixed by
3422        // the start code (0x00 00 00 01).
3423        ssize_t num_bytes_read = 0;
3424        int32_t drm = 0;
3425        bool usesDRM = (mFormat->findInt32(kKeyIsDRM, &drm) && drm != 0);
3426        if (usesDRM) {
3427            num_bytes_read =
3428                mDataSource->readAt(offset, (uint8_t*)mBuffer->data(), size);
3429        } else {
3430            num_bytes_read = mDataSource->readAt(offset, mSrcBuffer, size);
3431        }
3432
3433        if (num_bytes_read < (ssize_t)size) {
3434            mBuffer->release();
3435            mBuffer = NULL;
3436
3437            ALOGV("i/o error");
3438            return ERROR_IO;
3439        }
3440
3441        if (usesDRM) {
3442            CHECK(mBuffer != NULL);
3443            mBuffer->set_range(0, size);
3444
3445        } else {
3446            uint8_t *dstData = (uint8_t *)mBuffer->data();
3447            size_t srcOffset = 0;
3448            size_t dstOffset = 0;
3449
3450            while (srcOffset < size) {
3451                bool isMalFormed = (srcOffset + mNALLengthSize > size);
3452                size_t nalLength = 0;
3453                if (!isMalFormed) {
3454                    nalLength = parseNALSize(&mSrcBuffer[srcOffset]);
3455                    srcOffset += mNALLengthSize;
3456                    isMalFormed = srcOffset + nalLength > size;
3457                }
3458
3459                if (isMalFormed) {
3460                    ALOGE("Video is malformed");
3461                    mBuffer->release();
3462                    mBuffer = NULL;
3463                    return ERROR_MALFORMED;
3464                }
3465
3466                if (nalLength == 0) {
3467                    continue;
3468                }
3469
3470                CHECK(dstOffset + 4 <= mBuffer->size());
3471
3472                dstData[dstOffset++] = 0;
3473                dstData[dstOffset++] = 0;
3474                dstData[dstOffset++] = 0;
3475                dstData[dstOffset++] = 1;
3476                memcpy(&dstData[dstOffset], &mSrcBuffer[srcOffset], nalLength);
3477                srcOffset += nalLength;
3478                dstOffset += nalLength;
3479            }
3480            CHECK_EQ(srcOffset, size);
3481            CHECK(mBuffer != NULL);
3482            mBuffer->set_range(0, dstOffset);
3483        }
3484
3485        mBuffer->meta_data()->setInt64(
3486                kKeyTime, ((int64_t)cts * 1000000) / mTimescale);
3487
3488        if (targetSampleTimeUs >= 0) {
3489            mBuffer->meta_data()->setInt64(
3490                    kKeyTargetTime, targetSampleTimeUs);
3491        }
3492
3493        if (isSyncSample) {
3494            mBuffer->meta_data()->setInt32(kKeyIsSyncFrame, 1);
3495        }
3496
3497        ++mCurrentSampleIndex;
3498
3499        *out = mBuffer;
3500        mBuffer = NULL;
3501
3502        return OK;
3503    }
3504}
3505
3506MPEG4Extractor::Track *MPEG4Extractor::findTrackByMimePrefix(
3507        const char *mimePrefix) {
3508    for (Track *track = mFirstTrack; track != NULL; track = track->next) {
3509        const char *mime;
3510        if (track->meta != NULL
3511                && track->meta->findCString(kKeyMIMEType, &mime)
3512                && !strncasecmp(mime, mimePrefix, strlen(mimePrefix))) {
3513            return track;
3514        }
3515    }
3516
3517    return NULL;
3518}
3519
3520static bool LegacySniffMPEG4(
3521        const sp<DataSource> &source, String8 *mimeType, float *confidence) {
3522    uint8_t header[8];
3523
3524    ssize_t n = source->readAt(4, header, sizeof(header));
3525    if (n < (ssize_t)sizeof(header)) {
3526        return false;
3527    }
3528
3529    if (!memcmp(header, "ftyp3gp", 7) || !memcmp(header, "ftypmp42", 8)
3530        || !memcmp(header, "ftyp3gr6", 8) || !memcmp(header, "ftyp3gs6", 8)
3531        || !memcmp(header, "ftyp3ge6", 8) || !memcmp(header, "ftyp3gg6", 8)
3532        || !memcmp(header, "ftypisom", 8) || !memcmp(header, "ftypM4V ", 8)
3533        || !memcmp(header, "ftypM4A ", 8) || !memcmp(header, "ftypf4v ", 8)
3534        || !memcmp(header, "ftypkddi", 8) || !memcmp(header, "ftypM4VP", 8)) {
3535        *mimeType = MEDIA_MIMETYPE_CONTAINER_MPEG4;
3536        *confidence = 0.4;
3537
3538        return true;
3539    }
3540
3541    return false;
3542}
3543
3544static bool isCompatibleBrand(uint32_t fourcc) {
3545    static const uint32_t kCompatibleBrands[] = {
3546        FOURCC('i', 's', 'o', 'm'),
3547        FOURCC('i', 's', 'o', '2'),
3548        FOURCC('a', 'v', 'c', '1'),
3549        FOURCC('3', 'g', 'p', '4'),
3550        FOURCC('m', 'p', '4', '1'),
3551        FOURCC('m', 'p', '4', '2'),
3552
3553        // Won't promise that the following file types can be played.
3554        // Just give these file types a chance.
3555        FOURCC('q', 't', ' ', ' '),  // Apple's QuickTime
3556        FOURCC('M', 'S', 'N', 'V'),  // Sony's PSP
3557
3558        FOURCC('3', 'g', '2', 'a'),  // 3GPP2
3559        FOURCC('3', 'g', '2', 'b'),
3560    };
3561
3562    for (size_t i = 0;
3563         i < sizeof(kCompatibleBrands) / sizeof(kCompatibleBrands[0]);
3564         ++i) {
3565        if (kCompatibleBrands[i] == fourcc) {
3566            return true;
3567        }
3568    }
3569
3570    return false;
3571}
3572
3573// Attempt to actually parse the 'ftyp' atom and determine if a suitable
3574// compatible brand is present.
3575// Also try to identify where this file's metadata ends
3576// (end of the 'moov' atom) and report it to the caller as part of
3577// the metadata.
3578static bool BetterSniffMPEG4(
3579        const sp<DataSource> &source, String8 *mimeType, float *confidence,
3580        sp<AMessage> *meta) {
3581    // We scan up to 128 bytes to identify this file as an MP4.
3582    static const off64_t kMaxScanOffset = 128ll;
3583
3584    off64_t offset = 0ll;
3585    bool foundGoodFileType = false;
3586    off64_t moovAtomEndOffset = -1ll;
3587    bool done = false;
3588
3589    while (!done && offset < kMaxScanOffset) {
3590        uint32_t hdr[2];
3591        if (source->readAt(offset, hdr, 8) < 8) {
3592            return false;
3593        }
3594
3595        uint64_t chunkSize = ntohl(hdr[0]);
3596        uint32_t chunkType = ntohl(hdr[1]);
3597        off64_t chunkDataOffset = offset + 8;
3598
3599        if (chunkSize == 1) {
3600            if (source->readAt(offset + 8, &chunkSize, 8) < 8) {
3601                return false;
3602            }
3603
3604            chunkSize = ntoh64(chunkSize);
3605            chunkDataOffset += 8;
3606
3607            if (chunkSize < 16) {
3608                // The smallest valid chunk is 16 bytes long in this case.
3609                return false;
3610            }
3611        } else if (chunkSize < 8) {
3612            // The smallest valid chunk is 8 bytes long.
3613            return false;
3614        }
3615
3616        off64_t chunkDataSize = offset + chunkSize - chunkDataOffset;
3617
3618        char chunkstring[5];
3619        MakeFourCCString(chunkType, chunkstring);
3620        ALOGV("saw chunk type %s, size %lld @ %lld", chunkstring, chunkSize, offset);
3621        switch (chunkType) {
3622            case FOURCC('f', 't', 'y', 'p'):
3623            {
3624                if (chunkDataSize < 8) {
3625                    return false;
3626                }
3627
3628                uint32_t numCompatibleBrands = (chunkDataSize - 8) / 4;
3629                for (size_t i = 0; i < numCompatibleBrands + 2; ++i) {
3630                    if (i == 1) {
3631                        // Skip this index, it refers to the minorVersion,
3632                        // not a brand.
3633                        continue;
3634                    }
3635
3636                    uint32_t brand;
3637                    if (source->readAt(
3638                                chunkDataOffset + 4 * i, &brand, 4) < 4) {
3639                        return false;
3640                    }
3641
3642                    brand = ntohl(brand);
3643
3644                    if (isCompatibleBrand(brand)) {
3645                        foundGoodFileType = true;
3646                        break;
3647                    }
3648                }
3649
3650                if (!foundGoodFileType) {
3651                    return false;
3652                }
3653
3654                break;
3655            }
3656
3657            case FOURCC('m', 'o', 'o', 'v'):
3658            {
3659                moovAtomEndOffset = offset + chunkSize;
3660
3661                done = true;
3662                break;
3663            }
3664
3665            default:
3666                break;
3667        }
3668
3669        offset += chunkSize;
3670    }
3671
3672    if (!foundGoodFileType) {
3673        return false;
3674    }
3675
3676    *mimeType = MEDIA_MIMETYPE_CONTAINER_MPEG4;
3677    *confidence = 0.4f;
3678
3679    if (moovAtomEndOffset >= 0) {
3680        *meta = new AMessage;
3681        (*meta)->setInt64("meta-data-size", moovAtomEndOffset);
3682
3683        ALOGV("found metadata size: %lld", moovAtomEndOffset);
3684    }
3685
3686    return true;
3687}
3688
3689bool SniffMPEG4(
3690        const sp<DataSource> &source, String8 *mimeType, float *confidence,
3691        sp<AMessage> *meta) {
3692    if (BetterSniffMPEG4(source, mimeType, confidence, meta)) {
3693        return true;
3694    }
3695
3696    if (LegacySniffMPEG4(source, mimeType, confidence)) {
3697        ALOGW("Identified supported mpeg4 through LegacySniffMPEG4.");
3698        return true;
3699    }
3700
3701    return false;
3702}
3703
3704}  // namespace android
3705