1/*
2 * Copyright (C) 2009 The Android Open Source Project
3 *
4 * Licensed under the Apache License, Version 2.0 (the "License");
5 * you may not use this file except in compliance with the License.
6 * You may obtain a copy of the License at
7 *
8 *      http://www.apache.org/licenses/LICENSE-2.0
9 *
10 * Unless required by applicable law or agreed to in writing, software
11 * distributed under the License is distributed on an "AS IS" BASIS,
12 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13 * See the License for the specific language governing permissions and
14 * limitations under the License.
15 */
16
17//#define LOG_NDEBUG 0
18#define LOG_TAG "MPEG4Extractor"
19#include <utils/Log.h>
20
21#include "include/MPEG4Extractor.h"
22#include "include/SampleTable.h"
23#include "include/ESDS.h"
24
25#include <ctype.h>
26#include <stdint.h>
27#include <stdlib.h>
28#include <string.h>
29
30#include <media/stagefright/foundation/ABitReader.h>
31#include <media/stagefright/foundation/ABuffer.h>
32#include <media/stagefright/foundation/ADebug.h>
33#include <media/stagefright/foundation/AMessage.h>
34#include <media/stagefright/MediaBuffer.h>
35#include <media/stagefright/MediaBufferGroup.h>
36#include <media/stagefright/MediaDefs.h>
37#include <media/stagefright/MediaSource.h>
38#include <media/stagefright/MetaData.h>
39#include <utils/String8.h>
40
41namespace android {
42
43class MPEG4Source : public MediaSource {
44public:
45    // Caller retains ownership of both "dataSource" and "sampleTable".
46    MPEG4Source(const sp<MetaData> &format,
47                const sp<DataSource> &dataSource,
48                int32_t timeScale,
49                const sp<SampleTable> &sampleTable,
50                Vector<SidxEntry> &sidx,
51                off64_t firstMoofOffset);
52
53    virtual status_t start(MetaData *params = NULL);
54    virtual status_t stop();
55
56    virtual sp<MetaData> getFormat();
57
58    virtual status_t read(MediaBuffer **buffer, const ReadOptions *options = NULL);
59    virtual status_t fragmentedRead(MediaBuffer **buffer, const ReadOptions *options = NULL);
60
61protected:
62    virtual ~MPEG4Source();
63
64private:
65    Mutex mLock;
66
67    sp<MetaData> mFormat;
68    sp<DataSource> mDataSource;
69    int32_t mTimescale;
70    sp<SampleTable> mSampleTable;
71    uint32_t mCurrentSampleIndex;
72    uint32_t mCurrentFragmentIndex;
73    Vector<SidxEntry> &mSegments;
74    off64_t mFirstMoofOffset;
75    off64_t mCurrentMoofOffset;
76    off64_t mNextMoofOffset;
77    uint32_t mCurrentTime;
78    int32_t mLastParsedTrackId;
79    int32_t mTrackId;
80
81    int32_t mCryptoMode;    // passed in from extractor
82    int32_t mDefaultIVSize; // passed in from extractor
83    uint8_t mCryptoKey[16]; // passed in from extractor
84    uint32_t mCurrentAuxInfoType;
85    uint32_t mCurrentAuxInfoTypeParameter;
86    int32_t mCurrentDefaultSampleInfoSize;
87    uint32_t mCurrentSampleInfoCount;
88    uint32_t mCurrentSampleInfoAllocSize;
89    uint8_t* mCurrentSampleInfoSizes;
90    uint32_t mCurrentSampleInfoOffsetCount;
91    uint32_t mCurrentSampleInfoOffsetsAllocSize;
92    uint64_t* mCurrentSampleInfoOffsets;
93
94    bool mIsAVC;
95    size_t mNALLengthSize;
96
97    bool mStarted;
98
99    MediaBufferGroup *mGroup;
100
101    MediaBuffer *mBuffer;
102
103    bool mWantsNALFragments;
104
105    uint8_t *mSrcBuffer;
106
107    size_t parseNALSize(const uint8_t *data) const;
108    status_t parseChunk(off64_t *offset);
109    status_t parseTrackFragmentHeader(off64_t offset, off64_t size);
110    status_t parseTrackFragmentRun(off64_t offset, off64_t size);
111    status_t parseSampleAuxiliaryInformationSizes(off64_t offset, off64_t size);
112    status_t parseSampleAuxiliaryInformationOffsets(off64_t offset, off64_t size);
113
114    struct TrackFragmentHeaderInfo {
115        enum Flags {
116            kBaseDataOffsetPresent         = 0x01,
117            kSampleDescriptionIndexPresent = 0x02,
118            kDefaultSampleDurationPresent  = 0x08,
119            kDefaultSampleSizePresent      = 0x10,
120            kDefaultSampleFlagsPresent     = 0x20,
121            kDurationIsEmpty               = 0x10000,
122        };
123
124        uint32_t mTrackID;
125        uint32_t mFlags;
126        uint64_t mBaseDataOffset;
127        uint32_t mSampleDescriptionIndex;
128        uint32_t mDefaultSampleDuration;
129        uint32_t mDefaultSampleSize;
130        uint32_t mDefaultSampleFlags;
131
132        uint64_t mDataOffset;
133    };
134    TrackFragmentHeaderInfo mTrackFragmentHeaderInfo;
135
136    struct Sample {
137        off64_t offset;
138        size_t size;
139        uint32_t duration;
140        uint8_t iv[16];
141        Vector<size_t> clearsizes;
142        Vector<size_t> encryptedsizes;
143    };
144    Vector<Sample> mCurrentSamples;
145
146    MPEG4Source(const MPEG4Source &);
147    MPEG4Source &operator=(const MPEG4Source &);
148};
149
150// This custom data source wraps an existing one and satisfies requests
151// falling entirely within a cached range from the cache while forwarding
152// all remaining requests to the wrapped datasource.
153// This is used to cache the full sampletable metadata for a single track,
154// possibly wrapping multiple times to cover all tracks, i.e.
155// Each MPEG4DataSource caches the sampletable metadata for a single track.
156
157struct MPEG4DataSource : public DataSource {
158    MPEG4DataSource(const sp<DataSource> &source);
159
160    virtual status_t initCheck() const;
161    virtual ssize_t readAt(off64_t offset, void *data, size_t size);
162    virtual status_t getSize(off64_t *size);
163    virtual uint32_t flags();
164
165    status_t setCachedRange(off64_t offset, size_t size);
166
167protected:
168    virtual ~MPEG4DataSource();
169
170private:
171    Mutex mLock;
172
173    sp<DataSource> mSource;
174    off64_t mCachedOffset;
175    size_t mCachedSize;
176    uint8_t *mCache;
177
178    void clearCache();
179
180    MPEG4DataSource(const MPEG4DataSource &);
181    MPEG4DataSource &operator=(const MPEG4DataSource &);
182};
183
184MPEG4DataSource::MPEG4DataSource(const sp<DataSource> &source)
185    : mSource(source),
186      mCachedOffset(0),
187      mCachedSize(0),
188      mCache(NULL) {
189}
190
191MPEG4DataSource::~MPEG4DataSource() {
192    clearCache();
193}
194
195void MPEG4DataSource::clearCache() {
196    if (mCache) {
197        free(mCache);
198        mCache = NULL;
199    }
200
201    mCachedOffset = 0;
202    mCachedSize = 0;
203}
204
205status_t MPEG4DataSource::initCheck() const {
206    return mSource->initCheck();
207}
208
209ssize_t MPEG4DataSource::readAt(off64_t offset, void *data, size_t size) {
210    Mutex::Autolock autoLock(mLock);
211
212    if (offset >= mCachedOffset
213            && offset + size <= mCachedOffset + mCachedSize) {
214        memcpy(data, &mCache[offset - mCachedOffset], size);
215        return size;
216    }
217
218    return mSource->readAt(offset, data, size);
219}
220
221status_t MPEG4DataSource::getSize(off64_t *size) {
222    return mSource->getSize(size);
223}
224
225uint32_t MPEG4DataSource::flags() {
226    return mSource->flags();
227}
228
229status_t MPEG4DataSource::setCachedRange(off64_t offset, size_t size) {
230    Mutex::Autolock autoLock(mLock);
231
232    clearCache();
233
234    mCache = (uint8_t *)malloc(size);
235
236    if (mCache == NULL) {
237        return -ENOMEM;
238    }
239
240    mCachedOffset = offset;
241    mCachedSize = size;
242
243    ssize_t err = mSource->readAt(mCachedOffset, mCache, mCachedSize);
244
245    if (err < (ssize_t)size) {
246        clearCache();
247
248        return ERROR_IO;
249    }
250
251    return OK;
252}
253
254////////////////////////////////////////////////////////////////////////////////
255
256static void hexdump(const void *_data, size_t size) {
257    const uint8_t *data = (const uint8_t *)_data;
258    size_t offset = 0;
259    while (offset < size) {
260        printf("0x%04x  ", offset);
261
262        size_t n = size - offset;
263        if (n > 16) {
264            n = 16;
265        }
266
267        for (size_t i = 0; i < 16; ++i) {
268            if (i == 8) {
269                printf(" ");
270            }
271
272            if (offset + i < size) {
273                printf("%02x ", data[offset + i]);
274            } else {
275                printf("   ");
276            }
277        }
278
279        printf(" ");
280
281        for (size_t i = 0; i < n; ++i) {
282            if (isprint(data[offset + i])) {
283                printf("%c", data[offset + i]);
284            } else {
285                printf(".");
286            }
287        }
288
289        printf("\n");
290
291        offset += 16;
292    }
293}
294
295static const char *FourCC2MIME(uint32_t fourcc) {
296    switch (fourcc) {
297        case FOURCC('m', 'p', '4', 'a'):
298            return MEDIA_MIMETYPE_AUDIO_AAC;
299
300        case FOURCC('s', 'a', 'm', 'r'):
301            return MEDIA_MIMETYPE_AUDIO_AMR_NB;
302
303        case FOURCC('s', 'a', 'w', 'b'):
304            return MEDIA_MIMETYPE_AUDIO_AMR_WB;
305
306        case FOURCC('m', 'p', '4', 'v'):
307            return MEDIA_MIMETYPE_VIDEO_MPEG4;
308
309        case FOURCC('s', '2', '6', '3'):
310        case FOURCC('h', '2', '6', '3'):
311        case FOURCC('H', '2', '6', '3'):
312            return MEDIA_MIMETYPE_VIDEO_H263;
313
314        case FOURCC('a', 'v', 'c', '1'):
315            return MEDIA_MIMETYPE_VIDEO_AVC;
316
317        default:
318            CHECK(!"should not be here.");
319            return NULL;
320    }
321}
322
323static bool AdjustChannelsAndRate(uint32_t fourcc, uint32_t *channels, uint32_t *rate) {
324    if (!strcasecmp(MEDIA_MIMETYPE_AUDIO_AMR_NB, FourCC2MIME(fourcc))) {
325        // AMR NB audio is always mono, 8kHz
326        *channels = 1;
327        *rate = 8000;
328        return true;
329    } else if (!strcasecmp(MEDIA_MIMETYPE_AUDIO_AMR_WB, FourCC2MIME(fourcc))) {
330        // AMR WB audio is always mono, 16kHz
331        *channels = 1;
332        *rate = 16000;
333        return true;
334    }
335    return false;
336}
337
338MPEG4Extractor::MPEG4Extractor(const sp<DataSource> &source)
339    : mSidxDuration(0),
340      mMoofOffset(0),
341      mDataSource(source),
342      mInitCheck(NO_INIT),
343      mHasVideo(false),
344      mFirstTrack(NULL),
345      mLastTrack(NULL),
346      mFileMetaData(new MetaData),
347      mFirstSINF(NULL),
348      mIsDrm(false) {
349}
350
351MPEG4Extractor::~MPEG4Extractor() {
352    Track *track = mFirstTrack;
353    while (track) {
354        Track *next = track->next;
355
356        delete track;
357        track = next;
358    }
359    mFirstTrack = mLastTrack = NULL;
360
361    SINF *sinf = mFirstSINF;
362    while (sinf) {
363        SINF *next = sinf->next;
364        delete sinf->IPMPData;
365        delete sinf;
366        sinf = next;
367    }
368    mFirstSINF = NULL;
369
370    for (size_t i = 0; i < mPssh.size(); i++) {
371        delete [] mPssh[i].data;
372    }
373}
374
375uint32_t MPEG4Extractor::flags() const {
376    return CAN_PAUSE |
377            ((mMoofOffset == 0 || mSidxEntries.size() != 0) ?
378                    (CAN_SEEK_BACKWARD | CAN_SEEK_FORWARD | CAN_SEEK) : 0);
379}
380
381sp<MetaData> MPEG4Extractor::getMetaData() {
382    status_t err;
383    if ((err = readMetaData()) != OK) {
384        return new MetaData;
385    }
386
387    return mFileMetaData;
388}
389
390size_t MPEG4Extractor::countTracks() {
391    status_t err;
392    if ((err = readMetaData()) != OK) {
393        ALOGV("MPEG4Extractor::countTracks: no tracks");
394        return 0;
395    }
396
397    size_t n = 0;
398    Track *track = mFirstTrack;
399    while (track) {
400        ++n;
401        track = track->next;
402    }
403
404    ALOGV("MPEG4Extractor::countTracks: %d tracks", n);
405    return n;
406}
407
408sp<MetaData> MPEG4Extractor::getTrackMetaData(
409        size_t index, uint32_t flags) {
410    status_t err;
411    if ((err = readMetaData()) != OK) {
412        return NULL;
413    }
414
415    Track *track = mFirstTrack;
416    while (index > 0) {
417        if (track == NULL) {
418            return NULL;
419        }
420
421        track = track->next;
422        --index;
423    }
424
425    if (track == NULL) {
426        return NULL;
427    }
428
429    if ((flags & kIncludeExtensiveMetaData)
430            && !track->includes_expensive_metadata) {
431        track->includes_expensive_metadata = true;
432
433        const char *mime;
434        CHECK(track->meta->findCString(kKeyMIMEType, &mime));
435        if (!strncasecmp("video/", mime, 6)) {
436            if (mMoofOffset > 0) {
437                int64_t duration;
438                if (track->meta->findInt64(kKeyDuration, &duration)) {
439                    // nothing fancy, just pick a frame near 1/4th of the duration
440                    track->meta->setInt64(
441                            kKeyThumbnailTime, duration / 4);
442                }
443            } else {
444                uint32_t sampleIndex;
445                uint32_t sampleTime;
446                if (track->sampleTable->findThumbnailSample(&sampleIndex) == OK
447                        && track->sampleTable->getMetaDataForSample(
448                            sampleIndex, NULL /* offset */, NULL /* size */,
449                            &sampleTime) == OK) {
450                    track->meta->setInt64(
451                            kKeyThumbnailTime,
452                            ((int64_t)sampleTime * 1000000) / track->timescale);
453                }
454            }
455        }
456    }
457
458    return track->meta;
459}
460
461static void MakeFourCCString(uint32_t x, char *s) {
462    s[0] = x >> 24;
463    s[1] = (x >> 16) & 0xff;
464    s[2] = (x >> 8) & 0xff;
465    s[3] = x & 0xff;
466    s[4] = '\0';
467}
468
469status_t MPEG4Extractor::readMetaData() {
470    if (mInitCheck != NO_INIT) {
471        return mInitCheck;
472    }
473
474    off64_t offset = 0;
475    status_t err;
476    while (true) {
477        err = parseChunk(&offset, 0);
478        if (err == OK) {
479            continue;
480        }
481
482        uint32_t hdr[2];
483        if (mDataSource->readAt(offset, hdr, 8) < 8) {
484            break;
485        }
486        uint32_t chunk_type = ntohl(hdr[1]);
487        if (chunk_type == FOURCC('s', 'i', 'd', 'x')) {
488            // parse the sidx box too
489            continue;
490        } else if (chunk_type == FOURCC('m', 'o', 'o', 'f')) {
491            // store the offset of the first segment
492            mMoofOffset = offset;
493        }
494        break;
495    }
496
497    if (mInitCheck == OK) {
498        if (mHasVideo) {
499            mFileMetaData->setCString(
500                    kKeyMIMEType, MEDIA_MIMETYPE_CONTAINER_MPEG4);
501        } else {
502            mFileMetaData->setCString(kKeyMIMEType, "audio/mp4");
503        }
504
505        mInitCheck = OK;
506    } else {
507        mInitCheck = err;
508    }
509
510    CHECK_NE(err, (status_t)NO_INIT);
511
512    // copy pssh data into file metadata
513    int psshsize = 0;
514    for (size_t i = 0; i < mPssh.size(); i++) {
515        psshsize += 20 + mPssh[i].datalen;
516    }
517    if (psshsize) {
518        char *buf = (char*)malloc(psshsize);
519        char *ptr = buf;
520        for (size_t i = 0; i < mPssh.size(); i++) {
521            memcpy(ptr, mPssh[i].uuid, 20); // uuid + length
522            memcpy(ptr + 20, mPssh[i].data, mPssh[i].datalen);
523            ptr += (20 + mPssh[i].datalen);
524        }
525        mFileMetaData->setData(kKeyPssh, 'pssh', buf, psshsize);
526        free(buf);
527    }
528    return mInitCheck;
529}
530
531char* MPEG4Extractor::getDrmTrackInfo(size_t trackID, int *len) {
532    if (mFirstSINF == NULL) {
533        return NULL;
534    }
535
536    SINF *sinf = mFirstSINF;
537    while (sinf && (trackID != sinf->trackID)) {
538        sinf = sinf->next;
539    }
540
541    if (sinf == NULL) {
542        return NULL;
543    }
544
545    *len = sinf->len;
546    return sinf->IPMPData;
547}
548
549// Reads an encoded integer 7 bits at a time until it encounters the high bit clear.
550static int32_t readSize(off64_t offset,
551        const sp<DataSource> DataSource, uint8_t *numOfBytes) {
552    uint32_t size = 0;
553    uint8_t data;
554    bool moreData = true;
555    *numOfBytes = 0;
556
557    while (moreData) {
558        if (DataSource->readAt(offset, &data, 1) < 1) {
559            return -1;
560        }
561        offset ++;
562        moreData = (data >= 128) ? true : false;
563        size = (size << 7) | (data & 0x7f); // Take last 7 bits
564        (*numOfBytes) ++;
565    }
566
567    return size;
568}
569
570status_t MPEG4Extractor::parseDrmSINF(off64_t *offset, off64_t data_offset) {
571    uint8_t updateIdTag;
572    if (mDataSource->readAt(data_offset, &updateIdTag, 1) < 1) {
573        return ERROR_IO;
574    }
575    data_offset ++;
576
577    if (0x01/*OBJECT_DESCRIPTOR_UPDATE_ID_TAG*/ != updateIdTag) {
578        return ERROR_MALFORMED;
579    }
580
581    uint8_t numOfBytes;
582    int32_t size = readSize(data_offset, mDataSource, &numOfBytes);
583    if (size < 0) {
584        return ERROR_IO;
585    }
586    int32_t classSize = size;
587    data_offset += numOfBytes;
588
589    while(size >= 11 ) {
590        uint8_t descriptorTag;
591        if (mDataSource->readAt(data_offset, &descriptorTag, 1) < 1) {
592            return ERROR_IO;
593        }
594        data_offset ++;
595
596        if (0x11/*OBJECT_DESCRIPTOR_ID_TAG*/ != descriptorTag) {
597            return ERROR_MALFORMED;
598        }
599
600        uint8_t buffer[8];
601        //ObjectDescriptorID and ObjectDescriptor url flag
602        if (mDataSource->readAt(data_offset, buffer, 2) < 2) {
603            return ERROR_IO;
604        }
605        data_offset += 2;
606
607        if ((buffer[1] >> 5) & 0x0001) { //url flag is set
608            return ERROR_MALFORMED;
609        }
610
611        if (mDataSource->readAt(data_offset, buffer, 8) < 8) {
612            return ERROR_IO;
613        }
614        data_offset += 8;
615
616        if ((0x0F/*ES_ID_REF_TAG*/ != buffer[1])
617                || ( 0x0A/*IPMP_DESCRIPTOR_POINTER_ID_TAG*/ != buffer[5])) {
618            return ERROR_MALFORMED;
619        }
620
621        SINF *sinf = new SINF;
622        sinf->trackID = U16_AT(&buffer[3]);
623        sinf->IPMPDescriptorID = buffer[7];
624        sinf->next = mFirstSINF;
625        mFirstSINF = sinf;
626
627        size -= (8 + 2 + 1);
628    }
629
630    if (size != 0) {
631        return ERROR_MALFORMED;
632    }
633
634    if (mDataSource->readAt(data_offset, &updateIdTag, 1) < 1) {
635        return ERROR_IO;
636    }
637    data_offset ++;
638
639    if(0x05/*IPMP_DESCRIPTOR_UPDATE_ID_TAG*/ != updateIdTag) {
640        return ERROR_MALFORMED;
641    }
642
643    size = readSize(data_offset, mDataSource, &numOfBytes);
644    if (size < 0) {
645        return ERROR_IO;
646    }
647    classSize = size;
648    data_offset += numOfBytes;
649
650    while (size > 0) {
651        uint8_t tag;
652        int32_t dataLen;
653        if (mDataSource->readAt(data_offset, &tag, 1) < 1) {
654            return ERROR_IO;
655        }
656        data_offset ++;
657
658        if (0x0B/*IPMP_DESCRIPTOR_ID_TAG*/ == tag) {
659            uint8_t id;
660            dataLen = readSize(data_offset, mDataSource, &numOfBytes);
661            if (dataLen < 0) {
662                return ERROR_IO;
663            } else if (dataLen < 4) {
664                return ERROR_MALFORMED;
665            }
666            data_offset += numOfBytes;
667
668            if (mDataSource->readAt(data_offset, &id, 1) < 1) {
669                return ERROR_IO;
670            }
671            data_offset ++;
672
673            SINF *sinf = mFirstSINF;
674            while (sinf && (sinf->IPMPDescriptorID != id)) {
675                sinf = sinf->next;
676            }
677            if (sinf == NULL) {
678                return ERROR_MALFORMED;
679            }
680            sinf->len = dataLen - 3;
681            sinf->IPMPData = new char[sinf->len];
682
683            if (mDataSource->readAt(data_offset + 2, sinf->IPMPData, sinf->len) < sinf->len) {
684                return ERROR_IO;
685            }
686            data_offset += sinf->len;
687
688            size -= (dataLen + numOfBytes + 1);
689        }
690    }
691
692    if (size != 0) {
693        return ERROR_MALFORMED;
694    }
695
696    return UNKNOWN_ERROR;  // Return a dummy error.
697}
698
699struct PathAdder {
700    PathAdder(Vector<uint32_t> *path, uint32_t chunkType)
701        : mPath(path) {
702        mPath->push(chunkType);
703    }
704
705    ~PathAdder() {
706        mPath->pop();
707    }
708
709private:
710    Vector<uint32_t> *mPath;
711
712    PathAdder(const PathAdder &);
713    PathAdder &operator=(const PathAdder &);
714};
715
716static bool underMetaDataPath(const Vector<uint32_t> &path) {
717    return path.size() >= 5
718        && path[0] == FOURCC('m', 'o', 'o', 'v')
719        && path[1] == FOURCC('u', 'd', 't', 'a')
720        && path[2] == FOURCC('m', 'e', 't', 'a')
721        && path[3] == FOURCC('i', 'l', 's', 't');
722}
723
724// Given a time in seconds since Jan 1 1904, produce a human-readable string.
725static void convertTimeToDate(int64_t time_1904, String8 *s) {
726    time_t time_1970 = time_1904 - (((66 * 365 + 17) * 24) * 3600);
727
728    char tmp[32];
729    strftime(tmp, sizeof(tmp), "%Y%m%dT%H%M%S.000Z", gmtime(&time_1970));
730
731    s->setTo(tmp);
732}
733
734status_t MPEG4Extractor::parseChunk(off64_t *offset, int depth) {
735    ALOGV("entering parseChunk %lld/%d", *offset, depth);
736    uint32_t hdr[2];
737    if (mDataSource->readAt(*offset, hdr, 8) < 8) {
738        return ERROR_IO;
739    }
740    uint64_t chunk_size = ntohl(hdr[0]);
741    uint32_t chunk_type = ntohl(hdr[1]);
742    off64_t data_offset = *offset + 8;
743
744    if (chunk_size == 1) {
745        if (mDataSource->readAt(*offset + 8, &chunk_size, 8) < 8) {
746            return ERROR_IO;
747        }
748        chunk_size = ntoh64(chunk_size);
749        data_offset += 8;
750
751        if (chunk_size < 16) {
752            // The smallest valid chunk is 16 bytes long in this case.
753            return ERROR_MALFORMED;
754        }
755    } else if (chunk_size < 8) {
756        // The smallest valid chunk is 8 bytes long.
757        return ERROR_MALFORMED;
758    }
759
760    char chunk[5];
761    MakeFourCCString(chunk_type, chunk);
762    ALOGV("chunk: %s @ %lld, %d", chunk, *offset, depth);
763
764#if 0
765    static const char kWhitespace[] = "                                        ";
766    const char *indent = &kWhitespace[sizeof(kWhitespace) - 1 - 2 * depth];
767    printf("%sfound chunk '%s' of size %lld\n", indent, chunk, chunk_size);
768
769    char buffer[256];
770    size_t n = chunk_size;
771    if (n > sizeof(buffer)) {
772        n = sizeof(buffer);
773    }
774    if (mDataSource->readAt(*offset, buffer, n)
775            < (ssize_t)n) {
776        return ERROR_IO;
777    }
778
779    hexdump(buffer, n);
780#endif
781
782    PathAdder autoAdder(&mPath, chunk_type);
783
784    off64_t chunk_data_size = *offset + chunk_size - data_offset;
785
786    if (chunk_type != FOURCC('c', 'p', 'r', 't')
787            && chunk_type != FOURCC('c', 'o', 'v', 'r')
788            && mPath.size() == 5 && underMetaDataPath(mPath)) {
789        off64_t stop_offset = *offset + chunk_size;
790        *offset = data_offset;
791        while (*offset < stop_offset) {
792            status_t err = parseChunk(offset, depth + 1);
793            if (err != OK) {
794                return err;
795            }
796        }
797
798        if (*offset != stop_offset) {
799            return ERROR_MALFORMED;
800        }
801
802        return OK;
803    }
804
805    switch(chunk_type) {
806        case FOURCC('m', 'o', 'o', 'v'):
807        case FOURCC('t', 'r', 'a', 'k'):
808        case FOURCC('m', 'd', 'i', 'a'):
809        case FOURCC('m', 'i', 'n', 'f'):
810        case FOURCC('d', 'i', 'n', 'f'):
811        case FOURCC('s', 't', 'b', 'l'):
812        case FOURCC('m', 'v', 'e', 'x'):
813        case FOURCC('m', 'o', 'o', 'f'):
814        case FOURCC('t', 'r', 'a', 'f'):
815        case FOURCC('m', 'f', 'r', 'a'):
816        case FOURCC('u', 'd', 't', 'a'):
817        case FOURCC('i', 'l', 's', 't'):
818        case FOURCC('s', 'i', 'n', 'f'):
819        case FOURCC('s', 'c', 'h', 'i'):
820        {
821            if (chunk_type == FOURCC('s', 't', 'b', 'l')) {
822                ALOGV("sampleTable chunk is %d bytes long.", (size_t)chunk_size);
823
824                if (mDataSource->flags()
825                        & (DataSource::kWantsPrefetching
826                            | DataSource::kIsCachingDataSource)) {
827                    sp<MPEG4DataSource> cachedSource =
828                        new MPEG4DataSource(mDataSource);
829
830                    if (cachedSource->setCachedRange(*offset, chunk_size) == OK) {
831                        mDataSource = cachedSource;
832                    }
833                }
834
835                mLastTrack->sampleTable = new SampleTable(mDataSource);
836            }
837
838            bool isTrack = false;
839            if (chunk_type == FOURCC('t', 'r', 'a', 'k')) {
840                isTrack = true;
841
842                Track *track = new Track;
843                track->next = NULL;
844                if (mLastTrack) {
845                    mLastTrack->next = track;
846                } else {
847                    mFirstTrack = track;
848                }
849                mLastTrack = track;
850
851                track->meta = new MetaData;
852                track->includes_expensive_metadata = false;
853                track->skipTrack = false;
854                track->timescale = 0;
855                track->meta->setCString(kKeyMIMEType, "application/octet-stream");
856            }
857
858            off64_t stop_offset = *offset + chunk_size;
859            *offset = data_offset;
860            while (*offset < stop_offset) {
861                status_t err = parseChunk(offset, depth + 1);
862                if (err != OK) {
863                    return err;
864                }
865            }
866
867            if (*offset != stop_offset) {
868                return ERROR_MALFORMED;
869            }
870
871            if (isTrack) {
872                if (mLastTrack->skipTrack) {
873                    Track *cur = mFirstTrack;
874
875                    if (cur == mLastTrack) {
876                        delete cur;
877                        mFirstTrack = mLastTrack = NULL;
878                    } else {
879                        while (cur && cur->next != mLastTrack) {
880                            cur = cur->next;
881                        }
882                        cur->next = NULL;
883                        delete mLastTrack;
884                        mLastTrack = cur;
885                    }
886
887                    return OK;
888                }
889
890                status_t err = verifyTrack(mLastTrack);
891
892                if (err != OK) {
893                    return err;
894                }
895            } else if (chunk_type == FOURCC('m', 'o', 'o', 'v')) {
896                mInitCheck = OK;
897
898                if (!mIsDrm) {
899                    return UNKNOWN_ERROR;  // Return a dummy error.
900                } else {
901                    return OK;
902                }
903            }
904            break;
905        }
906
907        case FOURCC('f', 'r', 'm', 'a'):
908        {
909            uint32_t original_fourcc;
910            if (mDataSource->readAt(data_offset, &original_fourcc, 4) < 4) {
911                return ERROR_IO;
912            }
913            original_fourcc = ntohl(original_fourcc);
914            ALOGV("read original format: %d", original_fourcc);
915            mLastTrack->meta->setCString(kKeyMIMEType, FourCC2MIME(original_fourcc));
916            uint32_t num_channels = 0;
917            uint32_t sample_rate = 0;
918            if (AdjustChannelsAndRate(original_fourcc, &num_channels, &sample_rate)) {
919                mLastTrack->meta->setInt32(kKeyChannelCount, num_channels);
920                mLastTrack->meta->setInt32(kKeySampleRate, sample_rate);
921            }
922            *offset += chunk_size;
923            break;
924        }
925
926        case FOURCC('t', 'e', 'n', 'c'):
927        {
928            if (chunk_size < 32) {
929                return ERROR_MALFORMED;
930            }
931
932            // tenc box contains 1 byte version, 3 byte flags, 3 byte default algorithm id, one byte
933            // default IV size, 16 bytes default KeyID
934            // (ISO 23001-7)
935            char buf[4];
936            memset(buf, 0, 4);
937            if (mDataSource->readAt(data_offset + 4, buf + 1, 3) < 3) {
938                return ERROR_IO;
939            }
940            uint32_t defaultAlgorithmId = ntohl(*((int32_t*)buf));
941            if (defaultAlgorithmId > 1) {
942                // only 0 (clear) and 1 (AES-128) are valid
943                return ERROR_MALFORMED;
944            }
945
946            memset(buf, 0, 4);
947            if (mDataSource->readAt(data_offset + 7, buf + 3, 1) < 1) {
948                return ERROR_IO;
949            }
950            uint32_t defaultIVSize = ntohl(*((int32_t*)buf));
951
952            if ((defaultAlgorithmId == 0 && defaultIVSize != 0) ||
953                    (defaultAlgorithmId != 0 && defaultIVSize == 0)) {
954                // only unencrypted data must have 0 IV size
955                return ERROR_MALFORMED;
956            } else if (defaultIVSize != 0 &&
957                    defaultIVSize != 8 &&
958                    defaultIVSize != 16) {
959                // only supported sizes are 0, 8 and 16
960                return ERROR_MALFORMED;
961            }
962
963            uint8_t defaultKeyId[16];
964
965            if (mDataSource->readAt(data_offset + 8, &defaultKeyId, 16) < 16) {
966                return ERROR_IO;
967            }
968
969            mLastTrack->meta->setInt32(kKeyCryptoMode, defaultAlgorithmId);
970            mLastTrack->meta->setInt32(kKeyCryptoDefaultIVSize, defaultIVSize);
971            mLastTrack->meta->setData(kKeyCryptoKey, 'tenc', defaultKeyId, 16);
972            *offset += chunk_size;
973            break;
974        }
975
976        case FOURCC('t', 'k', 'h', 'd'):
977        {
978            status_t err;
979            if ((err = parseTrackHeader(data_offset, chunk_data_size)) != OK) {
980                return err;
981            }
982
983            *offset += chunk_size;
984            break;
985        }
986
987        case FOURCC('p', 's', 's', 'h'):
988        {
989            PsshInfo pssh;
990
991            if (mDataSource->readAt(data_offset + 4, &pssh.uuid, 16) < 16) {
992                return ERROR_IO;
993            }
994
995            uint32_t psshdatalen = 0;
996            if (mDataSource->readAt(data_offset + 20, &psshdatalen, 4) < 4) {
997                return ERROR_IO;
998            }
999            pssh.datalen = ntohl(psshdatalen);
1000            ALOGV("pssh data size: %d", pssh.datalen);
1001            if (pssh.datalen + 20 > chunk_size) {
1002                // pssh data length exceeds size of containing box
1003                return ERROR_MALFORMED;
1004            }
1005
1006            pssh.data = new uint8_t[pssh.datalen];
1007            ALOGV("allocated pssh @ %p", pssh.data);
1008            ssize_t requested = (ssize_t) pssh.datalen;
1009            if (mDataSource->readAt(data_offset + 24, pssh.data, requested) < requested) {
1010                return ERROR_IO;
1011            }
1012            mPssh.push_back(pssh);
1013
1014            *offset += chunk_size;
1015            break;
1016        }
1017
1018        case FOURCC('m', 'd', 'h', 'd'):
1019        {
1020            if (chunk_data_size < 4) {
1021                return ERROR_MALFORMED;
1022            }
1023
1024            uint8_t version;
1025            if (mDataSource->readAt(
1026                        data_offset, &version, sizeof(version))
1027                    < (ssize_t)sizeof(version)) {
1028                return ERROR_IO;
1029            }
1030
1031            off64_t timescale_offset;
1032
1033            if (version == 1) {
1034                timescale_offset = data_offset + 4 + 16;
1035            } else if (version == 0) {
1036                timescale_offset = data_offset + 4 + 8;
1037            } else {
1038                return ERROR_IO;
1039            }
1040
1041            uint32_t timescale;
1042            if (mDataSource->readAt(
1043                        timescale_offset, &timescale, sizeof(timescale))
1044                    < (ssize_t)sizeof(timescale)) {
1045                return ERROR_IO;
1046            }
1047
1048            mLastTrack->timescale = ntohl(timescale);
1049
1050            int64_t duration = 0;
1051            if (version == 1) {
1052                if (mDataSource->readAt(
1053                            timescale_offset + 4, &duration, sizeof(duration))
1054                        < (ssize_t)sizeof(duration)) {
1055                    return ERROR_IO;
1056                }
1057                duration = ntoh64(duration);
1058            } else {
1059                uint32_t duration32;
1060                if (mDataSource->readAt(
1061                            timescale_offset + 4, &duration32, sizeof(duration32))
1062                        < (ssize_t)sizeof(duration32)) {
1063                    return ERROR_IO;
1064                }
1065                // ffmpeg sets duration to -1, which is incorrect.
1066                if (duration32 != 0xffffffff) {
1067                    duration = ntohl(duration32);
1068                }
1069            }
1070            mLastTrack->meta->setInt64(
1071                    kKeyDuration, (duration * 1000000) / mLastTrack->timescale);
1072
1073            uint8_t lang[2];
1074            off64_t lang_offset;
1075            if (version == 1) {
1076                lang_offset = timescale_offset + 4 + 8;
1077            } else if (version == 0) {
1078                lang_offset = timescale_offset + 4 + 4;
1079            } else {
1080                return ERROR_IO;
1081            }
1082
1083            if (mDataSource->readAt(lang_offset, &lang, sizeof(lang))
1084                    < (ssize_t)sizeof(lang)) {
1085                return ERROR_IO;
1086            }
1087
1088            // To get the ISO-639-2/T three character language code
1089            // 1 bit pad followed by 3 5-bits characters. Each character
1090            // is packed as the difference between its ASCII value and 0x60.
1091            char lang_code[4];
1092            lang_code[0] = ((lang[0] >> 2) & 0x1f) + 0x60;
1093            lang_code[1] = ((lang[0] & 0x3) << 3 | (lang[1] >> 5)) + 0x60;
1094            lang_code[2] = (lang[1] & 0x1f) + 0x60;
1095            lang_code[3] = '\0';
1096
1097            mLastTrack->meta->setCString(
1098                    kKeyMediaLanguage, lang_code);
1099
1100            *offset += chunk_size;
1101            break;
1102        }
1103
1104        case FOURCC('s', 't', 's', 'd'):
1105        {
1106            if (chunk_data_size < 8) {
1107                return ERROR_MALFORMED;
1108            }
1109
1110            uint8_t buffer[8];
1111            if (chunk_data_size < (off64_t)sizeof(buffer)) {
1112                return ERROR_MALFORMED;
1113            }
1114
1115            if (mDataSource->readAt(
1116                        data_offset, buffer, 8) < 8) {
1117                return ERROR_IO;
1118            }
1119
1120            if (U32_AT(buffer) != 0) {
1121                // Should be version 0, flags 0.
1122                return ERROR_MALFORMED;
1123            }
1124
1125            uint32_t entry_count = U32_AT(&buffer[4]);
1126
1127            if (entry_count > 1) {
1128                // For 3GPP timed text, there could be multiple tx3g boxes contain
1129                // multiple text display formats. These formats will be used to
1130                // display the timed text.
1131                // For encrypted files, there may also be more than one entry.
1132                const char *mime;
1133                CHECK(mLastTrack->meta->findCString(kKeyMIMEType, &mime));
1134                if (strcasecmp(mime, MEDIA_MIMETYPE_TEXT_3GPP) &&
1135                        strcasecmp(mime, "application/octet-stream")) {
1136                    // For now we only support a single type of media per track.
1137                    mLastTrack->skipTrack = true;
1138                    *offset += chunk_size;
1139                    break;
1140                }
1141            }
1142            off64_t stop_offset = *offset + chunk_size;
1143            *offset = data_offset + 8;
1144            for (uint32_t i = 0; i < entry_count; ++i) {
1145                status_t err = parseChunk(offset, depth + 1);
1146                if (err != OK) {
1147                    return err;
1148                }
1149            }
1150
1151            if (*offset != stop_offset) {
1152                return ERROR_MALFORMED;
1153            }
1154            break;
1155        }
1156
1157        case FOURCC('m', 'p', '4', 'a'):
1158        case FOURCC('e', 'n', 'c', 'a'):
1159        case FOURCC('s', 'a', 'm', 'r'):
1160        case FOURCC('s', 'a', 'w', 'b'):
1161        {
1162            uint8_t buffer[8 + 20];
1163            if (chunk_data_size < (ssize_t)sizeof(buffer)) {
1164                // Basic AudioSampleEntry size.
1165                return ERROR_MALFORMED;
1166            }
1167
1168            if (mDataSource->readAt(
1169                        data_offset, buffer, sizeof(buffer)) < (ssize_t)sizeof(buffer)) {
1170                return ERROR_IO;
1171            }
1172
1173            uint16_t data_ref_index = U16_AT(&buffer[6]);
1174            uint32_t num_channels = U16_AT(&buffer[16]);
1175
1176            uint16_t sample_size = U16_AT(&buffer[18]);
1177            uint32_t sample_rate = U32_AT(&buffer[24]) >> 16;
1178
1179            if (chunk_type != FOURCC('e', 'n', 'c', 'a')) {
1180                // if the chunk type is enca, we'll get the type from the sinf/frma box later
1181                mLastTrack->meta->setCString(kKeyMIMEType, FourCC2MIME(chunk_type));
1182                AdjustChannelsAndRate(chunk_type, &num_channels, &sample_rate);
1183            }
1184            ALOGV("*** coding='%s' %d channels, size %d, rate %d\n",
1185                   chunk, num_channels, sample_size, sample_rate);
1186            mLastTrack->meta->setInt32(kKeyChannelCount, num_channels);
1187            mLastTrack->meta->setInt32(kKeySampleRate, sample_rate);
1188
1189            off64_t stop_offset = *offset + chunk_size;
1190            *offset = data_offset + sizeof(buffer);
1191            while (*offset < stop_offset) {
1192                status_t err = parseChunk(offset, depth + 1);
1193                if (err != OK) {
1194                    return err;
1195                }
1196            }
1197
1198            if (*offset != stop_offset) {
1199                return ERROR_MALFORMED;
1200            }
1201            break;
1202        }
1203
1204        case FOURCC('m', 'p', '4', 'v'):
1205        case FOURCC('e', 'n', 'c', 'v'):
1206        case FOURCC('s', '2', '6', '3'):
1207        case FOURCC('H', '2', '6', '3'):
1208        case FOURCC('h', '2', '6', '3'):
1209        case FOURCC('a', 'v', 'c', '1'):
1210        {
1211            mHasVideo = true;
1212
1213            uint8_t buffer[78];
1214            if (chunk_data_size < (ssize_t)sizeof(buffer)) {
1215                // Basic VideoSampleEntry size.
1216                return ERROR_MALFORMED;
1217            }
1218
1219            if (mDataSource->readAt(
1220                        data_offset, buffer, sizeof(buffer)) < (ssize_t)sizeof(buffer)) {
1221                return ERROR_IO;
1222            }
1223
1224            uint16_t data_ref_index = U16_AT(&buffer[6]);
1225            uint16_t width = U16_AT(&buffer[6 + 18]);
1226            uint16_t height = U16_AT(&buffer[6 + 20]);
1227
1228            // The video sample is not standard-compliant if it has invalid dimension.
1229            // Use some default width and height value, and
1230            // let the decoder figure out the actual width and height (and thus
1231            // be prepared for INFO_FOMRAT_CHANGED event).
1232            if (width == 0)  width  = 352;
1233            if (height == 0) height = 288;
1234
1235            // printf("*** coding='%s' width=%d height=%d\n",
1236            //        chunk, width, height);
1237
1238            if (chunk_type != FOURCC('e', 'n', 'c', 'v')) {
1239                // if the chunk type is encv, we'll get the type from the sinf/frma box later
1240                mLastTrack->meta->setCString(kKeyMIMEType, FourCC2MIME(chunk_type));
1241            }
1242            mLastTrack->meta->setInt32(kKeyWidth, width);
1243            mLastTrack->meta->setInt32(kKeyHeight, height);
1244
1245            off64_t stop_offset = *offset + chunk_size;
1246            *offset = data_offset + sizeof(buffer);
1247            while (*offset < stop_offset) {
1248                status_t err = parseChunk(offset, depth + 1);
1249                if (err != OK) {
1250                    return err;
1251                }
1252            }
1253
1254            if (*offset != stop_offset) {
1255                return ERROR_MALFORMED;
1256            }
1257            break;
1258        }
1259
1260        case FOURCC('s', 't', 'c', 'o'):
1261        case FOURCC('c', 'o', '6', '4'):
1262        {
1263            status_t err =
1264                mLastTrack->sampleTable->setChunkOffsetParams(
1265                        chunk_type, data_offset, chunk_data_size);
1266
1267            if (err != OK) {
1268                return err;
1269            }
1270
1271            *offset += chunk_size;
1272            break;
1273        }
1274
1275        case FOURCC('s', 't', 's', 'c'):
1276        {
1277            status_t err =
1278                mLastTrack->sampleTable->setSampleToChunkParams(
1279                        data_offset, chunk_data_size);
1280
1281            if (err != OK) {
1282                return err;
1283            }
1284
1285            *offset += chunk_size;
1286            break;
1287        }
1288
1289        case FOURCC('s', 't', 's', 'z'):
1290        case FOURCC('s', 't', 'z', '2'):
1291        {
1292            status_t err =
1293                mLastTrack->sampleTable->setSampleSizeParams(
1294                        chunk_type, data_offset, chunk_data_size);
1295
1296            if (err != OK) {
1297                return err;
1298            }
1299
1300            size_t max_size;
1301            err = mLastTrack->sampleTable->getMaxSampleSize(&max_size);
1302
1303            if (err != OK) {
1304                return err;
1305            }
1306
1307            if (max_size != 0) {
1308                // Assume that a given buffer only contains at most 10 chunks,
1309                // each chunk originally prefixed with a 2 byte length will
1310                // have a 4 byte header (0x00 0x00 0x00 0x01) after conversion,
1311                // and thus will grow by 2 bytes per chunk.
1312                mLastTrack->meta->setInt32(kKeyMaxInputSize, max_size + 10 * 2);
1313            } else {
1314                // No size was specified. Pick a conservatively large size.
1315                int32_t width, height;
1316                if (mLastTrack->meta->findInt32(kKeyWidth, &width) &&
1317                        mLastTrack->meta->findInt32(kKeyHeight, &height)) {
1318                    mLastTrack->meta->setInt32(kKeyMaxInputSize, width * height * 3 / 2);
1319                } else {
1320                    ALOGE("No width or height, assuming worst case 1080p");
1321                    mLastTrack->meta->setInt32(kKeyMaxInputSize, 3110400);
1322                }
1323            }
1324            *offset += chunk_size;
1325
1326            // Calculate average frame rate.
1327            const char *mime;
1328            CHECK(mLastTrack->meta->findCString(kKeyMIMEType, &mime));
1329            if (!strncasecmp("video/", mime, 6)) {
1330                size_t nSamples = mLastTrack->sampleTable->countSamples();
1331                int64_t durationUs;
1332                if (mLastTrack->meta->findInt64(kKeyDuration, &durationUs)) {
1333                    if (durationUs > 0) {
1334                        int32_t frameRate = (nSamples * 1000000LL +
1335                                    (durationUs >> 1)) / durationUs;
1336                        mLastTrack->meta->setInt32(kKeyFrameRate, frameRate);
1337                    }
1338                }
1339            }
1340
1341            break;
1342        }
1343
1344        case FOURCC('s', 't', 't', 's'):
1345        {
1346            status_t err =
1347                mLastTrack->sampleTable->setTimeToSampleParams(
1348                        data_offset, chunk_data_size);
1349
1350            if (err != OK) {
1351                return err;
1352            }
1353
1354            *offset += chunk_size;
1355            break;
1356        }
1357
1358        case FOURCC('c', 't', 't', 's'):
1359        {
1360            status_t err =
1361                mLastTrack->sampleTable->setCompositionTimeToSampleParams(
1362                        data_offset, chunk_data_size);
1363
1364            if (err != OK) {
1365                return err;
1366            }
1367
1368            *offset += chunk_size;
1369            break;
1370        }
1371
1372        case FOURCC('s', 't', 's', 's'):
1373        {
1374            status_t err =
1375                mLastTrack->sampleTable->setSyncSampleParams(
1376                        data_offset, chunk_data_size);
1377
1378            if (err != OK) {
1379                return err;
1380            }
1381
1382            *offset += chunk_size;
1383            break;
1384        }
1385
1386        // @xyz
1387        case FOURCC('\xA9', 'x', 'y', 'z'):
1388        {
1389            // Best case the total data length inside "@xyz" box
1390            // would be 8, for instance "@xyz" + "\x00\x04\x15\xc7" + "0+0/",
1391            // where "\x00\x04" is the text string length with value = 4,
1392            // "\0x15\xc7" is the language code = en, and "0+0" is a
1393            // location (string) value with longitude = 0 and latitude = 0.
1394            if (chunk_data_size < 8) {
1395                return ERROR_MALFORMED;
1396            }
1397
1398            // Worst case the location string length would be 18,
1399            // for instance +90.0000-180.0000, without the trailing "/" and
1400            // the string length + language code.
1401            char buffer[18];
1402
1403            // Substracting 5 from the data size is because the text string length +
1404            // language code takes 4 bytes, and the trailing slash "/" takes 1 byte.
1405            off64_t location_length = chunk_data_size - 5;
1406            if (location_length >= (off64_t) sizeof(buffer)) {
1407                return ERROR_MALFORMED;
1408            }
1409
1410            if (mDataSource->readAt(
1411                        data_offset + 4, buffer, location_length) < location_length) {
1412                return ERROR_IO;
1413            }
1414
1415            buffer[location_length] = '\0';
1416            mFileMetaData->setCString(kKeyLocation, buffer);
1417            *offset += chunk_size;
1418            break;
1419        }
1420
1421        case FOURCC('e', 's', 'd', 's'):
1422        {
1423            if (chunk_data_size < 4) {
1424                return ERROR_MALFORMED;
1425            }
1426
1427            uint8_t buffer[256];
1428            if (chunk_data_size > (off64_t)sizeof(buffer)) {
1429                return ERROR_BUFFER_TOO_SMALL;
1430            }
1431
1432            if (mDataSource->readAt(
1433                        data_offset, buffer, chunk_data_size) < chunk_data_size) {
1434                return ERROR_IO;
1435            }
1436
1437            if (U32_AT(buffer) != 0) {
1438                // Should be version 0, flags 0.
1439                return ERROR_MALFORMED;
1440            }
1441
1442            mLastTrack->meta->setData(
1443                    kKeyESDS, kTypeESDS, &buffer[4], chunk_data_size - 4);
1444
1445            if (mPath.size() >= 2
1446                    && mPath[mPath.size() - 2] == FOURCC('m', 'p', '4', 'a')) {
1447                // Information from the ESDS must be relied on for proper
1448                // setup of sample rate and channel count for MPEG4 Audio.
1449                // The generic header appears to only contain generic
1450                // information...
1451
1452                status_t err = updateAudioTrackInfoFromESDS_MPEG4Audio(
1453                        &buffer[4], chunk_data_size - 4);
1454
1455                if (err != OK) {
1456                    return err;
1457                }
1458            }
1459
1460            *offset += chunk_size;
1461            break;
1462        }
1463
1464        case FOURCC('a', 'v', 'c', 'C'):
1465        {
1466            sp<ABuffer> buffer = new ABuffer(chunk_data_size);
1467
1468            if (mDataSource->readAt(
1469                        data_offset, buffer->data(), chunk_data_size) < chunk_data_size) {
1470                return ERROR_IO;
1471            }
1472
1473            mLastTrack->meta->setData(
1474                    kKeyAVCC, kTypeAVCC, buffer->data(), chunk_data_size);
1475
1476            *offset += chunk_size;
1477            break;
1478        }
1479
1480        case FOURCC('d', '2', '6', '3'):
1481        {
1482            /*
1483             * d263 contains a fixed 7 bytes part:
1484             *   vendor - 4 bytes
1485             *   version - 1 byte
1486             *   level - 1 byte
1487             *   profile - 1 byte
1488             * optionally, "d263" box itself may contain a 16-byte
1489             * bit rate box (bitr)
1490             *   average bit rate - 4 bytes
1491             *   max bit rate - 4 bytes
1492             */
1493            char buffer[23];
1494            if (chunk_data_size != 7 &&
1495                chunk_data_size != 23) {
1496                ALOGE("Incorrect D263 box size %lld", chunk_data_size);
1497                return ERROR_MALFORMED;
1498            }
1499
1500            if (mDataSource->readAt(
1501                    data_offset, buffer, chunk_data_size) < chunk_data_size) {
1502                return ERROR_IO;
1503            }
1504
1505            mLastTrack->meta->setData(kKeyD263, kTypeD263, buffer, chunk_data_size);
1506
1507            *offset += chunk_size;
1508            break;
1509        }
1510
1511        case FOURCC('m', 'e', 't', 'a'):
1512        {
1513            uint8_t buffer[4];
1514            if (chunk_data_size < (off64_t)sizeof(buffer)) {
1515                return ERROR_MALFORMED;
1516            }
1517
1518            if (mDataSource->readAt(
1519                        data_offset, buffer, 4) < 4) {
1520                return ERROR_IO;
1521            }
1522
1523            if (U32_AT(buffer) != 0) {
1524                // Should be version 0, flags 0.
1525
1526                // If it's not, let's assume this is one of those
1527                // apparently malformed chunks that don't have flags
1528                // and completely different semantics than what's
1529                // in the MPEG4 specs and skip it.
1530                *offset += chunk_size;
1531                return OK;
1532            }
1533
1534            off64_t stop_offset = *offset + chunk_size;
1535            *offset = data_offset + sizeof(buffer);
1536            while (*offset < stop_offset) {
1537                status_t err = parseChunk(offset, depth + 1);
1538                if (err != OK) {
1539                    return err;
1540                }
1541            }
1542
1543            if (*offset != stop_offset) {
1544                return ERROR_MALFORMED;
1545            }
1546            break;
1547        }
1548
1549        case FOURCC('m', 'e', 'a', 'n'):
1550        case FOURCC('n', 'a', 'm', 'e'):
1551        case FOURCC('d', 'a', 't', 'a'):
1552        {
1553            if (mPath.size() == 6 && underMetaDataPath(mPath)) {
1554                status_t err = parseMetaData(data_offset, chunk_data_size);
1555
1556                if (err != OK) {
1557                    return err;
1558                }
1559            }
1560
1561            *offset += chunk_size;
1562            break;
1563        }
1564
1565        case FOURCC('m', 'v', 'h', 'd'):
1566        {
1567            if (chunk_data_size < 12) {
1568                return ERROR_MALFORMED;
1569            }
1570
1571            uint8_t header[12];
1572            if (mDataSource->readAt(
1573                        data_offset, header, sizeof(header))
1574                    < (ssize_t)sizeof(header)) {
1575                return ERROR_IO;
1576            }
1577
1578            int64_t creationTime;
1579            if (header[0] == 1) {
1580                creationTime = U64_AT(&header[4]);
1581            } else if (header[0] != 0) {
1582                return ERROR_MALFORMED;
1583            } else {
1584                creationTime = U32_AT(&header[4]);
1585            }
1586
1587            String8 s;
1588            convertTimeToDate(creationTime, &s);
1589
1590            mFileMetaData->setCString(kKeyDate, s.string());
1591
1592            *offset += chunk_size;
1593            break;
1594        }
1595
1596        case FOURCC('m', 'd', 'a', 't'):
1597        {
1598            ALOGV("mdat chunk, drm: %d", mIsDrm);
1599            if (!mIsDrm) {
1600                *offset += chunk_size;
1601                break;
1602            }
1603
1604            if (chunk_size < 8) {
1605                return ERROR_MALFORMED;
1606            }
1607
1608            return parseDrmSINF(offset, data_offset);
1609        }
1610
1611        case FOURCC('h', 'd', 'l', 'r'):
1612        {
1613            uint32_t buffer;
1614            if (mDataSource->readAt(
1615                        data_offset + 8, &buffer, 4) < 4) {
1616                return ERROR_IO;
1617            }
1618
1619            uint32_t type = ntohl(buffer);
1620            // For the 3GPP file format, the handler-type within the 'hdlr' box
1621            // shall be 'text'. We also want to support 'sbtl' handler type
1622            // for a practical reason as various MPEG4 containers use it.
1623            if (type == FOURCC('t', 'e', 'x', 't') || type == FOURCC('s', 'b', 't', 'l')) {
1624                mLastTrack->meta->setCString(kKeyMIMEType, MEDIA_MIMETYPE_TEXT_3GPP);
1625            }
1626
1627            *offset += chunk_size;
1628            break;
1629        }
1630
1631        case FOURCC('t', 'x', '3', 'g'):
1632        {
1633            uint32_t type;
1634            const void *data;
1635            size_t size = 0;
1636            if (!mLastTrack->meta->findData(
1637                    kKeyTextFormatData, &type, &data, &size)) {
1638                size = 0;
1639            }
1640
1641            uint8_t *buffer = new uint8_t[size + chunk_size];
1642
1643            if (size > 0) {
1644                memcpy(buffer, data, size);
1645            }
1646
1647            if ((size_t)(mDataSource->readAt(*offset, buffer + size, chunk_size))
1648                    < chunk_size) {
1649                delete[] buffer;
1650                buffer = NULL;
1651
1652                return ERROR_IO;
1653            }
1654
1655            mLastTrack->meta->setData(
1656                    kKeyTextFormatData, 0, buffer, size + chunk_size);
1657
1658            delete[] buffer;
1659
1660            *offset += chunk_size;
1661            break;
1662        }
1663
1664        case FOURCC('c', 'o', 'v', 'r'):
1665        {
1666            if (mFileMetaData != NULL) {
1667                ALOGV("chunk_data_size = %lld and data_offset = %lld",
1668                        chunk_data_size, data_offset);
1669                sp<ABuffer> buffer = new ABuffer(chunk_data_size + 1);
1670                if (mDataSource->readAt(
1671                    data_offset, buffer->data(), chunk_data_size) != (ssize_t)chunk_data_size) {
1672                    return ERROR_IO;
1673                }
1674                const int kSkipBytesOfDataBox = 16;
1675                mFileMetaData->setData(
1676                    kKeyAlbumArt, MetaData::TYPE_NONE,
1677                    buffer->data() + kSkipBytesOfDataBox, chunk_data_size - kSkipBytesOfDataBox);
1678            }
1679
1680            *offset += chunk_size;
1681            break;
1682        }
1683
1684        case FOURCC('-', '-', '-', '-'):
1685        {
1686            mLastCommentMean.clear();
1687            mLastCommentName.clear();
1688            mLastCommentData.clear();
1689            *offset += chunk_size;
1690            break;
1691        }
1692
1693        case FOURCC('s', 'i', 'd', 'x'):
1694        {
1695            parseSegmentIndex(data_offset, chunk_data_size);
1696            *offset += chunk_size;
1697            return UNKNOWN_ERROR; // stop parsing after sidx
1698        }
1699
1700        default:
1701        {
1702            *offset += chunk_size;
1703            break;
1704        }
1705    }
1706
1707    return OK;
1708}
1709
1710status_t MPEG4Extractor::parseSegmentIndex(off64_t offset, size_t size) {
1711  ALOGV("MPEG4Extractor::parseSegmentIndex");
1712
1713    if (size < 12) {
1714      return -EINVAL;
1715    }
1716
1717    uint32_t flags;
1718    if (!mDataSource->getUInt32(offset, &flags)) {
1719        return ERROR_MALFORMED;
1720    }
1721
1722    uint32_t version = flags >> 24;
1723    flags &= 0xffffff;
1724
1725    ALOGV("sidx version %d", version);
1726
1727    uint32_t referenceId;
1728    if (!mDataSource->getUInt32(offset + 4, &referenceId)) {
1729        return ERROR_MALFORMED;
1730    }
1731
1732    uint32_t timeScale;
1733    if (!mDataSource->getUInt32(offset + 8, &timeScale)) {
1734        return ERROR_MALFORMED;
1735    }
1736    ALOGV("sidx refid/timescale: %d/%d", referenceId, timeScale);
1737
1738    uint64_t earliestPresentationTime;
1739    uint64_t firstOffset;
1740
1741    offset += 12;
1742    size -= 12;
1743
1744    if (version == 0) {
1745        if (size < 8) {
1746            return -EINVAL;
1747        }
1748        uint32_t tmp;
1749        if (!mDataSource->getUInt32(offset, &tmp)) {
1750            return ERROR_MALFORMED;
1751        }
1752        earliestPresentationTime = tmp;
1753        if (!mDataSource->getUInt32(offset + 4, &tmp)) {
1754            return ERROR_MALFORMED;
1755        }
1756        firstOffset = tmp;
1757        offset += 8;
1758        size -= 8;
1759    } else {
1760        if (size < 16) {
1761            return -EINVAL;
1762        }
1763        if (!mDataSource->getUInt64(offset, &earliestPresentationTime)) {
1764            return ERROR_MALFORMED;
1765        }
1766        if (!mDataSource->getUInt64(offset + 8, &firstOffset)) {
1767            return ERROR_MALFORMED;
1768        }
1769        offset += 16;
1770        size -= 16;
1771    }
1772    ALOGV("sidx pres/off: %Ld/%Ld", earliestPresentationTime, firstOffset);
1773
1774    if (size < 4) {
1775        return -EINVAL;
1776    }
1777
1778    uint16_t referenceCount;
1779    if (!mDataSource->getUInt16(offset + 2, &referenceCount)) {
1780        return ERROR_MALFORMED;
1781    }
1782    offset += 4;
1783    size -= 4;
1784    ALOGV("refcount: %d", referenceCount);
1785
1786    if (size < referenceCount * 12) {
1787        return -EINVAL;
1788    }
1789
1790    uint64_t total_duration = 0;
1791    for (unsigned int i = 0; i < referenceCount; i++) {
1792        uint32_t d1, d2, d3;
1793
1794        if (!mDataSource->getUInt32(offset, &d1) ||     // size
1795            !mDataSource->getUInt32(offset + 4, &d2) || // duration
1796            !mDataSource->getUInt32(offset + 8, &d3)) { // flags
1797            return ERROR_MALFORMED;
1798        }
1799
1800        if (d1 & 0x80000000) {
1801            ALOGW("sub-sidx boxes not supported yet");
1802        }
1803        bool sap = d3 & 0x80000000;
1804        bool saptype = d3 >> 28;
1805        if (!sap || saptype > 2) {
1806            ALOGW("not a stream access point, or unsupported type");
1807        }
1808        total_duration += d2;
1809        offset += 12;
1810        ALOGV(" item %d, %08x %08x %08x", i, d1, d2, d3);
1811        SidxEntry se;
1812        se.mSize = d1 & 0x7fffffff;
1813        se.mDurationUs = 1000000LL * d2 / timeScale;
1814        mSidxEntries.add(se);
1815    }
1816
1817    mSidxDuration = total_duration * 1000000 / timeScale;
1818    ALOGV("duration: %lld", mSidxDuration);
1819
1820    int64_t metaDuration;
1821    if (!mLastTrack->meta->findInt64(kKeyDuration, &metaDuration) || metaDuration == 0) {
1822        mLastTrack->meta->setInt64(kKeyDuration, mSidxDuration);
1823    }
1824    return OK;
1825}
1826
1827
1828
1829status_t MPEG4Extractor::parseTrackHeader(
1830        off64_t data_offset, off64_t data_size) {
1831    if (data_size < 4) {
1832        return ERROR_MALFORMED;
1833    }
1834
1835    uint8_t version;
1836    if (mDataSource->readAt(data_offset, &version, 1) < 1) {
1837        return ERROR_IO;
1838    }
1839
1840    size_t dynSize = (version == 1) ? 36 : 24;
1841
1842    uint8_t buffer[36 + 60];
1843
1844    if (data_size != (off64_t)dynSize + 60) {
1845        return ERROR_MALFORMED;
1846    }
1847
1848    if (mDataSource->readAt(
1849                data_offset, buffer, data_size) < (ssize_t)data_size) {
1850        return ERROR_IO;
1851    }
1852
1853    uint64_t ctime, mtime, duration;
1854    int32_t id;
1855
1856    if (version == 1) {
1857        ctime = U64_AT(&buffer[4]);
1858        mtime = U64_AT(&buffer[12]);
1859        id = U32_AT(&buffer[20]);
1860        duration = U64_AT(&buffer[28]);
1861    } else {
1862        CHECK_EQ((unsigned)version, 0u);
1863
1864        ctime = U32_AT(&buffer[4]);
1865        mtime = U32_AT(&buffer[8]);
1866        id = U32_AT(&buffer[12]);
1867        duration = U32_AT(&buffer[20]);
1868    }
1869
1870    mLastTrack->meta->setInt32(kKeyTrackID, id);
1871
1872    size_t matrixOffset = dynSize + 16;
1873    int32_t a00 = U32_AT(&buffer[matrixOffset]);
1874    int32_t a01 = U32_AT(&buffer[matrixOffset + 4]);
1875    int32_t dx = U32_AT(&buffer[matrixOffset + 8]);
1876    int32_t a10 = U32_AT(&buffer[matrixOffset + 12]);
1877    int32_t a11 = U32_AT(&buffer[matrixOffset + 16]);
1878    int32_t dy = U32_AT(&buffer[matrixOffset + 20]);
1879
1880#if 0
1881    ALOGI("x' = %.2f * x + %.2f * y + %.2f",
1882         a00 / 65536.0f, a01 / 65536.0f, dx / 65536.0f);
1883    ALOGI("y' = %.2f * x + %.2f * y + %.2f",
1884         a10 / 65536.0f, a11 / 65536.0f, dy / 65536.0f);
1885#endif
1886
1887    uint32_t rotationDegrees;
1888
1889    static const int32_t kFixedOne = 0x10000;
1890    if (a00 == kFixedOne && a01 == 0 && a10 == 0 && a11 == kFixedOne) {
1891        // Identity, no rotation
1892        rotationDegrees = 0;
1893    } else if (a00 == 0 && a01 == kFixedOne && a10 == -kFixedOne && a11 == 0) {
1894        rotationDegrees = 90;
1895    } else if (a00 == 0 && a01 == -kFixedOne && a10 == kFixedOne && a11 == 0) {
1896        rotationDegrees = 270;
1897    } else if (a00 == -kFixedOne && a01 == 0 && a10 == 0 && a11 == -kFixedOne) {
1898        rotationDegrees = 180;
1899    } else {
1900        ALOGW("We only support 0,90,180,270 degree rotation matrices");
1901        rotationDegrees = 0;
1902    }
1903
1904    if (rotationDegrees != 0) {
1905        mLastTrack->meta->setInt32(kKeyRotation, rotationDegrees);
1906    }
1907
1908    // Handle presentation display size, which could be different
1909    // from the image size indicated by kKeyWidth and kKeyHeight.
1910    uint32_t width = U32_AT(&buffer[dynSize + 52]);
1911    uint32_t height = U32_AT(&buffer[dynSize + 56]);
1912    mLastTrack->meta->setInt32(kKeyDisplayWidth, width >> 16);
1913    mLastTrack->meta->setInt32(kKeyDisplayHeight, height >> 16);
1914
1915    return OK;
1916}
1917
1918status_t MPEG4Extractor::parseMetaData(off64_t offset, size_t size) {
1919    if (size < 4) {
1920        return ERROR_MALFORMED;
1921    }
1922
1923    uint8_t *buffer = new uint8_t[size + 1];
1924    if (mDataSource->readAt(
1925                offset, buffer, size) != (ssize_t)size) {
1926        delete[] buffer;
1927        buffer = NULL;
1928
1929        return ERROR_IO;
1930    }
1931
1932    uint32_t flags = U32_AT(buffer);
1933
1934    uint32_t metadataKey = 0;
1935    char chunk[5];
1936    MakeFourCCString(mPath[4], chunk);
1937    ALOGV("meta: %s @ %lld", chunk, offset);
1938    switch (mPath[4]) {
1939        case FOURCC(0xa9, 'a', 'l', 'b'):
1940        {
1941            metadataKey = kKeyAlbum;
1942            break;
1943        }
1944        case FOURCC(0xa9, 'A', 'R', 'T'):
1945        {
1946            metadataKey = kKeyArtist;
1947            break;
1948        }
1949        case FOURCC('a', 'A', 'R', 'T'):
1950        {
1951            metadataKey = kKeyAlbumArtist;
1952            break;
1953        }
1954        case FOURCC(0xa9, 'd', 'a', 'y'):
1955        {
1956            metadataKey = kKeyYear;
1957            break;
1958        }
1959        case FOURCC(0xa9, 'n', 'a', 'm'):
1960        {
1961            metadataKey = kKeyTitle;
1962            break;
1963        }
1964        case FOURCC(0xa9, 'w', 'r', 't'):
1965        {
1966            metadataKey = kKeyWriter;
1967            break;
1968        }
1969        case FOURCC('c', 'o', 'v', 'r'):
1970        {
1971            metadataKey = kKeyAlbumArt;
1972            break;
1973        }
1974        case FOURCC('g', 'n', 'r', 'e'):
1975        {
1976            metadataKey = kKeyGenre;
1977            break;
1978        }
1979        case FOURCC(0xa9, 'g', 'e', 'n'):
1980        {
1981            metadataKey = kKeyGenre;
1982            break;
1983        }
1984        case FOURCC('c', 'p', 'i', 'l'):
1985        {
1986            if (size == 9 && flags == 21) {
1987                char tmp[16];
1988                sprintf(tmp, "%d",
1989                        (int)buffer[size - 1]);
1990
1991                mFileMetaData->setCString(kKeyCompilation, tmp);
1992            }
1993            break;
1994        }
1995        case FOURCC('t', 'r', 'k', 'n'):
1996        {
1997            if (size == 16 && flags == 0) {
1998                char tmp[16];
1999                uint16_t* pTrack = (uint16_t*)&buffer[10];
2000                uint16_t* pTotalTracks = (uint16_t*)&buffer[12];
2001                sprintf(tmp, "%d/%d", ntohs(*pTrack), ntohs(*pTotalTracks));
2002
2003                mFileMetaData->setCString(kKeyCDTrackNumber, tmp);
2004            }
2005            break;
2006        }
2007        case FOURCC('d', 'i', 's', 'k'):
2008        {
2009            if ((size == 14 || size == 16) && flags == 0) {
2010                char tmp[16];
2011                uint16_t* pDisc = (uint16_t*)&buffer[10];
2012                uint16_t* pTotalDiscs = (uint16_t*)&buffer[12];
2013                sprintf(tmp, "%d/%d", ntohs(*pDisc), ntohs(*pTotalDiscs));
2014
2015                mFileMetaData->setCString(kKeyDiscNumber, tmp);
2016            }
2017            break;
2018        }
2019        case FOURCC('-', '-', '-', '-'):
2020        {
2021            buffer[size] = '\0';
2022            switch (mPath[5]) {
2023                case FOURCC('m', 'e', 'a', 'n'):
2024                    mLastCommentMean.setTo((const char *)buffer + 4);
2025                    break;
2026                case FOURCC('n', 'a', 'm', 'e'):
2027                    mLastCommentName.setTo((const char *)buffer + 4);
2028                    break;
2029                case FOURCC('d', 'a', 't', 'a'):
2030                    mLastCommentData.setTo((const char *)buffer + 8);
2031                    break;
2032            }
2033
2034            // Once we have a set of mean/name/data info, go ahead and process
2035            // it to see if its something we are interested in.  Whether or not
2036            // were are interested in the specific tag, make sure to clear out
2037            // the set so we can be ready to process another tuple should one
2038            // show up later in the file.
2039            if ((mLastCommentMean.length() != 0) &&
2040                (mLastCommentName.length() != 0) &&
2041                (mLastCommentData.length() != 0)) {
2042
2043                if (mLastCommentMean == "com.apple.iTunes"
2044                        && mLastCommentName == "iTunSMPB") {
2045                    int32_t delay, padding;
2046                    if (sscanf(mLastCommentData,
2047                               " %*x %x %x %*x", &delay, &padding) == 2) {
2048                        mLastTrack->meta->setInt32(kKeyEncoderDelay, delay);
2049                        mLastTrack->meta->setInt32(kKeyEncoderPadding, padding);
2050                    }
2051                }
2052
2053                mLastCommentMean.clear();
2054                mLastCommentName.clear();
2055                mLastCommentData.clear();
2056            }
2057            break;
2058        }
2059
2060        default:
2061            break;
2062    }
2063
2064    if (size >= 8 && metadataKey) {
2065        if (metadataKey == kKeyAlbumArt) {
2066            mFileMetaData->setData(
2067                    kKeyAlbumArt, MetaData::TYPE_NONE,
2068                    buffer + 8, size - 8);
2069        } else if (metadataKey == kKeyGenre) {
2070            if (flags == 0) {
2071                // uint8_t genre code, iTunes genre codes are
2072                // the standard id3 codes, except they start
2073                // at 1 instead of 0 (e.g. Pop is 14, not 13)
2074                // We use standard id3 numbering, so subtract 1.
2075                int genrecode = (int)buffer[size - 1];
2076                genrecode--;
2077                if (genrecode < 0) {
2078                    genrecode = 255; // reserved for 'unknown genre'
2079                }
2080                char genre[10];
2081                sprintf(genre, "%d", genrecode);
2082
2083                mFileMetaData->setCString(metadataKey, genre);
2084            } else if (flags == 1) {
2085                // custom genre string
2086                buffer[size] = '\0';
2087
2088                mFileMetaData->setCString(
2089                        metadataKey, (const char *)buffer + 8);
2090            }
2091        } else {
2092            buffer[size] = '\0';
2093
2094            mFileMetaData->setCString(
2095                    metadataKey, (const char *)buffer + 8);
2096        }
2097    }
2098
2099    delete[] buffer;
2100    buffer = NULL;
2101
2102    return OK;
2103}
2104
2105sp<MediaSource> MPEG4Extractor::getTrack(size_t index) {
2106    status_t err;
2107    if ((err = readMetaData()) != OK) {
2108        return NULL;
2109    }
2110
2111    Track *track = mFirstTrack;
2112    while (index > 0) {
2113        if (track == NULL) {
2114            return NULL;
2115        }
2116
2117        track = track->next;
2118        --index;
2119    }
2120
2121    if (track == NULL) {
2122        return NULL;
2123    }
2124
2125    ALOGV("getTrack called, pssh: %d", mPssh.size());
2126
2127    return new MPEG4Source(
2128            track->meta, mDataSource, track->timescale, track->sampleTable,
2129            mSidxEntries, mMoofOffset);
2130}
2131
2132// static
2133status_t MPEG4Extractor::verifyTrack(Track *track) {
2134    const char *mime;
2135    CHECK(track->meta->findCString(kKeyMIMEType, &mime));
2136
2137    uint32_t type;
2138    const void *data;
2139    size_t size;
2140    if (!strcasecmp(mime, MEDIA_MIMETYPE_VIDEO_AVC)) {
2141        if (!track->meta->findData(kKeyAVCC, &type, &data, &size)
2142                || type != kTypeAVCC) {
2143            return ERROR_MALFORMED;
2144        }
2145    } else if (!strcasecmp(mime, MEDIA_MIMETYPE_VIDEO_MPEG4)
2146            || !strcasecmp(mime, MEDIA_MIMETYPE_AUDIO_AAC)) {
2147        if (!track->meta->findData(kKeyESDS, &type, &data, &size)
2148                || type != kTypeESDS) {
2149            return ERROR_MALFORMED;
2150        }
2151    }
2152
2153    if (!track->sampleTable->isValid()) {
2154        // Make sure we have all the metadata we need.
2155        return ERROR_MALFORMED;
2156    }
2157
2158    return OK;
2159}
2160
2161status_t MPEG4Extractor::updateAudioTrackInfoFromESDS_MPEG4Audio(
2162        const void *esds_data, size_t esds_size) {
2163    ESDS esds(esds_data, esds_size);
2164
2165    uint8_t objectTypeIndication;
2166    if (esds.getObjectTypeIndication(&objectTypeIndication) != OK) {
2167        return ERROR_MALFORMED;
2168    }
2169
2170    if (objectTypeIndication == 0xe1) {
2171        // This isn't MPEG4 audio at all, it's QCELP 14k...
2172        mLastTrack->meta->setCString(kKeyMIMEType, MEDIA_MIMETYPE_AUDIO_QCELP);
2173        return OK;
2174    }
2175
2176    if (objectTypeIndication  == 0x6b) {
2177        // The media subtype is MP3 audio
2178        // Our software MP3 audio decoder may not be able to handle
2179        // packetized MP3 audio; for now, lets just return ERROR_UNSUPPORTED
2180        ALOGE("MP3 track in MP4/3GPP file is not supported");
2181        return ERROR_UNSUPPORTED;
2182    }
2183
2184    const uint8_t *csd;
2185    size_t csd_size;
2186    if (esds.getCodecSpecificInfo(
2187                (const void **)&csd, &csd_size) != OK) {
2188        return ERROR_MALFORMED;
2189    }
2190
2191#if 0
2192    printf("ESD of size %d\n", csd_size);
2193    hexdump(csd, csd_size);
2194#endif
2195
2196    if (csd_size == 0) {
2197        // There's no further information, i.e. no codec specific data
2198        // Let's assume that the information provided in the mpeg4 headers
2199        // is accurate and hope for the best.
2200
2201        return OK;
2202    }
2203
2204    if (csd_size < 2) {
2205        return ERROR_MALFORMED;
2206    }
2207
2208    ABitReader br(csd, csd_size);
2209    uint32_t objectType = br.getBits(5);
2210
2211    if (objectType == 31) {  // AAC-ELD => additional 6 bits
2212        objectType = 32 + br.getBits(6);
2213    }
2214
2215    uint32_t freqIndex = br.getBits(4);
2216
2217    int32_t sampleRate = 0;
2218    int32_t numChannels = 0;
2219    if (freqIndex == 15) {
2220        if (csd_size < 5) {
2221            return ERROR_MALFORMED;
2222        }
2223        sampleRate = br.getBits(24);
2224        numChannels = br.getBits(4);
2225    } else {
2226        numChannels = br.getBits(4);
2227        if (objectType == 5) {
2228            // SBR specific config per 14496-3 table 1.13
2229            freqIndex = br.getBits(4);
2230            if (freqIndex == 15) {
2231                if (csd_size < 8) {
2232                    return ERROR_MALFORMED;
2233                }
2234                sampleRate = br.getBits(24);
2235            }
2236        }
2237
2238        if (sampleRate == 0) {
2239            static uint32_t kSamplingRate[] = {
2240                96000, 88200, 64000, 48000, 44100, 32000, 24000, 22050,
2241                16000, 12000, 11025, 8000, 7350
2242            };
2243
2244            if (freqIndex == 13 || freqIndex == 14) {
2245                return ERROR_MALFORMED;
2246            }
2247
2248            sampleRate = kSamplingRate[freqIndex];
2249        }
2250    }
2251
2252    if (numChannels == 0) {
2253        return ERROR_UNSUPPORTED;
2254    }
2255
2256    int32_t prevSampleRate;
2257    CHECK(mLastTrack->meta->findInt32(kKeySampleRate, &prevSampleRate));
2258
2259    if (prevSampleRate != sampleRate) {
2260        ALOGV("mpeg4 audio sample rate different from previous setting. "
2261             "was: %d, now: %d", prevSampleRate, sampleRate);
2262    }
2263
2264    mLastTrack->meta->setInt32(kKeySampleRate, sampleRate);
2265
2266    int32_t prevChannelCount;
2267    CHECK(mLastTrack->meta->findInt32(kKeyChannelCount, &prevChannelCount));
2268
2269    if (prevChannelCount != numChannels) {
2270        ALOGV("mpeg4 audio channel count different from previous setting. "
2271             "was: %d, now: %d", prevChannelCount, numChannels);
2272    }
2273
2274    mLastTrack->meta->setInt32(kKeyChannelCount, numChannels);
2275
2276    return OK;
2277}
2278
2279////////////////////////////////////////////////////////////////////////////////
2280
2281MPEG4Source::MPEG4Source(
2282        const sp<MetaData> &format,
2283        const sp<DataSource> &dataSource,
2284        int32_t timeScale,
2285        const sp<SampleTable> &sampleTable,
2286        Vector<SidxEntry> &sidx,
2287        off64_t firstMoofOffset)
2288    : mFormat(format),
2289      mDataSource(dataSource),
2290      mTimescale(timeScale),
2291      mSampleTable(sampleTable),
2292      mCurrentSampleIndex(0),
2293      mCurrentFragmentIndex(0),
2294      mSegments(sidx),
2295      mFirstMoofOffset(firstMoofOffset),
2296      mCurrentMoofOffset(firstMoofOffset),
2297      mCurrentTime(0),
2298      mCurrentSampleInfoAllocSize(0),
2299      mCurrentSampleInfoSizes(NULL),
2300      mCurrentSampleInfoOffsetsAllocSize(0),
2301      mCurrentSampleInfoOffsets(NULL),
2302      mIsAVC(false),
2303      mNALLengthSize(0),
2304      mStarted(false),
2305      mGroup(NULL),
2306      mBuffer(NULL),
2307      mWantsNALFragments(false),
2308      mSrcBuffer(NULL) {
2309
2310    mFormat->findInt32(kKeyCryptoMode, &mCryptoMode);
2311    mDefaultIVSize = 0;
2312    mFormat->findInt32(kKeyCryptoDefaultIVSize, &mDefaultIVSize);
2313    uint32_t keytype;
2314    const void *key;
2315    size_t keysize;
2316    if (mFormat->findData(kKeyCryptoKey, &keytype, &key, &keysize)) {
2317        CHECK(keysize <= 16);
2318        memset(mCryptoKey, 0, 16);
2319        memcpy(mCryptoKey, key, keysize);
2320    }
2321
2322    const char *mime;
2323    bool success = mFormat->findCString(kKeyMIMEType, &mime);
2324    CHECK(success);
2325
2326    mIsAVC = !strcasecmp(mime, MEDIA_MIMETYPE_VIDEO_AVC);
2327
2328    if (mIsAVC) {
2329        uint32_t type;
2330        const void *data;
2331        size_t size;
2332        CHECK(format->findData(kKeyAVCC, &type, &data, &size));
2333
2334        const uint8_t *ptr = (const uint8_t *)data;
2335
2336        CHECK(size >= 7);
2337        CHECK_EQ((unsigned)ptr[0], 1u);  // configurationVersion == 1
2338
2339        // The number of bytes used to encode the length of a NAL unit.
2340        mNALLengthSize = 1 + (ptr[4] & 3);
2341    }
2342
2343    CHECK(format->findInt32(kKeyTrackID, &mTrackId));
2344
2345    if (mFirstMoofOffset != 0) {
2346        off64_t offset = mFirstMoofOffset;
2347        parseChunk(&offset);
2348    }
2349}
2350
2351MPEG4Source::~MPEG4Source() {
2352    if (mStarted) {
2353        stop();
2354    }
2355    free(mCurrentSampleInfoSizes);
2356    free(mCurrentSampleInfoOffsets);
2357}
2358
2359status_t MPEG4Source::start(MetaData *params) {
2360    Mutex::Autolock autoLock(mLock);
2361
2362    CHECK(!mStarted);
2363
2364    int32_t val;
2365    if (params && params->findInt32(kKeyWantsNALFragments, &val)
2366        && val != 0) {
2367        mWantsNALFragments = true;
2368    } else {
2369        mWantsNALFragments = false;
2370    }
2371
2372    mGroup = new MediaBufferGroup;
2373
2374    int32_t max_size;
2375    CHECK(mFormat->findInt32(kKeyMaxInputSize, &max_size));
2376
2377    mGroup->add_buffer(new MediaBuffer(max_size));
2378
2379    mSrcBuffer = new uint8_t[max_size];
2380
2381    mStarted = true;
2382
2383    return OK;
2384}
2385
2386status_t MPEG4Source::stop() {
2387    Mutex::Autolock autoLock(mLock);
2388
2389    CHECK(mStarted);
2390
2391    if (mBuffer != NULL) {
2392        mBuffer->release();
2393        mBuffer = NULL;
2394    }
2395
2396    delete[] mSrcBuffer;
2397    mSrcBuffer = NULL;
2398
2399    delete mGroup;
2400    mGroup = NULL;
2401
2402    mStarted = false;
2403    mCurrentSampleIndex = 0;
2404
2405    return OK;
2406}
2407
2408status_t MPEG4Source::parseChunk(off64_t *offset) {
2409    uint32_t hdr[2];
2410    if (mDataSource->readAt(*offset, hdr, 8) < 8) {
2411        return ERROR_IO;
2412    }
2413    uint64_t chunk_size = ntohl(hdr[0]);
2414    uint32_t chunk_type = ntohl(hdr[1]);
2415    off64_t data_offset = *offset + 8;
2416
2417    if (chunk_size == 1) {
2418        if (mDataSource->readAt(*offset + 8, &chunk_size, 8) < 8) {
2419            return ERROR_IO;
2420        }
2421        chunk_size = ntoh64(chunk_size);
2422        data_offset += 8;
2423
2424        if (chunk_size < 16) {
2425            // The smallest valid chunk is 16 bytes long in this case.
2426            return ERROR_MALFORMED;
2427        }
2428    } else if (chunk_size < 8) {
2429        // The smallest valid chunk is 8 bytes long.
2430        return ERROR_MALFORMED;
2431    }
2432
2433    char chunk[5];
2434    MakeFourCCString(chunk_type, chunk);
2435    ALOGV("MPEG4Source chunk %s @ %llx", chunk, *offset);
2436
2437    off64_t chunk_data_size = *offset + chunk_size - data_offset;
2438
2439    switch(chunk_type) {
2440
2441        case FOURCC('t', 'r', 'a', 'f'):
2442        case FOURCC('m', 'o', 'o', 'f'): {
2443            off64_t stop_offset = *offset + chunk_size;
2444            *offset = data_offset;
2445            while (*offset < stop_offset) {
2446                status_t err = parseChunk(offset);
2447                if (err != OK) {
2448                    return err;
2449                }
2450            }
2451            if (chunk_type == FOURCC('m', 'o', 'o', 'f')) {
2452                // *offset points to the mdat box following this moof
2453                parseChunk(offset); // doesn't actually parse it, just updates offset
2454                mNextMoofOffset = *offset;
2455            }
2456            break;
2457        }
2458
2459        case FOURCC('t', 'f', 'h', 'd'): {
2460                status_t err;
2461                if ((err = parseTrackFragmentHeader(data_offset, chunk_data_size)) != OK) {
2462                    return err;
2463                }
2464                *offset += chunk_size;
2465                break;
2466        }
2467
2468        case FOURCC('t', 'r', 'u', 'n'): {
2469                status_t err;
2470                if (mLastParsedTrackId == mTrackId) {
2471                    if ((err = parseTrackFragmentRun(data_offset, chunk_data_size)) != OK) {
2472                        return err;
2473                    }
2474                }
2475
2476                *offset += chunk_size;
2477                break;
2478        }
2479
2480        case FOURCC('s', 'a', 'i', 'z'): {
2481            status_t err;
2482            if ((err = parseSampleAuxiliaryInformationSizes(data_offset, chunk_data_size)) != OK) {
2483                return err;
2484            }
2485            *offset += chunk_size;
2486            break;
2487        }
2488        case FOURCC('s', 'a', 'i', 'o'): {
2489            status_t err;
2490            if ((err = parseSampleAuxiliaryInformationOffsets(data_offset, chunk_data_size)) != OK) {
2491                return err;
2492            }
2493            *offset += chunk_size;
2494            break;
2495        }
2496
2497        case FOURCC('m', 'd', 'a', 't'): {
2498            // parse DRM info if present
2499            ALOGV("MPEG4Source::parseChunk mdat");
2500            // if saiz/saoi was previously observed, do something with the sampleinfos
2501            *offset += chunk_size;
2502            break;
2503        }
2504
2505        default: {
2506            *offset += chunk_size;
2507            break;
2508        }
2509    }
2510    return OK;
2511}
2512
2513status_t MPEG4Source::parseSampleAuxiliaryInformationSizes(off64_t offset, off64_t size) {
2514    ALOGV("parseSampleAuxiliaryInformationSizes");
2515    // 14496-12 8.7.12
2516    uint8_t version;
2517    if (mDataSource->readAt(
2518            offset, &version, sizeof(version))
2519            < (ssize_t)sizeof(version)) {
2520        return ERROR_IO;
2521    }
2522
2523    if (version != 0) {
2524        return ERROR_UNSUPPORTED;
2525    }
2526    offset++;
2527
2528    uint32_t flags;
2529    if (!mDataSource->getUInt24(offset, &flags)) {
2530        return ERROR_IO;
2531    }
2532    offset += 3;
2533
2534    if (flags & 1) {
2535        uint32_t tmp;
2536        if (!mDataSource->getUInt32(offset, &tmp)) {
2537            return ERROR_MALFORMED;
2538        }
2539        mCurrentAuxInfoType = tmp;
2540        offset += 4;
2541        if (!mDataSource->getUInt32(offset, &tmp)) {
2542            return ERROR_MALFORMED;
2543        }
2544        mCurrentAuxInfoTypeParameter = tmp;
2545        offset += 4;
2546    }
2547
2548    uint8_t defsize;
2549    if (mDataSource->readAt(offset, &defsize, 1) != 1) {
2550        return ERROR_MALFORMED;
2551    }
2552    mCurrentDefaultSampleInfoSize = defsize;
2553    offset++;
2554
2555    uint32_t smplcnt;
2556    if (!mDataSource->getUInt32(offset, &smplcnt)) {
2557        return ERROR_MALFORMED;
2558    }
2559    mCurrentSampleInfoCount = smplcnt;
2560    offset += 4;
2561
2562    if (mCurrentDefaultSampleInfoSize != 0) {
2563        ALOGV("@@@@ using default sample info size of %d", mCurrentDefaultSampleInfoSize);
2564        return OK;
2565    }
2566    if (smplcnt > mCurrentSampleInfoAllocSize) {
2567        mCurrentSampleInfoSizes = (uint8_t*) realloc(mCurrentSampleInfoSizes, smplcnt);
2568        mCurrentSampleInfoAllocSize = smplcnt;
2569    }
2570
2571    mDataSource->readAt(offset, mCurrentSampleInfoSizes, smplcnt);
2572    return OK;
2573}
2574
2575status_t MPEG4Source::parseSampleAuxiliaryInformationOffsets(off64_t offset, off64_t size) {
2576    ALOGV("parseSampleAuxiliaryInformationOffsets");
2577    // 14496-12 8.7.13
2578    uint8_t version;
2579    if (mDataSource->readAt(offset, &version, sizeof(version)) != 1) {
2580        return ERROR_IO;
2581    }
2582    offset++;
2583
2584    uint32_t flags;
2585    if (!mDataSource->getUInt24(offset, &flags)) {
2586        return ERROR_IO;
2587    }
2588    offset += 3;
2589
2590    uint32_t entrycount;
2591    if (!mDataSource->getUInt32(offset, &entrycount)) {
2592        return ERROR_IO;
2593    }
2594    offset += 4;
2595
2596    if (entrycount > mCurrentSampleInfoOffsetsAllocSize) {
2597        mCurrentSampleInfoOffsets = (uint64_t*) realloc(mCurrentSampleInfoOffsets, entrycount * 8);
2598        mCurrentSampleInfoOffsetsAllocSize = entrycount;
2599    }
2600    mCurrentSampleInfoOffsetCount = entrycount;
2601
2602    for (size_t i = 0; i < entrycount; i++) {
2603        if (version == 0) {
2604            uint32_t tmp;
2605            if (!mDataSource->getUInt32(offset, &tmp)) {
2606                return ERROR_IO;
2607            }
2608            mCurrentSampleInfoOffsets[i] = tmp;
2609            offset += 4;
2610        } else {
2611            uint64_t tmp;
2612            if (!mDataSource->getUInt64(offset, &tmp)) {
2613                return ERROR_IO;
2614            }
2615            mCurrentSampleInfoOffsets[i] = tmp;
2616            offset += 8;
2617        }
2618    }
2619
2620    // parse clear/encrypted data
2621
2622    off64_t drmoffset = mCurrentSampleInfoOffsets[0]; // from moof
2623
2624    drmoffset += mCurrentMoofOffset;
2625    int ivlength;
2626    CHECK(mFormat->findInt32(kKeyCryptoDefaultIVSize, &ivlength));
2627
2628    // read CencSampleAuxiliaryDataFormats
2629    for (size_t i = 0; i < mCurrentSampleInfoCount; i++) {
2630        Sample *smpl = &mCurrentSamples.editItemAt(i);
2631
2632        memset(smpl->iv, 0, 16);
2633        if (mDataSource->readAt(drmoffset, smpl->iv, ivlength) != ivlength) {
2634            return ERROR_IO;
2635        }
2636
2637        drmoffset += ivlength;
2638
2639        int32_t smplinfosize = mCurrentDefaultSampleInfoSize;
2640        if (smplinfosize == 0) {
2641            smplinfosize = mCurrentSampleInfoSizes[i];
2642        }
2643        if (smplinfosize > ivlength) {
2644            uint16_t numsubsamples;
2645            if (!mDataSource->getUInt16(drmoffset, &numsubsamples)) {
2646                return ERROR_IO;
2647            }
2648            drmoffset += 2;
2649            for (size_t j = 0; j < numsubsamples; j++) {
2650                uint16_t numclear;
2651                uint32_t numencrypted;
2652                if (!mDataSource->getUInt16(drmoffset, &numclear)) {
2653                    return ERROR_IO;
2654                }
2655                drmoffset += 2;
2656                if (!mDataSource->getUInt32(drmoffset, &numencrypted)) {
2657                    return ERROR_IO;
2658                }
2659                drmoffset += 4;
2660                smpl->clearsizes.add(numclear);
2661                smpl->encryptedsizes.add(numencrypted);
2662            }
2663        } else {
2664            smpl->clearsizes.add(0);
2665            smpl->encryptedsizes.add(smpl->size);
2666        }
2667    }
2668
2669
2670    return OK;
2671}
2672
2673status_t MPEG4Source::parseTrackFragmentHeader(off64_t offset, off64_t size) {
2674
2675    if (size < 8) {
2676        return -EINVAL;
2677    }
2678
2679    uint32_t flags;
2680    if (!mDataSource->getUInt32(offset, &flags)) { // actually version + flags
2681        return ERROR_MALFORMED;
2682    }
2683
2684    if (flags & 0xff000000) {
2685        return -EINVAL;
2686    }
2687
2688    if (!mDataSource->getUInt32(offset + 4, (uint32_t*)&mLastParsedTrackId)) {
2689        return ERROR_MALFORMED;
2690    }
2691
2692    if (mLastParsedTrackId != mTrackId) {
2693        // this is not the right track, skip it
2694        return OK;
2695    }
2696
2697    mTrackFragmentHeaderInfo.mFlags = flags;
2698    mTrackFragmentHeaderInfo.mTrackID = mLastParsedTrackId;
2699    offset += 8;
2700    size -= 8;
2701
2702    ALOGV("fragment header: %08x %08x", flags, mTrackFragmentHeaderInfo.mTrackID);
2703
2704    if (flags & TrackFragmentHeaderInfo::kBaseDataOffsetPresent) {
2705        if (size < 8) {
2706            return -EINVAL;
2707        }
2708
2709        if (!mDataSource->getUInt64(offset, &mTrackFragmentHeaderInfo.mBaseDataOffset)) {
2710            return ERROR_MALFORMED;
2711        }
2712        offset += 8;
2713        size -= 8;
2714    }
2715
2716    if (flags & TrackFragmentHeaderInfo::kSampleDescriptionIndexPresent) {
2717        if (size < 4) {
2718            return -EINVAL;
2719        }
2720
2721        if (!mDataSource->getUInt32(offset, &mTrackFragmentHeaderInfo.mSampleDescriptionIndex)) {
2722            return ERROR_MALFORMED;
2723        }
2724        offset += 4;
2725        size -= 4;
2726    }
2727
2728    if (flags & TrackFragmentHeaderInfo::kDefaultSampleDurationPresent) {
2729        if (size < 4) {
2730            return -EINVAL;
2731        }
2732
2733        if (!mDataSource->getUInt32(offset, &mTrackFragmentHeaderInfo.mDefaultSampleDuration)) {
2734            return ERROR_MALFORMED;
2735        }
2736        offset += 4;
2737        size -= 4;
2738    }
2739
2740    if (flags & TrackFragmentHeaderInfo::kDefaultSampleSizePresent) {
2741        if (size < 4) {
2742            return -EINVAL;
2743        }
2744
2745        if (!mDataSource->getUInt32(offset, &mTrackFragmentHeaderInfo.mDefaultSampleSize)) {
2746            return ERROR_MALFORMED;
2747        }
2748        offset += 4;
2749        size -= 4;
2750    }
2751
2752    if (flags & TrackFragmentHeaderInfo::kDefaultSampleFlagsPresent) {
2753        if (size < 4) {
2754            return -EINVAL;
2755        }
2756
2757        if (!mDataSource->getUInt32(offset, &mTrackFragmentHeaderInfo.mDefaultSampleFlags)) {
2758            return ERROR_MALFORMED;
2759        }
2760        offset += 4;
2761        size -= 4;
2762    }
2763
2764    if (!(flags & TrackFragmentHeaderInfo::kBaseDataOffsetPresent)) {
2765        mTrackFragmentHeaderInfo.mBaseDataOffset = mCurrentMoofOffset;
2766    }
2767
2768    mTrackFragmentHeaderInfo.mDataOffset = 0;
2769    return OK;
2770}
2771
2772status_t MPEG4Source::parseTrackFragmentRun(off64_t offset, off64_t size) {
2773
2774    ALOGV("MPEG4Extractor::parseTrackFragmentRun");
2775    if (size < 8) {
2776        return -EINVAL;
2777    }
2778
2779    enum {
2780        kDataOffsetPresent                  = 0x01,
2781        kFirstSampleFlagsPresent            = 0x04,
2782        kSampleDurationPresent              = 0x100,
2783        kSampleSizePresent                  = 0x200,
2784        kSampleFlagsPresent                 = 0x400,
2785        kSampleCompositionTimeOffsetPresent = 0x800,
2786    };
2787
2788    uint32_t flags;
2789    if (!mDataSource->getUInt32(offset, &flags)) {
2790        return ERROR_MALFORMED;
2791    }
2792    ALOGV("fragment run flags: %08x", flags);
2793
2794    if (flags & 0xff000000) {
2795        return -EINVAL;
2796    }
2797
2798    if ((flags & kFirstSampleFlagsPresent) && (flags & kSampleFlagsPresent)) {
2799        // These two shall not be used together.
2800        return -EINVAL;
2801    }
2802
2803    uint32_t sampleCount;
2804    if (!mDataSource->getUInt32(offset + 4, &sampleCount)) {
2805        return ERROR_MALFORMED;
2806    }
2807    offset += 8;
2808    size -= 8;
2809
2810    uint64_t dataOffset = mTrackFragmentHeaderInfo.mDataOffset;
2811
2812    uint32_t firstSampleFlags = 0;
2813
2814    if (flags & kDataOffsetPresent) {
2815        if (size < 4) {
2816            return -EINVAL;
2817        }
2818
2819        int32_t dataOffsetDelta;
2820        if (!mDataSource->getUInt32(offset, (uint32_t*)&dataOffsetDelta)) {
2821            return ERROR_MALFORMED;
2822        }
2823
2824        dataOffset = mTrackFragmentHeaderInfo.mBaseDataOffset + dataOffsetDelta;
2825
2826        offset += 4;
2827        size -= 4;
2828    }
2829
2830    if (flags & kFirstSampleFlagsPresent) {
2831        if (size < 4) {
2832            return -EINVAL;
2833        }
2834
2835        if (!mDataSource->getUInt32(offset, &firstSampleFlags)) {
2836            return ERROR_MALFORMED;
2837        }
2838        offset += 4;
2839        size -= 4;
2840    }
2841
2842    uint32_t sampleDuration = 0, sampleSize = 0, sampleFlags = 0,
2843             sampleCtsOffset = 0;
2844
2845    size_t bytesPerSample = 0;
2846    if (flags & kSampleDurationPresent) {
2847        bytesPerSample += 4;
2848    } else if (mTrackFragmentHeaderInfo.mFlags
2849            & TrackFragmentHeaderInfo::kDefaultSampleDurationPresent) {
2850        sampleDuration = mTrackFragmentHeaderInfo.mDefaultSampleDuration;
2851    } else {
2852        sampleDuration = mTrackFragmentHeaderInfo.mDefaultSampleDuration;
2853    }
2854
2855    if (flags & kSampleSizePresent) {
2856        bytesPerSample += 4;
2857    } else if (mTrackFragmentHeaderInfo.mFlags
2858            & TrackFragmentHeaderInfo::kDefaultSampleSizePresent) {
2859        sampleSize = mTrackFragmentHeaderInfo.mDefaultSampleSize;
2860    } else {
2861        sampleSize = mTrackFragmentHeaderInfo.mDefaultSampleSize;
2862    }
2863
2864    if (flags & kSampleFlagsPresent) {
2865        bytesPerSample += 4;
2866    } else if (mTrackFragmentHeaderInfo.mFlags
2867            & TrackFragmentHeaderInfo::kDefaultSampleFlagsPresent) {
2868        sampleFlags = mTrackFragmentHeaderInfo.mDefaultSampleFlags;
2869    } else {
2870        sampleFlags = mTrackFragmentHeaderInfo.mDefaultSampleFlags;
2871    }
2872
2873    if (flags & kSampleCompositionTimeOffsetPresent) {
2874        bytesPerSample += 4;
2875    } else {
2876        sampleCtsOffset = 0;
2877    }
2878
2879    if (size < sampleCount * bytesPerSample) {
2880        return -EINVAL;
2881    }
2882
2883    Sample tmp;
2884    for (uint32_t i = 0; i < sampleCount; ++i) {
2885        if (flags & kSampleDurationPresent) {
2886            if (!mDataSource->getUInt32(offset, &sampleDuration)) {
2887                return ERROR_MALFORMED;
2888            }
2889            offset += 4;
2890        }
2891
2892        if (flags & kSampleSizePresent) {
2893            if (!mDataSource->getUInt32(offset, &sampleSize)) {
2894                return ERROR_MALFORMED;
2895            }
2896            offset += 4;
2897        }
2898
2899        if (flags & kSampleFlagsPresent) {
2900            if (!mDataSource->getUInt32(offset, &sampleFlags)) {
2901                return ERROR_MALFORMED;
2902            }
2903            offset += 4;
2904        }
2905
2906        if (flags & kSampleCompositionTimeOffsetPresent) {
2907            if (!mDataSource->getUInt32(offset, &sampleCtsOffset)) {
2908                return ERROR_MALFORMED;
2909            }
2910            offset += 4;
2911        }
2912
2913        ALOGV("adding sample %d at offset 0x%08llx, size %u, duration %u, "
2914              " flags 0x%08x", i + 1,
2915                dataOffset, sampleSize, sampleDuration,
2916                (flags & kFirstSampleFlagsPresent) && i == 0
2917                    ? firstSampleFlags : sampleFlags);
2918        tmp.offset = dataOffset;
2919        tmp.size = sampleSize;
2920        tmp.duration = sampleDuration;
2921        mCurrentSamples.add(tmp);
2922
2923        dataOffset += sampleSize;
2924    }
2925
2926    mTrackFragmentHeaderInfo.mDataOffset = dataOffset;
2927
2928    return OK;
2929}
2930
2931sp<MetaData> MPEG4Source::getFormat() {
2932    Mutex::Autolock autoLock(mLock);
2933
2934    return mFormat;
2935}
2936
2937size_t MPEG4Source::parseNALSize(const uint8_t *data) const {
2938    switch (mNALLengthSize) {
2939        case 1:
2940            return *data;
2941        case 2:
2942            return U16_AT(data);
2943        case 3:
2944            return ((size_t)data[0] << 16) | U16_AT(&data[1]);
2945        case 4:
2946            return U32_AT(data);
2947    }
2948
2949    // This cannot happen, mNALLengthSize springs to life by adding 1 to
2950    // a 2-bit integer.
2951    CHECK(!"Should not be here.");
2952
2953    return 0;
2954}
2955
2956status_t MPEG4Source::read(
2957        MediaBuffer **out, const ReadOptions *options) {
2958    Mutex::Autolock autoLock(mLock);
2959
2960    CHECK(mStarted);
2961
2962    if (mFirstMoofOffset > 0) {
2963        return fragmentedRead(out, options);
2964    }
2965
2966    *out = NULL;
2967
2968    int64_t targetSampleTimeUs = -1;
2969
2970    int64_t seekTimeUs;
2971    ReadOptions::SeekMode mode;
2972    if (options && options->getSeekTo(&seekTimeUs, &mode)) {
2973        uint32_t findFlags = 0;
2974        switch (mode) {
2975            case ReadOptions::SEEK_PREVIOUS_SYNC:
2976                findFlags = SampleTable::kFlagBefore;
2977                break;
2978            case ReadOptions::SEEK_NEXT_SYNC:
2979                findFlags = SampleTable::kFlagAfter;
2980                break;
2981            case ReadOptions::SEEK_CLOSEST_SYNC:
2982            case ReadOptions::SEEK_CLOSEST:
2983                findFlags = SampleTable::kFlagClosest;
2984                break;
2985            default:
2986                CHECK(!"Should not be here.");
2987                break;
2988        }
2989
2990        uint32_t sampleIndex;
2991        status_t err = mSampleTable->findSampleAtTime(
2992                seekTimeUs * mTimescale / 1000000,
2993                &sampleIndex, findFlags);
2994
2995        if (mode == ReadOptions::SEEK_CLOSEST) {
2996            // We found the closest sample already, now we want the sync
2997            // sample preceding it (or the sample itself of course), even
2998            // if the subsequent sync sample is closer.
2999            findFlags = SampleTable::kFlagBefore;
3000        }
3001
3002        uint32_t syncSampleIndex;
3003        if (err == OK) {
3004            err = mSampleTable->findSyncSampleNear(
3005                    sampleIndex, &syncSampleIndex, findFlags);
3006        }
3007
3008        uint32_t sampleTime;
3009        if (err == OK) {
3010            err = mSampleTable->getMetaDataForSample(
3011                    sampleIndex, NULL, NULL, &sampleTime);
3012        }
3013
3014        if (err != OK) {
3015            if (err == ERROR_OUT_OF_RANGE) {
3016                // An attempt to seek past the end of the stream would
3017                // normally cause this ERROR_OUT_OF_RANGE error. Propagating
3018                // this all the way to the MediaPlayer would cause abnormal
3019                // termination. Legacy behaviour appears to be to behave as if
3020                // we had seeked to the end of stream, ending normally.
3021                err = ERROR_END_OF_STREAM;
3022            }
3023            ALOGV("end of stream");
3024            return err;
3025        }
3026
3027        if (mode == ReadOptions::SEEK_CLOSEST) {
3028            targetSampleTimeUs = (sampleTime * 1000000ll) / mTimescale;
3029        }
3030
3031#if 0
3032        uint32_t syncSampleTime;
3033        CHECK_EQ(OK, mSampleTable->getMetaDataForSample(
3034                    syncSampleIndex, NULL, NULL, &syncSampleTime));
3035
3036        ALOGI("seek to time %lld us => sample at time %lld us, "
3037             "sync sample at time %lld us",
3038             seekTimeUs,
3039             sampleTime * 1000000ll / mTimescale,
3040             syncSampleTime * 1000000ll / mTimescale);
3041#endif
3042
3043        mCurrentSampleIndex = syncSampleIndex;
3044        if (mBuffer != NULL) {
3045            mBuffer->release();
3046            mBuffer = NULL;
3047        }
3048
3049        // fall through
3050    }
3051
3052    off64_t offset;
3053    size_t size;
3054    uint32_t cts;
3055    bool isSyncSample;
3056    bool newBuffer = false;
3057    if (mBuffer == NULL) {
3058        newBuffer = true;
3059
3060        status_t err =
3061            mSampleTable->getMetaDataForSample(
3062                    mCurrentSampleIndex, &offset, &size, &cts, &isSyncSample);
3063
3064        if (err != OK) {
3065            return err;
3066        }
3067
3068        err = mGroup->acquire_buffer(&mBuffer);
3069
3070        if (err != OK) {
3071            CHECK(mBuffer == NULL);
3072            return err;
3073        }
3074    }
3075
3076    if (!mIsAVC || mWantsNALFragments) {
3077        if (newBuffer) {
3078            ssize_t num_bytes_read =
3079                mDataSource->readAt(offset, (uint8_t *)mBuffer->data(), size);
3080
3081            if (num_bytes_read < (ssize_t)size) {
3082                mBuffer->release();
3083                mBuffer = NULL;
3084
3085                return ERROR_IO;
3086            }
3087
3088            CHECK(mBuffer != NULL);
3089            mBuffer->set_range(0, size);
3090            mBuffer->meta_data()->clear();
3091            mBuffer->meta_data()->setInt64(
3092                    kKeyTime, ((int64_t)cts * 1000000) / mTimescale);
3093
3094            if (targetSampleTimeUs >= 0) {
3095                mBuffer->meta_data()->setInt64(
3096                        kKeyTargetTime, targetSampleTimeUs);
3097            }
3098
3099            if (isSyncSample) {
3100                mBuffer->meta_data()->setInt32(kKeyIsSyncFrame, 1);
3101            }
3102
3103            ++mCurrentSampleIndex;
3104        }
3105
3106        if (!mIsAVC) {
3107            *out = mBuffer;
3108            mBuffer = NULL;
3109
3110            return OK;
3111        }
3112
3113        // Each NAL unit is split up into its constituent fragments and
3114        // each one of them returned in its own buffer.
3115
3116        CHECK(mBuffer->range_length() >= mNALLengthSize);
3117
3118        const uint8_t *src =
3119            (const uint8_t *)mBuffer->data() + mBuffer->range_offset();
3120
3121        size_t nal_size = parseNALSize(src);
3122        if (mBuffer->range_length() < mNALLengthSize + nal_size) {
3123            ALOGE("incomplete NAL unit.");
3124
3125            mBuffer->release();
3126            mBuffer = NULL;
3127
3128            return ERROR_MALFORMED;
3129        }
3130
3131        MediaBuffer *clone = mBuffer->clone();
3132        CHECK(clone != NULL);
3133        clone->set_range(mBuffer->range_offset() + mNALLengthSize, nal_size);
3134
3135        CHECK(mBuffer != NULL);
3136        mBuffer->set_range(
3137                mBuffer->range_offset() + mNALLengthSize + nal_size,
3138                mBuffer->range_length() - mNALLengthSize - nal_size);
3139
3140        if (mBuffer->range_length() == 0) {
3141            mBuffer->release();
3142            mBuffer = NULL;
3143        }
3144
3145        *out = clone;
3146
3147        return OK;
3148    } else {
3149        // Whole NAL units are returned but each fragment is prefixed by
3150        // the start code (0x00 00 00 01).
3151        ssize_t num_bytes_read = 0;
3152        int32_t drm = 0;
3153        bool usesDRM = (mFormat->findInt32(kKeyIsDRM, &drm) && drm != 0);
3154        if (usesDRM) {
3155            num_bytes_read =
3156                mDataSource->readAt(offset, (uint8_t*)mBuffer->data(), size);
3157        } else {
3158            num_bytes_read = mDataSource->readAt(offset, mSrcBuffer, size);
3159        }
3160
3161        if (num_bytes_read < (ssize_t)size) {
3162            mBuffer->release();
3163            mBuffer = NULL;
3164
3165            return ERROR_IO;
3166        }
3167
3168        if (usesDRM) {
3169            CHECK(mBuffer != NULL);
3170            mBuffer->set_range(0, size);
3171
3172        } else {
3173            uint8_t *dstData = (uint8_t *)mBuffer->data();
3174            size_t srcOffset = 0;
3175            size_t dstOffset = 0;
3176
3177            while (srcOffset < size) {
3178                bool isMalFormed = (srcOffset + mNALLengthSize > size);
3179                size_t nalLength = 0;
3180                if (!isMalFormed) {
3181                    nalLength = parseNALSize(&mSrcBuffer[srcOffset]);
3182                    srcOffset += mNALLengthSize;
3183                    isMalFormed = srcOffset + nalLength > size;
3184                }
3185
3186                if (isMalFormed) {
3187                    ALOGE("Video is malformed");
3188                    mBuffer->release();
3189                    mBuffer = NULL;
3190                    return ERROR_MALFORMED;
3191                }
3192
3193                if (nalLength == 0) {
3194                    continue;
3195                }
3196
3197                CHECK(dstOffset + 4 <= mBuffer->size());
3198
3199                dstData[dstOffset++] = 0;
3200                dstData[dstOffset++] = 0;
3201                dstData[dstOffset++] = 0;
3202                dstData[dstOffset++] = 1;
3203                memcpy(&dstData[dstOffset], &mSrcBuffer[srcOffset], nalLength);
3204                srcOffset += nalLength;
3205                dstOffset += nalLength;
3206            }
3207            CHECK_EQ(srcOffset, size);
3208            CHECK(mBuffer != NULL);
3209            mBuffer->set_range(0, dstOffset);
3210        }
3211
3212        mBuffer->meta_data()->clear();
3213        mBuffer->meta_data()->setInt64(
3214                kKeyTime, ((int64_t)cts * 1000000) / mTimescale);
3215
3216        if (targetSampleTimeUs >= 0) {
3217            mBuffer->meta_data()->setInt64(
3218                    kKeyTargetTime, targetSampleTimeUs);
3219        }
3220
3221        if (isSyncSample) {
3222            mBuffer->meta_data()->setInt32(kKeyIsSyncFrame, 1);
3223        }
3224
3225        ++mCurrentSampleIndex;
3226
3227        *out = mBuffer;
3228        mBuffer = NULL;
3229
3230        return OK;
3231    }
3232}
3233
3234status_t MPEG4Source::fragmentedRead(
3235        MediaBuffer **out, const ReadOptions *options) {
3236
3237    ALOGV("MPEG4Source::fragmentedRead");
3238
3239    CHECK(mStarted);
3240
3241    *out = NULL;
3242
3243    int64_t targetSampleTimeUs = -1;
3244
3245    int64_t seekTimeUs;
3246    ReadOptions::SeekMode mode;
3247    if (options && options->getSeekTo(&seekTimeUs, &mode)) {
3248
3249        int numSidxEntries = mSegments.size();
3250        if (numSidxEntries != 0) {
3251            int64_t totalTime = 0;
3252            off64_t totalOffset = mFirstMoofOffset;
3253            for (int i = 0; i < numSidxEntries; i++) {
3254                const SidxEntry *se = &mSegments[i];
3255                if (totalTime + se->mDurationUs > seekTimeUs) {
3256                    // The requested time is somewhere in this segment
3257                    if ((mode == ReadOptions::SEEK_NEXT_SYNC) ||
3258                        (mode == ReadOptions::SEEK_CLOSEST_SYNC &&
3259                        (seekTimeUs - totalTime) > (totalTime + se->mDurationUs - seekTimeUs))) {
3260                        // requested next sync, or closest sync and it was closer to the end of
3261                        // this segment
3262                        totalTime += se->mDurationUs;
3263                        totalOffset += se->mSize;
3264                    }
3265                    break;
3266                }
3267                totalTime += se->mDurationUs;
3268                totalOffset += se->mSize;
3269            }
3270        mCurrentMoofOffset = totalOffset;
3271        mCurrentSamples.clear();
3272        mCurrentSampleIndex = 0;
3273        parseChunk(&totalOffset);
3274        mCurrentTime = totalTime * mTimescale / 1000000ll;
3275        }
3276
3277        if (mBuffer != NULL) {
3278            mBuffer->release();
3279            mBuffer = NULL;
3280        }
3281
3282        // fall through
3283    }
3284
3285    off64_t offset = 0;
3286    size_t size;
3287    uint32_t cts = 0;
3288    bool isSyncSample = false;
3289    bool newBuffer = false;
3290    if (mBuffer == NULL) {
3291        newBuffer = true;
3292
3293        if (mCurrentSampleIndex >= mCurrentSamples.size()) {
3294            // move to next fragment
3295            Sample lastSample = mCurrentSamples[mCurrentSamples.size() - 1];
3296            off64_t nextMoof = mNextMoofOffset; // lastSample.offset + lastSample.size;
3297            mCurrentMoofOffset = nextMoof;
3298            mCurrentSamples.clear();
3299            mCurrentSampleIndex = 0;
3300            parseChunk(&nextMoof);
3301                if (mCurrentSampleIndex >= mCurrentSamples.size()) {
3302                    return ERROR_END_OF_STREAM;
3303                }
3304        }
3305
3306        const Sample *smpl = &mCurrentSamples[mCurrentSampleIndex];
3307        offset = smpl->offset;
3308        size = smpl->size;
3309        cts = mCurrentTime;
3310        mCurrentTime += smpl->duration;
3311        isSyncSample = (mCurrentSampleIndex == 0); // XXX
3312
3313        status_t err = mGroup->acquire_buffer(&mBuffer);
3314
3315        if (err != OK) {
3316            CHECK(mBuffer == NULL);
3317            ALOGV("acquire_buffer returned %d", err);
3318            return err;
3319        }
3320    }
3321
3322    const Sample *smpl = &mCurrentSamples[mCurrentSampleIndex];
3323    const sp<MetaData> bufmeta = mBuffer->meta_data();
3324    bufmeta->clear();
3325    if (smpl->encryptedsizes.size()) {
3326        // store clear/encrypted lengths in metadata
3327        bufmeta->setData(kKeyPlainSizes, 0,
3328                smpl->clearsizes.array(), smpl->clearsizes.size() * 4);
3329        bufmeta->setData(kKeyEncryptedSizes, 0,
3330                smpl->encryptedsizes.array(), smpl->encryptedsizes.size() * 4);
3331        bufmeta->setData(kKeyCryptoIV, 0, smpl->iv, 16); // use 16 or the actual size?
3332        bufmeta->setInt32(kKeyCryptoDefaultIVSize, mDefaultIVSize);
3333        bufmeta->setInt32(kKeyCryptoMode, mCryptoMode);
3334        bufmeta->setData(kKeyCryptoKey, 0, mCryptoKey, 16);
3335    }
3336
3337    if (!mIsAVC || mWantsNALFragments) {
3338        if (newBuffer) {
3339            ssize_t num_bytes_read =
3340                mDataSource->readAt(offset, (uint8_t *)mBuffer->data(), size);
3341
3342            if (num_bytes_read < (ssize_t)size) {
3343                mBuffer->release();
3344                mBuffer = NULL;
3345
3346                ALOGV("i/o error");
3347                return ERROR_IO;
3348            }
3349
3350            CHECK(mBuffer != NULL);
3351            mBuffer->set_range(0, size);
3352            mBuffer->meta_data()->setInt64(
3353                    kKeyTime, ((int64_t)cts * 1000000) / mTimescale);
3354
3355            if (targetSampleTimeUs >= 0) {
3356                mBuffer->meta_data()->setInt64(
3357                        kKeyTargetTime, targetSampleTimeUs);
3358            }
3359
3360            if (isSyncSample) {
3361                mBuffer->meta_data()->setInt32(kKeyIsSyncFrame, 1);
3362            }
3363
3364            ++mCurrentSampleIndex;
3365        }
3366
3367        if (!mIsAVC) {
3368            *out = mBuffer;
3369            mBuffer = NULL;
3370
3371            return OK;
3372        }
3373
3374        // Each NAL unit is split up into its constituent fragments and
3375        // each one of them returned in its own buffer.
3376
3377        CHECK(mBuffer->range_length() >= mNALLengthSize);
3378
3379        const uint8_t *src =
3380            (const uint8_t *)mBuffer->data() + mBuffer->range_offset();
3381
3382        size_t nal_size = parseNALSize(src);
3383        if (mBuffer->range_length() < mNALLengthSize + nal_size) {
3384            ALOGE("incomplete NAL unit.");
3385
3386            mBuffer->release();
3387            mBuffer = NULL;
3388
3389            return ERROR_MALFORMED;
3390        }
3391
3392        MediaBuffer *clone = mBuffer->clone();
3393        CHECK(clone != NULL);
3394        clone->set_range(mBuffer->range_offset() + mNALLengthSize, nal_size);
3395
3396        CHECK(mBuffer != NULL);
3397        mBuffer->set_range(
3398                mBuffer->range_offset() + mNALLengthSize + nal_size,
3399                mBuffer->range_length() - mNALLengthSize - nal_size);
3400
3401        if (mBuffer->range_length() == 0) {
3402            mBuffer->release();
3403            mBuffer = NULL;
3404        }
3405
3406        *out = clone;
3407
3408        return OK;
3409    } else {
3410        ALOGV("whole NAL");
3411        // Whole NAL units are returned but each fragment is prefixed by
3412        // the start code (0x00 00 00 01).
3413        ssize_t num_bytes_read = 0;
3414        int32_t drm = 0;
3415        bool usesDRM = (mFormat->findInt32(kKeyIsDRM, &drm) && drm != 0);
3416        if (usesDRM) {
3417            num_bytes_read =
3418                mDataSource->readAt(offset, (uint8_t*)mBuffer->data(), size);
3419        } else {
3420            num_bytes_read = mDataSource->readAt(offset, mSrcBuffer, size);
3421        }
3422
3423        if (num_bytes_read < (ssize_t)size) {
3424            mBuffer->release();
3425            mBuffer = NULL;
3426
3427            ALOGV("i/o error");
3428            return ERROR_IO;
3429        }
3430
3431        if (usesDRM) {
3432            CHECK(mBuffer != NULL);
3433            mBuffer->set_range(0, size);
3434
3435        } else {
3436            uint8_t *dstData = (uint8_t *)mBuffer->data();
3437            size_t srcOffset = 0;
3438            size_t dstOffset = 0;
3439
3440            while (srcOffset < size) {
3441                bool isMalFormed = (srcOffset + mNALLengthSize > size);
3442                size_t nalLength = 0;
3443                if (!isMalFormed) {
3444                    nalLength = parseNALSize(&mSrcBuffer[srcOffset]);
3445                    srcOffset += mNALLengthSize;
3446                    isMalFormed = srcOffset + nalLength > size;
3447                }
3448
3449                if (isMalFormed) {
3450                    ALOGE("Video is malformed");
3451                    mBuffer->release();
3452                    mBuffer = NULL;
3453                    return ERROR_MALFORMED;
3454                }
3455
3456                if (nalLength == 0) {
3457                    continue;
3458                }
3459
3460                CHECK(dstOffset + 4 <= mBuffer->size());
3461
3462                dstData[dstOffset++] = 0;
3463                dstData[dstOffset++] = 0;
3464                dstData[dstOffset++] = 0;
3465                dstData[dstOffset++] = 1;
3466                memcpy(&dstData[dstOffset], &mSrcBuffer[srcOffset], nalLength);
3467                srcOffset += nalLength;
3468                dstOffset += nalLength;
3469            }
3470            CHECK_EQ(srcOffset, size);
3471            CHECK(mBuffer != NULL);
3472            mBuffer->set_range(0, dstOffset);
3473        }
3474
3475        mBuffer->meta_data()->setInt64(
3476                kKeyTime, ((int64_t)cts * 1000000) / mTimescale);
3477
3478        if (targetSampleTimeUs >= 0) {
3479            mBuffer->meta_data()->setInt64(
3480                    kKeyTargetTime, targetSampleTimeUs);
3481        }
3482
3483        if (isSyncSample) {
3484            mBuffer->meta_data()->setInt32(kKeyIsSyncFrame, 1);
3485        }
3486
3487        ++mCurrentSampleIndex;
3488
3489        *out = mBuffer;
3490        mBuffer = NULL;
3491
3492        return OK;
3493    }
3494}
3495
3496MPEG4Extractor::Track *MPEG4Extractor::findTrackByMimePrefix(
3497        const char *mimePrefix) {
3498    for (Track *track = mFirstTrack; track != NULL; track = track->next) {
3499        const char *mime;
3500        if (track->meta != NULL
3501                && track->meta->findCString(kKeyMIMEType, &mime)
3502                && !strncasecmp(mime, mimePrefix, strlen(mimePrefix))) {
3503            return track;
3504        }
3505    }
3506
3507    return NULL;
3508}
3509
3510static bool LegacySniffMPEG4(
3511        const sp<DataSource> &source, String8 *mimeType, float *confidence) {
3512    uint8_t header[8];
3513
3514    ssize_t n = source->readAt(4, header, sizeof(header));
3515    if (n < (ssize_t)sizeof(header)) {
3516        return false;
3517    }
3518
3519    if (!memcmp(header, "ftyp3gp", 7) || !memcmp(header, "ftypmp42", 8)
3520        || !memcmp(header, "ftyp3gr6", 8) || !memcmp(header, "ftyp3gs6", 8)
3521        || !memcmp(header, "ftyp3ge6", 8) || !memcmp(header, "ftyp3gg6", 8)
3522        || !memcmp(header, "ftypisom", 8) || !memcmp(header, "ftypM4V ", 8)
3523        || !memcmp(header, "ftypM4A ", 8) || !memcmp(header, "ftypf4v ", 8)
3524        || !memcmp(header, "ftypkddi", 8) || !memcmp(header, "ftypM4VP", 8)) {
3525        *mimeType = MEDIA_MIMETYPE_CONTAINER_MPEG4;
3526        *confidence = 0.4;
3527
3528        return true;
3529    }
3530
3531    return false;
3532}
3533
3534static bool isCompatibleBrand(uint32_t fourcc) {
3535    static const uint32_t kCompatibleBrands[] = {
3536        FOURCC('i', 's', 'o', 'm'),
3537        FOURCC('i', 's', 'o', '2'),
3538        FOURCC('a', 'v', 'c', '1'),
3539        FOURCC('3', 'g', 'p', '4'),
3540        FOURCC('m', 'p', '4', '1'),
3541        FOURCC('m', 'p', '4', '2'),
3542
3543        // Won't promise that the following file types can be played.
3544        // Just give these file types a chance.
3545        FOURCC('q', 't', ' ', ' '),  // Apple's QuickTime
3546        FOURCC('M', 'S', 'N', 'V'),  // Sony's PSP
3547
3548        FOURCC('3', 'g', '2', 'a'),  // 3GPP2
3549        FOURCC('3', 'g', '2', 'b'),
3550    };
3551
3552    for (size_t i = 0;
3553         i < sizeof(kCompatibleBrands) / sizeof(kCompatibleBrands[0]);
3554         ++i) {
3555        if (kCompatibleBrands[i] == fourcc) {
3556            return true;
3557        }
3558    }
3559
3560    return false;
3561}
3562
3563// Attempt to actually parse the 'ftyp' atom and determine if a suitable
3564// compatible brand is present.
3565// Also try to identify where this file's metadata ends
3566// (end of the 'moov' atom) and report it to the caller as part of
3567// the metadata.
3568static bool BetterSniffMPEG4(
3569        const sp<DataSource> &source, String8 *mimeType, float *confidence,
3570        sp<AMessage> *meta) {
3571    // We scan up to 128 bytes to identify this file as an MP4.
3572    static const off64_t kMaxScanOffset = 128ll;
3573
3574    off64_t offset = 0ll;
3575    bool foundGoodFileType = false;
3576    off64_t moovAtomEndOffset = -1ll;
3577    bool done = false;
3578
3579    while (!done && offset < kMaxScanOffset) {
3580        uint32_t hdr[2];
3581        if (source->readAt(offset, hdr, 8) < 8) {
3582            return false;
3583        }
3584
3585        uint64_t chunkSize = ntohl(hdr[0]);
3586        uint32_t chunkType = ntohl(hdr[1]);
3587        off64_t chunkDataOffset = offset + 8;
3588
3589        if (chunkSize == 1) {
3590            if (source->readAt(offset + 8, &chunkSize, 8) < 8) {
3591                return false;
3592            }
3593
3594            chunkSize = ntoh64(chunkSize);
3595            chunkDataOffset += 8;
3596
3597            if (chunkSize < 16) {
3598                // The smallest valid chunk is 16 bytes long in this case.
3599                return false;
3600            }
3601        } else if (chunkSize < 8) {
3602            // The smallest valid chunk is 8 bytes long.
3603            return false;
3604        }
3605
3606        off64_t chunkDataSize = offset + chunkSize - chunkDataOffset;
3607
3608        char chunkstring[5];
3609        MakeFourCCString(chunkType, chunkstring);
3610        ALOGV("saw chunk type %s, size %lld @ %lld", chunkstring, chunkSize, offset);
3611        switch (chunkType) {
3612            case FOURCC('f', 't', 'y', 'p'):
3613            {
3614                if (chunkDataSize < 8) {
3615                    return false;
3616                }
3617
3618                uint32_t numCompatibleBrands = (chunkDataSize - 8) / 4;
3619                for (size_t i = 0; i < numCompatibleBrands + 2; ++i) {
3620                    if (i == 1) {
3621                        // Skip this index, it refers to the minorVersion,
3622                        // not a brand.
3623                        continue;
3624                    }
3625
3626                    uint32_t brand;
3627                    if (source->readAt(
3628                                chunkDataOffset + 4 * i, &brand, 4) < 4) {
3629                        return false;
3630                    }
3631
3632                    brand = ntohl(brand);
3633
3634                    if (isCompatibleBrand(brand)) {
3635                        foundGoodFileType = true;
3636                        break;
3637                    }
3638                }
3639
3640                if (!foundGoodFileType) {
3641                    return false;
3642                }
3643
3644                break;
3645            }
3646
3647            case FOURCC('m', 'o', 'o', 'v'):
3648            {
3649                moovAtomEndOffset = offset + chunkSize;
3650
3651                done = true;
3652                break;
3653            }
3654
3655            default:
3656                break;
3657        }
3658
3659        offset += chunkSize;
3660    }
3661
3662    if (!foundGoodFileType) {
3663        return false;
3664    }
3665
3666    *mimeType = MEDIA_MIMETYPE_CONTAINER_MPEG4;
3667    *confidence = 0.4f;
3668
3669    if (moovAtomEndOffset >= 0) {
3670        *meta = new AMessage;
3671        (*meta)->setInt64("meta-data-size", moovAtomEndOffset);
3672
3673        ALOGV("found metadata size: %lld", moovAtomEndOffset);
3674    }
3675
3676    return true;
3677}
3678
3679bool SniffMPEG4(
3680        const sp<DataSource> &source, String8 *mimeType, float *confidence,
3681        sp<AMessage> *meta) {
3682    if (BetterSniffMPEG4(source, mimeType, confidence, meta)) {
3683        return true;
3684    }
3685
3686    if (LegacySniffMPEG4(source, mimeType, confidence)) {
3687        ALOGW("Identified supported mpeg4 through LegacySniffMPEG4.");
3688        return true;
3689    }
3690
3691    return false;
3692}
3693
3694}  // namespace android
3695