MPEG4Extractor.cpp revision 8da8b2e80ccdb10ff2445f503829f803d3a6ab9f
1/*
2 * Copyright (C) 2009 The Android Open Source Project
3 *
4 * Licensed under the Apache License, Version 2.0 (the "License");
5 * you may not use this file except in compliance with the License.
6 * You may obtain a copy of the License at
7 *
8 *      http://www.apache.org/licenses/LICENSE-2.0
9 *
10 * Unless required by applicable law or agreed to in writing, software
11 * distributed under the License is distributed on an "AS IS" BASIS,
12 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13 * See the License for the specific language governing permissions and
14 * limitations under the License.
15 */
16
17//#define LOG_NDEBUG 0
18#define LOG_TAG "MPEG4Extractor"
19#include <utils/Log.h>
20
21#include "include/MPEG4Extractor.h"
22#include "include/SampleTable.h"
23#include "include/ESDS.h"
24
25#include <ctype.h>
26#include <stdint.h>
27#include <stdlib.h>
28#include <string.h>
29
30#include <media/stagefright/foundation/ABitReader.h>
31#include <media/stagefright/foundation/ABuffer.h>
32#include <media/stagefright/foundation/ADebug.h>
33#include <media/stagefright/foundation/AMessage.h>
34#include <media/stagefright/MediaBuffer.h>
35#include <media/stagefright/MediaBufferGroup.h>
36#include <media/stagefright/MediaDefs.h>
37#include <media/stagefright/MediaSource.h>
38#include <media/stagefright/MetaData.h>
39#include <utils/String8.h>
40
41#include <byteswap.h>
42#include "include/ID3.h"
43
44namespace android {
45
46class MPEG4Source : public MediaSource {
47public:
48    // Caller retains ownership of both "dataSource" and "sampleTable".
49    MPEG4Source(const sp<MetaData> &format,
50                const sp<DataSource> &dataSource,
51                int32_t timeScale,
52                const sp<SampleTable> &sampleTable,
53                Vector<SidxEntry> &sidx,
54                off64_t firstMoofOffset);
55
56    virtual status_t start(MetaData *params = NULL);
57    virtual status_t stop();
58
59    virtual sp<MetaData> getFormat();
60
61    virtual status_t read(MediaBuffer **buffer, const ReadOptions *options = NULL);
62    virtual status_t fragmentedRead(MediaBuffer **buffer, const ReadOptions *options = NULL);
63
64protected:
65    virtual ~MPEG4Source();
66
67private:
68    Mutex mLock;
69
70    sp<MetaData> mFormat;
71    sp<DataSource> mDataSource;
72    int32_t mTimescale;
73    sp<SampleTable> mSampleTable;
74    uint32_t mCurrentSampleIndex;
75    uint32_t mCurrentFragmentIndex;
76    Vector<SidxEntry> &mSegments;
77    off64_t mFirstMoofOffset;
78    off64_t mCurrentMoofOffset;
79    off64_t mNextMoofOffset;
80    uint32_t mCurrentTime;
81    int32_t mLastParsedTrackId;
82    int32_t mTrackId;
83
84    int32_t mCryptoMode;    // passed in from extractor
85    int32_t mDefaultIVSize; // passed in from extractor
86    uint8_t mCryptoKey[16]; // passed in from extractor
87    uint32_t mCurrentAuxInfoType;
88    uint32_t mCurrentAuxInfoTypeParameter;
89    int32_t mCurrentDefaultSampleInfoSize;
90    uint32_t mCurrentSampleInfoCount;
91    uint32_t mCurrentSampleInfoAllocSize;
92    uint8_t* mCurrentSampleInfoSizes;
93    uint32_t mCurrentSampleInfoOffsetCount;
94    uint32_t mCurrentSampleInfoOffsetsAllocSize;
95    uint64_t* mCurrentSampleInfoOffsets;
96
97    bool mIsAVC;
98    bool mIsHEVC;
99    size_t mNALLengthSize;
100
101    bool mStarted;
102
103    MediaBufferGroup *mGroup;
104
105    MediaBuffer *mBuffer;
106
107    bool mWantsNALFragments;
108
109    uint8_t *mSrcBuffer;
110
111    size_t parseNALSize(const uint8_t *data) const;
112    status_t parseChunk(off64_t *offset);
113    status_t parseTrackFragmentHeader(off64_t offset, off64_t size);
114    status_t parseTrackFragmentRun(off64_t offset, off64_t size);
115    status_t parseSampleAuxiliaryInformationSizes(off64_t offset, off64_t size);
116    status_t parseSampleAuxiliaryInformationOffsets(off64_t offset, off64_t size);
117
118    struct TrackFragmentHeaderInfo {
119        enum Flags {
120            kBaseDataOffsetPresent         = 0x01,
121            kSampleDescriptionIndexPresent = 0x02,
122            kDefaultSampleDurationPresent  = 0x08,
123            kDefaultSampleSizePresent      = 0x10,
124            kDefaultSampleFlagsPresent     = 0x20,
125            kDurationIsEmpty               = 0x10000,
126        };
127
128        uint32_t mTrackID;
129        uint32_t mFlags;
130        uint64_t mBaseDataOffset;
131        uint32_t mSampleDescriptionIndex;
132        uint32_t mDefaultSampleDuration;
133        uint32_t mDefaultSampleSize;
134        uint32_t mDefaultSampleFlags;
135
136        uint64_t mDataOffset;
137    };
138    TrackFragmentHeaderInfo mTrackFragmentHeaderInfo;
139
140    struct Sample {
141        off64_t offset;
142        size_t size;
143        uint32_t duration;
144        uint8_t iv[16];
145        Vector<size_t> clearsizes;
146        Vector<size_t> encryptedsizes;
147    };
148    Vector<Sample> mCurrentSamples;
149
150    MPEG4Source(const MPEG4Source &);
151    MPEG4Source &operator=(const MPEG4Source &);
152};
153
154// This custom data source wraps an existing one and satisfies requests
155// falling entirely within a cached range from the cache while forwarding
156// all remaining requests to the wrapped datasource.
157// This is used to cache the full sampletable metadata for a single track,
158// possibly wrapping multiple times to cover all tracks, i.e.
159// Each MPEG4DataSource caches the sampletable metadata for a single track.
160
161struct MPEG4DataSource : public DataSource {
162    MPEG4DataSource(const sp<DataSource> &source);
163
164    virtual status_t initCheck() const;
165    virtual ssize_t readAt(off64_t offset, void *data, size_t size);
166    virtual status_t getSize(off64_t *size);
167    virtual uint32_t flags();
168
169    status_t setCachedRange(off64_t offset, size_t size);
170
171protected:
172    virtual ~MPEG4DataSource();
173
174private:
175    Mutex mLock;
176
177    sp<DataSource> mSource;
178    off64_t mCachedOffset;
179    size_t mCachedSize;
180    uint8_t *mCache;
181
182    void clearCache();
183
184    MPEG4DataSource(const MPEG4DataSource &);
185    MPEG4DataSource &operator=(const MPEG4DataSource &);
186};
187
188MPEG4DataSource::MPEG4DataSource(const sp<DataSource> &source)
189    : mSource(source),
190      mCachedOffset(0),
191      mCachedSize(0),
192      mCache(NULL) {
193}
194
195MPEG4DataSource::~MPEG4DataSource() {
196    clearCache();
197}
198
199void MPEG4DataSource::clearCache() {
200    if (mCache) {
201        free(mCache);
202        mCache = NULL;
203    }
204
205    mCachedOffset = 0;
206    mCachedSize = 0;
207}
208
209status_t MPEG4DataSource::initCheck() const {
210    return mSource->initCheck();
211}
212
213ssize_t MPEG4DataSource::readAt(off64_t offset, void *data, size_t size) {
214    Mutex::Autolock autoLock(mLock);
215
216    if (offset >= mCachedOffset
217            && offset + size <= mCachedOffset + mCachedSize) {
218        memcpy(data, &mCache[offset - mCachedOffset], size);
219        return size;
220    }
221
222    return mSource->readAt(offset, data, size);
223}
224
225status_t MPEG4DataSource::getSize(off64_t *size) {
226    return mSource->getSize(size);
227}
228
229uint32_t MPEG4DataSource::flags() {
230    return mSource->flags();
231}
232
233status_t MPEG4DataSource::setCachedRange(off64_t offset, size_t size) {
234    Mutex::Autolock autoLock(mLock);
235
236    clearCache();
237
238    mCache = (uint8_t *)malloc(size);
239
240    if (mCache == NULL) {
241        return -ENOMEM;
242    }
243
244    mCachedOffset = offset;
245    mCachedSize = size;
246
247    ssize_t err = mSource->readAt(mCachedOffset, mCache, mCachedSize);
248
249    if (err < (ssize_t)size) {
250        clearCache();
251
252        return ERROR_IO;
253    }
254
255    return OK;
256}
257
258////////////////////////////////////////////////////////////////////////////////
259
260static void hexdump(const void *_data, size_t size) {
261    const uint8_t *data = (const uint8_t *)_data;
262    size_t offset = 0;
263    while (offset < size) {
264        printf("0x%04zx  ", offset);
265
266        size_t n = size - offset;
267        if (n > 16) {
268            n = 16;
269        }
270
271        for (size_t i = 0; i < 16; ++i) {
272            if (i == 8) {
273                printf(" ");
274            }
275
276            if (offset + i < size) {
277                printf("%02x ", data[offset + i]);
278            } else {
279                printf("   ");
280            }
281        }
282
283        printf(" ");
284
285        for (size_t i = 0; i < n; ++i) {
286            if (isprint(data[offset + i])) {
287                printf("%c", data[offset + i]);
288            } else {
289                printf(".");
290            }
291        }
292
293        printf("\n");
294
295        offset += 16;
296    }
297}
298
299static const char *FourCC2MIME(uint32_t fourcc) {
300    switch (fourcc) {
301        case FOURCC('m', 'p', '4', 'a'):
302            return MEDIA_MIMETYPE_AUDIO_AAC;
303
304        case FOURCC('s', 'a', 'm', 'r'):
305            return MEDIA_MIMETYPE_AUDIO_AMR_NB;
306
307        case FOURCC('s', 'a', 'w', 'b'):
308            return MEDIA_MIMETYPE_AUDIO_AMR_WB;
309
310        case FOURCC('m', 'p', '4', 'v'):
311            return MEDIA_MIMETYPE_VIDEO_MPEG4;
312
313        case FOURCC('s', '2', '6', '3'):
314        case FOURCC('h', '2', '6', '3'):
315        case FOURCC('H', '2', '6', '3'):
316            return MEDIA_MIMETYPE_VIDEO_H263;
317
318        case FOURCC('a', 'v', 'c', '1'):
319            return MEDIA_MIMETYPE_VIDEO_AVC;
320
321        case FOURCC('h', 'v', 'c', '1'):
322        case FOURCC('h', 'e', 'v', '1'):
323            return MEDIA_MIMETYPE_VIDEO_HEVC;
324        default:
325            CHECK(!"should not be here.");
326            return NULL;
327    }
328}
329
330static bool AdjustChannelsAndRate(uint32_t fourcc, uint32_t *channels, uint32_t *rate) {
331    if (!strcasecmp(MEDIA_MIMETYPE_AUDIO_AMR_NB, FourCC2MIME(fourcc))) {
332        // AMR NB audio is always mono, 8kHz
333        *channels = 1;
334        *rate = 8000;
335        return true;
336    } else if (!strcasecmp(MEDIA_MIMETYPE_AUDIO_AMR_WB, FourCC2MIME(fourcc))) {
337        // AMR WB audio is always mono, 16kHz
338        *channels = 1;
339        *rate = 16000;
340        return true;
341    }
342    return false;
343}
344
345MPEG4Extractor::MPEG4Extractor(const sp<DataSource> &source)
346    : mSidxDuration(0),
347      mMoofOffset(0),
348      mDataSource(source),
349      mInitCheck(NO_INIT),
350      mHasVideo(false),
351      mHeaderTimescale(0),
352      mFirstTrack(NULL),
353      mLastTrack(NULL),
354      mFileMetaData(new MetaData),
355      mFirstSINF(NULL),
356      mIsDrm(false) {
357}
358
359MPEG4Extractor::~MPEG4Extractor() {
360    Track *track = mFirstTrack;
361    while (track) {
362        Track *next = track->next;
363
364        delete track;
365        track = next;
366    }
367    mFirstTrack = mLastTrack = NULL;
368
369    SINF *sinf = mFirstSINF;
370    while (sinf) {
371        SINF *next = sinf->next;
372        delete[] sinf->IPMPData;
373        delete sinf;
374        sinf = next;
375    }
376    mFirstSINF = NULL;
377
378    for (size_t i = 0; i < mPssh.size(); i++) {
379        delete [] mPssh[i].data;
380    }
381}
382
383uint32_t MPEG4Extractor::flags() const {
384    return CAN_PAUSE |
385            ((mMoofOffset == 0 || mSidxEntries.size() != 0) ?
386                    (CAN_SEEK_BACKWARD | CAN_SEEK_FORWARD | CAN_SEEK) : 0);
387}
388
389sp<MetaData> MPEG4Extractor::getMetaData() {
390    status_t err;
391    if ((err = readMetaData()) != OK) {
392        return new MetaData;
393    }
394
395    return mFileMetaData;
396}
397
398size_t MPEG4Extractor::countTracks() {
399    status_t err;
400    if ((err = readMetaData()) != OK) {
401        ALOGV("MPEG4Extractor::countTracks: no tracks");
402        return 0;
403    }
404
405    size_t n = 0;
406    Track *track = mFirstTrack;
407    while (track) {
408        ++n;
409        track = track->next;
410    }
411
412    ALOGV("MPEG4Extractor::countTracks: %d tracks", n);
413    return n;
414}
415
416sp<MetaData> MPEG4Extractor::getTrackMetaData(
417        size_t index, uint32_t flags) {
418    status_t err;
419    if ((err = readMetaData()) != OK) {
420        return NULL;
421    }
422
423    Track *track = mFirstTrack;
424    while (index > 0) {
425        if (track == NULL) {
426            return NULL;
427        }
428
429        track = track->next;
430        --index;
431    }
432
433    if (track == NULL) {
434        return NULL;
435    }
436
437    if ((flags & kIncludeExtensiveMetaData)
438            && !track->includes_expensive_metadata) {
439        track->includes_expensive_metadata = true;
440
441        const char *mime;
442        CHECK(track->meta->findCString(kKeyMIMEType, &mime));
443        if (!strncasecmp("video/", mime, 6)) {
444            if (mMoofOffset > 0) {
445                int64_t duration;
446                if (track->meta->findInt64(kKeyDuration, &duration)) {
447                    // nothing fancy, just pick a frame near 1/4th of the duration
448                    track->meta->setInt64(
449                            kKeyThumbnailTime, duration / 4);
450                }
451            } else {
452                uint32_t sampleIndex;
453                uint32_t sampleTime;
454                if (track->sampleTable->findThumbnailSample(&sampleIndex) == OK
455                        && track->sampleTable->getMetaDataForSample(
456                            sampleIndex, NULL /* offset */, NULL /* size */,
457                            &sampleTime) == OK) {
458                    track->meta->setInt64(
459                            kKeyThumbnailTime,
460                            ((int64_t)sampleTime * 1000000) / track->timescale);
461                }
462            }
463        }
464    }
465
466    return track->meta;
467}
468
469static void MakeFourCCString(uint32_t x, char *s) {
470    s[0] = x >> 24;
471    s[1] = (x >> 16) & 0xff;
472    s[2] = (x >> 8) & 0xff;
473    s[3] = x & 0xff;
474    s[4] = '\0';
475}
476
477status_t MPEG4Extractor::readMetaData() {
478    if (mInitCheck != NO_INIT) {
479        return mInitCheck;
480    }
481
482    off64_t offset = 0;
483    status_t err;
484    while (true) {
485        off64_t orig_offset = offset;
486        err = parseChunk(&offset, 0);
487
488        if (offset <= orig_offset) {
489            // only continue parsing if the offset was advanced,
490            // otherwise we might end up in an infinite loop
491            ALOGE("did not advance: 0x%lld->0x%lld", orig_offset, offset);
492            err = ERROR_MALFORMED;
493            break;
494        } else if (err == OK) {
495            continue;
496        } else if (err != UNKNOWN_ERROR) {
497            break;
498        }
499        uint32_t hdr[2];
500        if (mDataSource->readAt(offset, hdr, 8) < 8) {
501            break;
502        }
503        uint32_t chunk_type = ntohl(hdr[1]);
504        if (chunk_type == FOURCC('m', 'o', 'o', 'f')) {
505            // store the offset of the first segment
506            mMoofOffset = offset;
507        } else if (chunk_type != FOURCC('m', 'd', 'a', 't')) {
508            // keep parsing until we get to the data
509            continue;
510        }
511        break;
512    }
513
514    if (mInitCheck == OK) {
515        if (mHasVideo) {
516            mFileMetaData->setCString(
517                    kKeyMIMEType, MEDIA_MIMETYPE_CONTAINER_MPEG4);
518        } else {
519            mFileMetaData->setCString(kKeyMIMEType, "audio/mp4");
520        }
521    } else {
522        mInitCheck = err;
523    }
524
525    CHECK_NE(err, (status_t)NO_INIT);
526
527    // copy pssh data into file metadata
528    int psshsize = 0;
529    for (size_t i = 0; i < mPssh.size(); i++) {
530        psshsize += 20 + mPssh[i].datalen;
531    }
532    if (psshsize) {
533        char *buf = (char*)malloc(psshsize);
534        char *ptr = buf;
535        for (size_t i = 0; i < mPssh.size(); i++) {
536            memcpy(ptr, mPssh[i].uuid, 20); // uuid + length
537            memcpy(ptr + 20, mPssh[i].data, mPssh[i].datalen);
538            ptr += (20 + mPssh[i].datalen);
539        }
540        mFileMetaData->setData(kKeyPssh, 'pssh', buf, psshsize);
541        free(buf);
542    }
543    return mInitCheck;
544}
545
546char* MPEG4Extractor::getDrmTrackInfo(size_t trackID, int *len) {
547    if (mFirstSINF == NULL) {
548        return NULL;
549    }
550
551    SINF *sinf = mFirstSINF;
552    while (sinf && (trackID != sinf->trackID)) {
553        sinf = sinf->next;
554    }
555
556    if (sinf == NULL) {
557        return NULL;
558    }
559
560    *len = sinf->len;
561    return sinf->IPMPData;
562}
563
564// Reads an encoded integer 7 bits at a time until it encounters the high bit clear.
565static int32_t readSize(off64_t offset,
566        const sp<DataSource> DataSource, uint8_t *numOfBytes) {
567    uint32_t size = 0;
568    uint8_t data;
569    bool moreData = true;
570    *numOfBytes = 0;
571
572    while (moreData) {
573        if (DataSource->readAt(offset, &data, 1) < 1) {
574            return -1;
575        }
576        offset ++;
577        moreData = (data >= 128) ? true : false;
578        size = (size << 7) | (data & 0x7f); // Take last 7 bits
579        (*numOfBytes) ++;
580    }
581
582    return size;
583}
584
585status_t MPEG4Extractor::parseDrmSINF(
586        off64_t * /* offset */, off64_t data_offset) {
587    uint8_t updateIdTag;
588    if (mDataSource->readAt(data_offset, &updateIdTag, 1) < 1) {
589        return ERROR_IO;
590    }
591    data_offset ++;
592
593    if (0x01/*OBJECT_DESCRIPTOR_UPDATE_ID_TAG*/ != updateIdTag) {
594        return ERROR_MALFORMED;
595    }
596
597    uint8_t numOfBytes;
598    int32_t size = readSize(data_offset, mDataSource, &numOfBytes);
599    if (size < 0) {
600        return ERROR_IO;
601    }
602    int32_t classSize = size;
603    data_offset += numOfBytes;
604
605    while(size >= 11 ) {
606        uint8_t descriptorTag;
607        if (mDataSource->readAt(data_offset, &descriptorTag, 1) < 1) {
608            return ERROR_IO;
609        }
610        data_offset ++;
611
612        if (0x11/*OBJECT_DESCRIPTOR_ID_TAG*/ != descriptorTag) {
613            return ERROR_MALFORMED;
614        }
615
616        uint8_t buffer[8];
617        //ObjectDescriptorID and ObjectDescriptor url flag
618        if (mDataSource->readAt(data_offset, buffer, 2) < 2) {
619            return ERROR_IO;
620        }
621        data_offset += 2;
622
623        if ((buffer[1] >> 5) & 0x0001) { //url flag is set
624            return ERROR_MALFORMED;
625        }
626
627        if (mDataSource->readAt(data_offset, buffer, 8) < 8) {
628            return ERROR_IO;
629        }
630        data_offset += 8;
631
632        if ((0x0F/*ES_ID_REF_TAG*/ != buffer[1])
633                || ( 0x0A/*IPMP_DESCRIPTOR_POINTER_ID_TAG*/ != buffer[5])) {
634            return ERROR_MALFORMED;
635        }
636
637        SINF *sinf = new SINF;
638        sinf->trackID = U16_AT(&buffer[3]);
639        sinf->IPMPDescriptorID = buffer[7];
640        sinf->next = mFirstSINF;
641        mFirstSINF = sinf;
642
643        size -= (8 + 2 + 1);
644    }
645
646    if (size != 0) {
647        return ERROR_MALFORMED;
648    }
649
650    if (mDataSource->readAt(data_offset, &updateIdTag, 1) < 1) {
651        return ERROR_IO;
652    }
653    data_offset ++;
654
655    if(0x05/*IPMP_DESCRIPTOR_UPDATE_ID_TAG*/ != updateIdTag) {
656        return ERROR_MALFORMED;
657    }
658
659    size = readSize(data_offset, mDataSource, &numOfBytes);
660    if (size < 0) {
661        return ERROR_IO;
662    }
663    classSize = size;
664    data_offset += numOfBytes;
665
666    while (size > 0) {
667        uint8_t tag;
668        int32_t dataLen;
669        if (mDataSource->readAt(data_offset, &tag, 1) < 1) {
670            return ERROR_IO;
671        }
672        data_offset ++;
673
674        if (0x0B/*IPMP_DESCRIPTOR_ID_TAG*/ == tag) {
675            uint8_t id;
676            dataLen = readSize(data_offset, mDataSource, &numOfBytes);
677            if (dataLen < 0) {
678                return ERROR_IO;
679            } else if (dataLen < 4) {
680                return ERROR_MALFORMED;
681            }
682            data_offset += numOfBytes;
683
684            if (mDataSource->readAt(data_offset, &id, 1) < 1) {
685                return ERROR_IO;
686            }
687            data_offset ++;
688
689            SINF *sinf = mFirstSINF;
690            while (sinf && (sinf->IPMPDescriptorID != id)) {
691                sinf = sinf->next;
692            }
693            if (sinf == NULL) {
694                return ERROR_MALFORMED;
695            }
696            sinf->len = dataLen - 3;
697            sinf->IPMPData = new (std::nothrow) char[sinf->len];
698            if (sinf->IPMPData == NULL) {
699                return ERROR_MALFORMED;
700            }
701            data_offset += 2;
702
703            if (mDataSource->readAt(data_offset, sinf->IPMPData, sinf->len) < sinf->len) {
704                return ERROR_IO;
705            }
706            data_offset += sinf->len;
707
708            size -= (dataLen + numOfBytes + 1);
709        }
710    }
711
712    if (size != 0) {
713        return ERROR_MALFORMED;
714    }
715
716    return UNKNOWN_ERROR;  // Return a dummy error.
717}
718
719struct PathAdder {
720    PathAdder(Vector<uint32_t> *path, uint32_t chunkType)
721        : mPath(path) {
722        mPath->push(chunkType);
723    }
724
725    ~PathAdder() {
726        mPath->pop();
727    }
728
729private:
730    Vector<uint32_t> *mPath;
731
732    PathAdder(const PathAdder &);
733    PathAdder &operator=(const PathAdder &);
734};
735
736static bool underMetaDataPath(const Vector<uint32_t> &path) {
737    return path.size() >= 5
738        && path[0] == FOURCC('m', 'o', 'o', 'v')
739        && path[1] == FOURCC('u', 'd', 't', 'a')
740        && path[2] == FOURCC('m', 'e', 't', 'a')
741        && path[3] == FOURCC('i', 'l', 's', 't');
742}
743
744// Given a time in seconds since Jan 1 1904, produce a human-readable string.
745static void convertTimeToDate(int64_t time_1904, String8 *s) {
746    time_t time_1970 = time_1904 - (((66 * 365 + 17) * 24) * 3600);
747
748    char tmp[32];
749    strftime(tmp, sizeof(tmp), "%Y%m%dT%H%M%S.000Z", gmtime(&time_1970));
750
751    s->setTo(tmp);
752}
753
754status_t MPEG4Extractor::parseChunk(off64_t *offset, int depth) {
755    ALOGV("entering parseChunk %lld/%d", *offset, depth);
756    uint32_t hdr[2];
757    if (mDataSource->readAt(*offset, hdr, 8) < 8) {
758        return ERROR_IO;
759    }
760    uint64_t chunk_size = ntohl(hdr[0]);
761    uint32_t chunk_type = ntohl(hdr[1]);
762    off64_t data_offset = *offset + 8;
763
764    if (chunk_size == 1) {
765        if (mDataSource->readAt(*offset + 8, &chunk_size, 8) < 8) {
766            return ERROR_IO;
767        }
768        chunk_size = ntoh64(chunk_size);
769        data_offset += 8;
770
771        if (chunk_size < 16) {
772            // The smallest valid chunk is 16 bytes long in this case.
773            return ERROR_MALFORMED;
774        }
775    } else if (chunk_size == 0) {
776        if (depth == 0) {
777            // atom extends to end of file
778            off64_t sourceSize;
779            if (mDataSource->getSize(&sourceSize) == OK) {
780                chunk_size = (sourceSize - *offset);
781            } else {
782                // XXX could we just pick a "sufficiently large" value here?
783                ALOGE("atom size is 0, and data source has no size");
784                return ERROR_MALFORMED;
785            }
786        } else {
787            // not allowed for non-toplevel atoms, skip it
788            *offset += 4;
789            return OK;
790        }
791    } else if (chunk_size < 8) {
792        // The smallest valid chunk is 8 bytes long.
793        ALOGE("invalid chunk size: %d", int(chunk_size));
794        return ERROR_MALFORMED;
795    }
796
797    char chunk[5];
798    MakeFourCCString(chunk_type, chunk);
799    ALOGV("chunk: %s @ %lld, %d", chunk, *offset, depth);
800
801#if 0
802    static const char kWhitespace[] = "                                        ";
803    const char *indent = &kWhitespace[sizeof(kWhitespace) - 1 - 2 * depth];
804    printf("%sfound chunk '%s' of size %lld\n", indent, chunk, chunk_size);
805
806    char buffer[256];
807    size_t n = chunk_size;
808    if (n > sizeof(buffer)) {
809        n = sizeof(buffer);
810    }
811    if (mDataSource->readAt(*offset, buffer, n)
812            < (ssize_t)n) {
813        return ERROR_IO;
814    }
815
816    hexdump(buffer, n);
817#endif
818
819    PathAdder autoAdder(&mPath, chunk_type);
820
821    off64_t chunk_data_size = *offset + chunk_size - data_offset;
822
823    if (chunk_type != FOURCC('c', 'p', 'r', 't')
824            && chunk_type != FOURCC('c', 'o', 'v', 'r')
825            && mPath.size() == 5 && underMetaDataPath(mPath)) {
826        off64_t stop_offset = *offset + chunk_size;
827        *offset = data_offset;
828        while (*offset < stop_offset) {
829            status_t err = parseChunk(offset, depth + 1);
830            if (err != OK) {
831                return err;
832            }
833        }
834
835        if (*offset != stop_offset) {
836            return ERROR_MALFORMED;
837        }
838
839        return OK;
840    }
841
842    switch(chunk_type) {
843        case FOURCC('m', 'o', 'o', 'v'):
844        case FOURCC('t', 'r', 'a', 'k'):
845        case FOURCC('m', 'd', 'i', 'a'):
846        case FOURCC('m', 'i', 'n', 'f'):
847        case FOURCC('d', 'i', 'n', 'f'):
848        case FOURCC('s', 't', 'b', 'l'):
849        case FOURCC('m', 'v', 'e', 'x'):
850        case FOURCC('m', 'o', 'o', 'f'):
851        case FOURCC('t', 'r', 'a', 'f'):
852        case FOURCC('m', 'f', 'r', 'a'):
853        case FOURCC('u', 'd', 't', 'a'):
854        case FOURCC('i', 'l', 's', 't'):
855        case FOURCC('s', 'i', 'n', 'f'):
856        case FOURCC('s', 'c', 'h', 'i'):
857        case FOURCC('e', 'd', 't', 's'):
858        {
859            if (chunk_type == FOURCC('s', 't', 'b', 'l')) {
860                ALOGV("sampleTable chunk is %d bytes long.", (size_t)chunk_size);
861
862                if (mDataSource->flags()
863                        & (DataSource::kWantsPrefetching
864                            | DataSource::kIsCachingDataSource)) {
865                    sp<MPEG4DataSource> cachedSource =
866                        new MPEG4DataSource(mDataSource);
867
868                    if (cachedSource->setCachedRange(*offset, chunk_size) == OK) {
869                        mDataSource = cachedSource;
870                    }
871                }
872
873                mLastTrack->sampleTable = new SampleTable(mDataSource);
874            }
875
876            bool isTrack = false;
877            if (chunk_type == FOURCC('t', 'r', 'a', 'k')) {
878                isTrack = true;
879
880                Track *track = new Track;
881                track->next = NULL;
882                if (mLastTrack) {
883                    mLastTrack->next = track;
884                } else {
885                    mFirstTrack = track;
886                }
887                mLastTrack = track;
888
889                track->meta = new MetaData;
890                track->includes_expensive_metadata = false;
891                track->skipTrack = false;
892                track->timescale = 0;
893                track->meta->setCString(kKeyMIMEType, "application/octet-stream");
894            }
895
896            off64_t stop_offset = *offset + chunk_size;
897            *offset = data_offset;
898            while (*offset < stop_offset) {
899                status_t err = parseChunk(offset, depth + 1);
900                if (err != OK) {
901                    return err;
902                }
903            }
904
905            if (*offset != stop_offset) {
906                return ERROR_MALFORMED;
907            }
908
909            if (isTrack) {
910                if (mLastTrack->skipTrack) {
911                    Track *cur = mFirstTrack;
912
913                    if (cur == mLastTrack) {
914                        delete cur;
915                        mFirstTrack = mLastTrack = NULL;
916                    } else {
917                        while (cur && cur->next != mLastTrack) {
918                            cur = cur->next;
919                        }
920                        cur->next = NULL;
921                        delete mLastTrack;
922                        mLastTrack = cur;
923                    }
924
925                    return OK;
926                }
927
928                status_t err = verifyTrack(mLastTrack);
929
930                if (err != OK) {
931                    return err;
932                }
933            } else if (chunk_type == FOURCC('m', 'o', 'o', 'v')) {
934                mInitCheck = OK;
935
936                if (!mIsDrm) {
937                    return UNKNOWN_ERROR;  // Return a dummy error.
938                } else {
939                    return OK;
940                }
941            }
942            break;
943        }
944
945        case FOURCC('e', 'l', 's', 't'):
946        {
947            *offset += chunk_size;
948
949            // See 14496-12 8.6.6
950            uint8_t version;
951            if (mDataSource->readAt(data_offset, &version, 1) < 1) {
952                return ERROR_IO;
953            }
954
955            uint32_t entry_count;
956            if (!mDataSource->getUInt32(data_offset + 4, &entry_count)) {
957                return ERROR_IO;
958            }
959
960            if (entry_count != 1) {
961                // we only support a single entry at the moment, for gapless playback
962                ALOGW("ignoring edit list with %d entries", entry_count);
963            } else if (mHeaderTimescale == 0) {
964                ALOGW("ignoring edit list because timescale is 0");
965            } else {
966                off64_t entriesoffset = data_offset + 8;
967                uint64_t segment_duration;
968                int64_t media_time;
969
970                if (version == 1) {
971                    if (!mDataSource->getUInt64(entriesoffset, &segment_duration) ||
972                            !mDataSource->getUInt64(entriesoffset + 8, (uint64_t*)&media_time)) {
973                        return ERROR_IO;
974                    }
975                } else if (version == 0) {
976                    uint32_t sd;
977                    int32_t mt;
978                    if (!mDataSource->getUInt32(entriesoffset, &sd) ||
979                            !mDataSource->getUInt32(entriesoffset + 4, (uint32_t*)&mt)) {
980                        return ERROR_IO;
981                    }
982                    segment_duration = sd;
983                    media_time = mt;
984                } else {
985                    return ERROR_IO;
986                }
987
988                uint64_t halfscale = mHeaderTimescale / 2;
989                segment_duration = (segment_duration * 1000000 + halfscale)/ mHeaderTimescale;
990                media_time = (media_time * 1000000 + halfscale) / mHeaderTimescale;
991
992                int64_t duration;
993                int32_t samplerate;
994                if (mLastTrack->meta->findInt64(kKeyDuration, &duration) &&
995                        mLastTrack->meta->findInt32(kKeySampleRate, &samplerate)) {
996
997                    int64_t delay = (media_time  * samplerate + 500000) / 1000000;
998                    mLastTrack->meta->setInt32(kKeyEncoderDelay, delay);
999
1000                    int64_t paddingus = duration - (segment_duration + media_time);
1001                    if (paddingus < 0) {
1002                        // track duration from media header (which is what kKeyDuration is) might
1003                        // be slightly shorter than the segment duration, which would make the
1004                        // padding negative. Clamp to zero.
1005                        paddingus = 0;
1006                    }
1007                    int64_t paddingsamples = (paddingus * samplerate + 500000) / 1000000;
1008                    mLastTrack->meta->setInt32(kKeyEncoderPadding, paddingsamples);
1009                }
1010            }
1011            break;
1012        }
1013
1014        case FOURCC('f', 'r', 'm', 'a'):
1015        {
1016            *offset += chunk_size;
1017
1018            uint32_t original_fourcc;
1019            if (mDataSource->readAt(data_offset, &original_fourcc, 4) < 4) {
1020                return ERROR_IO;
1021            }
1022            original_fourcc = ntohl(original_fourcc);
1023            ALOGV("read original format: %d", original_fourcc);
1024            mLastTrack->meta->setCString(kKeyMIMEType, FourCC2MIME(original_fourcc));
1025            uint32_t num_channels = 0;
1026            uint32_t sample_rate = 0;
1027            if (AdjustChannelsAndRate(original_fourcc, &num_channels, &sample_rate)) {
1028                mLastTrack->meta->setInt32(kKeyChannelCount, num_channels);
1029                mLastTrack->meta->setInt32(kKeySampleRate, sample_rate);
1030            }
1031            break;
1032        }
1033
1034        case FOURCC('t', 'e', 'n', 'c'):
1035        {
1036            *offset += chunk_size;
1037
1038            if (chunk_size < 32) {
1039                return ERROR_MALFORMED;
1040            }
1041
1042            // tenc box contains 1 byte version, 3 byte flags, 3 byte default algorithm id, one byte
1043            // default IV size, 16 bytes default KeyID
1044            // (ISO 23001-7)
1045            char buf[4];
1046            memset(buf, 0, 4);
1047            if (mDataSource->readAt(data_offset + 4, buf + 1, 3) < 3) {
1048                return ERROR_IO;
1049            }
1050            uint32_t defaultAlgorithmId = ntohl(*((int32_t*)buf));
1051            if (defaultAlgorithmId > 1) {
1052                // only 0 (clear) and 1 (AES-128) are valid
1053                return ERROR_MALFORMED;
1054            }
1055
1056            memset(buf, 0, 4);
1057            if (mDataSource->readAt(data_offset + 7, buf + 3, 1) < 1) {
1058                return ERROR_IO;
1059            }
1060            uint32_t defaultIVSize = ntohl(*((int32_t*)buf));
1061
1062            if ((defaultAlgorithmId == 0 && defaultIVSize != 0) ||
1063                    (defaultAlgorithmId != 0 && defaultIVSize == 0)) {
1064                // only unencrypted data must have 0 IV size
1065                return ERROR_MALFORMED;
1066            } else if (defaultIVSize != 0 &&
1067                    defaultIVSize != 8 &&
1068                    defaultIVSize != 16) {
1069                // only supported sizes are 0, 8 and 16
1070                return ERROR_MALFORMED;
1071            }
1072
1073            uint8_t defaultKeyId[16];
1074
1075            if (mDataSource->readAt(data_offset + 8, &defaultKeyId, 16) < 16) {
1076                return ERROR_IO;
1077            }
1078
1079            mLastTrack->meta->setInt32(kKeyCryptoMode, defaultAlgorithmId);
1080            mLastTrack->meta->setInt32(kKeyCryptoDefaultIVSize, defaultIVSize);
1081            mLastTrack->meta->setData(kKeyCryptoKey, 'tenc', defaultKeyId, 16);
1082            break;
1083        }
1084
1085        case FOURCC('t', 'k', 'h', 'd'):
1086        {
1087            *offset += chunk_size;
1088
1089            status_t err;
1090            if ((err = parseTrackHeader(data_offset, chunk_data_size)) != OK) {
1091                return err;
1092            }
1093
1094            break;
1095        }
1096
1097        case FOURCC('p', 's', 's', 'h'):
1098        {
1099            *offset += chunk_size;
1100
1101            PsshInfo pssh;
1102
1103            if (mDataSource->readAt(data_offset + 4, &pssh.uuid, 16) < 16) {
1104                return ERROR_IO;
1105            }
1106
1107            uint32_t psshdatalen = 0;
1108            if (mDataSource->readAt(data_offset + 20, &psshdatalen, 4) < 4) {
1109                return ERROR_IO;
1110            }
1111            pssh.datalen = ntohl(psshdatalen);
1112            ALOGV("pssh data size: %d", pssh.datalen);
1113            if (pssh.datalen + 20 > chunk_size) {
1114                // pssh data length exceeds size of containing box
1115                return ERROR_MALFORMED;
1116            }
1117
1118            pssh.data = new (std::nothrow) uint8_t[pssh.datalen];
1119            if (pssh.data == NULL) {
1120                return ERROR_MALFORMED;
1121            }
1122            ALOGV("allocated pssh @ %p", pssh.data);
1123            ssize_t requested = (ssize_t) pssh.datalen;
1124            if (mDataSource->readAt(data_offset + 24, pssh.data, requested) < requested) {
1125                return ERROR_IO;
1126            }
1127            mPssh.push_back(pssh);
1128
1129            break;
1130        }
1131
1132        case FOURCC('m', 'd', 'h', 'd'):
1133        {
1134            *offset += chunk_size;
1135
1136            if (chunk_data_size < 4) {
1137                return ERROR_MALFORMED;
1138            }
1139
1140            uint8_t version;
1141            if (mDataSource->readAt(
1142                        data_offset, &version, sizeof(version))
1143                    < (ssize_t)sizeof(version)) {
1144                return ERROR_IO;
1145            }
1146
1147            off64_t timescale_offset;
1148
1149            if (version == 1) {
1150                timescale_offset = data_offset + 4 + 16;
1151            } else if (version == 0) {
1152                timescale_offset = data_offset + 4 + 8;
1153            } else {
1154                return ERROR_IO;
1155            }
1156
1157            uint32_t timescale;
1158            if (mDataSource->readAt(
1159                        timescale_offset, &timescale, sizeof(timescale))
1160                    < (ssize_t)sizeof(timescale)) {
1161                return ERROR_IO;
1162            }
1163
1164            mLastTrack->timescale = ntohl(timescale);
1165
1166            int64_t duration = 0;
1167            if (version == 1) {
1168                if (mDataSource->readAt(
1169                            timescale_offset + 4, &duration, sizeof(duration))
1170                        < (ssize_t)sizeof(duration)) {
1171                    return ERROR_IO;
1172                }
1173                duration = ntoh64(duration);
1174            } else {
1175                uint32_t duration32;
1176                if (mDataSource->readAt(
1177                            timescale_offset + 4, &duration32, sizeof(duration32))
1178                        < (ssize_t)sizeof(duration32)) {
1179                    return ERROR_IO;
1180                }
1181                // ffmpeg sets duration to -1, which is incorrect.
1182                if (duration32 != 0xffffffff) {
1183                    duration = ntohl(duration32);
1184                }
1185            }
1186            mLastTrack->meta->setInt64(
1187                    kKeyDuration, (duration * 1000000) / mLastTrack->timescale);
1188
1189            uint8_t lang[2];
1190            off64_t lang_offset;
1191            if (version == 1) {
1192                lang_offset = timescale_offset + 4 + 8;
1193            } else if (version == 0) {
1194                lang_offset = timescale_offset + 4 + 4;
1195            } else {
1196                return ERROR_IO;
1197            }
1198
1199            if (mDataSource->readAt(lang_offset, &lang, sizeof(lang))
1200                    < (ssize_t)sizeof(lang)) {
1201                return ERROR_IO;
1202            }
1203
1204            // To get the ISO-639-2/T three character language code
1205            // 1 bit pad followed by 3 5-bits characters. Each character
1206            // is packed as the difference between its ASCII value and 0x60.
1207            char lang_code[4];
1208            lang_code[0] = ((lang[0] >> 2) & 0x1f) + 0x60;
1209            lang_code[1] = ((lang[0] & 0x3) << 3 | (lang[1] >> 5)) + 0x60;
1210            lang_code[2] = (lang[1] & 0x1f) + 0x60;
1211            lang_code[3] = '\0';
1212
1213            mLastTrack->meta->setCString(
1214                    kKeyMediaLanguage, lang_code);
1215
1216            break;
1217        }
1218
1219        case FOURCC('s', 't', 's', 'd'):
1220        {
1221            if (chunk_data_size < 8) {
1222                return ERROR_MALFORMED;
1223            }
1224
1225            uint8_t buffer[8];
1226            if (chunk_data_size < (off64_t)sizeof(buffer)) {
1227                return ERROR_MALFORMED;
1228            }
1229
1230            if (mDataSource->readAt(
1231                        data_offset, buffer, 8) < 8) {
1232                return ERROR_IO;
1233            }
1234
1235            if (U32_AT(buffer) != 0) {
1236                // Should be version 0, flags 0.
1237                return ERROR_MALFORMED;
1238            }
1239
1240            uint32_t entry_count = U32_AT(&buffer[4]);
1241
1242            if (entry_count > 1) {
1243                // For 3GPP timed text, there could be multiple tx3g boxes contain
1244                // multiple text display formats. These formats will be used to
1245                // display the timed text.
1246                // For encrypted files, there may also be more than one entry.
1247                const char *mime;
1248                CHECK(mLastTrack->meta->findCString(kKeyMIMEType, &mime));
1249                if (strcasecmp(mime, MEDIA_MIMETYPE_TEXT_3GPP) &&
1250                        strcasecmp(mime, "application/octet-stream")) {
1251                    // For now we only support a single type of media per track.
1252                    mLastTrack->skipTrack = true;
1253                    *offset += chunk_size;
1254                    break;
1255                }
1256            }
1257            off64_t stop_offset = *offset + chunk_size;
1258            *offset = data_offset + 8;
1259            for (uint32_t i = 0; i < entry_count; ++i) {
1260                status_t err = parseChunk(offset, depth + 1);
1261                if (err != OK) {
1262                    return err;
1263                }
1264            }
1265
1266            if (*offset != stop_offset) {
1267                return ERROR_MALFORMED;
1268            }
1269            break;
1270        }
1271
1272        case FOURCC('m', 'p', '4', 'a'):
1273        case FOURCC('e', 'n', 'c', 'a'):
1274        case FOURCC('s', 'a', 'm', 'r'):
1275        case FOURCC('s', 'a', 'w', 'b'):
1276        {
1277            uint8_t buffer[8 + 20];
1278            if (chunk_data_size < (ssize_t)sizeof(buffer)) {
1279                // Basic AudioSampleEntry size.
1280                return ERROR_MALFORMED;
1281            }
1282
1283            if (mDataSource->readAt(
1284                        data_offset, buffer, sizeof(buffer)) < (ssize_t)sizeof(buffer)) {
1285                return ERROR_IO;
1286            }
1287
1288            uint16_t data_ref_index = U16_AT(&buffer[6]);
1289            uint32_t num_channels = U16_AT(&buffer[16]);
1290
1291            uint16_t sample_size = U16_AT(&buffer[18]);
1292            uint32_t sample_rate = U32_AT(&buffer[24]) >> 16;
1293
1294            if (chunk_type != FOURCC('e', 'n', 'c', 'a')) {
1295                // if the chunk type is enca, we'll get the type from the sinf/frma box later
1296                mLastTrack->meta->setCString(kKeyMIMEType, FourCC2MIME(chunk_type));
1297                AdjustChannelsAndRate(chunk_type, &num_channels, &sample_rate);
1298            }
1299            ALOGV("*** coding='%s' %d channels, size %d, rate %d\n",
1300                   chunk, num_channels, sample_size, sample_rate);
1301            mLastTrack->meta->setInt32(kKeyChannelCount, num_channels);
1302            mLastTrack->meta->setInt32(kKeySampleRate, sample_rate);
1303
1304            off64_t stop_offset = *offset + chunk_size;
1305            *offset = data_offset + sizeof(buffer);
1306            while (*offset < stop_offset) {
1307                status_t err = parseChunk(offset, depth + 1);
1308                if (err != OK) {
1309                    return err;
1310                }
1311            }
1312
1313            if (*offset != stop_offset) {
1314                return ERROR_MALFORMED;
1315            }
1316            break;
1317        }
1318
1319        case FOURCC('m', 'p', '4', 'v'):
1320        case FOURCC('e', 'n', 'c', 'v'):
1321        case FOURCC('s', '2', '6', '3'):
1322        case FOURCC('H', '2', '6', '3'):
1323        case FOURCC('h', '2', '6', '3'):
1324        case FOURCC('a', 'v', 'c', '1'):
1325        case FOURCC('h', 'v', 'c', '1'):
1326        case FOURCC('h', 'e', 'v', '1'):
1327        {
1328            mHasVideo = true;
1329
1330            uint8_t buffer[78];
1331            if (chunk_data_size < (ssize_t)sizeof(buffer)) {
1332                // Basic VideoSampleEntry size.
1333                return ERROR_MALFORMED;
1334            }
1335
1336            if (mDataSource->readAt(
1337                        data_offset, buffer, sizeof(buffer)) < (ssize_t)sizeof(buffer)) {
1338                return ERROR_IO;
1339            }
1340
1341            uint16_t data_ref_index = U16_AT(&buffer[6]);
1342            uint16_t width = U16_AT(&buffer[6 + 18]);
1343            uint16_t height = U16_AT(&buffer[6 + 20]);
1344
1345            // The video sample is not standard-compliant if it has invalid dimension.
1346            // Use some default width and height value, and
1347            // let the decoder figure out the actual width and height (and thus
1348            // be prepared for INFO_FOMRAT_CHANGED event).
1349            if (width == 0)  width  = 352;
1350            if (height == 0) height = 288;
1351
1352            // printf("*** coding='%s' width=%d height=%d\n",
1353            //        chunk, width, height);
1354
1355            if (chunk_type != FOURCC('e', 'n', 'c', 'v')) {
1356                // if the chunk type is encv, we'll get the type from the sinf/frma box later
1357                mLastTrack->meta->setCString(kKeyMIMEType, FourCC2MIME(chunk_type));
1358            }
1359            mLastTrack->meta->setInt32(kKeyWidth, width);
1360            mLastTrack->meta->setInt32(kKeyHeight, height);
1361
1362            off64_t stop_offset = *offset + chunk_size;
1363            *offset = data_offset + sizeof(buffer);
1364            while (*offset < stop_offset) {
1365                status_t err = parseChunk(offset, depth + 1);
1366                if (err != OK) {
1367                    return err;
1368                }
1369            }
1370
1371            if (*offset != stop_offset) {
1372                return ERROR_MALFORMED;
1373            }
1374            break;
1375        }
1376
1377        case FOURCC('s', 't', 'c', 'o'):
1378        case FOURCC('c', 'o', '6', '4'):
1379        {
1380            status_t err =
1381                mLastTrack->sampleTable->setChunkOffsetParams(
1382                        chunk_type, data_offset, chunk_data_size);
1383
1384            *offset += chunk_size;
1385
1386            if (err != OK) {
1387                return err;
1388            }
1389
1390            break;
1391        }
1392
1393        case FOURCC('s', 't', 's', 'c'):
1394        {
1395            status_t err =
1396                mLastTrack->sampleTable->setSampleToChunkParams(
1397                        data_offset, chunk_data_size);
1398
1399            *offset += chunk_size;
1400
1401            if (err != OK) {
1402                return err;
1403            }
1404
1405            break;
1406        }
1407
1408        case FOURCC('s', 't', 's', 'z'):
1409        case FOURCC('s', 't', 'z', '2'):
1410        {
1411            status_t err =
1412                mLastTrack->sampleTable->setSampleSizeParams(
1413                        chunk_type, data_offset, chunk_data_size);
1414
1415            *offset += chunk_size;
1416
1417            if (err != OK) {
1418                return err;
1419            }
1420
1421            size_t max_size;
1422            err = mLastTrack->sampleTable->getMaxSampleSize(&max_size);
1423
1424            if (err != OK) {
1425                return err;
1426            }
1427
1428            if (max_size != 0) {
1429                // Assume that a given buffer only contains at most 10 chunks,
1430                // each chunk originally prefixed with a 2 byte length will
1431                // have a 4 byte header (0x00 0x00 0x00 0x01) after conversion,
1432                // and thus will grow by 2 bytes per chunk.
1433                mLastTrack->meta->setInt32(kKeyMaxInputSize, max_size + 10 * 2);
1434            } else {
1435                // No size was specified. Pick a conservatively large size.
1436                int32_t width, height;
1437                if (!mLastTrack->meta->findInt32(kKeyWidth, &width) ||
1438                    !mLastTrack->meta->findInt32(kKeyHeight, &height)) {
1439                    ALOGE("No width or height, assuming worst case 1080p");
1440                    width = 1920;
1441                    height = 1080;
1442                }
1443
1444                const char *mime;
1445                CHECK(mLastTrack->meta->findCString(kKeyMIMEType, &mime));
1446                if (!strcmp(mime, MEDIA_MIMETYPE_VIDEO_AVC)) {
1447                    // AVC requires compression ratio of at least 2, and uses
1448                    // macroblocks
1449                    max_size = ((width + 15) / 16) * ((height + 15) / 16) * 192;
1450                } else {
1451                    // For all other formats there is no minimum compression
1452                    // ratio. Use compression ratio of 1.
1453                    max_size = width * height * 3 / 2;
1454                }
1455                mLastTrack->meta->setInt32(kKeyMaxInputSize, max_size);
1456            }
1457
1458            // NOTE: setting another piece of metadata invalidates any pointers (such as the
1459            // mimetype) previously obtained, so don't cache them.
1460            const char *mime;
1461            CHECK(mLastTrack->meta->findCString(kKeyMIMEType, &mime));
1462            // Calculate average frame rate.
1463            if (!strncasecmp("video/", mime, 6)) {
1464                size_t nSamples = mLastTrack->sampleTable->countSamples();
1465                int64_t durationUs;
1466                if (mLastTrack->meta->findInt64(kKeyDuration, &durationUs)) {
1467                    if (durationUs > 0) {
1468                        int32_t frameRate = (nSamples * 1000000LL +
1469                                    (durationUs >> 1)) / durationUs;
1470                        mLastTrack->meta->setInt32(kKeyFrameRate, frameRate);
1471                    }
1472                }
1473            }
1474
1475            break;
1476        }
1477
1478        case FOURCC('s', 't', 't', 's'):
1479        {
1480            *offset += chunk_size;
1481
1482            status_t err =
1483                mLastTrack->sampleTable->setTimeToSampleParams(
1484                        data_offset, chunk_data_size);
1485
1486            if (err != OK) {
1487                return err;
1488            }
1489
1490            break;
1491        }
1492
1493        case FOURCC('c', 't', 't', 's'):
1494        {
1495            *offset += chunk_size;
1496
1497            status_t err =
1498                mLastTrack->sampleTable->setCompositionTimeToSampleParams(
1499                        data_offset, chunk_data_size);
1500
1501            if (err != OK) {
1502                return err;
1503            }
1504
1505            break;
1506        }
1507
1508        case FOURCC('s', 't', 's', 's'):
1509        {
1510            *offset += chunk_size;
1511
1512            status_t err =
1513                mLastTrack->sampleTable->setSyncSampleParams(
1514                        data_offset, chunk_data_size);
1515
1516            if (err != OK) {
1517                return err;
1518            }
1519
1520            break;
1521        }
1522
1523        // @xyz
1524        case FOURCC('\xA9', 'x', 'y', 'z'):
1525        {
1526            *offset += chunk_size;
1527
1528            // Best case the total data length inside "@xyz" box
1529            // would be 8, for instance "@xyz" + "\x00\x04\x15\xc7" + "0+0/",
1530            // where "\x00\x04" is the text string length with value = 4,
1531            // "\0x15\xc7" is the language code = en, and "0+0" is a
1532            // location (string) value with longitude = 0 and latitude = 0.
1533            if (chunk_data_size < 8) {
1534                return ERROR_MALFORMED;
1535            }
1536
1537            // Worst case the location string length would be 18,
1538            // for instance +90.0000-180.0000, without the trailing "/" and
1539            // the string length + language code.
1540            char buffer[18];
1541
1542            // Substracting 5 from the data size is because the text string length +
1543            // language code takes 4 bytes, and the trailing slash "/" takes 1 byte.
1544            off64_t location_length = chunk_data_size - 5;
1545            if (location_length >= (off64_t) sizeof(buffer)) {
1546                return ERROR_MALFORMED;
1547            }
1548
1549            if (mDataSource->readAt(
1550                        data_offset + 4, buffer, location_length) < location_length) {
1551                return ERROR_IO;
1552            }
1553
1554            buffer[location_length] = '\0';
1555            mFileMetaData->setCString(kKeyLocation, buffer);
1556            break;
1557        }
1558
1559        case FOURCC('e', 's', 'd', 's'):
1560        {
1561            *offset += chunk_size;
1562
1563            if (chunk_data_size < 4) {
1564                return ERROR_MALFORMED;
1565            }
1566
1567            uint8_t buffer[256];
1568            if (chunk_data_size > (off64_t)sizeof(buffer)) {
1569                return ERROR_BUFFER_TOO_SMALL;
1570            }
1571
1572            if (mDataSource->readAt(
1573                        data_offset, buffer, chunk_data_size) < chunk_data_size) {
1574                return ERROR_IO;
1575            }
1576
1577            if (U32_AT(buffer) != 0) {
1578                // Should be version 0, flags 0.
1579                return ERROR_MALFORMED;
1580            }
1581
1582            mLastTrack->meta->setData(
1583                    kKeyESDS, kTypeESDS, &buffer[4], chunk_data_size - 4);
1584
1585            if (mPath.size() >= 2
1586                    && mPath[mPath.size() - 2] == FOURCC('m', 'p', '4', 'a')) {
1587                // Information from the ESDS must be relied on for proper
1588                // setup of sample rate and channel count for MPEG4 Audio.
1589                // The generic header appears to only contain generic
1590                // information...
1591
1592                status_t err = updateAudioTrackInfoFromESDS_MPEG4Audio(
1593                        &buffer[4], chunk_data_size - 4);
1594
1595                if (err != OK) {
1596                    return err;
1597                }
1598            }
1599
1600            break;
1601        }
1602
1603        case FOURCC('a', 'v', 'c', 'C'):
1604        {
1605            *offset += chunk_size;
1606
1607            sp<ABuffer> buffer = new ABuffer(chunk_data_size);
1608
1609            if (mDataSource->readAt(
1610                        data_offset, buffer->data(), chunk_data_size) < chunk_data_size) {
1611                return ERROR_IO;
1612            }
1613
1614            mLastTrack->meta->setData(
1615                    kKeyAVCC, kTypeAVCC, buffer->data(), chunk_data_size);
1616
1617            break;
1618        }
1619        case FOURCC('h', 'v', 'c', 'C'):
1620        {
1621            sp<ABuffer> buffer = new ABuffer(chunk_data_size);
1622
1623            if (mDataSource->readAt(
1624                        data_offset, buffer->data(), chunk_data_size) < chunk_data_size) {
1625                return ERROR_IO;
1626            }
1627
1628            mLastTrack->meta->setData(
1629                    kKeyHVCC, kTypeHVCC, buffer->data(), chunk_data_size);
1630
1631            *offset += chunk_size;
1632            break;
1633        }
1634
1635        case FOURCC('d', '2', '6', '3'):
1636        {
1637            *offset += chunk_size;
1638            /*
1639             * d263 contains a fixed 7 bytes part:
1640             *   vendor - 4 bytes
1641             *   version - 1 byte
1642             *   level - 1 byte
1643             *   profile - 1 byte
1644             * optionally, "d263" box itself may contain a 16-byte
1645             * bit rate box (bitr)
1646             *   average bit rate - 4 bytes
1647             *   max bit rate - 4 bytes
1648             */
1649            char buffer[23];
1650            if (chunk_data_size != 7 &&
1651                chunk_data_size != 23) {
1652                ALOGE("Incorrect D263 box size %lld", chunk_data_size);
1653                return ERROR_MALFORMED;
1654            }
1655
1656            if (mDataSource->readAt(
1657                    data_offset, buffer, chunk_data_size) < chunk_data_size) {
1658                return ERROR_IO;
1659            }
1660
1661            mLastTrack->meta->setData(kKeyD263, kTypeD263, buffer, chunk_data_size);
1662
1663            break;
1664        }
1665
1666        case FOURCC('m', 'e', 't', 'a'):
1667        {
1668            uint8_t buffer[4];
1669            if (chunk_data_size < (off64_t)sizeof(buffer)) {
1670                *offset += chunk_size;
1671                return ERROR_MALFORMED;
1672            }
1673
1674            if (mDataSource->readAt(
1675                        data_offset, buffer, 4) < 4) {
1676                *offset += chunk_size;
1677                return ERROR_IO;
1678            }
1679
1680            if (U32_AT(buffer) != 0) {
1681                // Should be version 0, flags 0.
1682
1683                // If it's not, let's assume this is one of those
1684                // apparently malformed chunks that don't have flags
1685                // and completely different semantics than what's
1686                // in the MPEG4 specs and skip it.
1687                *offset += chunk_size;
1688                return OK;
1689            }
1690
1691            off64_t stop_offset = *offset + chunk_size;
1692            *offset = data_offset + sizeof(buffer);
1693            while (*offset < stop_offset) {
1694                status_t err = parseChunk(offset, depth + 1);
1695                if (err != OK) {
1696                    return err;
1697                }
1698            }
1699
1700            if (*offset != stop_offset) {
1701                return ERROR_MALFORMED;
1702            }
1703            break;
1704        }
1705
1706        case FOURCC('m', 'e', 'a', 'n'):
1707        case FOURCC('n', 'a', 'm', 'e'):
1708        case FOURCC('d', 'a', 't', 'a'):
1709        {
1710            *offset += chunk_size;
1711
1712            if (mPath.size() == 6 && underMetaDataPath(mPath)) {
1713                status_t err = parseITunesMetaData(data_offset, chunk_data_size);
1714
1715                if (err != OK) {
1716                    return err;
1717                }
1718            }
1719
1720            break;
1721        }
1722
1723        case FOURCC('m', 'v', 'h', 'd'):
1724        {
1725            *offset += chunk_size;
1726
1727            if (chunk_data_size < 24) {
1728                return ERROR_MALFORMED;
1729            }
1730
1731            uint8_t header[24];
1732            if (mDataSource->readAt(
1733                        data_offset, header, sizeof(header))
1734                    < (ssize_t)sizeof(header)) {
1735                return ERROR_IO;
1736            }
1737
1738            uint64_t creationTime;
1739            if (header[0] == 1) {
1740                creationTime = U64_AT(&header[4]);
1741                mHeaderTimescale = U32_AT(&header[20]);
1742            } else if (header[0] != 0) {
1743                return ERROR_MALFORMED;
1744            } else {
1745                creationTime = U32_AT(&header[4]);
1746                mHeaderTimescale = U32_AT(&header[12]);
1747            }
1748
1749            String8 s;
1750            convertTimeToDate(creationTime, &s);
1751
1752            mFileMetaData->setCString(kKeyDate, s.string());
1753
1754            break;
1755        }
1756
1757        case FOURCC('m', 'd', 'a', 't'):
1758        {
1759            ALOGV("mdat chunk, drm: %d", mIsDrm);
1760            if (!mIsDrm) {
1761                *offset += chunk_size;
1762                break;
1763            }
1764
1765            if (chunk_size < 8) {
1766                return ERROR_MALFORMED;
1767            }
1768
1769            return parseDrmSINF(offset, data_offset);
1770        }
1771
1772        case FOURCC('h', 'd', 'l', 'r'):
1773        {
1774            *offset += chunk_size;
1775
1776            uint32_t buffer;
1777            if (mDataSource->readAt(
1778                        data_offset + 8, &buffer, 4) < 4) {
1779                return ERROR_IO;
1780            }
1781
1782            uint32_t type = ntohl(buffer);
1783            // For the 3GPP file format, the handler-type within the 'hdlr' box
1784            // shall be 'text'. We also want to support 'sbtl' handler type
1785            // for a practical reason as various MPEG4 containers use it.
1786            if (type == FOURCC('t', 'e', 'x', 't') || type == FOURCC('s', 'b', 't', 'l')) {
1787                mLastTrack->meta->setCString(kKeyMIMEType, MEDIA_MIMETYPE_TEXT_3GPP);
1788            }
1789
1790            break;
1791        }
1792
1793        case FOURCC('t', 'x', '3', 'g'):
1794        {
1795            uint32_t type;
1796            const void *data;
1797            size_t size = 0;
1798            if (!mLastTrack->meta->findData(
1799                    kKeyTextFormatData, &type, &data, &size)) {
1800                size = 0;
1801            }
1802
1803            uint8_t *buffer = new (std::nothrow) uint8_t[size + chunk_size];
1804            if (buffer == NULL) {
1805                return ERROR_MALFORMED;
1806            }
1807
1808            if (size > 0) {
1809                memcpy(buffer, data, size);
1810            }
1811
1812            if ((size_t)(mDataSource->readAt(*offset, buffer + size, chunk_size))
1813                    < chunk_size) {
1814                delete[] buffer;
1815                buffer = NULL;
1816
1817                // advance read pointer so we don't end up reading this again
1818                *offset += chunk_size;
1819                return ERROR_IO;
1820            }
1821
1822            mLastTrack->meta->setData(
1823                    kKeyTextFormatData, 0, buffer, size + chunk_size);
1824
1825            delete[] buffer;
1826
1827            *offset += chunk_size;
1828            break;
1829        }
1830
1831        case FOURCC('c', 'o', 'v', 'r'):
1832        {
1833            *offset += chunk_size;
1834
1835            if (mFileMetaData != NULL) {
1836                ALOGV("chunk_data_size = %lld and data_offset = %lld",
1837                        chunk_data_size, data_offset);
1838                sp<ABuffer> buffer = new ABuffer(chunk_data_size + 1);
1839                if (mDataSource->readAt(
1840                    data_offset, buffer->data(), chunk_data_size) != (ssize_t)chunk_data_size) {
1841                    return ERROR_IO;
1842                }
1843                const int kSkipBytesOfDataBox = 16;
1844                mFileMetaData->setData(
1845                    kKeyAlbumArt, MetaData::TYPE_NONE,
1846                    buffer->data() + kSkipBytesOfDataBox, chunk_data_size - kSkipBytesOfDataBox);
1847            }
1848
1849            break;
1850        }
1851
1852        case FOURCC('t', 'i', 't', 'l'):
1853        case FOURCC('p', 'e', 'r', 'f'):
1854        case FOURCC('a', 'u', 't', 'h'):
1855        case FOURCC('g', 'n', 'r', 'e'):
1856        case FOURCC('a', 'l', 'b', 'm'):
1857        case FOURCC('y', 'r', 'r', 'c'):
1858        {
1859            *offset += chunk_size;
1860
1861            status_t err = parse3GPPMetaData(data_offset, chunk_data_size, depth);
1862
1863            if (err != OK) {
1864                return err;
1865            }
1866
1867            break;
1868        }
1869
1870        case FOURCC('I', 'D', '3', '2'):
1871        {
1872            *offset += chunk_size;
1873
1874            if (chunk_data_size < 6) {
1875                return ERROR_MALFORMED;
1876            }
1877
1878            parseID3v2MetaData(data_offset + 6);
1879
1880            break;
1881        }
1882
1883        case FOURCC('-', '-', '-', '-'):
1884        {
1885            mLastCommentMean.clear();
1886            mLastCommentName.clear();
1887            mLastCommentData.clear();
1888            *offset += chunk_size;
1889            break;
1890        }
1891
1892        case FOURCC('s', 'i', 'd', 'x'):
1893        {
1894            parseSegmentIndex(data_offset, chunk_data_size);
1895            *offset += chunk_size;
1896            return UNKNOWN_ERROR; // stop parsing after sidx
1897        }
1898
1899        default:
1900        {
1901            *offset += chunk_size;
1902            break;
1903        }
1904    }
1905
1906    return OK;
1907}
1908
1909status_t MPEG4Extractor::parseSegmentIndex(off64_t offset, size_t size) {
1910  ALOGV("MPEG4Extractor::parseSegmentIndex");
1911
1912    if (size < 12) {
1913      return -EINVAL;
1914    }
1915
1916    uint32_t flags;
1917    if (!mDataSource->getUInt32(offset, &flags)) {
1918        return ERROR_MALFORMED;
1919    }
1920
1921    uint32_t version = flags >> 24;
1922    flags &= 0xffffff;
1923
1924    ALOGV("sidx version %d", version);
1925
1926    uint32_t referenceId;
1927    if (!mDataSource->getUInt32(offset + 4, &referenceId)) {
1928        return ERROR_MALFORMED;
1929    }
1930
1931    uint32_t timeScale;
1932    if (!mDataSource->getUInt32(offset + 8, &timeScale)) {
1933        return ERROR_MALFORMED;
1934    }
1935    ALOGV("sidx refid/timescale: %d/%d", referenceId, timeScale);
1936
1937    uint64_t earliestPresentationTime;
1938    uint64_t firstOffset;
1939
1940    offset += 12;
1941    size -= 12;
1942
1943    if (version == 0) {
1944        if (size < 8) {
1945            return -EINVAL;
1946        }
1947        uint32_t tmp;
1948        if (!mDataSource->getUInt32(offset, &tmp)) {
1949            return ERROR_MALFORMED;
1950        }
1951        earliestPresentationTime = tmp;
1952        if (!mDataSource->getUInt32(offset + 4, &tmp)) {
1953            return ERROR_MALFORMED;
1954        }
1955        firstOffset = tmp;
1956        offset += 8;
1957        size -= 8;
1958    } else {
1959        if (size < 16) {
1960            return -EINVAL;
1961        }
1962        if (!mDataSource->getUInt64(offset, &earliestPresentationTime)) {
1963            return ERROR_MALFORMED;
1964        }
1965        if (!mDataSource->getUInt64(offset + 8, &firstOffset)) {
1966            return ERROR_MALFORMED;
1967        }
1968        offset += 16;
1969        size -= 16;
1970    }
1971    ALOGV("sidx pres/off: %Ld/%Ld", earliestPresentationTime, firstOffset);
1972
1973    if (size < 4) {
1974        return -EINVAL;
1975    }
1976
1977    uint16_t referenceCount;
1978    if (!mDataSource->getUInt16(offset + 2, &referenceCount)) {
1979        return ERROR_MALFORMED;
1980    }
1981    offset += 4;
1982    size -= 4;
1983    ALOGV("refcount: %d", referenceCount);
1984
1985    if (size < referenceCount * 12) {
1986        return -EINVAL;
1987    }
1988
1989    uint64_t total_duration = 0;
1990    for (unsigned int i = 0; i < referenceCount; i++) {
1991        uint32_t d1, d2, d3;
1992
1993        if (!mDataSource->getUInt32(offset, &d1) ||     // size
1994            !mDataSource->getUInt32(offset + 4, &d2) || // duration
1995            !mDataSource->getUInt32(offset + 8, &d3)) { // flags
1996            return ERROR_MALFORMED;
1997        }
1998
1999        if (d1 & 0x80000000) {
2000            ALOGW("sub-sidx boxes not supported yet");
2001        }
2002        bool sap = d3 & 0x80000000;
2003        uint32_t saptype = (d3 >> 28) & 7;
2004        if (!sap || (saptype != 1 && saptype != 2)) {
2005            // type 1 and 2 are sync samples
2006            ALOGW("not a stream access point, or unsupported type: %08x", d3);
2007        }
2008        total_duration += d2;
2009        offset += 12;
2010        ALOGV(" item %d, %08x %08x %08x", i, d1, d2, d3);
2011        SidxEntry se;
2012        se.mSize = d1 & 0x7fffffff;
2013        se.mDurationUs = 1000000LL * d2 / timeScale;
2014        mSidxEntries.add(se);
2015    }
2016
2017    mSidxDuration = total_duration * 1000000 / timeScale;
2018    ALOGV("duration: %lld", mSidxDuration);
2019
2020    int64_t metaDuration;
2021    if (!mLastTrack->meta->findInt64(kKeyDuration, &metaDuration) || metaDuration == 0) {
2022        mLastTrack->meta->setInt64(kKeyDuration, mSidxDuration);
2023    }
2024    return OK;
2025}
2026
2027
2028
2029status_t MPEG4Extractor::parseTrackHeader(
2030        off64_t data_offset, off64_t data_size) {
2031    if (data_size < 4) {
2032        return ERROR_MALFORMED;
2033    }
2034
2035    uint8_t version;
2036    if (mDataSource->readAt(data_offset, &version, 1) < 1) {
2037        return ERROR_IO;
2038    }
2039
2040    size_t dynSize = (version == 1) ? 36 : 24;
2041
2042    uint8_t buffer[36 + 60];
2043
2044    if (data_size != (off64_t)dynSize + 60) {
2045        return ERROR_MALFORMED;
2046    }
2047
2048    if (mDataSource->readAt(
2049                data_offset, buffer, data_size) < (ssize_t)data_size) {
2050        return ERROR_IO;
2051    }
2052
2053    uint64_t ctime, mtime, duration;
2054    int32_t id;
2055
2056    if (version == 1) {
2057        ctime = U64_AT(&buffer[4]);
2058        mtime = U64_AT(&buffer[12]);
2059        id = U32_AT(&buffer[20]);
2060        duration = U64_AT(&buffer[28]);
2061    } else if (version == 0) {
2062        ctime = U32_AT(&buffer[4]);
2063        mtime = U32_AT(&buffer[8]);
2064        id = U32_AT(&buffer[12]);
2065        duration = U32_AT(&buffer[20]);
2066    } else {
2067        return ERROR_UNSUPPORTED;
2068    }
2069
2070    mLastTrack->meta->setInt32(kKeyTrackID, id);
2071
2072    size_t matrixOffset = dynSize + 16;
2073    int32_t a00 = U32_AT(&buffer[matrixOffset]);
2074    int32_t a01 = U32_AT(&buffer[matrixOffset + 4]);
2075    int32_t dx = U32_AT(&buffer[matrixOffset + 8]);
2076    int32_t a10 = U32_AT(&buffer[matrixOffset + 12]);
2077    int32_t a11 = U32_AT(&buffer[matrixOffset + 16]);
2078    int32_t dy = U32_AT(&buffer[matrixOffset + 20]);
2079
2080#if 0
2081    ALOGI("x' = %.2f * x + %.2f * y + %.2f",
2082         a00 / 65536.0f, a01 / 65536.0f, dx / 65536.0f);
2083    ALOGI("y' = %.2f * x + %.2f * y + %.2f",
2084         a10 / 65536.0f, a11 / 65536.0f, dy / 65536.0f);
2085#endif
2086
2087    uint32_t rotationDegrees;
2088
2089    static const int32_t kFixedOne = 0x10000;
2090    if (a00 == kFixedOne && a01 == 0 && a10 == 0 && a11 == kFixedOne) {
2091        // Identity, no rotation
2092        rotationDegrees = 0;
2093    } else if (a00 == 0 && a01 == kFixedOne && a10 == -kFixedOne && a11 == 0) {
2094        rotationDegrees = 90;
2095    } else if (a00 == 0 && a01 == -kFixedOne && a10 == kFixedOne && a11 == 0) {
2096        rotationDegrees = 270;
2097    } else if (a00 == -kFixedOne && a01 == 0 && a10 == 0 && a11 == -kFixedOne) {
2098        rotationDegrees = 180;
2099    } else {
2100        ALOGW("We only support 0,90,180,270 degree rotation matrices");
2101        rotationDegrees = 0;
2102    }
2103
2104    if (rotationDegrees != 0) {
2105        mLastTrack->meta->setInt32(kKeyRotation, rotationDegrees);
2106    }
2107
2108    // Handle presentation display size, which could be different
2109    // from the image size indicated by kKeyWidth and kKeyHeight.
2110    uint32_t width = U32_AT(&buffer[dynSize + 52]);
2111    uint32_t height = U32_AT(&buffer[dynSize + 56]);
2112    mLastTrack->meta->setInt32(kKeyDisplayWidth, width >> 16);
2113    mLastTrack->meta->setInt32(kKeyDisplayHeight, height >> 16);
2114
2115    return OK;
2116}
2117
2118status_t MPEG4Extractor::parseITunesMetaData(off64_t offset, size_t size) {
2119    if (size < 4) {
2120        return ERROR_MALFORMED;
2121    }
2122
2123    uint8_t *buffer = new (std::nothrow) uint8_t[size + 1];
2124    if (buffer == NULL) {
2125        return ERROR_MALFORMED;
2126    }
2127    if (mDataSource->readAt(
2128                offset, buffer, size) != (ssize_t)size) {
2129        delete[] buffer;
2130        buffer = NULL;
2131
2132        return ERROR_IO;
2133    }
2134
2135    uint32_t flags = U32_AT(buffer);
2136
2137    uint32_t metadataKey = 0;
2138    char chunk[5];
2139    MakeFourCCString(mPath[4], chunk);
2140    ALOGV("meta: %s @ %lld", chunk, offset);
2141    switch (mPath[4]) {
2142        case FOURCC(0xa9, 'a', 'l', 'b'):
2143        {
2144            metadataKey = kKeyAlbum;
2145            break;
2146        }
2147        case FOURCC(0xa9, 'A', 'R', 'T'):
2148        {
2149            metadataKey = kKeyArtist;
2150            break;
2151        }
2152        case FOURCC('a', 'A', 'R', 'T'):
2153        {
2154            metadataKey = kKeyAlbumArtist;
2155            break;
2156        }
2157        case FOURCC(0xa9, 'd', 'a', 'y'):
2158        {
2159            metadataKey = kKeyYear;
2160            break;
2161        }
2162        case FOURCC(0xa9, 'n', 'a', 'm'):
2163        {
2164            metadataKey = kKeyTitle;
2165            break;
2166        }
2167        case FOURCC(0xa9, 'w', 'r', 't'):
2168        {
2169            metadataKey = kKeyWriter;
2170            break;
2171        }
2172        case FOURCC('c', 'o', 'v', 'r'):
2173        {
2174            metadataKey = kKeyAlbumArt;
2175            break;
2176        }
2177        case FOURCC('g', 'n', 'r', 'e'):
2178        {
2179            metadataKey = kKeyGenre;
2180            break;
2181        }
2182        case FOURCC(0xa9, 'g', 'e', 'n'):
2183        {
2184            metadataKey = kKeyGenre;
2185            break;
2186        }
2187        case FOURCC('c', 'p', 'i', 'l'):
2188        {
2189            if (size == 9 && flags == 21) {
2190                char tmp[16];
2191                sprintf(tmp, "%d",
2192                        (int)buffer[size - 1]);
2193
2194                mFileMetaData->setCString(kKeyCompilation, tmp);
2195            }
2196            break;
2197        }
2198        case FOURCC('t', 'r', 'k', 'n'):
2199        {
2200            if (size == 16 && flags == 0) {
2201                char tmp[16];
2202                uint16_t* pTrack = (uint16_t*)&buffer[10];
2203                uint16_t* pTotalTracks = (uint16_t*)&buffer[12];
2204                sprintf(tmp, "%d/%d", ntohs(*pTrack), ntohs(*pTotalTracks));
2205
2206                mFileMetaData->setCString(kKeyCDTrackNumber, tmp);
2207            }
2208            break;
2209        }
2210        case FOURCC('d', 'i', 's', 'k'):
2211        {
2212            if ((size == 14 || size == 16) && flags == 0) {
2213                char tmp[16];
2214                uint16_t* pDisc = (uint16_t*)&buffer[10];
2215                uint16_t* pTotalDiscs = (uint16_t*)&buffer[12];
2216                sprintf(tmp, "%d/%d", ntohs(*pDisc), ntohs(*pTotalDiscs));
2217
2218                mFileMetaData->setCString(kKeyDiscNumber, tmp);
2219            }
2220            break;
2221        }
2222        case FOURCC('-', '-', '-', '-'):
2223        {
2224            buffer[size] = '\0';
2225            switch (mPath[5]) {
2226                case FOURCC('m', 'e', 'a', 'n'):
2227                    mLastCommentMean.setTo((const char *)buffer + 4);
2228                    break;
2229                case FOURCC('n', 'a', 'm', 'e'):
2230                    mLastCommentName.setTo((const char *)buffer + 4);
2231                    break;
2232                case FOURCC('d', 'a', 't', 'a'):
2233                    mLastCommentData.setTo((const char *)buffer + 8);
2234                    break;
2235            }
2236
2237            // Once we have a set of mean/name/data info, go ahead and process
2238            // it to see if its something we are interested in.  Whether or not
2239            // were are interested in the specific tag, make sure to clear out
2240            // the set so we can be ready to process another tuple should one
2241            // show up later in the file.
2242            if ((mLastCommentMean.length() != 0) &&
2243                (mLastCommentName.length() != 0) &&
2244                (mLastCommentData.length() != 0)) {
2245
2246                if (mLastCommentMean == "com.apple.iTunes"
2247                        && mLastCommentName == "iTunSMPB") {
2248                    int32_t delay, padding;
2249                    if (sscanf(mLastCommentData,
2250                               " %*x %x %x %*x", &delay, &padding) == 2) {
2251                        mLastTrack->meta->setInt32(kKeyEncoderDelay, delay);
2252                        mLastTrack->meta->setInt32(kKeyEncoderPadding, padding);
2253                    }
2254                }
2255
2256                mLastCommentMean.clear();
2257                mLastCommentName.clear();
2258                mLastCommentData.clear();
2259            }
2260            break;
2261        }
2262
2263        default:
2264            break;
2265    }
2266
2267    if (size >= 8 && metadataKey && !mFileMetaData->hasData(metadataKey)) {
2268        if (metadataKey == kKeyAlbumArt) {
2269            mFileMetaData->setData(
2270                    kKeyAlbumArt, MetaData::TYPE_NONE,
2271                    buffer + 8, size - 8);
2272        } else if (metadataKey == kKeyGenre) {
2273            if (flags == 0) {
2274                // uint8_t genre code, iTunes genre codes are
2275                // the standard id3 codes, except they start
2276                // at 1 instead of 0 (e.g. Pop is 14, not 13)
2277                // We use standard id3 numbering, so subtract 1.
2278                int genrecode = (int)buffer[size - 1];
2279                genrecode--;
2280                if (genrecode < 0) {
2281                    genrecode = 255; // reserved for 'unknown genre'
2282                }
2283                char genre[10];
2284                sprintf(genre, "%d", genrecode);
2285
2286                mFileMetaData->setCString(metadataKey, genre);
2287            } else if (flags == 1) {
2288                // custom genre string
2289                buffer[size] = '\0';
2290
2291                mFileMetaData->setCString(
2292                        metadataKey, (const char *)buffer + 8);
2293            }
2294        } else {
2295            buffer[size] = '\0';
2296
2297            mFileMetaData->setCString(
2298                    metadataKey, (const char *)buffer + 8);
2299        }
2300    }
2301
2302    delete[] buffer;
2303    buffer = NULL;
2304
2305    return OK;
2306}
2307
2308status_t MPEG4Extractor::parse3GPPMetaData(off64_t offset, size_t size, int depth) {
2309    if (size < 4) {
2310        return ERROR_MALFORMED;
2311    }
2312
2313    uint8_t *buffer = new (std::nothrow) uint8_t[size];
2314    if (buffer == NULL) {
2315        return ERROR_MALFORMED;
2316    }
2317    if (mDataSource->readAt(
2318                offset, buffer, size) != (ssize_t)size) {
2319        delete[] buffer;
2320        buffer = NULL;
2321
2322        return ERROR_IO;
2323    }
2324
2325    uint32_t metadataKey = 0;
2326    switch (mPath[depth]) {
2327        case FOURCC('t', 'i', 't', 'l'):
2328        {
2329            metadataKey = kKeyTitle;
2330            break;
2331        }
2332        case FOURCC('p', 'e', 'r', 'f'):
2333        {
2334            metadataKey = kKeyArtist;
2335            break;
2336        }
2337        case FOURCC('a', 'u', 't', 'h'):
2338        {
2339            metadataKey = kKeyWriter;
2340            break;
2341        }
2342        case FOURCC('g', 'n', 'r', 'e'):
2343        {
2344            metadataKey = kKeyGenre;
2345            break;
2346        }
2347        case FOURCC('a', 'l', 'b', 'm'):
2348        {
2349            if (buffer[size - 1] != '\0') {
2350              char tmp[4];
2351              sprintf(tmp, "%u", buffer[size - 1]);
2352
2353              mFileMetaData->setCString(kKeyCDTrackNumber, tmp);
2354            }
2355
2356            metadataKey = kKeyAlbum;
2357            break;
2358        }
2359        case FOURCC('y', 'r', 'r', 'c'):
2360        {
2361            char tmp[5];
2362            uint16_t year = U16_AT(&buffer[4]);
2363
2364            if (year < 10000) {
2365                sprintf(tmp, "%u", year);
2366
2367                mFileMetaData->setCString(kKeyYear, tmp);
2368            }
2369            break;
2370        }
2371
2372        default:
2373            break;
2374    }
2375
2376    if (metadataKey > 0) {
2377        bool isUTF8 = true; // Common case
2378        char16_t *framedata = NULL;
2379        int len16 = 0; // Number of UTF-16 characters
2380
2381        // smallest possible valid UTF-16 string w BOM: 0xfe 0xff 0x00 0x00
2382        if (size - 6 >= 4) {
2383            len16 = ((size - 6) / 2) - 1; // don't include 0x0000 terminator
2384            framedata = (char16_t *)(buffer + 6);
2385            if (0xfffe == *framedata) {
2386                // endianness marker (BOM) doesn't match host endianness
2387                for (int i = 0; i < len16; i++) {
2388                    framedata[i] = bswap_16(framedata[i]);
2389                }
2390                // BOM is now swapped to 0xfeff, we will execute next block too
2391            }
2392
2393            if (0xfeff == *framedata) {
2394                // Remove the BOM
2395                framedata++;
2396                len16--;
2397                isUTF8 = false;
2398            }
2399            // else normal non-zero-length UTF-8 string
2400            // we can't handle UTF-16 without BOM as there is no other
2401            // indication of encoding.
2402        }
2403
2404        if (isUTF8) {
2405            mFileMetaData->setCString(metadataKey, (const char *)buffer + 6);
2406        } else {
2407            // Convert from UTF-16 string to UTF-8 string.
2408            String8 tmpUTF8str(framedata, len16);
2409            mFileMetaData->setCString(metadataKey, tmpUTF8str.string());
2410        }
2411    }
2412
2413    delete[] buffer;
2414    buffer = NULL;
2415
2416    return OK;
2417}
2418
2419void MPEG4Extractor::parseID3v2MetaData(off64_t offset) {
2420    ID3 id3(mDataSource, true /* ignorev1 */, offset);
2421
2422    if (id3.isValid()) {
2423        struct Map {
2424            int key;
2425            const char *tag1;
2426            const char *tag2;
2427        };
2428        static const Map kMap[] = {
2429            { kKeyAlbum, "TALB", "TAL" },
2430            { kKeyArtist, "TPE1", "TP1" },
2431            { kKeyAlbumArtist, "TPE2", "TP2" },
2432            { kKeyComposer, "TCOM", "TCM" },
2433            { kKeyGenre, "TCON", "TCO" },
2434            { kKeyTitle, "TIT2", "TT2" },
2435            { kKeyYear, "TYE", "TYER" },
2436            { kKeyAuthor, "TXT", "TEXT" },
2437            { kKeyCDTrackNumber, "TRK", "TRCK" },
2438            { kKeyDiscNumber, "TPA", "TPOS" },
2439            { kKeyCompilation, "TCP", "TCMP" },
2440        };
2441        static const size_t kNumMapEntries = sizeof(kMap) / sizeof(kMap[0]);
2442
2443        for (size_t i = 0; i < kNumMapEntries; ++i) {
2444            if (!mFileMetaData->hasData(kMap[i].key)) {
2445                ID3::Iterator *it = new ID3::Iterator(id3, kMap[i].tag1);
2446                if (it->done()) {
2447                    delete it;
2448                    it = new ID3::Iterator(id3, kMap[i].tag2);
2449                }
2450
2451                if (it->done()) {
2452                    delete it;
2453                    continue;
2454                }
2455
2456                String8 s;
2457                it->getString(&s);
2458                delete it;
2459
2460                mFileMetaData->setCString(kMap[i].key, s);
2461            }
2462        }
2463
2464        size_t dataSize;
2465        String8 mime;
2466        const void *data = id3.getAlbumArt(&dataSize, &mime);
2467
2468        if (data) {
2469            mFileMetaData->setData(kKeyAlbumArt, MetaData::TYPE_NONE, data, dataSize);
2470            mFileMetaData->setCString(kKeyAlbumArtMIME, mime.string());
2471        }
2472    }
2473}
2474
2475sp<MediaSource> MPEG4Extractor::getTrack(size_t index) {
2476    status_t err;
2477    if ((err = readMetaData()) != OK) {
2478        return NULL;
2479    }
2480
2481    Track *track = mFirstTrack;
2482    while (index > 0) {
2483        if (track == NULL) {
2484            return NULL;
2485        }
2486
2487        track = track->next;
2488        --index;
2489    }
2490
2491    if (track == NULL) {
2492        return NULL;
2493    }
2494
2495    ALOGV("getTrack called, pssh: %d", mPssh.size());
2496
2497    return new MPEG4Source(
2498            track->meta, mDataSource, track->timescale, track->sampleTable,
2499            mSidxEntries, mMoofOffset);
2500}
2501
2502// static
2503status_t MPEG4Extractor::verifyTrack(Track *track) {
2504    const char *mime;
2505    CHECK(track->meta->findCString(kKeyMIMEType, &mime));
2506
2507    uint32_t type;
2508    const void *data;
2509    size_t size;
2510    if (!strcasecmp(mime, MEDIA_MIMETYPE_VIDEO_AVC)) {
2511        if (!track->meta->findData(kKeyAVCC, &type, &data, &size)
2512                || type != kTypeAVCC) {
2513            return ERROR_MALFORMED;
2514        }
2515    } else if (!strcasecmp(mime, MEDIA_MIMETYPE_VIDEO_HEVC)) {
2516        if (!track->meta->findData(kKeyHVCC, &type, &data, &size)
2517                    || type != kTypeHVCC) {
2518            return ERROR_MALFORMED;
2519        }
2520    } else if (!strcasecmp(mime, MEDIA_MIMETYPE_VIDEO_MPEG4)
2521            || !strcasecmp(mime, MEDIA_MIMETYPE_AUDIO_AAC)) {
2522        if (!track->meta->findData(kKeyESDS, &type, &data, &size)
2523                || type != kTypeESDS) {
2524            return ERROR_MALFORMED;
2525        }
2526    }
2527
2528    if (track->sampleTable == NULL || !track->sampleTable->isValid()) {
2529        // Make sure we have all the metadata we need.
2530        ALOGE("stbl atom missing/invalid.");
2531        return ERROR_MALFORMED;
2532    }
2533
2534    return OK;
2535}
2536
2537typedef enum {
2538    //AOT_NONE             = -1,
2539    //AOT_NULL_OBJECT      = 0,
2540    //AOT_AAC_MAIN         = 1, /**< Main profile                              */
2541    AOT_AAC_LC           = 2,   /**< Low Complexity object                     */
2542    //AOT_AAC_SSR          = 3,
2543    //AOT_AAC_LTP          = 4,
2544    AOT_SBR              = 5,
2545    //AOT_AAC_SCAL         = 6,
2546    //AOT_TWIN_VQ          = 7,
2547    //AOT_CELP             = 8,
2548    //AOT_HVXC             = 9,
2549    //AOT_RSVD_10          = 10, /**< (reserved)                                */
2550    //AOT_RSVD_11          = 11, /**< (reserved)                                */
2551    //AOT_TTSI             = 12, /**< TTSI Object                               */
2552    //AOT_MAIN_SYNTH       = 13, /**< Main Synthetic object                     */
2553    //AOT_WAV_TAB_SYNTH    = 14, /**< Wavetable Synthesis object                */
2554    //AOT_GEN_MIDI         = 15, /**< General MIDI object                       */
2555    //AOT_ALG_SYNTH_AUD_FX = 16, /**< Algorithmic Synthesis and Audio FX object */
2556    AOT_ER_AAC_LC        = 17,   /**< Error Resilient(ER) AAC Low Complexity    */
2557    //AOT_RSVD_18          = 18, /**< (reserved)                                */
2558    //AOT_ER_AAC_LTP       = 19, /**< Error Resilient(ER) AAC LTP object        */
2559    AOT_ER_AAC_SCAL      = 20,   /**< Error Resilient(ER) AAC Scalable object   */
2560    //AOT_ER_TWIN_VQ       = 21, /**< Error Resilient(ER) TwinVQ object         */
2561    AOT_ER_BSAC          = 22,   /**< Error Resilient(ER) BSAC object           */
2562    AOT_ER_AAC_LD        = 23,   /**< Error Resilient(ER) AAC LowDelay object   */
2563    //AOT_ER_CELP          = 24, /**< Error Resilient(ER) CELP object           */
2564    //AOT_ER_HVXC          = 25, /**< Error Resilient(ER) HVXC object           */
2565    //AOT_ER_HILN          = 26, /**< Error Resilient(ER) HILN object           */
2566    //AOT_ER_PARA          = 27, /**< Error Resilient(ER) Parametric object     */
2567    //AOT_RSVD_28          = 28, /**< might become SSC                          */
2568    AOT_PS               = 29,   /**< PS, Parametric Stereo (includes SBR)      */
2569    //AOT_MPEGS            = 30, /**< MPEG Surround                             */
2570
2571    AOT_ESCAPE           = 31,   /**< Signal AOT uses more than 5 bits          */
2572
2573    //AOT_MP3ONMP4_L1      = 32, /**< MPEG-Layer1 in mp4                        */
2574    //AOT_MP3ONMP4_L2      = 33, /**< MPEG-Layer2 in mp4                        */
2575    //AOT_MP3ONMP4_L3      = 34, /**< MPEG-Layer3 in mp4                        */
2576    //AOT_RSVD_35          = 35, /**< might become DST                          */
2577    //AOT_RSVD_36          = 36, /**< might become ALS                          */
2578    //AOT_AAC_SLS          = 37, /**< AAC + SLS                                 */
2579    //AOT_SLS              = 38, /**< SLS                                       */
2580    //AOT_ER_AAC_ELD       = 39, /**< AAC Enhanced Low Delay                    */
2581
2582    //AOT_USAC             = 42, /**< USAC                                      */
2583    //AOT_SAOC             = 43, /**< SAOC                                      */
2584    //AOT_LD_MPEGS         = 44, /**< Low Delay MPEG Surround                   */
2585
2586    //AOT_RSVD50           = 50,  /**< Interim AOT for Rsvd50                   */
2587} AUDIO_OBJECT_TYPE;
2588
2589status_t MPEG4Extractor::updateAudioTrackInfoFromESDS_MPEG4Audio(
2590        const void *esds_data, size_t esds_size) {
2591    ESDS esds(esds_data, esds_size);
2592
2593    uint8_t objectTypeIndication;
2594    if (esds.getObjectTypeIndication(&objectTypeIndication) != OK) {
2595        return ERROR_MALFORMED;
2596    }
2597
2598    if (objectTypeIndication == 0xe1) {
2599        // This isn't MPEG4 audio at all, it's QCELP 14k...
2600        mLastTrack->meta->setCString(kKeyMIMEType, MEDIA_MIMETYPE_AUDIO_QCELP);
2601        return OK;
2602    }
2603
2604    if (objectTypeIndication  == 0x6b) {
2605        // The media subtype is MP3 audio
2606        // Our software MP3 audio decoder may not be able to handle
2607        // packetized MP3 audio; for now, lets just return ERROR_UNSUPPORTED
2608        ALOGE("MP3 track in MP4/3GPP file is not supported");
2609        return ERROR_UNSUPPORTED;
2610    }
2611
2612    const uint8_t *csd;
2613    size_t csd_size;
2614    if (esds.getCodecSpecificInfo(
2615                (const void **)&csd, &csd_size) != OK) {
2616        return ERROR_MALFORMED;
2617    }
2618
2619#if 0
2620    printf("ESD of size %d\n", csd_size);
2621    hexdump(csd, csd_size);
2622#endif
2623
2624    if (csd_size == 0) {
2625        // There's no further information, i.e. no codec specific data
2626        // Let's assume that the information provided in the mpeg4 headers
2627        // is accurate and hope for the best.
2628
2629        return OK;
2630    }
2631
2632    if (csd_size < 2) {
2633        return ERROR_MALFORMED;
2634    }
2635
2636    static uint32_t kSamplingRate[] = {
2637        96000, 88200, 64000, 48000, 44100, 32000, 24000, 22050,
2638        16000, 12000, 11025, 8000, 7350
2639    };
2640
2641    ABitReader br(csd, csd_size);
2642    uint32_t objectType = br.getBits(5);
2643
2644    if (objectType == 31) {  // AAC-ELD => additional 6 bits
2645        objectType = 32 + br.getBits(6);
2646    }
2647
2648    //keep AOT type
2649    mLastTrack->meta->setInt32(kKeyAACAOT, objectType);
2650
2651    uint32_t freqIndex = br.getBits(4);
2652
2653    int32_t sampleRate = 0;
2654    int32_t numChannels = 0;
2655    if (freqIndex == 15) {
2656        if (csd_size < 5) {
2657            return ERROR_MALFORMED;
2658        }
2659        sampleRate = br.getBits(24);
2660        numChannels = br.getBits(4);
2661    } else {
2662        numChannels = br.getBits(4);
2663
2664        if (freqIndex == 13 || freqIndex == 14) {
2665            return ERROR_MALFORMED;
2666        }
2667
2668        sampleRate = kSamplingRate[freqIndex];
2669    }
2670
2671    if (objectType == AOT_SBR || objectType == AOT_PS) {//SBR specific config per 14496-3 table 1.13
2672        uint32_t extFreqIndex = br.getBits(4);
2673        int32_t extSampleRate;
2674        if (extFreqIndex == 15) {
2675            if (csd_size < 8) {
2676                return ERROR_MALFORMED;
2677            }
2678            extSampleRate = br.getBits(24);
2679        } else {
2680            if (extFreqIndex == 13 || extFreqIndex == 14) {
2681                return ERROR_MALFORMED;
2682            }
2683            extSampleRate = kSamplingRate[extFreqIndex];
2684        }
2685        //TODO: save the extension sampling rate value in meta data =>
2686        //      mLastTrack->meta->setInt32(kKeyExtSampleRate, extSampleRate);
2687    }
2688
2689    switch (numChannels) {
2690        // values defined in 14496-3_2009 amendment-4 Table 1.19 - Channel Configuration
2691        case 0:
2692        case 1:// FC
2693        case 2:// FL FR
2694        case 3:// FC, FL FR
2695        case 4:// FC, FL FR, RC
2696        case 5:// FC, FL FR, SL SR
2697        case 6:// FC, FL FR, SL SR, LFE
2698            //numChannels already contains the right value
2699            break;
2700        case 11:// FC, FL FR, SL SR, RC, LFE
2701            numChannels = 7;
2702            break;
2703        case 7: // FC, FCL FCR, FL FR, SL SR, LFE
2704        case 12:// FC, FL  FR,  SL SR, RL RR, LFE
2705        case 14:// FC, FL  FR,  SL SR, LFE, FHL FHR
2706            numChannels = 8;
2707            break;
2708        default:
2709            return ERROR_UNSUPPORTED;
2710    }
2711
2712    {
2713        if (objectType == AOT_SBR || objectType == AOT_PS) {
2714            const int32_t extensionSamplingFrequency = br.getBits(4);
2715            objectType = br.getBits(5);
2716
2717            if (objectType == AOT_ESCAPE) {
2718                objectType = 32 + br.getBits(6);
2719            }
2720        }
2721        if (objectType == AOT_AAC_LC || objectType == AOT_ER_AAC_LC ||
2722                objectType == AOT_ER_AAC_LD || objectType == AOT_ER_AAC_SCAL ||
2723                objectType == AOT_ER_BSAC) {
2724            const int32_t frameLengthFlag = br.getBits(1);
2725
2726            const int32_t dependsOnCoreCoder = br.getBits(1);
2727
2728            if (dependsOnCoreCoder ) {
2729                const int32_t coreCoderDelay = br.getBits(14);
2730            }
2731
2732            const int32_t extensionFlag = br.getBits(1);
2733
2734            if (numChannels == 0 ) {
2735                int32_t channelsEffectiveNum = 0;
2736                int32_t channelsNum = 0;
2737                const int32_t ElementInstanceTag = br.getBits(4);
2738                const int32_t Profile = br.getBits(2);
2739                const int32_t SamplingFrequencyIndex = br.getBits(4);
2740                const int32_t NumFrontChannelElements = br.getBits(4);
2741                const int32_t NumSideChannelElements = br.getBits(4);
2742                const int32_t NumBackChannelElements = br.getBits(4);
2743                const int32_t NumLfeChannelElements = br.getBits(2);
2744                const int32_t NumAssocDataElements = br.getBits(3);
2745                const int32_t NumValidCcElements = br.getBits(4);
2746
2747                const int32_t MonoMixdownPresent = br.getBits(1);
2748                if (MonoMixdownPresent != 0) {
2749                    const int32_t MonoMixdownElementNumber = br.getBits(4);
2750                }
2751
2752                const int32_t StereoMixdownPresent = br.getBits(1);
2753                if (StereoMixdownPresent != 0) {
2754                    const int32_t StereoMixdownElementNumber = br.getBits(4);
2755                }
2756
2757                const int32_t MatrixMixdownIndexPresent = br.getBits(1);
2758                if (MatrixMixdownIndexPresent != 0) {
2759                    const int32_t MatrixMixdownIndex = br.getBits(2);
2760                    const int32_t PseudoSurroundEnable = br.getBits(1);
2761                }
2762
2763                int i;
2764                for (i=0; i < NumFrontChannelElements; i++) {
2765                    const int32_t FrontElementIsCpe = br.getBits(1);
2766                    const int32_t FrontElementTagSelect = br.getBits(4);
2767                    channelsNum += FrontElementIsCpe ? 2 : 1;
2768                }
2769
2770                for (i=0; i < NumSideChannelElements; i++) {
2771                    const int32_t SideElementIsCpe = br.getBits(1);
2772                    const int32_t SideElementTagSelect = br.getBits(4);
2773                    channelsNum += SideElementIsCpe ? 2 : 1;
2774                }
2775
2776                for (i=0; i < NumBackChannelElements; i++) {
2777                    const int32_t BackElementIsCpe = br.getBits(1);
2778                    const int32_t BackElementTagSelect = br.getBits(4);
2779                    channelsNum += BackElementIsCpe ? 2 : 1;
2780                }
2781                channelsEffectiveNum = channelsNum;
2782
2783                for (i=0; i < NumLfeChannelElements; i++) {
2784                    const int32_t LfeElementTagSelect = br.getBits(4);
2785                    channelsNum += 1;
2786                }
2787                ALOGV("mpeg4 audio channelsNum = %d", channelsNum);
2788                ALOGV("mpeg4 audio channelsEffectiveNum = %d", channelsEffectiveNum);
2789                numChannels = channelsNum;
2790            }
2791        }
2792    }
2793
2794    if (numChannels == 0) {
2795        return ERROR_UNSUPPORTED;
2796    }
2797
2798    int32_t prevSampleRate;
2799    CHECK(mLastTrack->meta->findInt32(kKeySampleRate, &prevSampleRate));
2800
2801    if (prevSampleRate != sampleRate) {
2802        ALOGV("mpeg4 audio sample rate different from previous setting. "
2803             "was: %d, now: %d", prevSampleRate, sampleRate);
2804    }
2805
2806    mLastTrack->meta->setInt32(kKeySampleRate, sampleRate);
2807
2808    int32_t prevChannelCount;
2809    CHECK(mLastTrack->meta->findInt32(kKeyChannelCount, &prevChannelCount));
2810
2811    if (prevChannelCount != numChannels) {
2812        ALOGV("mpeg4 audio channel count different from previous setting. "
2813             "was: %d, now: %d", prevChannelCount, numChannels);
2814    }
2815
2816    mLastTrack->meta->setInt32(kKeyChannelCount, numChannels);
2817
2818    return OK;
2819}
2820
2821////////////////////////////////////////////////////////////////////////////////
2822
2823MPEG4Source::MPEG4Source(
2824        const sp<MetaData> &format,
2825        const sp<DataSource> &dataSource,
2826        int32_t timeScale,
2827        const sp<SampleTable> &sampleTable,
2828        Vector<SidxEntry> &sidx,
2829        off64_t firstMoofOffset)
2830    : mFormat(format),
2831      mDataSource(dataSource),
2832      mTimescale(timeScale),
2833      mSampleTable(sampleTable),
2834      mCurrentSampleIndex(0),
2835      mCurrentFragmentIndex(0),
2836      mSegments(sidx),
2837      mFirstMoofOffset(firstMoofOffset),
2838      mCurrentMoofOffset(firstMoofOffset),
2839      mCurrentTime(0),
2840      mCurrentSampleInfoAllocSize(0),
2841      mCurrentSampleInfoSizes(NULL),
2842      mCurrentSampleInfoOffsetsAllocSize(0),
2843      mCurrentSampleInfoOffsets(NULL),
2844      mIsAVC(false),
2845      mIsHEVC(false),
2846      mNALLengthSize(0),
2847      mStarted(false),
2848      mGroup(NULL),
2849      mBuffer(NULL),
2850      mWantsNALFragments(false),
2851      mSrcBuffer(NULL) {
2852
2853    mFormat->findInt32(kKeyCryptoMode, &mCryptoMode);
2854    mDefaultIVSize = 0;
2855    mFormat->findInt32(kKeyCryptoDefaultIVSize, &mDefaultIVSize);
2856    uint32_t keytype;
2857    const void *key;
2858    size_t keysize;
2859    if (mFormat->findData(kKeyCryptoKey, &keytype, &key, &keysize)) {
2860        CHECK(keysize <= 16);
2861        memset(mCryptoKey, 0, 16);
2862        memcpy(mCryptoKey, key, keysize);
2863    }
2864
2865    const char *mime;
2866    bool success = mFormat->findCString(kKeyMIMEType, &mime);
2867    CHECK(success);
2868
2869    mIsAVC = !strcasecmp(mime, MEDIA_MIMETYPE_VIDEO_AVC);
2870    mIsHEVC = !strcasecmp(mime, MEDIA_MIMETYPE_VIDEO_HEVC);
2871
2872    if (mIsAVC) {
2873        uint32_t type;
2874        const void *data;
2875        size_t size;
2876        CHECK(format->findData(kKeyAVCC, &type, &data, &size));
2877
2878        const uint8_t *ptr = (const uint8_t *)data;
2879
2880        CHECK(size >= 7);
2881        CHECK_EQ((unsigned)ptr[0], 1u);  // configurationVersion == 1
2882
2883        // The number of bytes used to encode the length of a NAL unit.
2884        mNALLengthSize = 1 + (ptr[4] & 3);
2885    } else if (mIsHEVC) {
2886        uint32_t type;
2887        const void *data;
2888        size_t size;
2889        CHECK(format->findData(kKeyHVCC, &type, &data, &size));
2890
2891        const uint8_t *ptr = (const uint8_t *)data;
2892
2893        CHECK(size >= 7);
2894        CHECK_EQ((unsigned)ptr[0], 1u);  // configurationVersion == 1
2895
2896        mNALLengthSize = 1 + (ptr[14 + 7] & 3);
2897    }
2898
2899    CHECK(format->findInt32(kKeyTrackID, &mTrackId));
2900
2901    if (mFirstMoofOffset != 0) {
2902        off64_t offset = mFirstMoofOffset;
2903        parseChunk(&offset);
2904    }
2905}
2906
2907MPEG4Source::~MPEG4Source() {
2908    if (mStarted) {
2909        stop();
2910    }
2911    free(mCurrentSampleInfoSizes);
2912    free(mCurrentSampleInfoOffsets);
2913}
2914
2915status_t MPEG4Source::start(MetaData *params) {
2916    Mutex::Autolock autoLock(mLock);
2917
2918    CHECK(!mStarted);
2919
2920    int32_t val;
2921    if (params && params->findInt32(kKeyWantsNALFragments, &val)
2922        && val != 0) {
2923        mWantsNALFragments = true;
2924    } else {
2925        mWantsNALFragments = false;
2926    }
2927
2928    mGroup = new MediaBufferGroup;
2929
2930    int32_t max_size;
2931    CHECK(mFormat->findInt32(kKeyMaxInputSize, &max_size));
2932
2933    mGroup->add_buffer(new MediaBuffer(max_size));
2934
2935    mSrcBuffer = new (std::nothrow) uint8_t[max_size];
2936    if (mSrcBuffer == NULL) {
2937        // file probably specified a bad max size
2938        return ERROR_MALFORMED;
2939    }
2940
2941    mStarted = true;
2942
2943    return OK;
2944}
2945
2946status_t MPEG4Source::stop() {
2947    Mutex::Autolock autoLock(mLock);
2948
2949    CHECK(mStarted);
2950
2951    if (mBuffer != NULL) {
2952        mBuffer->release();
2953        mBuffer = NULL;
2954    }
2955
2956    delete[] mSrcBuffer;
2957    mSrcBuffer = NULL;
2958
2959    delete mGroup;
2960    mGroup = NULL;
2961
2962    mStarted = false;
2963    mCurrentSampleIndex = 0;
2964
2965    return OK;
2966}
2967
2968status_t MPEG4Source::parseChunk(off64_t *offset) {
2969    uint32_t hdr[2];
2970    if (mDataSource->readAt(*offset, hdr, 8) < 8) {
2971        return ERROR_IO;
2972    }
2973    uint64_t chunk_size = ntohl(hdr[0]);
2974    uint32_t chunk_type = ntohl(hdr[1]);
2975    off64_t data_offset = *offset + 8;
2976
2977    if (chunk_size == 1) {
2978        if (mDataSource->readAt(*offset + 8, &chunk_size, 8) < 8) {
2979            return ERROR_IO;
2980        }
2981        chunk_size = ntoh64(chunk_size);
2982        data_offset += 8;
2983
2984        if (chunk_size < 16) {
2985            // The smallest valid chunk is 16 bytes long in this case.
2986            return ERROR_MALFORMED;
2987        }
2988    } else if (chunk_size < 8) {
2989        // The smallest valid chunk is 8 bytes long.
2990        return ERROR_MALFORMED;
2991    }
2992
2993    char chunk[5];
2994    MakeFourCCString(chunk_type, chunk);
2995    ALOGV("MPEG4Source chunk %s @ %llx", chunk, *offset);
2996
2997    off64_t chunk_data_size = *offset + chunk_size - data_offset;
2998
2999    switch(chunk_type) {
3000
3001        case FOURCC('t', 'r', 'a', 'f'):
3002        case FOURCC('m', 'o', 'o', 'f'): {
3003            off64_t stop_offset = *offset + chunk_size;
3004            *offset = data_offset;
3005            while (*offset < stop_offset) {
3006                status_t err = parseChunk(offset);
3007                if (err != OK) {
3008                    return err;
3009                }
3010            }
3011            if (chunk_type == FOURCC('m', 'o', 'o', 'f')) {
3012                // *offset points to the box following this moof. Find the next moof from there.
3013
3014                while (true) {
3015                    if (mDataSource->readAt(*offset, hdr, 8) < 8) {
3016                        return ERROR_END_OF_STREAM;
3017                    }
3018                    chunk_size = ntohl(hdr[0]);
3019                    chunk_type = ntohl(hdr[1]);
3020                    if (chunk_type == FOURCC('m', 'o', 'o', 'f')) {
3021                        mNextMoofOffset = *offset;
3022                        break;
3023                    }
3024                    *offset += chunk_size;
3025                }
3026            }
3027            break;
3028        }
3029
3030        case FOURCC('t', 'f', 'h', 'd'): {
3031                status_t err;
3032                if ((err = parseTrackFragmentHeader(data_offset, chunk_data_size)) != OK) {
3033                    return err;
3034                }
3035                *offset += chunk_size;
3036                break;
3037        }
3038
3039        case FOURCC('t', 'r', 'u', 'n'): {
3040                status_t err;
3041                if (mLastParsedTrackId == mTrackId) {
3042                    if ((err = parseTrackFragmentRun(data_offset, chunk_data_size)) != OK) {
3043                        return err;
3044                    }
3045                }
3046
3047                *offset += chunk_size;
3048                break;
3049        }
3050
3051        case FOURCC('s', 'a', 'i', 'z'): {
3052            status_t err;
3053            if ((err = parseSampleAuxiliaryInformationSizes(data_offset, chunk_data_size)) != OK) {
3054                return err;
3055            }
3056            *offset += chunk_size;
3057            break;
3058        }
3059        case FOURCC('s', 'a', 'i', 'o'): {
3060            status_t err;
3061            if ((err = parseSampleAuxiliaryInformationOffsets(data_offset, chunk_data_size)) != OK) {
3062                return err;
3063            }
3064            *offset += chunk_size;
3065            break;
3066        }
3067
3068        case FOURCC('m', 'd', 'a', 't'): {
3069            // parse DRM info if present
3070            ALOGV("MPEG4Source::parseChunk mdat");
3071            // if saiz/saoi was previously observed, do something with the sampleinfos
3072            *offset += chunk_size;
3073            break;
3074        }
3075
3076        default: {
3077            *offset += chunk_size;
3078            break;
3079        }
3080    }
3081    return OK;
3082}
3083
3084status_t MPEG4Source::parseSampleAuxiliaryInformationSizes(
3085        off64_t offset, off64_t /* size */) {
3086    ALOGV("parseSampleAuxiliaryInformationSizes");
3087    // 14496-12 8.7.12
3088    uint8_t version;
3089    if (mDataSource->readAt(
3090            offset, &version, sizeof(version))
3091            < (ssize_t)sizeof(version)) {
3092        return ERROR_IO;
3093    }
3094
3095    if (version != 0) {
3096        return ERROR_UNSUPPORTED;
3097    }
3098    offset++;
3099
3100    uint32_t flags;
3101    if (!mDataSource->getUInt24(offset, &flags)) {
3102        return ERROR_IO;
3103    }
3104    offset += 3;
3105
3106    if (flags & 1) {
3107        uint32_t tmp;
3108        if (!mDataSource->getUInt32(offset, &tmp)) {
3109            return ERROR_MALFORMED;
3110        }
3111        mCurrentAuxInfoType = tmp;
3112        offset += 4;
3113        if (!mDataSource->getUInt32(offset, &tmp)) {
3114            return ERROR_MALFORMED;
3115        }
3116        mCurrentAuxInfoTypeParameter = tmp;
3117        offset += 4;
3118    }
3119
3120    uint8_t defsize;
3121    if (mDataSource->readAt(offset, &defsize, 1) != 1) {
3122        return ERROR_MALFORMED;
3123    }
3124    mCurrentDefaultSampleInfoSize = defsize;
3125    offset++;
3126
3127    uint32_t smplcnt;
3128    if (!mDataSource->getUInt32(offset, &smplcnt)) {
3129        return ERROR_MALFORMED;
3130    }
3131    mCurrentSampleInfoCount = smplcnt;
3132    offset += 4;
3133
3134    if (mCurrentDefaultSampleInfoSize != 0) {
3135        ALOGV("@@@@ using default sample info size of %d", mCurrentDefaultSampleInfoSize);
3136        return OK;
3137    }
3138    if (smplcnt > mCurrentSampleInfoAllocSize) {
3139        mCurrentSampleInfoSizes = (uint8_t*) realloc(mCurrentSampleInfoSizes, smplcnt);
3140        mCurrentSampleInfoAllocSize = smplcnt;
3141    }
3142
3143    mDataSource->readAt(offset, mCurrentSampleInfoSizes, smplcnt);
3144    return OK;
3145}
3146
3147status_t MPEG4Source::parseSampleAuxiliaryInformationOffsets(
3148        off64_t offset, off64_t /* size */) {
3149    ALOGV("parseSampleAuxiliaryInformationOffsets");
3150    // 14496-12 8.7.13
3151    uint8_t version;
3152    if (mDataSource->readAt(offset, &version, sizeof(version)) != 1) {
3153        return ERROR_IO;
3154    }
3155    offset++;
3156
3157    uint32_t flags;
3158    if (!mDataSource->getUInt24(offset, &flags)) {
3159        return ERROR_IO;
3160    }
3161    offset += 3;
3162
3163    uint32_t entrycount;
3164    if (!mDataSource->getUInt32(offset, &entrycount)) {
3165        return ERROR_IO;
3166    }
3167    offset += 4;
3168
3169    if (entrycount > mCurrentSampleInfoOffsetsAllocSize) {
3170        mCurrentSampleInfoOffsets = (uint64_t*) realloc(mCurrentSampleInfoOffsets, entrycount * 8);
3171        mCurrentSampleInfoOffsetsAllocSize = entrycount;
3172    }
3173    mCurrentSampleInfoOffsetCount = entrycount;
3174
3175    for (size_t i = 0; i < entrycount; i++) {
3176        if (version == 0) {
3177            uint32_t tmp;
3178            if (!mDataSource->getUInt32(offset, &tmp)) {
3179                return ERROR_IO;
3180            }
3181            mCurrentSampleInfoOffsets[i] = tmp;
3182            offset += 4;
3183        } else {
3184            uint64_t tmp;
3185            if (!mDataSource->getUInt64(offset, &tmp)) {
3186                return ERROR_IO;
3187            }
3188            mCurrentSampleInfoOffsets[i] = tmp;
3189            offset += 8;
3190        }
3191    }
3192
3193    // parse clear/encrypted data
3194
3195    off64_t drmoffset = mCurrentSampleInfoOffsets[0]; // from moof
3196
3197    drmoffset += mCurrentMoofOffset;
3198    int ivlength;
3199    CHECK(mFormat->findInt32(kKeyCryptoDefaultIVSize, &ivlength));
3200
3201    // read CencSampleAuxiliaryDataFormats
3202    for (size_t i = 0; i < mCurrentSampleInfoCount; i++) {
3203        Sample *smpl = &mCurrentSamples.editItemAt(i);
3204
3205        memset(smpl->iv, 0, 16);
3206        if (mDataSource->readAt(drmoffset, smpl->iv, ivlength) != ivlength) {
3207            return ERROR_IO;
3208        }
3209
3210        drmoffset += ivlength;
3211
3212        int32_t smplinfosize = mCurrentDefaultSampleInfoSize;
3213        if (smplinfosize == 0) {
3214            smplinfosize = mCurrentSampleInfoSizes[i];
3215        }
3216        if (smplinfosize > ivlength) {
3217            uint16_t numsubsamples;
3218            if (!mDataSource->getUInt16(drmoffset, &numsubsamples)) {
3219                return ERROR_IO;
3220            }
3221            drmoffset += 2;
3222            for (size_t j = 0; j < numsubsamples; j++) {
3223                uint16_t numclear;
3224                uint32_t numencrypted;
3225                if (!mDataSource->getUInt16(drmoffset, &numclear)) {
3226                    return ERROR_IO;
3227                }
3228                drmoffset += 2;
3229                if (!mDataSource->getUInt32(drmoffset, &numencrypted)) {
3230                    return ERROR_IO;
3231                }
3232                drmoffset += 4;
3233                smpl->clearsizes.add(numclear);
3234                smpl->encryptedsizes.add(numencrypted);
3235            }
3236        } else {
3237            smpl->clearsizes.add(0);
3238            smpl->encryptedsizes.add(smpl->size);
3239        }
3240    }
3241
3242
3243    return OK;
3244}
3245
3246status_t MPEG4Source::parseTrackFragmentHeader(off64_t offset, off64_t size) {
3247
3248    if (size < 8) {
3249        return -EINVAL;
3250    }
3251
3252    uint32_t flags;
3253    if (!mDataSource->getUInt32(offset, &flags)) { // actually version + flags
3254        return ERROR_MALFORMED;
3255    }
3256
3257    if (flags & 0xff000000) {
3258        return -EINVAL;
3259    }
3260
3261    if (!mDataSource->getUInt32(offset + 4, (uint32_t*)&mLastParsedTrackId)) {
3262        return ERROR_MALFORMED;
3263    }
3264
3265    if (mLastParsedTrackId != mTrackId) {
3266        // this is not the right track, skip it
3267        return OK;
3268    }
3269
3270    mTrackFragmentHeaderInfo.mFlags = flags;
3271    mTrackFragmentHeaderInfo.mTrackID = mLastParsedTrackId;
3272    offset += 8;
3273    size -= 8;
3274
3275    ALOGV("fragment header: %08x %08x", flags, mTrackFragmentHeaderInfo.mTrackID);
3276
3277    if (flags & TrackFragmentHeaderInfo::kBaseDataOffsetPresent) {
3278        if (size < 8) {
3279            return -EINVAL;
3280        }
3281
3282        if (!mDataSource->getUInt64(offset, &mTrackFragmentHeaderInfo.mBaseDataOffset)) {
3283            return ERROR_MALFORMED;
3284        }
3285        offset += 8;
3286        size -= 8;
3287    }
3288
3289    if (flags & TrackFragmentHeaderInfo::kSampleDescriptionIndexPresent) {
3290        if (size < 4) {
3291            return -EINVAL;
3292        }
3293
3294        if (!mDataSource->getUInt32(offset, &mTrackFragmentHeaderInfo.mSampleDescriptionIndex)) {
3295            return ERROR_MALFORMED;
3296        }
3297        offset += 4;
3298        size -= 4;
3299    }
3300
3301    if (flags & TrackFragmentHeaderInfo::kDefaultSampleDurationPresent) {
3302        if (size < 4) {
3303            return -EINVAL;
3304        }
3305
3306        if (!mDataSource->getUInt32(offset, &mTrackFragmentHeaderInfo.mDefaultSampleDuration)) {
3307            return ERROR_MALFORMED;
3308        }
3309        offset += 4;
3310        size -= 4;
3311    }
3312
3313    if (flags & TrackFragmentHeaderInfo::kDefaultSampleSizePresent) {
3314        if (size < 4) {
3315            return -EINVAL;
3316        }
3317
3318        if (!mDataSource->getUInt32(offset, &mTrackFragmentHeaderInfo.mDefaultSampleSize)) {
3319            return ERROR_MALFORMED;
3320        }
3321        offset += 4;
3322        size -= 4;
3323    }
3324
3325    if (flags & TrackFragmentHeaderInfo::kDefaultSampleFlagsPresent) {
3326        if (size < 4) {
3327            return -EINVAL;
3328        }
3329
3330        if (!mDataSource->getUInt32(offset, &mTrackFragmentHeaderInfo.mDefaultSampleFlags)) {
3331            return ERROR_MALFORMED;
3332        }
3333        offset += 4;
3334        size -= 4;
3335    }
3336
3337    if (!(flags & TrackFragmentHeaderInfo::kBaseDataOffsetPresent)) {
3338        mTrackFragmentHeaderInfo.mBaseDataOffset = mCurrentMoofOffset;
3339    }
3340
3341    mTrackFragmentHeaderInfo.mDataOffset = 0;
3342    return OK;
3343}
3344
3345status_t MPEG4Source::parseTrackFragmentRun(off64_t offset, off64_t size) {
3346
3347    ALOGV("MPEG4Extractor::parseTrackFragmentRun");
3348    if (size < 8) {
3349        return -EINVAL;
3350    }
3351
3352    enum {
3353        kDataOffsetPresent                  = 0x01,
3354        kFirstSampleFlagsPresent            = 0x04,
3355        kSampleDurationPresent              = 0x100,
3356        kSampleSizePresent                  = 0x200,
3357        kSampleFlagsPresent                 = 0x400,
3358        kSampleCompositionTimeOffsetPresent = 0x800,
3359    };
3360
3361    uint32_t flags;
3362    if (!mDataSource->getUInt32(offset, &flags)) {
3363        return ERROR_MALFORMED;
3364    }
3365    ALOGV("fragment run flags: %08x", flags);
3366
3367    if (flags & 0xff000000) {
3368        return -EINVAL;
3369    }
3370
3371    if ((flags & kFirstSampleFlagsPresent) && (flags & kSampleFlagsPresent)) {
3372        // These two shall not be used together.
3373        return -EINVAL;
3374    }
3375
3376    uint32_t sampleCount;
3377    if (!mDataSource->getUInt32(offset + 4, &sampleCount)) {
3378        return ERROR_MALFORMED;
3379    }
3380    offset += 8;
3381    size -= 8;
3382
3383    uint64_t dataOffset = mTrackFragmentHeaderInfo.mDataOffset;
3384
3385    uint32_t firstSampleFlags = 0;
3386
3387    if (flags & kDataOffsetPresent) {
3388        if (size < 4) {
3389            return -EINVAL;
3390        }
3391
3392        int32_t dataOffsetDelta;
3393        if (!mDataSource->getUInt32(offset, (uint32_t*)&dataOffsetDelta)) {
3394            return ERROR_MALFORMED;
3395        }
3396
3397        dataOffset = mTrackFragmentHeaderInfo.mBaseDataOffset + dataOffsetDelta;
3398
3399        offset += 4;
3400        size -= 4;
3401    }
3402
3403    if (flags & kFirstSampleFlagsPresent) {
3404        if (size < 4) {
3405            return -EINVAL;
3406        }
3407
3408        if (!mDataSource->getUInt32(offset, &firstSampleFlags)) {
3409            return ERROR_MALFORMED;
3410        }
3411        offset += 4;
3412        size -= 4;
3413    }
3414
3415    uint32_t sampleDuration = 0, sampleSize = 0, sampleFlags = 0,
3416             sampleCtsOffset = 0;
3417
3418    size_t bytesPerSample = 0;
3419    if (flags & kSampleDurationPresent) {
3420        bytesPerSample += 4;
3421    } else if (mTrackFragmentHeaderInfo.mFlags
3422            & TrackFragmentHeaderInfo::kDefaultSampleDurationPresent) {
3423        sampleDuration = mTrackFragmentHeaderInfo.mDefaultSampleDuration;
3424    } else {
3425        sampleDuration = mTrackFragmentHeaderInfo.mDefaultSampleDuration;
3426    }
3427
3428    if (flags & kSampleSizePresent) {
3429        bytesPerSample += 4;
3430    } else if (mTrackFragmentHeaderInfo.mFlags
3431            & TrackFragmentHeaderInfo::kDefaultSampleSizePresent) {
3432        sampleSize = mTrackFragmentHeaderInfo.mDefaultSampleSize;
3433    } else {
3434        sampleSize = mTrackFragmentHeaderInfo.mDefaultSampleSize;
3435    }
3436
3437    if (flags & kSampleFlagsPresent) {
3438        bytesPerSample += 4;
3439    } else if (mTrackFragmentHeaderInfo.mFlags
3440            & TrackFragmentHeaderInfo::kDefaultSampleFlagsPresent) {
3441        sampleFlags = mTrackFragmentHeaderInfo.mDefaultSampleFlags;
3442    } else {
3443        sampleFlags = mTrackFragmentHeaderInfo.mDefaultSampleFlags;
3444    }
3445
3446    if (flags & kSampleCompositionTimeOffsetPresent) {
3447        bytesPerSample += 4;
3448    } else {
3449        sampleCtsOffset = 0;
3450    }
3451
3452    if (size < sampleCount * bytesPerSample) {
3453        return -EINVAL;
3454    }
3455
3456    Sample tmp;
3457    for (uint32_t i = 0; i < sampleCount; ++i) {
3458        if (flags & kSampleDurationPresent) {
3459            if (!mDataSource->getUInt32(offset, &sampleDuration)) {
3460                return ERROR_MALFORMED;
3461            }
3462            offset += 4;
3463        }
3464
3465        if (flags & kSampleSizePresent) {
3466            if (!mDataSource->getUInt32(offset, &sampleSize)) {
3467                return ERROR_MALFORMED;
3468            }
3469            offset += 4;
3470        }
3471
3472        if (flags & kSampleFlagsPresent) {
3473            if (!mDataSource->getUInt32(offset, &sampleFlags)) {
3474                return ERROR_MALFORMED;
3475            }
3476            offset += 4;
3477        }
3478
3479        if (flags & kSampleCompositionTimeOffsetPresent) {
3480            if (!mDataSource->getUInt32(offset, &sampleCtsOffset)) {
3481                return ERROR_MALFORMED;
3482            }
3483            offset += 4;
3484        }
3485
3486        ALOGV("adding sample %d at offset 0x%08llx, size %u, duration %u, "
3487              " flags 0x%08x", i + 1,
3488                dataOffset, sampleSize, sampleDuration,
3489                (flags & kFirstSampleFlagsPresent) && i == 0
3490                    ? firstSampleFlags : sampleFlags);
3491        tmp.offset = dataOffset;
3492        tmp.size = sampleSize;
3493        tmp.duration = sampleDuration;
3494        mCurrentSamples.add(tmp);
3495
3496        dataOffset += sampleSize;
3497    }
3498
3499    mTrackFragmentHeaderInfo.mDataOffset = dataOffset;
3500
3501    return OK;
3502}
3503
3504sp<MetaData> MPEG4Source::getFormat() {
3505    Mutex::Autolock autoLock(mLock);
3506
3507    return mFormat;
3508}
3509
3510size_t MPEG4Source::parseNALSize(const uint8_t *data) const {
3511    switch (mNALLengthSize) {
3512        case 1:
3513            return *data;
3514        case 2:
3515            return U16_AT(data);
3516        case 3:
3517            return ((size_t)data[0] << 16) | U16_AT(&data[1]);
3518        case 4:
3519            return U32_AT(data);
3520    }
3521
3522    // This cannot happen, mNALLengthSize springs to life by adding 1 to
3523    // a 2-bit integer.
3524    CHECK(!"Should not be here.");
3525
3526    return 0;
3527}
3528
3529status_t MPEG4Source::read(
3530        MediaBuffer **out, const ReadOptions *options) {
3531    Mutex::Autolock autoLock(mLock);
3532
3533    CHECK(mStarted);
3534
3535    if (mFirstMoofOffset > 0) {
3536        return fragmentedRead(out, options);
3537    }
3538
3539    *out = NULL;
3540
3541    int64_t targetSampleTimeUs = -1;
3542
3543    int64_t seekTimeUs;
3544    ReadOptions::SeekMode mode;
3545    if (options && options->getSeekTo(&seekTimeUs, &mode)) {
3546        uint32_t findFlags = 0;
3547        switch (mode) {
3548            case ReadOptions::SEEK_PREVIOUS_SYNC:
3549                findFlags = SampleTable::kFlagBefore;
3550                break;
3551            case ReadOptions::SEEK_NEXT_SYNC:
3552                findFlags = SampleTable::kFlagAfter;
3553                break;
3554            case ReadOptions::SEEK_CLOSEST_SYNC:
3555            case ReadOptions::SEEK_CLOSEST:
3556                findFlags = SampleTable::kFlagClosest;
3557                break;
3558            default:
3559                CHECK(!"Should not be here.");
3560                break;
3561        }
3562
3563        uint32_t sampleIndex;
3564        status_t err = mSampleTable->findSampleAtTime(
3565                seekTimeUs * mTimescale / 1000000,
3566                &sampleIndex, findFlags);
3567
3568        if (mode == ReadOptions::SEEK_CLOSEST) {
3569            // We found the closest sample already, now we want the sync
3570            // sample preceding it (or the sample itself of course), even
3571            // if the subsequent sync sample is closer.
3572            findFlags = SampleTable::kFlagBefore;
3573        }
3574
3575        uint32_t syncSampleIndex;
3576        if (err == OK) {
3577            err = mSampleTable->findSyncSampleNear(
3578                    sampleIndex, &syncSampleIndex, findFlags);
3579        }
3580
3581        uint32_t sampleTime;
3582        if (err == OK) {
3583            err = mSampleTable->getMetaDataForSample(
3584                    sampleIndex, NULL, NULL, &sampleTime);
3585        }
3586
3587        if (err != OK) {
3588            if (err == ERROR_OUT_OF_RANGE) {
3589                // An attempt to seek past the end of the stream would
3590                // normally cause this ERROR_OUT_OF_RANGE error. Propagating
3591                // this all the way to the MediaPlayer would cause abnormal
3592                // termination. Legacy behaviour appears to be to behave as if
3593                // we had seeked to the end of stream, ending normally.
3594                err = ERROR_END_OF_STREAM;
3595            }
3596            ALOGV("end of stream");
3597            return err;
3598        }
3599
3600        if (mode == ReadOptions::SEEK_CLOSEST) {
3601            targetSampleTimeUs = (sampleTime * 1000000ll) / mTimescale;
3602        }
3603
3604#if 0
3605        uint32_t syncSampleTime;
3606        CHECK_EQ(OK, mSampleTable->getMetaDataForSample(
3607                    syncSampleIndex, NULL, NULL, &syncSampleTime));
3608
3609        ALOGI("seek to time %lld us => sample at time %lld us, "
3610             "sync sample at time %lld us",
3611             seekTimeUs,
3612             sampleTime * 1000000ll / mTimescale,
3613             syncSampleTime * 1000000ll / mTimescale);
3614#endif
3615
3616        mCurrentSampleIndex = syncSampleIndex;
3617        if (mBuffer != NULL) {
3618            mBuffer->release();
3619            mBuffer = NULL;
3620        }
3621
3622        // fall through
3623    }
3624
3625    off64_t offset;
3626    size_t size;
3627    uint32_t cts, stts;
3628    bool isSyncSample;
3629    bool newBuffer = false;
3630    if (mBuffer == NULL) {
3631        newBuffer = true;
3632
3633        status_t err =
3634            mSampleTable->getMetaDataForSample(
3635                    mCurrentSampleIndex, &offset, &size, &cts, &isSyncSample, &stts);
3636
3637        if (err != OK) {
3638            return err;
3639        }
3640
3641        err = mGroup->acquire_buffer(&mBuffer);
3642
3643        if (err != OK) {
3644            CHECK(mBuffer == NULL);
3645            return err;
3646        }
3647    }
3648
3649    if ((!mIsAVC && !mIsHEVC) || mWantsNALFragments) {
3650        if (newBuffer) {
3651            ssize_t num_bytes_read =
3652                mDataSource->readAt(offset, (uint8_t *)mBuffer->data(), size);
3653
3654            if (num_bytes_read < (ssize_t)size) {
3655                mBuffer->release();
3656                mBuffer = NULL;
3657
3658                return ERROR_IO;
3659            }
3660
3661            CHECK(mBuffer != NULL);
3662            mBuffer->set_range(0, size);
3663            mBuffer->meta_data()->clear();
3664            mBuffer->meta_data()->setInt64(
3665                    kKeyTime, ((int64_t)cts * 1000000) / mTimescale);
3666            mBuffer->meta_data()->setInt64(
3667                    kKeyDuration, ((int64_t)stts * 1000000) / mTimescale);
3668
3669            if (targetSampleTimeUs >= 0) {
3670                mBuffer->meta_data()->setInt64(
3671                        kKeyTargetTime, targetSampleTimeUs);
3672            }
3673
3674            if (isSyncSample) {
3675                mBuffer->meta_data()->setInt32(kKeyIsSyncFrame, 1);
3676            }
3677
3678            ++mCurrentSampleIndex;
3679        }
3680
3681        if (!mIsAVC && !mIsHEVC) {
3682            *out = mBuffer;
3683            mBuffer = NULL;
3684
3685            return OK;
3686        }
3687
3688        // Each NAL unit is split up into its constituent fragments and
3689        // each one of them returned in its own buffer.
3690
3691        CHECK(mBuffer->range_length() >= mNALLengthSize);
3692
3693        const uint8_t *src =
3694            (const uint8_t *)mBuffer->data() + mBuffer->range_offset();
3695
3696        size_t nal_size = parseNALSize(src);
3697        if (mBuffer->range_length() < mNALLengthSize + nal_size) {
3698            ALOGE("incomplete NAL unit.");
3699
3700            mBuffer->release();
3701            mBuffer = NULL;
3702
3703            return ERROR_MALFORMED;
3704        }
3705
3706        MediaBuffer *clone = mBuffer->clone();
3707        CHECK(clone != NULL);
3708        clone->set_range(mBuffer->range_offset() + mNALLengthSize, nal_size);
3709
3710        CHECK(mBuffer != NULL);
3711        mBuffer->set_range(
3712                mBuffer->range_offset() + mNALLengthSize + nal_size,
3713                mBuffer->range_length() - mNALLengthSize - nal_size);
3714
3715        if (mBuffer->range_length() == 0) {
3716            mBuffer->release();
3717            mBuffer = NULL;
3718        }
3719
3720        *out = clone;
3721
3722        return OK;
3723    } else {
3724        // Whole NAL units are returned but each fragment is prefixed by
3725        // the start code (0x00 00 00 01).
3726        ssize_t num_bytes_read = 0;
3727        int32_t drm = 0;
3728        bool usesDRM = (mFormat->findInt32(kKeyIsDRM, &drm) && drm != 0);
3729        if (usesDRM) {
3730            num_bytes_read =
3731                mDataSource->readAt(offset, (uint8_t*)mBuffer->data(), size);
3732        } else {
3733            num_bytes_read = mDataSource->readAt(offset, mSrcBuffer, size);
3734        }
3735
3736        if (num_bytes_read < (ssize_t)size) {
3737            mBuffer->release();
3738            mBuffer = NULL;
3739
3740            return ERROR_IO;
3741        }
3742
3743        if (usesDRM) {
3744            CHECK(mBuffer != NULL);
3745            mBuffer->set_range(0, size);
3746
3747        } else {
3748            uint8_t *dstData = (uint8_t *)mBuffer->data();
3749            size_t srcOffset = 0;
3750            size_t dstOffset = 0;
3751
3752            while (srcOffset < size) {
3753                bool isMalFormed = (srcOffset + mNALLengthSize > size);
3754                size_t nalLength = 0;
3755                if (!isMalFormed) {
3756                    nalLength = parseNALSize(&mSrcBuffer[srcOffset]);
3757                    srcOffset += mNALLengthSize;
3758                    isMalFormed = srcOffset + nalLength > size;
3759                }
3760
3761                if (isMalFormed) {
3762                    ALOGE("Video is malformed");
3763                    mBuffer->release();
3764                    mBuffer = NULL;
3765                    return ERROR_MALFORMED;
3766                }
3767
3768                if (nalLength == 0) {
3769                    continue;
3770                }
3771
3772                CHECK(dstOffset + 4 <= mBuffer->size());
3773
3774                dstData[dstOffset++] = 0;
3775                dstData[dstOffset++] = 0;
3776                dstData[dstOffset++] = 0;
3777                dstData[dstOffset++] = 1;
3778                memcpy(&dstData[dstOffset], &mSrcBuffer[srcOffset], nalLength);
3779                srcOffset += nalLength;
3780                dstOffset += nalLength;
3781            }
3782            CHECK_EQ(srcOffset, size);
3783            CHECK(mBuffer != NULL);
3784            mBuffer->set_range(0, dstOffset);
3785        }
3786
3787        mBuffer->meta_data()->clear();
3788        mBuffer->meta_data()->setInt64(
3789                kKeyTime, ((int64_t)cts * 1000000) / mTimescale);
3790        mBuffer->meta_data()->setInt64(
3791                kKeyDuration, ((int64_t)stts * 1000000) / mTimescale);
3792
3793        if (targetSampleTimeUs >= 0) {
3794            mBuffer->meta_data()->setInt64(
3795                    kKeyTargetTime, targetSampleTimeUs);
3796        }
3797
3798        if (isSyncSample) {
3799            mBuffer->meta_data()->setInt32(kKeyIsSyncFrame, 1);
3800        }
3801
3802        ++mCurrentSampleIndex;
3803
3804        *out = mBuffer;
3805        mBuffer = NULL;
3806
3807        return OK;
3808    }
3809}
3810
3811status_t MPEG4Source::fragmentedRead(
3812        MediaBuffer **out, const ReadOptions *options) {
3813
3814    ALOGV("MPEG4Source::fragmentedRead");
3815
3816    CHECK(mStarted);
3817
3818    *out = NULL;
3819
3820    int64_t targetSampleTimeUs = -1;
3821
3822    int64_t seekTimeUs;
3823    ReadOptions::SeekMode mode;
3824    if (options && options->getSeekTo(&seekTimeUs, &mode)) {
3825
3826        int numSidxEntries = mSegments.size();
3827        if (numSidxEntries != 0) {
3828            int64_t totalTime = 0;
3829            off64_t totalOffset = mFirstMoofOffset;
3830            for (int i = 0; i < numSidxEntries; i++) {
3831                const SidxEntry *se = &mSegments[i];
3832                if (totalTime + se->mDurationUs > seekTimeUs) {
3833                    // The requested time is somewhere in this segment
3834                    if ((mode == ReadOptions::SEEK_NEXT_SYNC && seekTimeUs > totalTime) ||
3835                        (mode == ReadOptions::SEEK_CLOSEST_SYNC &&
3836                        (seekTimeUs - totalTime) > (totalTime + se->mDurationUs - seekTimeUs))) {
3837                        // requested next sync, or closest sync and it was closer to the end of
3838                        // this segment
3839                        totalTime += se->mDurationUs;
3840                        totalOffset += se->mSize;
3841                    }
3842                    break;
3843                }
3844                totalTime += se->mDurationUs;
3845                totalOffset += se->mSize;
3846            }
3847            mCurrentMoofOffset = totalOffset;
3848            mCurrentSamples.clear();
3849            mCurrentSampleIndex = 0;
3850            parseChunk(&totalOffset);
3851            mCurrentTime = totalTime * mTimescale / 1000000ll;
3852        } else {
3853            // without sidx boxes, we can only seek to 0
3854            mCurrentMoofOffset = mFirstMoofOffset;
3855            mCurrentSamples.clear();
3856            mCurrentSampleIndex = 0;
3857            off64_t tmp = mCurrentMoofOffset;
3858            parseChunk(&tmp);
3859            mCurrentTime = 0;
3860        }
3861
3862        if (mBuffer != NULL) {
3863            mBuffer->release();
3864            mBuffer = NULL;
3865        }
3866
3867        // fall through
3868    }
3869
3870    off64_t offset = 0;
3871    size_t size = 0;
3872    uint32_t cts = 0;
3873    bool isSyncSample = false;
3874    bool newBuffer = false;
3875    if (mBuffer == NULL) {
3876        newBuffer = true;
3877
3878        if (mCurrentSampleIndex >= mCurrentSamples.size()) {
3879            // move to next fragment if there is one
3880            if (mNextMoofOffset <= mCurrentMoofOffset) {
3881                return ERROR_END_OF_STREAM;
3882            }
3883            off64_t nextMoof = mNextMoofOffset;
3884            mCurrentMoofOffset = nextMoof;
3885            mCurrentSamples.clear();
3886            mCurrentSampleIndex = 0;
3887            parseChunk(&nextMoof);
3888            if (mCurrentSampleIndex >= mCurrentSamples.size()) {
3889                return ERROR_END_OF_STREAM;
3890            }
3891        }
3892
3893        const Sample *smpl = &mCurrentSamples[mCurrentSampleIndex];
3894        offset = smpl->offset;
3895        size = smpl->size;
3896        cts = mCurrentTime;
3897        mCurrentTime += smpl->duration;
3898        isSyncSample = (mCurrentSampleIndex == 0); // XXX
3899
3900        status_t err = mGroup->acquire_buffer(&mBuffer);
3901
3902        if (err != OK) {
3903            CHECK(mBuffer == NULL);
3904            ALOGV("acquire_buffer returned %d", err);
3905            return err;
3906        }
3907    }
3908
3909    const Sample *smpl = &mCurrentSamples[mCurrentSampleIndex];
3910    const sp<MetaData> bufmeta = mBuffer->meta_data();
3911    bufmeta->clear();
3912    if (smpl->encryptedsizes.size()) {
3913        // store clear/encrypted lengths in metadata
3914        bufmeta->setData(kKeyPlainSizes, 0,
3915                smpl->clearsizes.array(), smpl->clearsizes.size() * 4);
3916        bufmeta->setData(kKeyEncryptedSizes, 0,
3917                smpl->encryptedsizes.array(), smpl->encryptedsizes.size() * 4);
3918        bufmeta->setData(kKeyCryptoIV, 0, smpl->iv, 16); // use 16 or the actual size?
3919        bufmeta->setInt32(kKeyCryptoDefaultIVSize, mDefaultIVSize);
3920        bufmeta->setInt32(kKeyCryptoMode, mCryptoMode);
3921        bufmeta->setData(kKeyCryptoKey, 0, mCryptoKey, 16);
3922    }
3923
3924    if ((!mIsAVC && !mIsHEVC)|| mWantsNALFragments) {
3925        if (newBuffer) {
3926            ssize_t num_bytes_read =
3927                mDataSource->readAt(offset, (uint8_t *)mBuffer->data(), size);
3928
3929            if (num_bytes_read < (ssize_t)size) {
3930                mBuffer->release();
3931                mBuffer = NULL;
3932
3933                ALOGV("i/o error");
3934                return ERROR_IO;
3935            }
3936
3937            CHECK(mBuffer != NULL);
3938            mBuffer->set_range(0, size);
3939            mBuffer->meta_data()->setInt64(
3940                    kKeyTime, ((int64_t)cts * 1000000) / mTimescale);
3941            mBuffer->meta_data()->setInt64(
3942                    kKeyDuration, ((int64_t)smpl->duration * 1000000) / mTimescale);
3943
3944            if (targetSampleTimeUs >= 0) {
3945                mBuffer->meta_data()->setInt64(
3946                        kKeyTargetTime, targetSampleTimeUs);
3947            }
3948
3949            if (isSyncSample) {
3950                mBuffer->meta_data()->setInt32(kKeyIsSyncFrame, 1);
3951            }
3952
3953            ++mCurrentSampleIndex;
3954        }
3955
3956        if (!mIsAVC && !mIsHEVC) {
3957            *out = mBuffer;
3958            mBuffer = NULL;
3959
3960            return OK;
3961        }
3962
3963        // Each NAL unit is split up into its constituent fragments and
3964        // each one of them returned in its own buffer.
3965
3966        CHECK(mBuffer->range_length() >= mNALLengthSize);
3967
3968        const uint8_t *src =
3969            (const uint8_t *)mBuffer->data() + mBuffer->range_offset();
3970
3971        size_t nal_size = parseNALSize(src);
3972        if (mBuffer->range_length() < mNALLengthSize + nal_size) {
3973            ALOGE("incomplete NAL unit.");
3974
3975            mBuffer->release();
3976            mBuffer = NULL;
3977
3978            return ERROR_MALFORMED;
3979        }
3980
3981        MediaBuffer *clone = mBuffer->clone();
3982        CHECK(clone != NULL);
3983        clone->set_range(mBuffer->range_offset() + mNALLengthSize, nal_size);
3984
3985        CHECK(mBuffer != NULL);
3986        mBuffer->set_range(
3987                mBuffer->range_offset() + mNALLengthSize + nal_size,
3988                mBuffer->range_length() - mNALLengthSize - nal_size);
3989
3990        if (mBuffer->range_length() == 0) {
3991            mBuffer->release();
3992            mBuffer = NULL;
3993        }
3994
3995        *out = clone;
3996
3997        return OK;
3998    } else {
3999        ALOGV("whole NAL");
4000        // Whole NAL units are returned but each fragment is prefixed by
4001        // the start code (0x00 00 00 01).
4002        ssize_t num_bytes_read = 0;
4003        int32_t drm = 0;
4004        bool usesDRM = (mFormat->findInt32(kKeyIsDRM, &drm) && drm != 0);
4005        if (usesDRM) {
4006            num_bytes_read =
4007                mDataSource->readAt(offset, (uint8_t*)mBuffer->data(), size);
4008        } else {
4009            num_bytes_read = mDataSource->readAt(offset, mSrcBuffer, size);
4010        }
4011
4012        if (num_bytes_read < (ssize_t)size) {
4013            mBuffer->release();
4014            mBuffer = NULL;
4015
4016            ALOGV("i/o error");
4017            return ERROR_IO;
4018        }
4019
4020        if (usesDRM) {
4021            CHECK(mBuffer != NULL);
4022            mBuffer->set_range(0, size);
4023
4024        } else {
4025            uint8_t *dstData = (uint8_t *)mBuffer->data();
4026            size_t srcOffset = 0;
4027            size_t dstOffset = 0;
4028
4029            while (srcOffset < size) {
4030                bool isMalFormed = (srcOffset + mNALLengthSize > size);
4031                size_t nalLength = 0;
4032                if (!isMalFormed) {
4033                    nalLength = parseNALSize(&mSrcBuffer[srcOffset]);
4034                    srcOffset += mNALLengthSize;
4035                    isMalFormed = srcOffset + nalLength > size;
4036                }
4037
4038                if (isMalFormed) {
4039                    ALOGE("Video is malformed");
4040                    mBuffer->release();
4041                    mBuffer = NULL;
4042                    return ERROR_MALFORMED;
4043                }
4044
4045                if (nalLength == 0) {
4046                    continue;
4047                }
4048
4049                CHECK(dstOffset + 4 <= mBuffer->size());
4050
4051                dstData[dstOffset++] = 0;
4052                dstData[dstOffset++] = 0;
4053                dstData[dstOffset++] = 0;
4054                dstData[dstOffset++] = 1;
4055                memcpy(&dstData[dstOffset], &mSrcBuffer[srcOffset], nalLength);
4056                srcOffset += nalLength;
4057                dstOffset += nalLength;
4058            }
4059            CHECK_EQ(srcOffset, size);
4060            CHECK(mBuffer != NULL);
4061            mBuffer->set_range(0, dstOffset);
4062        }
4063
4064        mBuffer->meta_data()->setInt64(
4065                kKeyTime, ((int64_t)cts * 1000000) / mTimescale);
4066        mBuffer->meta_data()->setInt64(
4067                kKeyDuration, ((int64_t)smpl->duration * 1000000) / mTimescale);
4068
4069        if (targetSampleTimeUs >= 0) {
4070            mBuffer->meta_data()->setInt64(
4071                    kKeyTargetTime, targetSampleTimeUs);
4072        }
4073
4074        if (isSyncSample) {
4075            mBuffer->meta_data()->setInt32(kKeyIsSyncFrame, 1);
4076        }
4077
4078        ++mCurrentSampleIndex;
4079
4080        *out = mBuffer;
4081        mBuffer = NULL;
4082
4083        return OK;
4084    }
4085}
4086
4087MPEG4Extractor::Track *MPEG4Extractor::findTrackByMimePrefix(
4088        const char *mimePrefix) {
4089    for (Track *track = mFirstTrack; track != NULL; track = track->next) {
4090        const char *mime;
4091        if (track->meta != NULL
4092                && track->meta->findCString(kKeyMIMEType, &mime)
4093                && !strncasecmp(mime, mimePrefix, strlen(mimePrefix))) {
4094            return track;
4095        }
4096    }
4097
4098    return NULL;
4099}
4100
4101static bool LegacySniffMPEG4(
4102        const sp<DataSource> &source, String8 *mimeType, float *confidence) {
4103    uint8_t header[8];
4104
4105    ssize_t n = source->readAt(4, header, sizeof(header));
4106    if (n < (ssize_t)sizeof(header)) {
4107        return false;
4108    }
4109
4110    if (!memcmp(header, "ftyp3gp", 7) || !memcmp(header, "ftypmp42", 8)
4111        || !memcmp(header, "ftyp3gr6", 8) || !memcmp(header, "ftyp3gs6", 8)
4112        || !memcmp(header, "ftyp3ge6", 8) || !memcmp(header, "ftyp3gg6", 8)
4113        || !memcmp(header, "ftypisom", 8) || !memcmp(header, "ftypM4V ", 8)
4114        || !memcmp(header, "ftypM4A ", 8) || !memcmp(header, "ftypf4v ", 8)
4115        || !memcmp(header, "ftypkddi", 8) || !memcmp(header, "ftypM4VP", 8)) {
4116        *mimeType = MEDIA_MIMETYPE_CONTAINER_MPEG4;
4117        *confidence = 0.4;
4118
4119        return true;
4120    }
4121
4122    return false;
4123}
4124
4125static bool isCompatibleBrand(uint32_t fourcc) {
4126    static const uint32_t kCompatibleBrands[] = {
4127        FOURCC('i', 's', 'o', 'm'),
4128        FOURCC('i', 's', 'o', '2'),
4129        FOURCC('a', 'v', 'c', '1'),
4130        FOURCC('h', 'v', 'c', '1'),
4131        FOURCC('h', 'e', 'v', '1'),
4132        FOURCC('3', 'g', 'p', '4'),
4133        FOURCC('m', 'p', '4', '1'),
4134        FOURCC('m', 'p', '4', '2'),
4135
4136        // Won't promise that the following file types can be played.
4137        // Just give these file types a chance.
4138        FOURCC('q', 't', ' ', ' '),  // Apple's QuickTime
4139        FOURCC('M', 'S', 'N', 'V'),  // Sony's PSP
4140
4141        FOURCC('3', 'g', '2', 'a'),  // 3GPP2
4142        FOURCC('3', 'g', '2', 'b'),
4143    };
4144
4145    for (size_t i = 0;
4146         i < sizeof(kCompatibleBrands) / sizeof(kCompatibleBrands[0]);
4147         ++i) {
4148        if (kCompatibleBrands[i] == fourcc) {
4149            return true;
4150        }
4151    }
4152
4153    return false;
4154}
4155
4156// Attempt to actually parse the 'ftyp' atom and determine if a suitable
4157// compatible brand is present.
4158// Also try to identify where this file's metadata ends
4159// (end of the 'moov' atom) and report it to the caller as part of
4160// the metadata.
4161static bool BetterSniffMPEG4(
4162        const sp<DataSource> &source, String8 *mimeType, float *confidence,
4163        sp<AMessage> *meta) {
4164    // We scan up to 128 bytes to identify this file as an MP4.
4165    static const off64_t kMaxScanOffset = 128ll;
4166
4167    off64_t offset = 0ll;
4168    bool foundGoodFileType = false;
4169    off64_t moovAtomEndOffset = -1ll;
4170    bool done = false;
4171
4172    while (!done && offset < kMaxScanOffset) {
4173        uint32_t hdr[2];
4174        if (source->readAt(offset, hdr, 8) < 8) {
4175            return false;
4176        }
4177
4178        uint64_t chunkSize = ntohl(hdr[0]);
4179        uint32_t chunkType = ntohl(hdr[1]);
4180        off64_t chunkDataOffset = offset + 8;
4181
4182        if (chunkSize == 1) {
4183            if (source->readAt(offset + 8, &chunkSize, 8) < 8) {
4184                return false;
4185            }
4186
4187            chunkSize = ntoh64(chunkSize);
4188            chunkDataOffset += 8;
4189
4190            if (chunkSize < 16) {
4191                // The smallest valid chunk is 16 bytes long in this case.
4192                return false;
4193            }
4194        } else if (chunkSize < 8) {
4195            // The smallest valid chunk is 8 bytes long.
4196            return false;
4197        }
4198
4199        off64_t chunkDataSize = offset + chunkSize - chunkDataOffset;
4200
4201        char chunkstring[5];
4202        MakeFourCCString(chunkType, chunkstring);
4203        ALOGV("saw chunk type %s, size %lld @ %lld", chunkstring, chunkSize, offset);
4204        switch (chunkType) {
4205            case FOURCC('f', 't', 'y', 'p'):
4206            {
4207                if (chunkDataSize < 8) {
4208                    return false;
4209                }
4210
4211                uint32_t numCompatibleBrands = (chunkDataSize - 8) / 4;
4212                for (size_t i = 0; i < numCompatibleBrands + 2; ++i) {
4213                    if (i == 1) {
4214                        // Skip this index, it refers to the minorVersion,
4215                        // not a brand.
4216                        continue;
4217                    }
4218
4219                    uint32_t brand;
4220                    if (source->readAt(
4221                                chunkDataOffset + 4 * i, &brand, 4) < 4) {
4222                        return false;
4223                    }
4224
4225                    brand = ntohl(brand);
4226
4227                    if (isCompatibleBrand(brand)) {
4228                        foundGoodFileType = true;
4229                        break;
4230                    }
4231                }
4232
4233                if (!foundGoodFileType) {
4234                    return false;
4235                }
4236
4237                break;
4238            }
4239
4240            case FOURCC('m', 'o', 'o', 'v'):
4241            {
4242                moovAtomEndOffset = offset + chunkSize;
4243
4244                done = true;
4245                break;
4246            }
4247
4248            default:
4249                break;
4250        }
4251
4252        offset += chunkSize;
4253    }
4254
4255    if (!foundGoodFileType) {
4256        return false;
4257    }
4258
4259    *mimeType = MEDIA_MIMETYPE_CONTAINER_MPEG4;
4260    *confidence = 0.4f;
4261
4262    if (moovAtomEndOffset >= 0) {
4263        *meta = new AMessage;
4264        (*meta)->setInt64("meta-data-size", moovAtomEndOffset);
4265
4266        ALOGV("found metadata size: %lld", moovAtomEndOffset);
4267    }
4268
4269    return true;
4270}
4271
4272bool SniffMPEG4(
4273        const sp<DataSource> &source, String8 *mimeType, float *confidence,
4274        sp<AMessage> *meta) {
4275    if (BetterSniffMPEG4(source, mimeType, confidence, meta)) {
4276        return true;
4277    }
4278
4279    if (LegacySniffMPEG4(source, mimeType, confidence)) {
4280        ALOGW("Identified supported mpeg4 through LegacySniffMPEG4.");
4281        return true;
4282    }
4283
4284    return false;
4285}
4286
4287}  // namespace android
4288