MPEG4Extractor.cpp revision 62df539321b3079f5ff11bb6aeaaab75ef307d40
1/*
2 * Copyright (C) 2009 The Android Open Source Project
3 *
4 * Licensed under the Apache License, Version 2.0 (the "License");
5 * you may not use this file except in compliance with the License.
6 * You may obtain a copy of the License at
7 *
8 *      http://www.apache.org/licenses/LICENSE-2.0
9 *
10 * Unless required by applicable law or agreed to in writing, software
11 * distributed under the License is distributed on an "AS IS" BASIS,
12 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13 * See the License for the specific language governing permissions and
14 * limitations under the License.
15 */
16
17//#define LOG_NDEBUG 0
18#define LOG_TAG "MPEG4Extractor"
19#include <utils/Log.h>
20
21#include "include/MPEG4Extractor.h"
22#include "include/SampleTable.h"
23#include "include/ESDS.h"
24
25#include <ctype.h>
26#include <stdint.h>
27#include <stdlib.h>
28#include <string.h>
29
30#include <media/stagefright/foundation/ABitReader.h>
31#include <media/stagefright/foundation/ABuffer.h>
32#include <media/stagefright/foundation/ADebug.h>
33#include <media/stagefright/foundation/AMessage.h>
34#include <media/stagefright/MediaBuffer.h>
35#include <media/stagefright/MediaBufferGroup.h>
36#include <media/stagefright/MediaDefs.h>
37#include <media/stagefright/MediaSource.h>
38#include <media/stagefright/MetaData.h>
39#include <utils/String8.h>
40
41#include <byteswap.h>
42#include "include/ID3.h"
43
44namespace android {
45
46class MPEG4Source : public MediaSource {
47public:
48    // Caller retains ownership of both "dataSource" and "sampleTable".
49    MPEG4Source(const sp<MetaData> &format,
50                const sp<DataSource> &dataSource,
51                int32_t timeScale,
52                const sp<SampleTable> &sampleTable,
53                Vector<SidxEntry> &sidx,
54                off64_t firstMoofOffset);
55
56    virtual status_t start(MetaData *params = NULL);
57    virtual status_t stop();
58
59    virtual sp<MetaData> getFormat();
60
61    virtual status_t read(MediaBuffer **buffer, const ReadOptions *options = NULL);
62    virtual status_t fragmentedRead(MediaBuffer **buffer, const ReadOptions *options = NULL);
63
64protected:
65    virtual ~MPEG4Source();
66
67private:
68    Mutex mLock;
69
70    sp<MetaData> mFormat;
71    sp<DataSource> mDataSource;
72    int32_t mTimescale;
73    sp<SampleTable> mSampleTable;
74    uint32_t mCurrentSampleIndex;
75    uint32_t mCurrentFragmentIndex;
76    Vector<SidxEntry> &mSegments;
77    off64_t mFirstMoofOffset;
78    off64_t mCurrentMoofOffset;
79    off64_t mNextMoofOffset;
80    uint32_t mCurrentTime;
81    int32_t mLastParsedTrackId;
82    int32_t mTrackId;
83
84    int32_t mCryptoMode;    // passed in from extractor
85    int32_t mDefaultIVSize; // passed in from extractor
86    uint8_t mCryptoKey[16]; // passed in from extractor
87    uint32_t mCurrentAuxInfoType;
88    uint32_t mCurrentAuxInfoTypeParameter;
89    int32_t mCurrentDefaultSampleInfoSize;
90    uint32_t mCurrentSampleInfoCount;
91    uint32_t mCurrentSampleInfoAllocSize;
92    uint8_t* mCurrentSampleInfoSizes;
93    uint32_t mCurrentSampleInfoOffsetCount;
94    uint32_t mCurrentSampleInfoOffsetsAllocSize;
95    uint64_t* mCurrentSampleInfoOffsets;
96
97    bool mIsAVC;
98    bool mIsHEVC;
99    size_t mNALLengthSize;
100
101    bool mStarted;
102
103    MediaBufferGroup *mGroup;
104
105    MediaBuffer *mBuffer;
106
107    bool mWantsNALFragments;
108
109    uint8_t *mSrcBuffer;
110
111    size_t parseNALSize(const uint8_t *data) const;
112    status_t parseChunk(off64_t *offset);
113    status_t parseTrackFragmentHeader(off64_t offset, off64_t size);
114    status_t parseTrackFragmentRun(off64_t offset, off64_t size);
115    status_t parseSampleAuxiliaryInformationSizes(off64_t offset, off64_t size);
116    status_t parseSampleAuxiliaryInformationOffsets(off64_t offset, off64_t size);
117
118    struct TrackFragmentHeaderInfo {
119        enum Flags {
120            kBaseDataOffsetPresent         = 0x01,
121            kSampleDescriptionIndexPresent = 0x02,
122            kDefaultSampleDurationPresent  = 0x08,
123            kDefaultSampleSizePresent      = 0x10,
124            kDefaultSampleFlagsPresent     = 0x20,
125            kDurationIsEmpty               = 0x10000,
126        };
127
128        uint32_t mTrackID;
129        uint32_t mFlags;
130        uint64_t mBaseDataOffset;
131        uint32_t mSampleDescriptionIndex;
132        uint32_t mDefaultSampleDuration;
133        uint32_t mDefaultSampleSize;
134        uint32_t mDefaultSampleFlags;
135
136        uint64_t mDataOffset;
137    };
138    TrackFragmentHeaderInfo mTrackFragmentHeaderInfo;
139
140    struct Sample {
141        off64_t offset;
142        size_t size;
143        uint32_t duration;
144        uint8_t iv[16];
145        Vector<size_t> clearsizes;
146        Vector<size_t> encryptedsizes;
147    };
148    Vector<Sample> mCurrentSamples;
149
150    MPEG4Source(const MPEG4Source &);
151    MPEG4Source &operator=(const MPEG4Source &);
152};
153
154// This custom data source wraps an existing one and satisfies requests
155// falling entirely within a cached range from the cache while forwarding
156// all remaining requests to the wrapped datasource.
157// This is used to cache the full sampletable metadata for a single track,
158// possibly wrapping multiple times to cover all tracks, i.e.
159// Each MPEG4DataSource caches the sampletable metadata for a single track.
160
161struct MPEG4DataSource : public DataSource {
162    MPEG4DataSource(const sp<DataSource> &source);
163
164    virtual status_t initCheck() const;
165    virtual ssize_t readAt(off64_t offset, void *data, size_t size);
166    virtual status_t getSize(off64_t *size);
167    virtual uint32_t flags();
168
169    status_t setCachedRange(off64_t offset, size_t size);
170
171protected:
172    virtual ~MPEG4DataSource();
173
174private:
175    Mutex mLock;
176
177    sp<DataSource> mSource;
178    off64_t mCachedOffset;
179    size_t mCachedSize;
180    uint8_t *mCache;
181
182    void clearCache();
183
184    MPEG4DataSource(const MPEG4DataSource &);
185    MPEG4DataSource &operator=(const MPEG4DataSource &);
186};
187
188MPEG4DataSource::MPEG4DataSource(const sp<DataSource> &source)
189    : mSource(source),
190      mCachedOffset(0),
191      mCachedSize(0),
192      mCache(NULL) {
193}
194
195MPEG4DataSource::~MPEG4DataSource() {
196    clearCache();
197}
198
199void MPEG4DataSource::clearCache() {
200    if (mCache) {
201        free(mCache);
202        mCache = NULL;
203    }
204
205    mCachedOffset = 0;
206    mCachedSize = 0;
207}
208
209status_t MPEG4DataSource::initCheck() const {
210    return mSource->initCheck();
211}
212
213ssize_t MPEG4DataSource::readAt(off64_t offset, void *data, size_t size) {
214    Mutex::Autolock autoLock(mLock);
215
216    if (offset >= mCachedOffset
217            && offset + size <= mCachedOffset + mCachedSize) {
218        memcpy(data, &mCache[offset - mCachedOffset], size);
219        return size;
220    }
221
222    return mSource->readAt(offset, data, size);
223}
224
225status_t MPEG4DataSource::getSize(off64_t *size) {
226    return mSource->getSize(size);
227}
228
229uint32_t MPEG4DataSource::flags() {
230    return mSource->flags();
231}
232
233status_t MPEG4DataSource::setCachedRange(off64_t offset, size_t size) {
234    Mutex::Autolock autoLock(mLock);
235
236    clearCache();
237
238    mCache = (uint8_t *)malloc(size);
239
240    if (mCache == NULL) {
241        return -ENOMEM;
242    }
243
244    mCachedOffset = offset;
245    mCachedSize = size;
246
247    ssize_t err = mSource->readAt(mCachedOffset, mCache, mCachedSize);
248
249    if (err < (ssize_t)size) {
250        clearCache();
251
252        return ERROR_IO;
253    }
254
255    return OK;
256}
257
258////////////////////////////////////////////////////////////////////////////////
259
260static void hexdump(const void *_data, size_t size) {
261    const uint8_t *data = (const uint8_t *)_data;
262    size_t offset = 0;
263    while (offset < size) {
264        printf("0x%04zx  ", offset);
265
266        size_t n = size - offset;
267        if (n > 16) {
268            n = 16;
269        }
270
271        for (size_t i = 0; i < 16; ++i) {
272            if (i == 8) {
273                printf(" ");
274            }
275
276            if (offset + i < size) {
277                printf("%02x ", data[offset + i]);
278            } else {
279                printf("   ");
280            }
281        }
282
283        printf(" ");
284
285        for (size_t i = 0; i < n; ++i) {
286            if (isprint(data[offset + i])) {
287                printf("%c", data[offset + i]);
288            } else {
289                printf(".");
290            }
291        }
292
293        printf("\n");
294
295        offset += 16;
296    }
297}
298
299static const char *FourCC2MIME(uint32_t fourcc) {
300    switch (fourcc) {
301        case FOURCC('m', 'p', '4', 'a'):
302            return MEDIA_MIMETYPE_AUDIO_AAC;
303
304        case FOURCC('s', 'a', 'm', 'r'):
305            return MEDIA_MIMETYPE_AUDIO_AMR_NB;
306
307        case FOURCC('s', 'a', 'w', 'b'):
308            return MEDIA_MIMETYPE_AUDIO_AMR_WB;
309
310        case FOURCC('m', 'p', '4', 'v'):
311            return MEDIA_MIMETYPE_VIDEO_MPEG4;
312
313        case FOURCC('s', '2', '6', '3'):
314        case FOURCC('h', '2', '6', '3'):
315        case FOURCC('H', '2', '6', '3'):
316            return MEDIA_MIMETYPE_VIDEO_H263;
317
318        case FOURCC('a', 'v', 'c', '1'):
319            return MEDIA_MIMETYPE_VIDEO_AVC;
320
321        case FOURCC('h', 'v', 'c', '1'):
322        case FOURCC('h', 'e', 'v', '1'):
323            return MEDIA_MIMETYPE_VIDEO_HEVC;
324        default:
325            CHECK(!"should not be here.");
326            return NULL;
327    }
328}
329
330static bool AdjustChannelsAndRate(uint32_t fourcc, uint32_t *channels, uint32_t *rate) {
331    if (!strcasecmp(MEDIA_MIMETYPE_AUDIO_AMR_NB, FourCC2MIME(fourcc))) {
332        // AMR NB audio is always mono, 8kHz
333        *channels = 1;
334        *rate = 8000;
335        return true;
336    } else if (!strcasecmp(MEDIA_MIMETYPE_AUDIO_AMR_WB, FourCC2MIME(fourcc))) {
337        // AMR WB audio is always mono, 16kHz
338        *channels = 1;
339        *rate = 16000;
340        return true;
341    }
342    return false;
343}
344
345MPEG4Extractor::MPEG4Extractor(const sp<DataSource> &source)
346    : mSidxDuration(0),
347      mMoofOffset(0),
348      mDataSource(source),
349      mInitCheck(NO_INIT),
350      mHasVideo(false),
351      mHeaderTimescale(0),
352      mFirstTrack(NULL),
353      mLastTrack(NULL),
354      mFileMetaData(new MetaData),
355      mFirstSINF(NULL),
356      mIsDrm(false) {
357}
358
359MPEG4Extractor::~MPEG4Extractor() {
360    Track *track = mFirstTrack;
361    while (track) {
362        Track *next = track->next;
363
364        delete track;
365        track = next;
366    }
367    mFirstTrack = mLastTrack = NULL;
368
369    SINF *sinf = mFirstSINF;
370    while (sinf) {
371        SINF *next = sinf->next;
372        delete sinf->IPMPData;
373        delete sinf;
374        sinf = next;
375    }
376    mFirstSINF = NULL;
377
378    for (size_t i = 0; i < mPssh.size(); i++) {
379        delete [] mPssh[i].data;
380    }
381}
382
383uint32_t MPEG4Extractor::flags() const {
384    return CAN_PAUSE |
385            ((mMoofOffset == 0 || mSidxEntries.size() != 0) ?
386                    (CAN_SEEK_BACKWARD | CAN_SEEK_FORWARD | CAN_SEEK) : 0);
387}
388
389sp<MetaData> MPEG4Extractor::getMetaData() {
390    status_t err;
391    if ((err = readMetaData()) != OK) {
392        return new MetaData;
393    }
394
395    return mFileMetaData;
396}
397
398size_t MPEG4Extractor::countTracks() {
399    status_t err;
400    if ((err = readMetaData()) != OK) {
401        ALOGV("MPEG4Extractor::countTracks: no tracks");
402        return 0;
403    }
404
405    size_t n = 0;
406    Track *track = mFirstTrack;
407    while (track) {
408        ++n;
409        track = track->next;
410    }
411
412    ALOGV("MPEG4Extractor::countTracks: %d tracks", n);
413    return n;
414}
415
416sp<MetaData> MPEG4Extractor::getTrackMetaData(
417        size_t index, uint32_t flags) {
418    status_t err;
419    if ((err = readMetaData()) != OK) {
420        return NULL;
421    }
422
423    Track *track = mFirstTrack;
424    while (index > 0) {
425        if (track == NULL) {
426            return NULL;
427        }
428
429        track = track->next;
430        --index;
431    }
432
433    if (track == NULL) {
434        return NULL;
435    }
436
437    if ((flags & kIncludeExtensiveMetaData)
438            && !track->includes_expensive_metadata) {
439        track->includes_expensive_metadata = true;
440
441        const char *mime;
442        CHECK(track->meta->findCString(kKeyMIMEType, &mime));
443        if (!strncasecmp("video/", mime, 6)) {
444            if (mMoofOffset > 0) {
445                int64_t duration;
446                if (track->meta->findInt64(kKeyDuration, &duration)) {
447                    // nothing fancy, just pick a frame near 1/4th of the duration
448                    track->meta->setInt64(
449                            kKeyThumbnailTime, duration / 4);
450                }
451            } else {
452                uint32_t sampleIndex;
453                uint32_t sampleTime;
454                if (track->sampleTable->findThumbnailSample(&sampleIndex) == OK
455                        && track->sampleTable->getMetaDataForSample(
456                            sampleIndex, NULL /* offset */, NULL /* size */,
457                            &sampleTime) == OK) {
458                    track->meta->setInt64(
459                            kKeyThumbnailTime,
460                            ((int64_t)sampleTime * 1000000) / track->timescale);
461                }
462            }
463        }
464    }
465
466    return track->meta;
467}
468
469static void MakeFourCCString(uint32_t x, char *s) {
470    s[0] = x >> 24;
471    s[1] = (x >> 16) & 0xff;
472    s[2] = (x >> 8) & 0xff;
473    s[3] = x & 0xff;
474    s[4] = '\0';
475}
476
477status_t MPEG4Extractor::readMetaData() {
478    if (mInitCheck != NO_INIT) {
479        return mInitCheck;
480    }
481
482    off64_t offset = 0;
483    status_t err;
484    while (true) {
485        off64_t orig_offset = offset;
486        err = parseChunk(&offset, 0);
487
488        if (offset <= orig_offset) {
489            // only continue parsing if the offset was advanced,
490            // otherwise we might end up in an infinite loop
491            ALOGE("did not advance: 0x%lld->0x%lld", orig_offset, offset);
492            err = ERROR_MALFORMED;
493            break;
494        } else if (err == OK) {
495            continue;
496        } else if (err != UNKNOWN_ERROR) {
497            break;
498        }
499        uint32_t hdr[2];
500        if (mDataSource->readAt(offset, hdr, 8) < 8) {
501            break;
502        }
503        uint32_t chunk_type = ntohl(hdr[1]);
504        if (chunk_type == FOURCC('m', 'o', 'o', 'f')) {
505            // store the offset of the first segment
506            mMoofOffset = offset;
507        } else if (chunk_type != FOURCC('m', 'd', 'a', 't')) {
508            // keep parsing until we get to the data
509            continue;
510        }
511        break;
512    }
513
514    if (mInitCheck == OK) {
515        if (mHasVideo) {
516            mFileMetaData->setCString(
517                    kKeyMIMEType, MEDIA_MIMETYPE_CONTAINER_MPEG4);
518        } else {
519            mFileMetaData->setCString(kKeyMIMEType, "audio/mp4");
520        }
521    } else {
522        mInitCheck = err;
523    }
524
525    CHECK_NE(err, (status_t)NO_INIT);
526
527    // copy pssh data into file metadata
528    int psshsize = 0;
529    for (size_t i = 0; i < mPssh.size(); i++) {
530        psshsize += 20 + mPssh[i].datalen;
531    }
532    if (psshsize) {
533        char *buf = (char*)malloc(psshsize);
534        char *ptr = buf;
535        for (size_t i = 0; i < mPssh.size(); i++) {
536            memcpy(ptr, mPssh[i].uuid, 20); // uuid + length
537            memcpy(ptr + 20, mPssh[i].data, mPssh[i].datalen);
538            ptr += (20 + mPssh[i].datalen);
539        }
540        mFileMetaData->setData(kKeyPssh, 'pssh', buf, psshsize);
541        free(buf);
542    }
543    return mInitCheck;
544}
545
546char* MPEG4Extractor::getDrmTrackInfo(size_t trackID, int *len) {
547    if (mFirstSINF == NULL) {
548        return NULL;
549    }
550
551    SINF *sinf = mFirstSINF;
552    while (sinf && (trackID != sinf->trackID)) {
553        sinf = sinf->next;
554    }
555
556    if (sinf == NULL) {
557        return NULL;
558    }
559
560    *len = sinf->len;
561    return sinf->IPMPData;
562}
563
564// Reads an encoded integer 7 bits at a time until it encounters the high bit clear.
565static int32_t readSize(off64_t offset,
566        const sp<DataSource> DataSource, uint8_t *numOfBytes) {
567    uint32_t size = 0;
568    uint8_t data;
569    bool moreData = true;
570    *numOfBytes = 0;
571
572    while (moreData) {
573        if (DataSource->readAt(offset, &data, 1) < 1) {
574            return -1;
575        }
576        offset ++;
577        moreData = (data >= 128) ? true : false;
578        size = (size << 7) | (data & 0x7f); // Take last 7 bits
579        (*numOfBytes) ++;
580    }
581
582    return size;
583}
584
585status_t MPEG4Extractor::parseDrmSINF(
586        off64_t * /* offset */, off64_t data_offset) {
587    uint8_t updateIdTag;
588    if (mDataSource->readAt(data_offset, &updateIdTag, 1) < 1) {
589        return ERROR_IO;
590    }
591    data_offset ++;
592
593    if (0x01/*OBJECT_DESCRIPTOR_UPDATE_ID_TAG*/ != updateIdTag) {
594        return ERROR_MALFORMED;
595    }
596
597    uint8_t numOfBytes;
598    int32_t size = readSize(data_offset, mDataSource, &numOfBytes);
599    if (size < 0) {
600        return ERROR_IO;
601    }
602    int32_t classSize = size;
603    data_offset += numOfBytes;
604
605    while(size >= 11 ) {
606        uint8_t descriptorTag;
607        if (mDataSource->readAt(data_offset, &descriptorTag, 1) < 1) {
608            return ERROR_IO;
609        }
610        data_offset ++;
611
612        if (0x11/*OBJECT_DESCRIPTOR_ID_TAG*/ != descriptorTag) {
613            return ERROR_MALFORMED;
614        }
615
616        uint8_t buffer[8];
617        //ObjectDescriptorID and ObjectDescriptor url flag
618        if (mDataSource->readAt(data_offset, buffer, 2) < 2) {
619            return ERROR_IO;
620        }
621        data_offset += 2;
622
623        if ((buffer[1] >> 5) & 0x0001) { //url flag is set
624            return ERROR_MALFORMED;
625        }
626
627        if (mDataSource->readAt(data_offset, buffer, 8) < 8) {
628            return ERROR_IO;
629        }
630        data_offset += 8;
631
632        if ((0x0F/*ES_ID_REF_TAG*/ != buffer[1])
633                || ( 0x0A/*IPMP_DESCRIPTOR_POINTER_ID_TAG*/ != buffer[5])) {
634            return ERROR_MALFORMED;
635        }
636
637        SINF *sinf = new SINF;
638        sinf->trackID = U16_AT(&buffer[3]);
639        sinf->IPMPDescriptorID = buffer[7];
640        sinf->next = mFirstSINF;
641        mFirstSINF = sinf;
642
643        size -= (8 + 2 + 1);
644    }
645
646    if (size != 0) {
647        return ERROR_MALFORMED;
648    }
649
650    if (mDataSource->readAt(data_offset, &updateIdTag, 1) < 1) {
651        return ERROR_IO;
652    }
653    data_offset ++;
654
655    if(0x05/*IPMP_DESCRIPTOR_UPDATE_ID_TAG*/ != updateIdTag) {
656        return ERROR_MALFORMED;
657    }
658
659    size = readSize(data_offset, mDataSource, &numOfBytes);
660    if (size < 0) {
661        return ERROR_IO;
662    }
663    classSize = size;
664    data_offset += numOfBytes;
665
666    while (size > 0) {
667        uint8_t tag;
668        int32_t dataLen;
669        if (mDataSource->readAt(data_offset, &tag, 1) < 1) {
670            return ERROR_IO;
671        }
672        data_offset ++;
673
674        if (0x0B/*IPMP_DESCRIPTOR_ID_TAG*/ == tag) {
675            uint8_t id;
676            dataLen = readSize(data_offset, mDataSource, &numOfBytes);
677            if (dataLen < 0) {
678                return ERROR_IO;
679            } else if (dataLen < 4) {
680                return ERROR_MALFORMED;
681            }
682            data_offset += numOfBytes;
683
684            if (mDataSource->readAt(data_offset, &id, 1) < 1) {
685                return ERROR_IO;
686            }
687            data_offset ++;
688
689            SINF *sinf = mFirstSINF;
690            while (sinf && (sinf->IPMPDescriptorID != id)) {
691                sinf = sinf->next;
692            }
693            if (sinf == NULL) {
694                return ERROR_MALFORMED;
695            }
696            sinf->len = dataLen - 3;
697            sinf->IPMPData = new char[sinf->len];
698            data_offset += 2;
699
700            if (mDataSource->readAt(data_offset, sinf->IPMPData, sinf->len) < sinf->len) {
701                return ERROR_IO;
702            }
703            data_offset += sinf->len;
704
705            size -= (dataLen + numOfBytes + 1);
706        }
707    }
708
709    if (size != 0) {
710        return ERROR_MALFORMED;
711    }
712
713    return UNKNOWN_ERROR;  // Return a dummy error.
714}
715
716struct PathAdder {
717    PathAdder(Vector<uint32_t> *path, uint32_t chunkType)
718        : mPath(path) {
719        mPath->push(chunkType);
720    }
721
722    ~PathAdder() {
723        mPath->pop();
724    }
725
726private:
727    Vector<uint32_t> *mPath;
728
729    PathAdder(const PathAdder &);
730    PathAdder &operator=(const PathAdder &);
731};
732
733static bool underMetaDataPath(const Vector<uint32_t> &path) {
734    return path.size() >= 5
735        && path[0] == FOURCC('m', 'o', 'o', 'v')
736        && path[1] == FOURCC('u', 'd', 't', 'a')
737        && path[2] == FOURCC('m', 'e', 't', 'a')
738        && path[3] == FOURCC('i', 'l', 's', 't');
739}
740
741// Given a time in seconds since Jan 1 1904, produce a human-readable string.
742static void convertTimeToDate(int64_t time_1904, String8 *s) {
743    time_t time_1970 = time_1904 - (((66 * 365 + 17) * 24) * 3600);
744
745    char tmp[32];
746    strftime(tmp, sizeof(tmp), "%Y%m%dT%H%M%S.000Z", gmtime(&time_1970));
747
748    s->setTo(tmp);
749}
750
751status_t MPEG4Extractor::parseChunk(off64_t *offset, int depth) {
752    ALOGV("entering parseChunk %lld/%d", *offset, depth);
753    uint32_t hdr[2];
754    if (mDataSource->readAt(*offset, hdr, 8) < 8) {
755        return ERROR_IO;
756    }
757    uint64_t chunk_size = ntohl(hdr[0]);
758    uint32_t chunk_type = ntohl(hdr[1]);
759    off64_t data_offset = *offset + 8;
760
761    if (chunk_size == 1) {
762        if (mDataSource->readAt(*offset + 8, &chunk_size, 8) < 8) {
763            return ERROR_IO;
764        }
765        chunk_size = ntoh64(chunk_size);
766        data_offset += 8;
767
768        if (chunk_size < 16) {
769            // The smallest valid chunk is 16 bytes long in this case.
770            return ERROR_MALFORMED;
771        }
772    } else if (chunk_size == 0) {
773        if (depth == 0) {
774            // atom extends to end of file
775            off64_t sourceSize;
776            if (mDataSource->getSize(&sourceSize) == OK) {
777                chunk_size = (sourceSize - *offset);
778            } else {
779                // XXX could we just pick a "sufficiently large" value here?
780                ALOGE("atom size is 0, and data source has no size");
781                return ERROR_MALFORMED;
782            }
783        } else {
784            // not allowed for non-toplevel atoms, skip it
785            *offset += 4;
786            return OK;
787        }
788    } else if (chunk_size < 8) {
789        // The smallest valid chunk is 8 bytes long.
790        ALOGE("invalid chunk size: %d", int(chunk_size));
791        return ERROR_MALFORMED;
792    }
793
794    char chunk[5];
795    MakeFourCCString(chunk_type, chunk);
796    ALOGV("chunk: %s @ %lld, %d", chunk, *offset, depth);
797
798#if 0
799    static const char kWhitespace[] = "                                        ";
800    const char *indent = &kWhitespace[sizeof(kWhitespace) - 1 - 2 * depth];
801    printf("%sfound chunk '%s' of size %lld\n", indent, chunk, chunk_size);
802
803    char buffer[256];
804    size_t n = chunk_size;
805    if (n > sizeof(buffer)) {
806        n = sizeof(buffer);
807    }
808    if (mDataSource->readAt(*offset, buffer, n)
809            < (ssize_t)n) {
810        return ERROR_IO;
811    }
812
813    hexdump(buffer, n);
814#endif
815
816    PathAdder autoAdder(&mPath, chunk_type);
817
818    off64_t chunk_data_size = *offset + chunk_size - data_offset;
819
820    if (chunk_type != FOURCC('c', 'p', 'r', 't')
821            && chunk_type != FOURCC('c', 'o', 'v', 'r')
822            && mPath.size() == 5 && underMetaDataPath(mPath)) {
823        off64_t stop_offset = *offset + chunk_size;
824        *offset = data_offset;
825        while (*offset < stop_offset) {
826            status_t err = parseChunk(offset, depth + 1);
827            if (err != OK) {
828                return err;
829            }
830        }
831
832        if (*offset != stop_offset) {
833            return ERROR_MALFORMED;
834        }
835
836        return OK;
837    }
838
839    switch(chunk_type) {
840        case FOURCC('m', 'o', 'o', 'v'):
841        case FOURCC('t', 'r', 'a', 'k'):
842        case FOURCC('m', 'd', 'i', 'a'):
843        case FOURCC('m', 'i', 'n', 'f'):
844        case FOURCC('d', 'i', 'n', 'f'):
845        case FOURCC('s', 't', 'b', 'l'):
846        case FOURCC('m', 'v', 'e', 'x'):
847        case FOURCC('m', 'o', 'o', 'f'):
848        case FOURCC('t', 'r', 'a', 'f'):
849        case FOURCC('m', 'f', 'r', 'a'):
850        case FOURCC('u', 'd', 't', 'a'):
851        case FOURCC('i', 'l', 's', 't'):
852        case FOURCC('s', 'i', 'n', 'f'):
853        case FOURCC('s', 'c', 'h', 'i'):
854        case FOURCC('e', 'd', 't', 's'):
855        {
856            if (chunk_type == FOURCC('s', 't', 'b', 'l')) {
857                ALOGV("sampleTable chunk is %d bytes long.", (size_t)chunk_size);
858
859                if (mDataSource->flags()
860                        & (DataSource::kWantsPrefetching
861                            | DataSource::kIsCachingDataSource)) {
862                    sp<MPEG4DataSource> cachedSource =
863                        new MPEG4DataSource(mDataSource);
864
865                    if (cachedSource->setCachedRange(*offset, chunk_size) == OK) {
866                        mDataSource = cachedSource;
867                    }
868                }
869
870                mLastTrack->sampleTable = new SampleTable(mDataSource);
871            }
872
873            bool isTrack = false;
874            if (chunk_type == FOURCC('t', 'r', 'a', 'k')) {
875                isTrack = true;
876
877                Track *track = new Track;
878                track->next = NULL;
879                if (mLastTrack) {
880                    mLastTrack->next = track;
881                } else {
882                    mFirstTrack = track;
883                }
884                mLastTrack = track;
885
886                track->meta = new MetaData;
887                track->includes_expensive_metadata = false;
888                track->skipTrack = false;
889                track->timescale = 0;
890                track->meta->setCString(kKeyMIMEType, "application/octet-stream");
891            }
892
893            off64_t stop_offset = *offset + chunk_size;
894            *offset = data_offset;
895            while (*offset < stop_offset) {
896                status_t err = parseChunk(offset, depth + 1);
897                if (err != OK) {
898                    return err;
899                }
900            }
901
902            if (*offset != stop_offset) {
903                return ERROR_MALFORMED;
904            }
905
906            if (isTrack) {
907                if (mLastTrack->skipTrack) {
908                    Track *cur = mFirstTrack;
909
910                    if (cur == mLastTrack) {
911                        delete cur;
912                        mFirstTrack = mLastTrack = NULL;
913                    } else {
914                        while (cur && cur->next != mLastTrack) {
915                            cur = cur->next;
916                        }
917                        cur->next = NULL;
918                        delete mLastTrack;
919                        mLastTrack = cur;
920                    }
921
922                    return OK;
923                }
924
925                status_t err = verifyTrack(mLastTrack);
926
927                if (err != OK) {
928                    return err;
929                }
930            } else if (chunk_type == FOURCC('m', 'o', 'o', 'v')) {
931                mInitCheck = OK;
932
933                if (!mIsDrm) {
934                    return UNKNOWN_ERROR;  // Return a dummy error.
935                } else {
936                    return OK;
937                }
938            }
939            break;
940        }
941
942        case FOURCC('e', 'l', 's', 't'):
943        {
944            *offset += chunk_size;
945
946            // See 14496-12 8.6.6
947            uint8_t version;
948            if (mDataSource->readAt(data_offset, &version, 1) < 1) {
949                return ERROR_IO;
950            }
951
952            uint32_t entry_count;
953            if (!mDataSource->getUInt32(data_offset + 4, &entry_count)) {
954                return ERROR_IO;
955            }
956
957            if (entry_count != 1) {
958                // we only support a single entry at the moment, for gapless playback
959                ALOGW("ignoring edit list with %d entries", entry_count);
960            } else if (mHeaderTimescale == 0) {
961                ALOGW("ignoring edit list because timescale is 0");
962            } else {
963                off64_t entriesoffset = data_offset + 8;
964                uint64_t segment_duration;
965                int64_t media_time;
966
967                if (version == 1) {
968                    if (!mDataSource->getUInt64(entriesoffset, &segment_duration) ||
969                            !mDataSource->getUInt64(entriesoffset + 8, (uint64_t*)&media_time)) {
970                        return ERROR_IO;
971                    }
972                } else if (version == 0) {
973                    uint32_t sd;
974                    int32_t mt;
975                    if (!mDataSource->getUInt32(entriesoffset, &sd) ||
976                            !mDataSource->getUInt32(entriesoffset + 4, (uint32_t*)&mt)) {
977                        return ERROR_IO;
978                    }
979                    segment_duration = sd;
980                    media_time = mt;
981                } else {
982                    return ERROR_IO;
983                }
984
985                uint64_t halfscale = mHeaderTimescale / 2;
986                segment_duration = (segment_duration * 1000000 + halfscale)/ mHeaderTimescale;
987                media_time = (media_time * 1000000 + halfscale) / mHeaderTimescale;
988
989                int64_t duration;
990                int32_t samplerate;
991                if (mLastTrack->meta->findInt64(kKeyDuration, &duration) &&
992                        mLastTrack->meta->findInt32(kKeySampleRate, &samplerate)) {
993
994                    int64_t delay = (media_time  * samplerate + 500000) / 1000000;
995                    mLastTrack->meta->setInt32(kKeyEncoderDelay, delay);
996
997                    int64_t paddingus = duration - (segment_duration + media_time);
998                    if (paddingus < 0) {
999                        // track duration from media header (which is what kKeyDuration is) might
1000                        // be slightly shorter than the segment duration, which would make the
1001                        // padding negative. Clamp to zero.
1002                        paddingus = 0;
1003                    }
1004                    int64_t paddingsamples = (paddingus * samplerate + 500000) / 1000000;
1005                    mLastTrack->meta->setInt32(kKeyEncoderPadding, paddingsamples);
1006                }
1007            }
1008            break;
1009        }
1010
1011        case FOURCC('f', 'r', 'm', 'a'):
1012        {
1013            *offset += chunk_size;
1014
1015            uint32_t original_fourcc;
1016            if (mDataSource->readAt(data_offset, &original_fourcc, 4) < 4) {
1017                return ERROR_IO;
1018            }
1019            original_fourcc = ntohl(original_fourcc);
1020            ALOGV("read original format: %d", original_fourcc);
1021            mLastTrack->meta->setCString(kKeyMIMEType, FourCC2MIME(original_fourcc));
1022            uint32_t num_channels = 0;
1023            uint32_t sample_rate = 0;
1024            if (AdjustChannelsAndRate(original_fourcc, &num_channels, &sample_rate)) {
1025                mLastTrack->meta->setInt32(kKeyChannelCount, num_channels);
1026                mLastTrack->meta->setInt32(kKeySampleRate, sample_rate);
1027            }
1028            break;
1029        }
1030
1031        case FOURCC('t', 'e', 'n', 'c'):
1032        {
1033            *offset += chunk_size;
1034
1035            if (chunk_size < 32) {
1036                return ERROR_MALFORMED;
1037            }
1038
1039            // tenc box contains 1 byte version, 3 byte flags, 3 byte default algorithm id, one byte
1040            // default IV size, 16 bytes default KeyID
1041            // (ISO 23001-7)
1042            char buf[4];
1043            memset(buf, 0, 4);
1044            if (mDataSource->readAt(data_offset + 4, buf + 1, 3) < 3) {
1045                return ERROR_IO;
1046            }
1047            uint32_t defaultAlgorithmId = ntohl(*((int32_t*)buf));
1048            if (defaultAlgorithmId > 1) {
1049                // only 0 (clear) and 1 (AES-128) are valid
1050                return ERROR_MALFORMED;
1051            }
1052
1053            memset(buf, 0, 4);
1054            if (mDataSource->readAt(data_offset + 7, buf + 3, 1) < 1) {
1055                return ERROR_IO;
1056            }
1057            uint32_t defaultIVSize = ntohl(*((int32_t*)buf));
1058
1059            if ((defaultAlgorithmId == 0 && defaultIVSize != 0) ||
1060                    (defaultAlgorithmId != 0 && defaultIVSize == 0)) {
1061                // only unencrypted data must have 0 IV size
1062                return ERROR_MALFORMED;
1063            } else if (defaultIVSize != 0 &&
1064                    defaultIVSize != 8 &&
1065                    defaultIVSize != 16) {
1066                // only supported sizes are 0, 8 and 16
1067                return ERROR_MALFORMED;
1068            }
1069
1070            uint8_t defaultKeyId[16];
1071
1072            if (mDataSource->readAt(data_offset + 8, &defaultKeyId, 16) < 16) {
1073                return ERROR_IO;
1074            }
1075
1076            mLastTrack->meta->setInt32(kKeyCryptoMode, defaultAlgorithmId);
1077            mLastTrack->meta->setInt32(kKeyCryptoDefaultIVSize, defaultIVSize);
1078            mLastTrack->meta->setData(kKeyCryptoKey, 'tenc', defaultKeyId, 16);
1079            break;
1080        }
1081
1082        case FOURCC('t', 'k', 'h', 'd'):
1083        {
1084            *offset += chunk_size;
1085
1086            status_t err;
1087            if ((err = parseTrackHeader(data_offset, chunk_data_size)) != OK) {
1088                return err;
1089            }
1090
1091            break;
1092        }
1093
1094        case FOURCC('p', 's', 's', 'h'):
1095        {
1096            *offset += chunk_size;
1097
1098            PsshInfo pssh;
1099
1100            if (mDataSource->readAt(data_offset + 4, &pssh.uuid, 16) < 16) {
1101                return ERROR_IO;
1102            }
1103
1104            uint32_t psshdatalen = 0;
1105            if (mDataSource->readAt(data_offset + 20, &psshdatalen, 4) < 4) {
1106                return ERROR_IO;
1107            }
1108            pssh.datalen = ntohl(psshdatalen);
1109            ALOGV("pssh data size: %d", pssh.datalen);
1110            if (pssh.datalen + 20 > chunk_size) {
1111                // pssh data length exceeds size of containing box
1112                return ERROR_MALFORMED;
1113            }
1114
1115            pssh.data = new uint8_t[pssh.datalen];
1116            ALOGV("allocated pssh @ %p", pssh.data);
1117            ssize_t requested = (ssize_t) pssh.datalen;
1118            if (mDataSource->readAt(data_offset + 24, pssh.data, requested) < requested) {
1119                return ERROR_IO;
1120            }
1121            mPssh.push_back(pssh);
1122
1123            break;
1124        }
1125
1126        case FOURCC('m', 'd', 'h', 'd'):
1127        {
1128            *offset += chunk_size;
1129
1130            if (chunk_data_size < 4) {
1131                return ERROR_MALFORMED;
1132            }
1133
1134            uint8_t version;
1135            if (mDataSource->readAt(
1136                        data_offset, &version, sizeof(version))
1137                    < (ssize_t)sizeof(version)) {
1138                return ERROR_IO;
1139            }
1140
1141            off64_t timescale_offset;
1142
1143            if (version == 1) {
1144                timescale_offset = data_offset + 4 + 16;
1145            } else if (version == 0) {
1146                timescale_offset = data_offset + 4 + 8;
1147            } else {
1148                return ERROR_IO;
1149            }
1150
1151            uint32_t timescale;
1152            if (mDataSource->readAt(
1153                        timescale_offset, &timescale, sizeof(timescale))
1154                    < (ssize_t)sizeof(timescale)) {
1155                return ERROR_IO;
1156            }
1157
1158            mLastTrack->timescale = ntohl(timescale);
1159
1160            int64_t duration = 0;
1161            if (version == 1) {
1162                if (mDataSource->readAt(
1163                            timescale_offset + 4, &duration, sizeof(duration))
1164                        < (ssize_t)sizeof(duration)) {
1165                    return ERROR_IO;
1166                }
1167                duration = ntoh64(duration);
1168            } else {
1169                uint32_t duration32;
1170                if (mDataSource->readAt(
1171                            timescale_offset + 4, &duration32, sizeof(duration32))
1172                        < (ssize_t)sizeof(duration32)) {
1173                    return ERROR_IO;
1174                }
1175                // ffmpeg sets duration to -1, which is incorrect.
1176                if (duration32 != 0xffffffff) {
1177                    duration = ntohl(duration32);
1178                }
1179            }
1180            mLastTrack->meta->setInt64(
1181                    kKeyDuration, (duration * 1000000) / mLastTrack->timescale);
1182
1183            uint8_t lang[2];
1184            off64_t lang_offset;
1185            if (version == 1) {
1186                lang_offset = timescale_offset + 4 + 8;
1187            } else if (version == 0) {
1188                lang_offset = timescale_offset + 4 + 4;
1189            } else {
1190                return ERROR_IO;
1191            }
1192
1193            if (mDataSource->readAt(lang_offset, &lang, sizeof(lang))
1194                    < (ssize_t)sizeof(lang)) {
1195                return ERROR_IO;
1196            }
1197
1198            // To get the ISO-639-2/T three character language code
1199            // 1 bit pad followed by 3 5-bits characters. Each character
1200            // is packed as the difference between its ASCII value and 0x60.
1201            char lang_code[4];
1202            lang_code[0] = ((lang[0] >> 2) & 0x1f) + 0x60;
1203            lang_code[1] = ((lang[0] & 0x3) << 3 | (lang[1] >> 5)) + 0x60;
1204            lang_code[2] = (lang[1] & 0x1f) + 0x60;
1205            lang_code[3] = '\0';
1206
1207            mLastTrack->meta->setCString(
1208                    kKeyMediaLanguage, lang_code);
1209
1210            break;
1211        }
1212
1213        case FOURCC('s', 't', 's', 'd'):
1214        {
1215            if (chunk_data_size < 8) {
1216                return ERROR_MALFORMED;
1217            }
1218
1219            uint8_t buffer[8];
1220            if (chunk_data_size < (off64_t)sizeof(buffer)) {
1221                return ERROR_MALFORMED;
1222            }
1223
1224            if (mDataSource->readAt(
1225                        data_offset, buffer, 8) < 8) {
1226                return ERROR_IO;
1227            }
1228
1229            if (U32_AT(buffer) != 0) {
1230                // Should be version 0, flags 0.
1231                return ERROR_MALFORMED;
1232            }
1233
1234            uint32_t entry_count = U32_AT(&buffer[4]);
1235
1236            if (entry_count > 1) {
1237                // For 3GPP timed text, there could be multiple tx3g boxes contain
1238                // multiple text display formats. These formats will be used to
1239                // display the timed text.
1240                // For encrypted files, there may also be more than one entry.
1241                const char *mime;
1242                CHECK(mLastTrack->meta->findCString(kKeyMIMEType, &mime));
1243                if (strcasecmp(mime, MEDIA_MIMETYPE_TEXT_3GPP) &&
1244                        strcasecmp(mime, "application/octet-stream")) {
1245                    // For now we only support a single type of media per track.
1246                    mLastTrack->skipTrack = true;
1247                    *offset += chunk_size;
1248                    break;
1249                }
1250            }
1251            off64_t stop_offset = *offset + chunk_size;
1252            *offset = data_offset + 8;
1253            for (uint32_t i = 0; i < entry_count; ++i) {
1254                status_t err = parseChunk(offset, depth + 1);
1255                if (err != OK) {
1256                    return err;
1257                }
1258            }
1259
1260            if (*offset != stop_offset) {
1261                return ERROR_MALFORMED;
1262            }
1263            break;
1264        }
1265
1266        case FOURCC('m', 'p', '4', 'a'):
1267        case FOURCC('e', 'n', 'c', 'a'):
1268        case FOURCC('s', 'a', 'm', 'r'):
1269        case FOURCC('s', 'a', 'w', 'b'):
1270        {
1271            uint8_t buffer[8 + 20];
1272            if (chunk_data_size < (ssize_t)sizeof(buffer)) {
1273                // Basic AudioSampleEntry size.
1274                return ERROR_MALFORMED;
1275            }
1276
1277            if (mDataSource->readAt(
1278                        data_offset, buffer, sizeof(buffer)) < (ssize_t)sizeof(buffer)) {
1279                return ERROR_IO;
1280            }
1281
1282            uint16_t data_ref_index = U16_AT(&buffer[6]);
1283            uint32_t num_channels = U16_AT(&buffer[16]);
1284
1285            uint16_t sample_size = U16_AT(&buffer[18]);
1286            uint32_t sample_rate = U32_AT(&buffer[24]) >> 16;
1287
1288            if (chunk_type != FOURCC('e', 'n', 'c', 'a')) {
1289                // if the chunk type is enca, we'll get the type from the sinf/frma box later
1290                mLastTrack->meta->setCString(kKeyMIMEType, FourCC2MIME(chunk_type));
1291                AdjustChannelsAndRate(chunk_type, &num_channels, &sample_rate);
1292            }
1293            ALOGV("*** coding='%s' %d channels, size %d, rate %d\n",
1294                   chunk, num_channels, sample_size, sample_rate);
1295            mLastTrack->meta->setInt32(kKeyChannelCount, num_channels);
1296            mLastTrack->meta->setInt32(kKeySampleRate, sample_rate);
1297
1298            off64_t stop_offset = *offset + chunk_size;
1299            *offset = data_offset + sizeof(buffer);
1300            while (*offset < stop_offset) {
1301                status_t err = parseChunk(offset, depth + 1);
1302                if (err != OK) {
1303                    return err;
1304                }
1305            }
1306
1307            if (*offset != stop_offset) {
1308                return ERROR_MALFORMED;
1309            }
1310            break;
1311        }
1312
1313        case FOURCC('m', 'p', '4', 'v'):
1314        case FOURCC('e', 'n', 'c', 'v'):
1315        case FOURCC('s', '2', '6', '3'):
1316        case FOURCC('H', '2', '6', '3'):
1317        case FOURCC('h', '2', '6', '3'):
1318        case FOURCC('a', 'v', 'c', '1'):
1319        case FOURCC('h', 'v', 'c', '1'):
1320        case FOURCC('h', 'e', 'v', '1'):
1321        {
1322            mHasVideo = true;
1323
1324            uint8_t buffer[78];
1325            if (chunk_data_size < (ssize_t)sizeof(buffer)) {
1326                // Basic VideoSampleEntry size.
1327                return ERROR_MALFORMED;
1328            }
1329
1330            if (mDataSource->readAt(
1331                        data_offset, buffer, sizeof(buffer)) < (ssize_t)sizeof(buffer)) {
1332                return ERROR_IO;
1333            }
1334
1335            uint16_t data_ref_index = U16_AT(&buffer[6]);
1336            uint16_t width = U16_AT(&buffer[6 + 18]);
1337            uint16_t height = U16_AT(&buffer[6 + 20]);
1338
1339            // The video sample is not standard-compliant if it has invalid dimension.
1340            // Use some default width and height value, and
1341            // let the decoder figure out the actual width and height (and thus
1342            // be prepared for INFO_FOMRAT_CHANGED event).
1343            if (width == 0)  width  = 352;
1344            if (height == 0) height = 288;
1345
1346            // printf("*** coding='%s' width=%d height=%d\n",
1347            //        chunk, width, height);
1348
1349            if (chunk_type != FOURCC('e', 'n', 'c', 'v')) {
1350                // if the chunk type is encv, we'll get the type from the sinf/frma box later
1351                mLastTrack->meta->setCString(kKeyMIMEType, FourCC2MIME(chunk_type));
1352            }
1353            mLastTrack->meta->setInt32(kKeyWidth, width);
1354            mLastTrack->meta->setInt32(kKeyHeight, height);
1355
1356            off64_t stop_offset = *offset + chunk_size;
1357            *offset = data_offset + sizeof(buffer);
1358            while (*offset < stop_offset) {
1359                status_t err = parseChunk(offset, depth + 1);
1360                if (err != OK) {
1361                    return err;
1362                }
1363            }
1364
1365            if (*offset != stop_offset) {
1366                return ERROR_MALFORMED;
1367            }
1368            break;
1369        }
1370
1371        case FOURCC('s', 't', 'c', 'o'):
1372        case FOURCC('c', 'o', '6', '4'):
1373        {
1374            status_t err =
1375                mLastTrack->sampleTable->setChunkOffsetParams(
1376                        chunk_type, data_offset, chunk_data_size);
1377
1378            *offset += chunk_size;
1379
1380            if (err != OK) {
1381                return err;
1382            }
1383
1384            break;
1385        }
1386
1387        case FOURCC('s', 't', 's', 'c'):
1388        {
1389            status_t err =
1390                mLastTrack->sampleTable->setSampleToChunkParams(
1391                        data_offset, chunk_data_size);
1392
1393            *offset += chunk_size;
1394
1395            if (err != OK) {
1396                return err;
1397            }
1398
1399            break;
1400        }
1401
1402        case FOURCC('s', 't', 's', 'z'):
1403        case FOURCC('s', 't', 'z', '2'):
1404        {
1405            status_t err =
1406                mLastTrack->sampleTable->setSampleSizeParams(
1407                        chunk_type, data_offset, chunk_data_size);
1408
1409            *offset += chunk_size;
1410
1411            if (err != OK) {
1412                return err;
1413            }
1414
1415            size_t max_size;
1416            err = mLastTrack->sampleTable->getMaxSampleSize(&max_size);
1417
1418            if (err != OK) {
1419                return err;
1420            }
1421
1422            if (max_size != 0) {
1423                // Assume that a given buffer only contains at most 10 chunks,
1424                // each chunk originally prefixed with a 2 byte length will
1425                // have a 4 byte header (0x00 0x00 0x00 0x01) after conversion,
1426                // and thus will grow by 2 bytes per chunk.
1427                mLastTrack->meta->setInt32(kKeyMaxInputSize, max_size + 10 * 2);
1428            } else {
1429                // No size was specified. Pick a conservatively large size.
1430                int32_t width, height;
1431                if (!mLastTrack->meta->findInt32(kKeyWidth, &width) ||
1432                    !mLastTrack->meta->findInt32(kKeyHeight, &height)) {
1433                    ALOGE("No width or height, assuming worst case 1080p");
1434                    width = 1920;
1435                    height = 1080;
1436                }
1437
1438                const char *mime;
1439                CHECK(mLastTrack->meta->findCString(kKeyMIMEType, &mime));
1440                if (!strcmp(mime, MEDIA_MIMETYPE_VIDEO_AVC)) {
1441                    // AVC requires compression ratio of at least 2, and uses
1442                    // macroblocks
1443                    max_size = ((width + 15) / 16) * ((height + 15) / 16) * 192;
1444                } else {
1445                    // For all other formats there is no minimum compression
1446                    // ratio. Use compression ratio of 1.
1447                    max_size = width * height * 3 / 2;
1448                }
1449                mLastTrack->meta->setInt32(kKeyMaxInputSize, max_size);
1450            }
1451
1452            // NOTE: setting another piece of metadata invalidates any pointers (such as the
1453            // mimetype) previously obtained, so don't cache them.
1454            const char *mime;
1455            CHECK(mLastTrack->meta->findCString(kKeyMIMEType, &mime));
1456            // Calculate average frame rate.
1457            if (!strncasecmp("video/", mime, 6)) {
1458                size_t nSamples = mLastTrack->sampleTable->countSamples();
1459                int64_t durationUs;
1460                if (mLastTrack->meta->findInt64(kKeyDuration, &durationUs)) {
1461                    if (durationUs > 0) {
1462                        int32_t frameRate = (nSamples * 1000000LL +
1463                                    (durationUs >> 1)) / durationUs;
1464                        mLastTrack->meta->setInt32(kKeyFrameRate, frameRate);
1465                    }
1466                }
1467            }
1468
1469            break;
1470        }
1471
1472        case FOURCC('s', 't', 't', 's'):
1473        {
1474            *offset += chunk_size;
1475
1476            status_t err =
1477                mLastTrack->sampleTable->setTimeToSampleParams(
1478                        data_offset, chunk_data_size);
1479
1480            if (err != OK) {
1481                return err;
1482            }
1483
1484            break;
1485        }
1486
1487        case FOURCC('c', 't', 't', 's'):
1488        {
1489            *offset += chunk_size;
1490
1491            status_t err =
1492                mLastTrack->sampleTable->setCompositionTimeToSampleParams(
1493                        data_offset, chunk_data_size);
1494
1495            if (err != OK) {
1496                return err;
1497            }
1498
1499            break;
1500        }
1501
1502        case FOURCC('s', 't', 's', 's'):
1503        {
1504            *offset += chunk_size;
1505
1506            status_t err =
1507                mLastTrack->sampleTable->setSyncSampleParams(
1508                        data_offset, chunk_data_size);
1509
1510            if (err != OK) {
1511                return err;
1512            }
1513
1514            break;
1515        }
1516
1517        // @xyz
1518        case FOURCC('\xA9', 'x', 'y', 'z'):
1519        {
1520            *offset += chunk_size;
1521
1522            // Best case the total data length inside "@xyz" box
1523            // would be 8, for instance "@xyz" + "\x00\x04\x15\xc7" + "0+0/",
1524            // where "\x00\x04" is the text string length with value = 4,
1525            // "\0x15\xc7" is the language code = en, and "0+0" is a
1526            // location (string) value with longitude = 0 and latitude = 0.
1527            if (chunk_data_size < 8) {
1528                return ERROR_MALFORMED;
1529            }
1530
1531            // Worst case the location string length would be 18,
1532            // for instance +90.0000-180.0000, without the trailing "/" and
1533            // the string length + language code.
1534            char buffer[18];
1535
1536            // Substracting 5 from the data size is because the text string length +
1537            // language code takes 4 bytes, and the trailing slash "/" takes 1 byte.
1538            off64_t location_length = chunk_data_size - 5;
1539            if (location_length >= (off64_t) sizeof(buffer)) {
1540                return ERROR_MALFORMED;
1541            }
1542
1543            if (mDataSource->readAt(
1544                        data_offset + 4, buffer, location_length) < location_length) {
1545                return ERROR_IO;
1546            }
1547
1548            buffer[location_length] = '\0';
1549            mFileMetaData->setCString(kKeyLocation, buffer);
1550            break;
1551        }
1552
1553        case FOURCC('e', 's', 'd', 's'):
1554        {
1555            *offset += chunk_size;
1556
1557            if (chunk_data_size < 4) {
1558                return ERROR_MALFORMED;
1559            }
1560
1561            uint8_t buffer[256];
1562            if (chunk_data_size > (off64_t)sizeof(buffer)) {
1563                return ERROR_BUFFER_TOO_SMALL;
1564            }
1565
1566            if (mDataSource->readAt(
1567                        data_offset, buffer, chunk_data_size) < chunk_data_size) {
1568                return ERROR_IO;
1569            }
1570
1571            if (U32_AT(buffer) != 0) {
1572                // Should be version 0, flags 0.
1573                return ERROR_MALFORMED;
1574            }
1575
1576            mLastTrack->meta->setData(
1577                    kKeyESDS, kTypeESDS, &buffer[4], chunk_data_size - 4);
1578
1579            if (mPath.size() >= 2
1580                    && mPath[mPath.size() - 2] == FOURCC('m', 'p', '4', 'a')) {
1581                // Information from the ESDS must be relied on for proper
1582                // setup of sample rate and channel count for MPEG4 Audio.
1583                // The generic header appears to only contain generic
1584                // information...
1585
1586                status_t err = updateAudioTrackInfoFromESDS_MPEG4Audio(
1587                        &buffer[4], chunk_data_size - 4);
1588
1589                if (err != OK) {
1590                    return err;
1591                }
1592            }
1593
1594            break;
1595        }
1596
1597        case FOURCC('a', 'v', 'c', 'C'):
1598        {
1599            *offset += chunk_size;
1600
1601            sp<ABuffer> buffer = new ABuffer(chunk_data_size);
1602
1603            if (mDataSource->readAt(
1604                        data_offset, buffer->data(), chunk_data_size) < chunk_data_size) {
1605                return ERROR_IO;
1606            }
1607
1608            mLastTrack->meta->setData(
1609                    kKeyAVCC, kTypeAVCC, buffer->data(), chunk_data_size);
1610
1611            break;
1612        }
1613        case FOURCC('h', 'v', 'c', 'C'):
1614        {
1615            sp<ABuffer> buffer = new ABuffer(chunk_data_size);
1616
1617            if (mDataSource->readAt(
1618                        data_offset, buffer->data(), chunk_data_size) < chunk_data_size) {
1619                return ERROR_IO;
1620            }
1621
1622            mLastTrack->meta->setData(
1623                    kKeyHVCC, kTypeHVCC, buffer->data(), chunk_data_size);
1624
1625            *offset += chunk_size;
1626            break;
1627        }
1628
1629        case FOURCC('d', '2', '6', '3'):
1630        {
1631            *offset += chunk_size;
1632            /*
1633             * d263 contains a fixed 7 bytes part:
1634             *   vendor - 4 bytes
1635             *   version - 1 byte
1636             *   level - 1 byte
1637             *   profile - 1 byte
1638             * optionally, "d263" box itself may contain a 16-byte
1639             * bit rate box (bitr)
1640             *   average bit rate - 4 bytes
1641             *   max bit rate - 4 bytes
1642             */
1643            char buffer[23];
1644            if (chunk_data_size != 7 &&
1645                chunk_data_size != 23) {
1646                ALOGE("Incorrect D263 box size %lld", chunk_data_size);
1647                return ERROR_MALFORMED;
1648            }
1649
1650            if (mDataSource->readAt(
1651                    data_offset, buffer, chunk_data_size) < chunk_data_size) {
1652                return ERROR_IO;
1653            }
1654
1655            mLastTrack->meta->setData(kKeyD263, kTypeD263, buffer, chunk_data_size);
1656
1657            break;
1658        }
1659
1660        case FOURCC('m', 'e', 't', 'a'):
1661        {
1662            uint8_t buffer[4];
1663            if (chunk_data_size < (off64_t)sizeof(buffer)) {
1664                *offset += chunk_size;
1665                return ERROR_MALFORMED;
1666            }
1667
1668            if (mDataSource->readAt(
1669                        data_offset, buffer, 4) < 4) {
1670                *offset += chunk_size;
1671                return ERROR_IO;
1672            }
1673
1674            if (U32_AT(buffer) != 0) {
1675                // Should be version 0, flags 0.
1676
1677                // If it's not, let's assume this is one of those
1678                // apparently malformed chunks that don't have flags
1679                // and completely different semantics than what's
1680                // in the MPEG4 specs and skip it.
1681                *offset += chunk_size;
1682                return OK;
1683            }
1684
1685            off64_t stop_offset = *offset + chunk_size;
1686            *offset = data_offset + sizeof(buffer);
1687            while (*offset < stop_offset) {
1688                status_t err = parseChunk(offset, depth + 1);
1689                if (err != OK) {
1690                    return err;
1691                }
1692            }
1693
1694            if (*offset != stop_offset) {
1695                return ERROR_MALFORMED;
1696            }
1697            break;
1698        }
1699
1700        case FOURCC('m', 'e', 'a', 'n'):
1701        case FOURCC('n', 'a', 'm', 'e'):
1702        case FOURCC('d', 'a', 't', 'a'):
1703        {
1704            *offset += chunk_size;
1705
1706            if (mPath.size() == 6 && underMetaDataPath(mPath)) {
1707                status_t err = parseITunesMetaData(data_offset, chunk_data_size);
1708
1709                if (err != OK) {
1710                    return err;
1711                }
1712            }
1713
1714            break;
1715        }
1716
1717        case FOURCC('m', 'v', 'h', 'd'):
1718        {
1719            *offset += chunk_size;
1720
1721            if (chunk_data_size < 24) {
1722                return ERROR_MALFORMED;
1723            }
1724
1725            uint8_t header[24];
1726            if (mDataSource->readAt(
1727                        data_offset, header, sizeof(header))
1728                    < (ssize_t)sizeof(header)) {
1729                return ERROR_IO;
1730            }
1731
1732            uint64_t creationTime;
1733            if (header[0] == 1) {
1734                creationTime = U64_AT(&header[4]);
1735                mHeaderTimescale = U32_AT(&header[20]);
1736            } else if (header[0] != 0) {
1737                return ERROR_MALFORMED;
1738            } else {
1739                creationTime = U32_AT(&header[4]);
1740                mHeaderTimescale = U32_AT(&header[12]);
1741            }
1742
1743            String8 s;
1744            convertTimeToDate(creationTime, &s);
1745
1746            mFileMetaData->setCString(kKeyDate, s.string());
1747
1748            break;
1749        }
1750
1751        case FOURCC('m', 'd', 'a', 't'):
1752        {
1753            ALOGV("mdat chunk, drm: %d", mIsDrm);
1754            if (!mIsDrm) {
1755                *offset += chunk_size;
1756                break;
1757            }
1758
1759            if (chunk_size < 8) {
1760                return ERROR_MALFORMED;
1761            }
1762
1763            return parseDrmSINF(offset, data_offset);
1764        }
1765
1766        case FOURCC('h', 'd', 'l', 'r'):
1767        {
1768            *offset += chunk_size;
1769
1770            uint32_t buffer;
1771            if (mDataSource->readAt(
1772                        data_offset + 8, &buffer, 4) < 4) {
1773                return ERROR_IO;
1774            }
1775
1776            uint32_t type = ntohl(buffer);
1777            // For the 3GPP file format, the handler-type within the 'hdlr' box
1778            // shall be 'text'. We also want to support 'sbtl' handler type
1779            // for a practical reason as various MPEG4 containers use it.
1780            if (type == FOURCC('t', 'e', 'x', 't') || type == FOURCC('s', 'b', 't', 'l')) {
1781                mLastTrack->meta->setCString(kKeyMIMEType, MEDIA_MIMETYPE_TEXT_3GPP);
1782            }
1783
1784            break;
1785        }
1786
1787        case FOURCC('t', 'x', '3', 'g'):
1788        {
1789            uint32_t type;
1790            const void *data;
1791            size_t size = 0;
1792            if (!mLastTrack->meta->findData(
1793                    kKeyTextFormatData, &type, &data, &size)) {
1794                size = 0;
1795            }
1796
1797            uint8_t *buffer = new uint8_t[size + chunk_size];
1798
1799            if (size > 0) {
1800                memcpy(buffer, data, size);
1801            }
1802
1803            if ((size_t)(mDataSource->readAt(*offset, buffer + size, chunk_size))
1804                    < chunk_size) {
1805                delete[] buffer;
1806                buffer = NULL;
1807
1808                // advance read pointer so we don't end up reading this again
1809                *offset += chunk_size;
1810                return ERROR_IO;
1811            }
1812
1813            mLastTrack->meta->setData(
1814                    kKeyTextFormatData, 0, buffer, size + chunk_size);
1815
1816            delete[] buffer;
1817
1818            *offset += chunk_size;
1819            break;
1820        }
1821
1822        case FOURCC('c', 'o', 'v', 'r'):
1823        {
1824            *offset += chunk_size;
1825
1826            if (mFileMetaData != NULL) {
1827                ALOGV("chunk_data_size = %lld and data_offset = %lld",
1828                        chunk_data_size, data_offset);
1829                sp<ABuffer> buffer = new ABuffer(chunk_data_size + 1);
1830                if (mDataSource->readAt(
1831                    data_offset, buffer->data(), chunk_data_size) != (ssize_t)chunk_data_size) {
1832                    return ERROR_IO;
1833                }
1834                const int kSkipBytesOfDataBox = 16;
1835                mFileMetaData->setData(
1836                    kKeyAlbumArt, MetaData::TYPE_NONE,
1837                    buffer->data() + kSkipBytesOfDataBox, chunk_data_size - kSkipBytesOfDataBox);
1838            }
1839
1840            break;
1841        }
1842
1843        case FOURCC('t', 'i', 't', 'l'):
1844        case FOURCC('p', 'e', 'r', 'f'):
1845        case FOURCC('a', 'u', 't', 'h'):
1846        case FOURCC('g', 'n', 'r', 'e'):
1847        case FOURCC('a', 'l', 'b', 'm'):
1848        case FOURCC('y', 'r', 'r', 'c'):
1849        {
1850            *offset += chunk_size;
1851
1852            status_t err = parse3GPPMetaData(data_offset, chunk_data_size, depth);
1853
1854            if (err != OK) {
1855                return err;
1856            }
1857
1858            break;
1859        }
1860
1861        case FOURCC('I', 'D', '3', '2'):
1862        {
1863            *offset += chunk_size;
1864
1865            if (chunk_data_size < 6) {
1866                return ERROR_MALFORMED;
1867            }
1868
1869            parseID3v2MetaData(data_offset + 6);
1870
1871            break;
1872        }
1873
1874        case FOURCC('-', '-', '-', '-'):
1875        {
1876            mLastCommentMean.clear();
1877            mLastCommentName.clear();
1878            mLastCommentData.clear();
1879            *offset += chunk_size;
1880            break;
1881        }
1882
1883        case FOURCC('s', 'i', 'd', 'x'):
1884        {
1885            parseSegmentIndex(data_offset, chunk_data_size);
1886            *offset += chunk_size;
1887            return UNKNOWN_ERROR; // stop parsing after sidx
1888        }
1889
1890        default:
1891        {
1892            *offset += chunk_size;
1893            break;
1894        }
1895    }
1896
1897    return OK;
1898}
1899
1900status_t MPEG4Extractor::parseSegmentIndex(off64_t offset, size_t size) {
1901  ALOGV("MPEG4Extractor::parseSegmentIndex");
1902
1903    if (size < 12) {
1904      return -EINVAL;
1905    }
1906
1907    uint32_t flags;
1908    if (!mDataSource->getUInt32(offset, &flags)) {
1909        return ERROR_MALFORMED;
1910    }
1911
1912    uint32_t version = flags >> 24;
1913    flags &= 0xffffff;
1914
1915    ALOGV("sidx version %d", version);
1916
1917    uint32_t referenceId;
1918    if (!mDataSource->getUInt32(offset + 4, &referenceId)) {
1919        return ERROR_MALFORMED;
1920    }
1921
1922    uint32_t timeScale;
1923    if (!mDataSource->getUInt32(offset + 8, &timeScale)) {
1924        return ERROR_MALFORMED;
1925    }
1926    ALOGV("sidx refid/timescale: %d/%d", referenceId, timeScale);
1927
1928    uint64_t earliestPresentationTime;
1929    uint64_t firstOffset;
1930
1931    offset += 12;
1932    size -= 12;
1933
1934    if (version == 0) {
1935        if (size < 8) {
1936            return -EINVAL;
1937        }
1938        uint32_t tmp;
1939        if (!mDataSource->getUInt32(offset, &tmp)) {
1940            return ERROR_MALFORMED;
1941        }
1942        earliestPresentationTime = tmp;
1943        if (!mDataSource->getUInt32(offset + 4, &tmp)) {
1944            return ERROR_MALFORMED;
1945        }
1946        firstOffset = tmp;
1947        offset += 8;
1948        size -= 8;
1949    } else {
1950        if (size < 16) {
1951            return -EINVAL;
1952        }
1953        if (!mDataSource->getUInt64(offset, &earliestPresentationTime)) {
1954            return ERROR_MALFORMED;
1955        }
1956        if (!mDataSource->getUInt64(offset + 8, &firstOffset)) {
1957            return ERROR_MALFORMED;
1958        }
1959        offset += 16;
1960        size -= 16;
1961    }
1962    ALOGV("sidx pres/off: %Ld/%Ld", earliestPresentationTime, firstOffset);
1963
1964    if (size < 4) {
1965        return -EINVAL;
1966    }
1967
1968    uint16_t referenceCount;
1969    if (!mDataSource->getUInt16(offset + 2, &referenceCount)) {
1970        return ERROR_MALFORMED;
1971    }
1972    offset += 4;
1973    size -= 4;
1974    ALOGV("refcount: %d", referenceCount);
1975
1976    if (size < referenceCount * 12) {
1977        return -EINVAL;
1978    }
1979
1980    uint64_t total_duration = 0;
1981    for (unsigned int i = 0; i < referenceCount; i++) {
1982        uint32_t d1, d2, d3;
1983
1984        if (!mDataSource->getUInt32(offset, &d1) ||     // size
1985            !mDataSource->getUInt32(offset + 4, &d2) || // duration
1986            !mDataSource->getUInt32(offset + 8, &d3)) { // flags
1987            return ERROR_MALFORMED;
1988        }
1989
1990        if (d1 & 0x80000000) {
1991            ALOGW("sub-sidx boxes not supported yet");
1992        }
1993        bool sap = d3 & 0x80000000;
1994        uint32_t saptype = (d3 >> 28) & 7;
1995        if (!sap || (saptype != 1 && saptype != 2)) {
1996            // type 1 and 2 are sync samples
1997            ALOGW("not a stream access point, or unsupported type: %08x", d3);
1998        }
1999        total_duration += d2;
2000        offset += 12;
2001        ALOGV(" item %d, %08x %08x %08x", i, d1, d2, d3);
2002        SidxEntry se;
2003        se.mSize = d1 & 0x7fffffff;
2004        se.mDurationUs = 1000000LL * d2 / timeScale;
2005        mSidxEntries.add(se);
2006    }
2007
2008    mSidxDuration = total_duration * 1000000 / timeScale;
2009    ALOGV("duration: %lld", mSidxDuration);
2010
2011    int64_t metaDuration;
2012    if (!mLastTrack->meta->findInt64(kKeyDuration, &metaDuration) || metaDuration == 0) {
2013        mLastTrack->meta->setInt64(kKeyDuration, mSidxDuration);
2014    }
2015    return OK;
2016}
2017
2018
2019
2020status_t MPEG4Extractor::parseTrackHeader(
2021        off64_t data_offset, off64_t data_size) {
2022    if (data_size < 4) {
2023        return ERROR_MALFORMED;
2024    }
2025
2026    uint8_t version;
2027    if (mDataSource->readAt(data_offset, &version, 1) < 1) {
2028        return ERROR_IO;
2029    }
2030
2031    size_t dynSize = (version == 1) ? 36 : 24;
2032
2033    uint8_t buffer[36 + 60];
2034
2035    if (data_size != (off64_t)dynSize + 60) {
2036        return ERROR_MALFORMED;
2037    }
2038
2039    if (mDataSource->readAt(
2040                data_offset, buffer, data_size) < (ssize_t)data_size) {
2041        return ERROR_IO;
2042    }
2043
2044    uint64_t ctime, mtime, duration;
2045    int32_t id;
2046
2047    if (version == 1) {
2048        ctime = U64_AT(&buffer[4]);
2049        mtime = U64_AT(&buffer[12]);
2050        id = U32_AT(&buffer[20]);
2051        duration = U64_AT(&buffer[28]);
2052    } else if (version == 0) {
2053        ctime = U32_AT(&buffer[4]);
2054        mtime = U32_AT(&buffer[8]);
2055        id = U32_AT(&buffer[12]);
2056        duration = U32_AT(&buffer[20]);
2057    } else {
2058        return ERROR_UNSUPPORTED;
2059    }
2060
2061    mLastTrack->meta->setInt32(kKeyTrackID, id);
2062
2063    size_t matrixOffset = dynSize + 16;
2064    int32_t a00 = U32_AT(&buffer[matrixOffset]);
2065    int32_t a01 = U32_AT(&buffer[matrixOffset + 4]);
2066    int32_t dx = U32_AT(&buffer[matrixOffset + 8]);
2067    int32_t a10 = U32_AT(&buffer[matrixOffset + 12]);
2068    int32_t a11 = U32_AT(&buffer[matrixOffset + 16]);
2069    int32_t dy = U32_AT(&buffer[matrixOffset + 20]);
2070
2071#if 0
2072    ALOGI("x' = %.2f * x + %.2f * y + %.2f",
2073         a00 / 65536.0f, a01 / 65536.0f, dx / 65536.0f);
2074    ALOGI("y' = %.2f * x + %.2f * y + %.2f",
2075         a10 / 65536.0f, a11 / 65536.0f, dy / 65536.0f);
2076#endif
2077
2078    uint32_t rotationDegrees;
2079
2080    static const int32_t kFixedOne = 0x10000;
2081    if (a00 == kFixedOne && a01 == 0 && a10 == 0 && a11 == kFixedOne) {
2082        // Identity, no rotation
2083        rotationDegrees = 0;
2084    } else if (a00 == 0 && a01 == kFixedOne && a10 == -kFixedOne && a11 == 0) {
2085        rotationDegrees = 90;
2086    } else if (a00 == 0 && a01 == -kFixedOne && a10 == kFixedOne && a11 == 0) {
2087        rotationDegrees = 270;
2088    } else if (a00 == -kFixedOne && a01 == 0 && a10 == 0 && a11 == -kFixedOne) {
2089        rotationDegrees = 180;
2090    } else {
2091        ALOGW("We only support 0,90,180,270 degree rotation matrices");
2092        rotationDegrees = 0;
2093    }
2094
2095    if (rotationDegrees != 0) {
2096        mLastTrack->meta->setInt32(kKeyRotation, rotationDegrees);
2097    }
2098
2099    // Handle presentation display size, which could be different
2100    // from the image size indicated by kKeyWidth and kKeyHeight.
2101    uint32_t width = U32_AT(&buffer[dynSize + 52]);
2102    uint32_t height = U32_AT(&buffer[dynSize + 56]);
2103    mLastTrack->meta->setInt32(kKeyDisplayWidth, width >> 16);
2104    mLastTrack->meta->setInt32(kKeyDisplayHeight, height >> 16);
2105
2106    return OK;
2107}
2108
2109status_t MPEG4Extractor::parseITunesMetaData(off64_t offset, size_t size) {
2110    if (size < 4) {
2111        return ERROR_MALFORMED;
2112    }
2113
2114    uint8_t *buffer = new uint8_t[size + 1];
2115    if (mDataSource->readAt(
2116                offset, buffer, size) != (ssize_t)size) {
2117        delete[] buffer;
2118        buffer = NULL;
2119
2120        return ERROR_IO;
2121    }
2122
2123    uint32_t flags = U32_AT(buffer);
2124
2125    uint32_t metadataKey = 0;
2126    char chunk[5];
2127    MakeFourCCString(mPath[4], chunk);
2128    ALOGV("meta: %s @ %lld", chunk, offset);
2129    switch (mPath[4]) {
2130        case FOURCC(0xa9, 'a', 'l', 'b'):
2131        {
2132            metadataKey = kKeyAlbum;
2133            break;
2134        }
2135        case FOURCC(0xa9, 'A', 'R', 'T'):
2136        {
2137            metadataKey = kKeyArtist;
2138            break;
2139        }
2140        case FOURCC('a', 'A', 'R', 'T'):
2141        {
2142            metadataKey = kKeyAlbumArtist;
2143            break;
2144        }
2145        case FOURCC(0xa9, 'd', 'a', 'y'):
2146        {
2147            metadataKey = kKeyYear;
2148            break;
2149        }
2150        case FOURCC(0xa9, 'n', 'a', 'm'):
2151        {
2152            metadataKey = kKeyTitle;
2153            break;
2154        }
2155        case FOURCC(0xa9, 'w', 'r', 't'):
2156        {
2157            metadataKey = kKeyWriter;
2158            break;
2159        }
2160        case FOURCC('c', 'o', 'v', 'r'):
2161        {
2162            metadataKey = kKeyAlbumArt;
2163            break;
2164        }
2165        case FOURCC('g', 'n', 'r', 'e'):
2166        {
2167            metadataKey = kKeyGenre;
2168            break;
2169        }
2170        case FOURCC(0xa9, 'g', 'e', 'n'):
2171        {
2172            metadataKey = kKeyGenre;
2173            break;
2174        }
2175        case FOURCC('c', 'p', 'i', 'l'):
2176        {
2177            if (size == 9 && flags == 21) {
2178                char tmp[16];
2179                sprintf(tmp, "%d",
2180                        (int)buffer[size - 1]);
2181
2182                mFileMetaData->setCString(kKeyCompilation, tmp);
2183            }
2184            break;
2185        }
2186        case FOURCC('t', 'r', 'k', 'n'):
2187        {
2188            if (size == 16 && flags == 0) {
2189                char tmp[16];
2190                uint16_t* pTrack = (uint16_t*)&buffer[10];
2191                uint16_t* pTotalTracks = (uint16_t*)&buffer[12];
2192                sprintf(tmp, "%d/%d", ntohs(*pTrack), ntohs(*pTotalTracks));
2193
2194                mFileMetaData->setCString(kKeyCDTrackNumber, tmp);
2195            }
2196            break;
2197        }
2198        case FOURCC('d', 'i', 's', 'k'):
2199        {
2200            if ((size == 14 || size == 16) && flags == 0) {
2201                char tmp[16];
2202                uint16_t* pDisc = (uint16_t*)&buffer[10];
2203                uint16_t* pTotalDiscs = (uint16_t*)&buffer[12];
2204                sprintf(tmp, "%d/%d", ntohs(*pDisc), ntohs(*pTotalDiscs));
2205
2206                mFileMetaData->setCString(kKeyDiscNumber, tmp);
2207            }
2208            break;
2209        }
2210        case FOURCC('-', '-', '-', '-'):
2211        {
2212            buffer[size] = '\0';
2213            switch (mPath[5]) {
2214                case FOURCC('m', 'e', 'a', 'n'):
2215                    mLastCommentMean.setTo((const char *)buffer + 4);
2216                    break;
2217                case FOURCC('n', 'a', 'm', 'e'):
2218                    mLastCommentName.setTo((const char *)buffer + 4);
2219                    break;
2220                case FOURCC('d', 'a', 't', 'a'):
2221                    mLastCommentData.setTo((const char *)buffer + 8);
2222                    break;
2223            }
2224
2225            // Once we have a set of mean/name/data info, go ahead and process
2226            // it to see if its something we are interested in.  Whether or not
2227            // were are interested in the specific tag, make sure to clear out
2228            // the set so we can be ready to process another tuple should one
2229            // show up later in the file.
2230            if ((mLastCommentMean.length() != 0) &&
2231                (mLastCommentName.length() != 0) &&
2232                (mLastCommentData.length() != 0)) {
2233
2234                if (mLastCommentMean == "com.apple.iTunes"
2235                        && mLastCommentName == "iTunSMPB") {
2236                    int32_t delay, padding;
2237                    if (sscanf(mLastCommentData,
2238                               " %*x %x %x %*x", &delay, &padding) == 2) {
2239                        mLastTrack->meta->setInt32(kKeyEncoderDelay, delay);
2240                        mLastTrack->meta->setInt32(kKeyEncoderPadding, padding);
2241                    }
2242                }
2243
2244                mLastCommentMean.clear();
2245                mLastCommentName.clear();
2246                mLastCommentData.clear();
2247            }
2248            break;
2249        }
2250
2251        default:
2252            break;
2253    }
2254
2255    if (size >= 8 && metadataKey && !mFileMetaData->hasData(metadataKey)) {
2256        if (metadataKey == kKeyAlbumArt) {
2257            mFileMetaData->setData(
2258                    kKeyAlbumArt, MetaData::TYPE_NONE,
2259                    buffer + 8, size - 8);
2260        } else if (metadataKey == kKeyGenre) {
2261            if (flags == 0) {
2262                // uint8_t genre code, iTunes genre codes are
2263                // the standard id3 codes, except they start
2264                // at 1 instead of 0 (e.g. Pop is 14, not 13)
2265                // We use standard id3 numbering, so subtract 1.
2266                int genrecode = (int)buffer[size - 1];
2267                genrecode--;
2268                if (genrecode < 0) {
2269                    genrecode = 255; // reserved for 'unknown genre'
2270                }
2271                char genre[10];
2272                sprintf(genre, "%d", genrecode);
2273
2274                mFileMetaData->setCString(metadataKey, genre);
2275            } else if (flags == 1) {
2276                // custom genre string
2277                buffer[size] = '\0';
2278
2279                mFileMetaData->setCString(
2280                        metadataKey, (const char *)buffer + 8);
2281            }
2282        } else {
2283            buffer[size] = '\0';
2284
2285            mFileMetaData->setCString(
2286                    metadataKey, (const char *)buffer + 8);
2287        }
2288    }
2289
2290    delete[] buffer;
2291    buffer = NULL;
2292
2293    return OK;
2294}
2295
2296status_t MPEG4Extractor::parse3GPPMetaData(off64_t offset, size_t size, int depth) {
2297    if (size < 4) {
2298        return ERROR_MALFORMED;
2299    }
2300
2301    uint8_t *buffer = new uint8_t[size];
2302    if (mDataSource->readAt(
2303                offset, buffer, size) != (ssize_t)size) {
2304        delete[] buffer;
2305        buffer = NULL;
2306
2307        return ERROR_IO;
2308    }
2309
2310    uint32_t metadataKey = 0;
2311    switch (mPath[depth]) {
2312        case FOURCC('t', 'i', 't', 'l'):
2313        {
2314            metadataKey = kKeyTitle;
2315            break;
2316        }
2317        case FOURCC('p', 'e', 'r', 'f'):
2318        {
2319            metadataKey = kKeyArtist;
2320            break;
2321        }
2322        case FOURCC('a', 'u', 't', 'h'):
2323        {
2324            metadataKey = kKeyWriter;
2325            break;
2326        }
2327        case FOURCC('g', 'n', 'r', 'e'):
2328        {
2329            metadataKey = kKeyGenre;
2330            break;
2331        }
2332        case FOURCC('a', 'l', 'b', 'm'):
2333        {
2334            if (buffer[size - 1] != '\0') {
2335              char tmp[4];
2336              sprintf(tmp, "%u", buffer[size - 1]);
2337
2338              mFileMetaData->setCString(kKeyCDTrackNumber, tmp);
2339            }
2340
2341            metadataKey = kKeyAlbum;
2342            break;
2343        }
2344        case FOURCC('y', 'r', 'r', 'c'):
2345        {
2346            char tmp[5];
2347            uint16_t year = U16_AT(&buffer[4]);
2348
2349            if (year < 10000) {
2350                sprintf(tmp, "%u", year);
2351
2352                mFileMetaData->setCString(kKeyYear, tmp);
2353            }
2354            break;
2355        }
2356
2357        default:
2358            break;
2359    }
2360
2361    if (metadataKey > 0) {
2362        bool isUTF8 = true; // Common case
2363        char16_t *framedata = NULL;
2364        int len16 = 0; // Number of UTF-16 characters
2365
2366        // smallest possible valid UTF-16 string w BOM: 0xfe 0xff 0x00 0x00
2367        if (size - 6 >= 4) {
2368            len16 = ((size - 6) / 2) - 1; // don't include 0x0000 terminator
2369            framedata = (char16_t *)(buffer + 6);
2370            if (0xfffe == *framedata) {
2371                // endianness marker (BOM) doesn't match host endianness
2372                for (int i = 0; i < len16; i++) {
2373                    framedata[i] = bswap_16(framedata[i]);
2374                }
2375                // BOM is now swapped to 0xfeff, we will execute next block too
2376            }
2377
2378            if (0xfeff == *framedata) {
2379                // Remove the BOM
2380                framedata++;
2381                len16--;
2382                isUTF8 = false;
2383            }
2384            // else normal non-zero-length UTF-8 string
2385            // we can't handle UTF-16 without BOM as there is no other
2386            // indication of encoding.
2387        }
2388
2389        if (isUTF8) {
2390            mFileMetaData->setCString(metadataKey, (const char *)buffer + 6);
2391        } else {
2392            // Convert from UTF-16 string to UTF-8 string.
2393            String8 tmpUTF8str(framedata, len16);
2394            mFileMetaData->setCString(metadataKey, tmpUTF8str.string());
2395        }
2396    }
2397
2398    delete[] buffer;
2399    buffer = NULL;
2400
2401    return OK;
2402}
2403
2404void MPEG4Extractor::parseID3v2MetaData(off64_t offset) {
2405    ID3 id3(mDataSource, true /* ignorev1 */, offset);
2406
2407    if (id3.isValid()) {
2408        struct Map {
2409            int key;
2410            const char *tag1;
2411            const char *tag2;
2412        };
2413        static const Map kMap[] = {
2414            { kKeyAlbum, "TALB", "TAL" },
2415            { kKeyArtist, "TPE1", "TP1" },
2416            { kKeyAlbumArtist, "TPE2", "TP2" },
2417            { kKeyComposer, "TCOM", "TCM" },
2418            { kKeyGenre, "TCON", "TCO" },
2419            { kKeyTitle, "TIT2", "TT2" },
2420            { kKeyYear, "TYE", "TYER" },
2421            { kKeyAuthor, "TXT", "TEXT" },
2422            { kKeyCDTrackNumber, "TRK", "TRCK" },
2423            { kKeyDiscNumber, "TPA", "TPOS" },
2424            { kKeyCompilation, "TCP", "TCMP" },
2425        };
2426        static const size_t kNumMapEntries = sizeof(kMap) / sizeof(kMap[0]);
2427
2428        for (size_t i = 0; i < kNumMapEntries; ++i) {
2429            if (!mFileMetaData->hasData(kMap[i].key)) {
2430                ID3::Iterator *it = new ID3::Iterator(id3, kMap[i].tag1);
2431                if (it->done()) {
2432                    delete it;
2433                    it = new ID3::Iterator(id3, kMap[i].tag2);
2434                }
2435
2436                if (it->done()) {
2437                    delete it;
2438                    continue;
2439                }
2440
2441                String8 s;
2442                it->getString(&s);
2443                delete it;
2444
2445                mFileMetaData->setCString(kMap[i].key, s);
2446            }
2447        }
2448
2449        size_t dataSize;
2450        String8 mime;
2451        const void *data = id3.getAlbumArt(&dataSize, &mime);
2452
2453        if (data) {
2454            mFileMetaData->setData(kKeyAlbumArt, MetaData::TYPE_NONE, data, dataSize);
2455            mFileMetaData->setCString(kKeyAlbumArtMIME, mime.string());
2456        }
2457    }
2458}
2459
2460sp<MediaSource> MPEG4Extractor::getTrack(size_t index) {
2461    status_t err;
2462    if ((err = readMetaData()) != OK) {
2463        return NULL;
2464    }
2465
2466    Track *track = mFirstTrack;
2467    while (index > 0) {
2468        if (track == NULL) {
2469            return NULL;
2470        }
2471
2472        track = track->next;
2473        --index;
2474    }
2475
2476    if (track == NULL) {
2477        return NULL;
2478    }
2479
2480    ALOGV("getTrack called, pssh: %d", mPssh.size());
2481
2482    return new MPEG4Source(
2483            track->meta, mDataSource, track->timescale, track->sampleTable,
2484            mSidxEntries, mMoofOffset);
2485}
2486
2487// static
2488status_t MPEG4Extractor::verifyTrack(Track *track) {
2489    const char *mime;
2490    CHECK(track->meta->findCString(kKeyMIMEType, &mime));
2491
2492    uint32_t type;
2493    const void *data;
2494    size_t size;
2495    if (!strcasecmp(mime, MEDIA_MIMETYPE_VIDEO_AVC)) {
2496        if (!track->meta->findData(kKeyAVCC, &type, &data, &size)
2497                || type != kTypeAVCC) {
2498            return ERROR_MALFORMED;
2499        }
2500    } else if (!strcasecmp(mime, MEDIA_MIMETYPE_VIDEO_HEVC)) {
2501        if (!track->meta->findData(kKeyHVCC, &type, &data, &size)
2502                    || type != kTypeHVCC) {
2503            return ERROR_MALFORMED;
2504        }
2505    } else if (!strcasecmp(mime, MEDIA_MIMETYPE_VIDEO_MPEG4)
2506            || !strcasecmp(mime, MEDIA_MIMETYPE_AUDIO_AAC)) {
2507        if (!track->meta->findData(kKeyESDS, &type, &data, &size)
2508                || type != kTypeESDS) {
2509            return ERROR_MALFORMED;
2510        }
2511    }
2512
2513    if (track->sampleTable == NULL || !track->sampleTable->isValid()) {
2514        // Make sure we have all the metadata we need.
2515        ALOGE("stbl atom missing/invalid.");
2516        return ERROR_MALFORMED;
2517    }
2518
2519    return OK;
2520}
2521
2522typedef enum {
2523    //AOT_NONE             = -1,
2524    //AOT_NULL_OBJECT      = 0,
2525    //AOT_AAC_MAIN         = 1, /**< Main profile                              */
2526    AOT_AAC_LC           = 2,   /**< Low Complexity object                     */
2527    //AOT_AAC_SSR          = 3,
2528    //AOT_AAC_LTP          = 4,
2529    AOT_SBR              = 5,
2530    //AOT_AAC_SCAL         = 6,
2531    //AOT_TWIN_VQ          = 7,
2532    //AOT_CELP             = 8,
2533    //AOT_HVXC             = 9,
2534    //AOT_RSVD_10          = 10, /**< (reserved)                                */
2535    //AOT_RSVD_11          = 11, /**< (reserved)                                */
2536    //AOT_TTSI             = 12, /**< TTSI Object                               */
2537    //AOT_MAIN_SYNTH       = 13, /**< Main Synthetic object                     */
2538    //AOT_WAV_TAB_SYNTH    = 14, /**< Wavetable Synthesis object                */
2539    //AOT_GEN_MIDI         = 15, /**< General MIDI object                       */
2540    //AOT_ALG_SYNTH_AUD_FX = 16, /**< Algorithmic Synthesis and Audio FX object */
2541    AOT_ER_AAC_LC        = 17,   /**< Error Resilient(ER) AAC Low Complexity    */
2542    //AOT_RSVD_18          = 18, /**< (reserved)                                */
2543    //AOT_ER_AAC_LTP       = 19, /**< Error Resilient(ER) AAC LTP object        */
2544    AOT_ER_AAC_SCAL      = 20,   /**< Error Resilient(ER) AAC Scalable object   */
2545    //AOT_ER_TWIN_VQ       = 21, /**< Error Resilient(ER) TwinVQ object         */
2546    AOT_ER_BSAC          = 22,   /**< Error Resilient(ER) BSAC object           */
2547    AOT_ER_AAC_LD        = 23,   /**< Error Resilient(ER) AAC LowDelay object   */
2548    //AOT_ER_CELP          = 24, /**< Error Resilient(ER) CELP object           */
2549    //AOT_ER_HVXC          = 25, /**< Error Resilient(ER) HVXC object           */
2550    //AOT_ER_HILN          = 26, /**< Error Resilient(ER) HILN object           */
2551    //AOT_ER_PARA          = 27, /**< Error Resilient(ER) Parametric object     */
2552    //AOT_RSVD_28          = 28, /**< might become SSC                          */
2553    AOT_PS               = 29,   /**< PS, Parametric Stereo (includes SBR)      */
2554    //AOT_MPEGS            = 30, /**< MPEG Surround                             */
2555
2556    AOT_ESCAPE           = 31,   /**< Signal AOT uses more than 5 bits          */
2557
2558    //AOT_MP3ONMP4_L1      = 32, /**< MPEG-Layer1 in mp4                        */
2559    //AOT_MP3ONMP4_L2      = 33, /**< MPEG-Layer2 in mp4                        */
2560    //AOT_MP3ONMP4_L3      = 34, /**< MPEG-Layer3 in mp4                        */
2561    //AOT_RSVD_35          = 35, /**< might become DST                          */
2562    //AOT_RSVD_36          = 36, /**< might become ALS                          */
2563    //AOT_AAC_SLS          = 37, /**< AAC + SLS                                 */
2564    //AOT_SLS              = 38, /**< SLS                                       */
2565    //AOT_ER_AAC_ELD       = 39, /**< AAC Enhanced Low Delay                    */
2566
2567    //AOT_USAC             = 42, /**< USAC                                      */
2568    //AOT_SAOC             = 43, /**< SAOC                                      */
2569    //AOT_LD_MPEGS         = 44, /**< Low Delay MPEG Surround                   */
2570
2571    //AOT_RSVD50           = 50,  /**< Interim AOT for Rsvd50                   */
2572} AUDIO_OBJECT_TYPE;
2573
2574status_t MPEG4Extractor::updateAudioTrackInfoFromESDS_MPEG4Audio(
2575        const void *esds_data, size_t esds_size) {
2576    ESDS esds(esds_data, esds_size);
2577
2578    uint8_t objectTypeIndication;
2579    if (esds.getObjectTypeIndication(&objectTypeIndication) != OK) {
2580        return ERROR_MALFORMED;
2581    }
2582
2583    if (objectTypeIndication == 0xe1) {
2584        // This isn't MPEG4 audio at all, it's QCELP 14k...
2585        mLastTrack->meta->setCString(kKeyMIMEType, MEDIA_MIMETYPE_AUDIO_QCELP);
2586        return OK;
2587    }
2588
2589    if (objectTypeIndication  == 0x6b) {
2590        // The media subtype is MP3 audio
2591        // Our software MP3 audio decoder may not be able to handle
2592        // packetized MP3 audio; for now, lets just return ERROR_UNSUPPORTED
2593        ALOGE("MP3 track in MP4/3GPP file is not supported");
2594        return ERROR_UNSUPPORTED;
2595    }
2596
2597    const uint8_t *csd;
2598    size_t csd_size;
2599    if (esds.getCodecSpecificInfo(
2600                (const void **)&csd, &csd_size) != OK) {
2601        return ERROR_MALFORMED;
2602    }
2603
2604#if 0
2605    printf("ESD of size %d\n", csd_size);
2606    hexdump(csd, csd_size);
2607#endif
2608
2609    if (csd_size == 0) {
2610        // There's no further information, i.e. no codec specific data
2611        // Let's assume that the information provided in the mpeg4 headers
2612        // is accurate and hope for the best.
2613
2614        return OK;
2615    }
2616
2617    if (csd_size < 2) {
2618        return ERROR_MALFORMED;
2619    }
2620
2621    static uint32_t kSamplingRate[] = {
2622        96000, 88200, 64000, 48000, 44100, 32000, 24000, 22050,
2623        16000, 12000, 11025, 8000, 7350
2624    };
2625
2626    ABitReader br(csd, csd_size);
2627    uint32_t objectType = br.getBits(5);
2628
2629    if (objectType == 31) {  // AAC-ELD => additional 6 bits
2630        objectType = 32 + br.getBits(6);
2631    }
2632
2633    //keep AOT type
2634    mLastTrack->meta->setInt32(kKeyAACAOT, objectType);
2635
2636    uint32_t freqIndex = br.getBits(4);
2637
2638    int32_t sampleRate = 0;
2639    int32_t numChannels = 0;
2640    if (freqIndex == 15) {
2641        if (csd_size < 5) {
2642            return ERROR_MALFORMED;
2643        }
2644        sampleRate = br.getBits(24);
2645        numChannels = br.getBits(4);
2646    } else {
2647        numChannels = br.getBits(4);
2648
2649        if (freqIndex == 13 || freqIndex == 14) {
2650            return ERROR_MALFORMED;
2651        }
2652
2653        sampleRate = kSamplingRate[freqIndex];
2654    }
2655
2656    if (objectType == AOT_SBR || objectType == AOT_PS) {//SBR specific config per 14496-3 table 1.13
2657        uint32_t extFreqIndex = br.getBits(4);
2658        int32_t extSampleRate;
2659        if (extFreqIndex == 15) {
2660            if (csd_size < 8) {
2661                return ERROR_MALFORMED;
2662            }
2663            extSampleRate = br.getBits(24);
2664        } else {
2665            if (extFreqIndex == 13 || extFreqIndex == 14) {
2666                return ERROR_MALFORMED;
2667            }
2668            extSampleRate = kSamplingRate[extFreqIndex];
2669        }
2670        //TODO: save the extension sampling rate value in meta data =>
2671        //      mLastTrack->meta->setInt32(kKeyExtSampleRate, extSampleRate);
2672    }
2673
2674    switch (numChannels) {
2675        // values defined in 14496-3_2009 amendment-4 Table 1.19 - Channel Configuration
2676        case 0:
2677        case 1:// FC
2678        case 2:// FL FR
2679        case 3:// FC, FL FR
2680        case 4:// FC, FL FR, RC
2681        case 5:// FC, FL FR, SL SR
2682        case 6:// FC, FL FR, SL SR, LFE
2683            //numChannels already contains the right value
2684            break;
2685        case 11:// FC, FL FR, SL SR, RC, LFE
2686            numChannels = 7;
2687            break;
2688        case 7: // FC, FCL FCR, FL FR, SL SR, LFE
2689        case 12:// FC, FL  FR,  SL SR, RL RR, LFE
2690        case 14:// FC, FL  FR,  SL SR, LFE, FHL FHR
2691            numChannels = 8;
2692            break;
2693        default:
2694            return ERROR_UNSUPPORTED;
2695    }
2696
2697    {
2698        if (objectType == AOT_SBR || objectType == AOT_PS) {
2699            const int32_t extensionSamplingFrequency = br.getBits(4);
2700            objectType = br.getBits(5);
2701
2702            if (objectType == AOT_ESCAPE) {
2703                objectType = 32 + br.getBits(6);
2704            }
2705        }
2706        if (objectType == AOT_AAC_LC || objectType == AOT_ER_AAC_LC ||
2707                objectType == AOT_ER_AAC_LD || objectType == AOT_ER_AAC_SCAL ||
2708                objectType == AOT_ER_BSAC) {
2709            const int32_t frameLengthFlag = br.getBits(1);
2710
2711            const int32_t dependsOnCoreCoder = br.getBits(1);
2712
2713            if (dependsOnCoreCoder ) {
2714                const int32_t coreCoderDelay = br.getBits(14);
2715            }
2716
2717            const int32_t extensionFlag = br.getBits(1);
2718
2719            if (numChannels == 0 ) {
2720                int32_t channelsEffectiveNum = 0;
2721                int32_t channelsNum = 0;
2722                const int32_t ElementInstanceTag = br.getBits(4);
2723                const int32_t Profile = br.getBits(2);
2724                const int32_t SamplingFrequencyIndex = br.getBits(4);
2725                const int32_t NumFrontChannelElements = br.getBits(4);
2726                const int32_t NumSideChannelElements = br.getBits(4);
2727                const int32_t NumBackChannelElements = br.getBits(4);
2728                const int32_t NumLfeChannelElements = br.getBits(2);
2729                const int32_t NumAssocDataElements = br.getBits(3);
2730                const int32_t NumValidCcElements = br.getBits(4);
2731
2732                const int32_t MonoMixdownPresent = br.getBits(1);
2733                if (MonoMixdownPresent != 0) {
2734                    const int32_t MonoMixdownElementNumber = br.getBits(4);
2735                }
2736
2737                const int32_t StereoMixdownPresent = br.getBits(1);
2738                if (StereoMixdownPresent != 0) {
2739                    const int32_t StereoMixdownElementNumber = br.getBits(4);
2740                }
2741
2742                const int32_t MatrixMixdownIndexPresent = br.getBits(1);
2743                if (MatrixMixdownIndexPresent != 0) {
2744                    const int32_t MatrixMixdownIndex = br.getBits(2);
2745                    const int32_t PseudoSurroundEnable = br.getBits(1);
2746                }
2747
2748                int i;
2749                for (i=0; i < NumFrontChannelElements; i++) {
2750                    const int32_t FrontElementIsCpe = br.getBits(1);
2751                    const int32_t FrontElementTagSelect = br.getBits(4);
2752                    channelsNum += FrontElementIsCpe ? 2 : 1;
2753                }
2754
2755                for (i=0; i < NumSideChannelElements; i++) {
2756                    const int32_t SideElementIsCpe = br.getBits(1);
2757                    const int32_t SideElementTagSelect = br.getBits(4);
2758                    channelsNum += SideElementIsCpe ? 2 : 1;
2759                }
2760
2761                for (i=0; i < NumBackChannelElements; i++) {
2762                    const int32_t BackElementIsCpe = br.getBits(1);
2763                    const int32_t BackElementTagSelect = br.getBits(4);
2764                    channelsNum += BackElementIsCpe ? 2 : 1;
2765                }
2766                channelsEffectiveNum = channelsNum;
2767
2768                for (i=0; i < NumLfeChannelElements; i++) {
2769                    const int32_t LfeElementTagSelect = br.getBits(4);
2770                    channelsNum += 1;
2771                }
2772                ALOGV("mpeg4 audio channelsNum = %d", channelsNum);
2773                ALOGV("mpeg4 audio channelsEffectiveNum = %d", channelsEffectiveNum);
2774                numChannels = channelsNum;
2775            }
2776        }
2777    }
2778
2779    if (numChannels == 0) {
2780        return ERROR_UNSUPPORTED;
2781    }
2782
2783    int32_t prevSampleRate;
2784    CHECK(mLastTrack->meta->findInt32(kKeySampleRate, &prevSampleRate));
2785
2786    if (prevSampleRate != sampleRate) {
2787        ALOGV("mpeg4 audio sample rate different from previous setting. "
2788             "was: %d, now: %d", prevSampleRate, sampleRate);
2789    }
2790
2791    mLastTrack->meta->setInt32(kKeySampleRate, sampleRate);
2792
2793    int32_t prevChannelCount;
2794    CHECK(mLastTrack->meta->findInt32(kKeyChannelCount, &prevChannelCount));
2795
2796    if (prevChannelCount != numChannels) {
2797        ALOGV("mpeg4 audio channel count different from previous setting. "
2798             "was: %d, now: %d", prevChannelCount, numChannels);
2799    }
2800
2801    mLastTrack->meta->setInt32(kKeyChannelCount, numChannels);
2802
2803    return OK;
2804}
2805
2806////////////////////////////////////////////////////////////////////////////////
2807
2808MPEG4Source::MPEG4Source(
2809        const sp<MetaData> &format,
2810        const sp<DataSource> &dataSource,
2811        int32_t timeScale,
2812        const sp<SampleTable> &sampleTable,
2813        Vector<SidxEntry> &sidx,
2814        off64_t firstMoofOffset)
2815    : mFormat(format),
2816      mDataSource(dataSource),
2817      mTimescale(timeScale),
2818      mSampleTable(sampleTable),
2819      mCurrentSampleIndex(0),
2820      mCurrentFragmentIndex(0),
2821      mSegments(sidx),
2822      mFirstMoofOffset(firstMoofOffset),
2823      mCurrentMoofOffset(firstMoofOffset),
2824      mCurrentTime(0),
2825      mCurrentSampleInfoAllocSize(0),
2826      mCurrentSampleInfoSizes(NULL),
2827      mCurrentSampleInfoOffsetsAllocSize(0),
2828      mCurrentSampleInfoOffsets(NULL),
2829      mIsAVC(false),
2830      mIsHEVC(false),
2831      mNALLengthSize(0),
2832      mStarted(false),
2833      mGroup(NULL),
2834      mBuffer(NULL),
2835      mWantsNALFragments(false),
2836      mSrcBuffer(NULL) {
2837
2838    mFormat->findInt32(kKeyCryptoMode, &mCryptoMode);
2839    mDefaultIVSize = 0;
2840    mFormat->findInt32(kKeyCryptoDefaultIVSize, &mDefaultIVSize);
2841    uint32_t keytype;
2842    const void *key;
2843    size_t keysize;
2844    if (mFormat->findData(kKeyCryptoKey, &keytype, &key, &keysize)) {
2845        CHECK(keysize <= 16);
2846        memset(mCryptoKey, 0, 16);
2847        memcpy(mCryptoKey, key, keysize);
2848    }
2849
2850    const char *mime;
2851    bool success = mFormat->findCString(kKeyMIMEType, &mime);
2852    CHECK(success);
2853
2854    mIsAVC = !strcasecmp(mime, MEDIA_MIMETYPE_VIDEO_AVC);
2855    mIsHEVC = !strcasecmp(mime, MEDIA_MIMETYPE_VIDEO_HEVC);
2856
2857    if (mIsAVC) {
2858        uint32_t type;
2859        const void *data;
2860        size_t size;
2861        CHECK(format->findData(kKeyAVCC, &type, &data, &size));
2862
2863        const uint8_t *ptr = (const uint8_t *)data;
2864
2865        CHECK(size >= 7);
2866        CHECK_EQ((unsigned)ptr[0], 1u);  // configurationVersion == 1
2867
2868        // The number of bytes used to encode the length of a NAL unit.
2869        mNALLengthSize = 1 + (ptr[4] & 3);
2870    } else if (mIsHEVC) {
2871        uint32_t type;
2872        const void *data;
2873        size_t size;
2874        CHECK(format->findData(kKeyHVCC, &type, &data, &size));
2875
2876        const uint8_t *ptr = (const uint8_t *)data;
2877
2878        CHECK(size >= 7);
2879        CHECK_EQ((unsigned)ptr[0], 1u);  // configurationVersion == 1
2880
2881        mNALLengthSize = 1 + (ptr[14 + 7] & 3);
2882    }
2883
2884    CHECK(format->findInt32(kKeyTrackID, &mTrackId));
2885
2886    if (mFirstMoofOffset != 0) {
2887        off64_t offset = mFirstMoofOffset;
2888        parseChunk(&offset);
2889    }
2890}
2891
2892MPEG4Source::~MPEG4Source() {
2893    if (mStarted) {
2894        stop();
2895    }
2896    free(mCurrentSampleInfoSizes);
2897    free(mCurrentSampleInfoOffsets);
2898}
2899
2900status_t MPEG4Source::start(MetaData *params) {
2901    Mutex::Autolock autoLock(mLock);
2902
2903    CHECK(!mStarted);
2904
2905    int32_t val;
2906    if (params && params->findInt32(kKeyWantsNALFragments, &val)
2907        && val != 0) {
2908        mWantsNALFragments = true;
2909    } else {
2910        mWantsNALFragments = false;
2911    }
2912
2913    mGroup = new MediaBufferGroup;
2914
2915    int32_t max_size;
2916    CHECK(mFormat->findInt32(kKeyMaxInputSize, &max_size));
2917
2918    mGroup->add_buffer(new MediaBuffer(max_size));
2919
2920    mSrcBuffer = new uint8_t[max_size];
2921
2922    mStarted = true;
2923
2924    return OK;
2925}
2926
2927status_t MPEG4Source::stop() {
2928    Mutex::Autolock autoLock(mLock);
2929
2930    CHECK(mStarted);
2931
2932    if (mBuffer != NULL) {
2933        mBuffer->release();
2934        mBuffer = NULL;
2935    }
2936
2937    delete[] mSrcBuffer;
2938    mSrcBuffer = NULL;
2939
2940    delete mGroup;
2941    mGroup = NULL;
2942
2943    mStarted = false;
2944    mCurrentSampleIndex = 0;
2945
2946    return OK;
2947}
2948
2949status_t MPEG4Source::parseChunk(off64_t *offset) {
2950    uint32_t hdr[2];
2951    if (mDataSource->readAt(*offset, hdr, 8) < 8) {
2952        return ERROR_IO;
2953    }
2954    uint64_t chunk_size = ntohl(hdr[0]);
2955    uint32_t chunk_type = ntohl(hdr[1]);
2956    off64_t data_offset = *offset + 8;
2957
2958    if (chunk_size == 1) {
2959        if (mDataSource->readAt(*offset + 8, &chunk_size, 8) < 8) {
2960            return ERROR_IO;
2961        }
2962        chunk_size = ntoh64(chunk_size);
2963        data_offset += 8;
2964
2965        if (chunk_size < 16) {
2966            // The smallest valid chunk is 16 bytes long in this case.
2967            return ERROR_MALFORMED;
2968        }
2969    } else if (chunk_size < 8) {
2970        // The smallest valid chunk is 8 bytes long.
2971        return ERROR_MALFORMED;
2972    }
2973
2974    char chunk[5];
2975    MakeFourCCString(chunk_type, chunk);
2976    ALOGV("MPEG4Source chunk %s @ %llx", chunk, *offset);
2977
2978    off64_t chunk_data_size = *offset + chunk_size - data_offset;
2979
2980    switch(chunk_type) {
2981
2982        case FOURCC('t', 'r', 'a', 'f'):
2983        case FOURCC('m', 'o', 'o', 'f'): {
2984            off64_t stop_offset = *offset + chunk_size;
2985            *offset = data_offset;
2986            while (*offset < stop_offset) {
2987                status_t err = parseChunk(offset);
2988                if (err != OK) {
2989                    return err;
2990                }
2991            }
2992            if (chunk_type == FOURCC('m', 'o', 'o', 'f')) {
2993                // *offset points to the box following this moof. Find the next moof from there.
2994
2995                while (true) {
2996                    if (mDataSource->readAt(*offset, hdr, 8) < 8) {
2997                        return ERROR_END_OF_STREAM;
2998                    }
2999                    chunk_size = ntohl(hdr[0]);
3000                    chunk_type = ntohl(hdr[1]);
3001                    if (chunk_type == FOURCC('m', 'o', 'o', 'f')) {
3002                        mNextMoofOffset = *offset;
3003                        break;
3004                    }
3005                    *offset += chunk_size;
3006                }
3007            }
3008            break;
3009        }
3010
3011        case FOURCC('t', 'f', 'h', 'd'): {
3012                status_t err;
3013                if ((err = parseTrackFragmentHeader(data_offset, chunk_data_size)) != OK) {
3014                    return err;
3015                }
3016                *offset += chunk_size;
3017                break;
3018        }
3019
3020        case FOURCC('t', 'r', 'u', 'n'): {
3021                status_t err;
3022                if (mLastParsedTrackId == mTrackId) {
3023                    if ((err = parseTrackFragmentRun(data_offset, chunk_data_size)) != OK) {
3024                        return err;
3025                    }
3026                }
3027
3028                *offset += chunk_size;
3029                break;
3030        }
3031
3032        case FOURCC('s', 'a', 'i', 'z'): {
3033            status_t err;
3034            if ((err = parseSampleAuxiliaryInformationSizes(data_offset, chunk_data_size)) != OK) {
3035                return err;
3036            }
3037            *offset += chunk_size;
3038            break;
3039        }
3040        case FOURCC('s', 'a', 'i', 'o'): {
3041            status_t err;
3042            if ((err = parseSampleAuxiliaryInformationOffsets(data_offset, chunk_data_size)) != OK) {
3043                return err;
3044            }
3045            *offset += chunk_size;
3046            break;
3047        }
3048
3049        case FOURCC('m', 'd', 'a', 't'): {
3050            // parse DRM info if present
3051            ALOGV("MPEG4Source::parseChunk mdat");
3052            // if saiz/saoi was previously observed, do something with the sampleinfos
3053            *offset += chunk_size;
3054            break;
3055        }
3056
3057        default: {
3058            *offset += chunk_size;
3059            break;
3060        }
3061    }
3062    return OK;
3063}
3064
3065status_t MPEG4Source::parseSampleAuxiliaryInformationSizes(
3066        off64_t offset, off64_t /* size */) {
3067    ALOGV("parseSampleAuxiliaryInformationSizes");
3068    // 14496-12 8.7.12
3069    uint8_t version;
3070    if (mDataSource->readAt(
3071            offset, &version, sizeof(version))
3072            < (ssize_t)sizeof(version)) {
3073        return ERROR_IO;
3074    }
3075
3076    if (version != 0) {
3077        return ERROR_UNSUPPORTED;
3078    }
3079    offset++;
3080
3081    uint32_t flags;
3082    if (!mDataSource->getUInt24(offset, &flags)) {
3083        return ERROR_IO;
3084    }
3085    offset += 3;
3086
3087    if (flags & 1) {
3088        uint32_t tmp;
3089        if (!mDataSource->getUInt32(offset, &tmp)) {
3090            return ERROR_MALFORMED;
3091        }
3092        mCurrentAuxInfoType = tmp;
3093        offset += 4;
3094        if (!mDataSource->getUInt32(offset, &tmp)) {
3095            return ERROR_MALFORMED;
3096        }
3097        mCurrentAuxInfoTypeParameter = tmp;
3098        offset += 4;
3099    }
3100
3101    uint8_t defsize;
3102    if (mDataSource->readAt(offset, &defsize, 1) != 1) {
3103        return ERROR_MALFORMED;
3104    }
3105    mCurrentDefaultSampleInfoSize = defsize;
3106    offset++;
3107
3108    uint32_t smplcnt;
3109    if (!mDataSource->getUInt32(offset, &smplcnt)) {
3110        return ERROR_MALFORMED;
3111    }
3112    mCurrentSampleInfoCount = smplcnt;
3113    offset += 4;
3114
3115    if (mCurrentDefaultSampleInfoSize != 0) {
3116        ALOGV("@@@@ using default sample info size of %d", mCurrentDefaultSampleInfoSize);
3117        return OK;
3118    }
3119    if (smplcnt > mCurrentSampleInfoAllocSize) {
3120        mCurrentSampleInfoSizes = (uint8_t*) realloc(mCurrentSampleInfoSizes, smplcnt);
3121        mCurrentSampleInfoAllocSize = smplcnt;
3122    }
3123
3124    mDataSource->readAt(offset, mCurrentSampleInfoSizes, smplcnt);
3125    return OK;
3126}
3127
3128status_t MPEG4Source::parseSampleAuxiliaryInformationOffsets(
3129        off64_t offset, off64_t /* size */) {
3130    ALOGV("parseSampleAuxiliaryInformationOffsets");
3131    // 14496-12 8.7.13
3132    uint8_t version;
3133    if (mDataSource->readAt(offset, &version, sizeof(version)) != 1) {
3134        return ERROR_IO;
3135    }
3136    offset++;
3137
3138    uint32_t flags;
3139    if (!mDataSource->getUInt24(offset, &flags)) {
3140        return ERROR_IO;
3141    }
3142    offset += 3;
3143
3144    uint32_t entrycount;
3145    if (!mDataSource->getUInt32(offset, &entrycount)) {
3146        return ERROR_IO;
3147    }
3148    offset += 4;
3149
3150    if (entrycount > mCurrentSampleInfoOffsetsAllocSize) {
3151        mCurrentSampleInfoOffsets = (uint64_t*) realloc(mCurrentSampleInfoOffsets, entrycount * 8);
3152        mCurrentSampleInfoOffsetsAllocSize = entrycount;
3153    }
3154    mCurrentSampleInfoOffsetCount = entrycount;
3155
3156    for (size_t i = 0; i < entrycount; i++) {
3157        if (version == 0) {
3158            uint32_t tmp;
3159            if (!mDataSource->getUInt32(offset, &tmp)) {
3160                return ERROR_IO;
3161            }
3162            mCurrentSampleInfoOffsets[i] = tmp;
3163            offset += 4;
3164        } else {
3165            uint64_t tmp;
3166            if (!mDataSource->getUInt64(offset, &tmp)) {
3167                return ERROR_IO;
3168            }
3169            mCurrentSampleInfoOffsets[i] = tmp;
3170            offset += 8;
3171        }
3172    }
3173
3174    // parse clear/encrypted data
3175
3176    off64_t drmoffset = mCurrentSampleInfoOffsets[0]; // from moof
3177
3178    drmoffset += mCurrentMoofOffset;
3179    int ivlength;
3180    CHECK(mFormat->findInt32(kKeyCryptoDefaultIVSize, &ivlength));
3181
3182    // read CencSampleAuxiliaryDataFormats
3183    for (size_t i = 0; i < mCurrentSampleInfoCount; i++) {
3184        Sample *smpl = &mCurrentSamples.editItemAt(i);
3185
3186        memset(smpl->iv, 0, 16);
3187        if (mDataSource->readAt(drmoffset, smpl->iv, ivlength) != ivlength) {
3188            return ERROR_IO;
3189        }
3190
3191        drmoffset += ivlength;
3192
3193        int32_t smplinfosize = mCurrentDefaultSampleInfoSize;
3194        if (smplinfosize == 0) {
3195            smplinfosize = mCurrentSampleInfoSizes[i];
3196        }
3197        if (smplinfosize > ivlength) {
3198            uint16_t numsubsamples;
3199            if (!mDataSource->getUInt16(drmoffset, &numsubsamples)) {
3200                return ERROR_IO;
3201            }
3202            drmoffset += 2;
3203            for (size_t j = 0; j < numsubsamples; j++) {
3204                uint16_t numclear;
3205                uint32_t numencrypted;
3206                if (!mDataSource->getUInt16(drmoffset, &numclear)) {
3207                    return ERROR_IO;
3208                }
3209                drmoffset += 2;
3210                if (!mDataSource->getUInt32(drmoffset, &numencrypted)) {
3211                    return ERROR_IO;
3212                }
3213                drmoffset += 4;
3214                smpl->clearsizes.add(numclear);
3215                smpl->encryptedsizes.add(numencrypted);
3216            }
3217        } else {
3218            smpl->clearsizes.add(0);
3219            smpl->encryptedsizes.add(smpl->size);
3220        }
3221    }
3222
3223
3224    return OK;
3225}
3226
3227status_t MPEG4Source::parseTrackFragmentHeader(off64_t offset, off64_t size) {
3228
3229    if (size < 8) {
3230        return -EINVAL;
3231    }
3232
3233    uint32_t flags;
3234    if (!mDataSource->getUInt32(offset, &flags)) { // actually version + flags
3235        return ERROR_MALFORMED;
3236    }
3237
3238    if (flags & 0xff000000) {
3239        return -EINVAL;
3240    }
3241
3242    if (!mDataSource->getUInt32(offset + 4, (uint32_t*)&mLastParsedTrackId)) {
3243        return ERROR_MALFORMED;
3244    }
3245
3246    if (mLastParsedTrackId != mTrackId) {
3247        // this is not the right track, skip it
3248        return OK;
3249    }
3250
3251    mTrackFragmentHeaderInfo.mFlags = flags;
3252    mTrackFragmentHeaderInfo.mTrackID = mLastParsedTrackId;
3253    offset += 8;
3254    size -= 8;
3255
3256    ALOGV("fragment header: %08x %08x", flags, mTrackFragmentHeaderInfo.mTrackID);
3257
3258    if (flags & TrackFragmentHeaderInfo::kBaseDataOffsetPresent) {
3259        if (size < 8) {
3260            return -EINVAL;
3261        }
3262
3263        if (!mDataSource->getUInt64(offset, &mTrackFragmentHeaderInfo.mBaseDataOffset)) {
3264            return ERROR_MALFORMED;
3265        }
3266        offset += 8;
3267        size -= 8;
3268    }
3269
3270    if (flags & TrackFragmentHeaderInfo::kSampleDescriptionIndexPresent) {
3271        if (size < 4) {
3272            return -EINVAL;
3273        }
3274
3275        if (!mDataSource->getUInt32(offset, &mTrackFragmentHeaderInfo.mSampleDescriptionIndex)) {
3276            return ERROR_MALFORMED;
3277        }
3278        offset += 4;
3279        size -= 4;
3280    }
3281
3282    if (flags & TrackFragmentHeaderInfo::kDefaultSampleDurationPresent) {
3283        if (size < 4) {
3284            return -EINVAL;
3285        }
3286
3287        if (!mDataSource->getUInt32(offset, &mTrackFragmentHeaderInfo.mDefaultSampleDuration)) {
3288            return ERROR_MALFORMED;
3289        }
3290        offset += 4;
3291        size -= 4;
3292    }
3293
3294    if (flags & TrackFragmentHeaderInfo::kDefaultSampleSizePresent) {
3295        if (size < 4) {
3296            return -EINVAL;
3297        }
3298
3299        if (!mDataSource->getUInt32(offset, &mTrackFragmentHeaderInfo.mDefaultSampleSize)) {
3300            return ERROR_MALFORMED;
3301        }
3302        offset += 4;
3303        size -= 4;
3304    }
3305
3306    if (flags & TrackFragmentHeaderInfo::kDefaultSampleFlagsPresent) {
3307        if (size < 4) {
3308            return -EINVAL;
3309        }
3310
3311        if (!mDataSource->getUInt32(offset, &mTrackFragmentHeaderInfo.mDefaultSampleFlags)) {
3312            return ERROR_MALFORMED;
3313        }
3314        offset += 4;
3315        size -= 4;
3316    }
3317
3318    if (!(flags & TrackFragmentHeaderInfo::kBaseDataOffsetPresent)) {
3319        mTrackFragmentHeaderInfo.mBaseDataOffset = mCurrentMoofOffset;
3320    }
3321
3322    mTrackFragmentHeaderInfo.mDataOffset = 0;
3323    return OK;
3324}
3325
3326status_t MPEG4Source::parseTrackFragmentRun(off64_t offset, off64_t size) {
3327
3328    ALOGV("MPEG4Extractor::parseTrackFragmentRun");
3329    if (size < 8) {
3330        return -EINVAL;
3331    }
3332
3333    enum {
3334        kDataOffsetPresent                  = 0x01,
3335        kFirstSampleFlagsPresent            = 0x04,
3336        kSampleDurationPresent              = 0x100,
3337        kSampleSizePresent                  = 0x200,
3338        kSampleFlagsPresent                 = 0x400,
3339        kSampleCompositionTimeOffsetPresent = 0x800,
3340    };
3341
3342    uint32_t flags;
3343    if (!mDataSource->getUInt32(offset, &flags)) {
3344        return ERROR_MALFORMED;
3345    }
3346    ALOGV("fragment run flags: %08x", flags);
3347
3348    if (flags & 0xff000000) {
3349        return -EINVAL;
3350    }
3351
3352    if ((flags & kFirstSampleFlagsPresent) && (flags & kSampleFlagsPresent)) {
3353        // These two shall not be used together.
3354        return -EINVAL;
3355    }
3356
3357    uint32_t sampleCount;
3358    if (!mDataSource->getUInt32(offset + 4, &sampleCount)) {
3359        return ERROR_MALFORMED;
3360    }
3361    offset += 8;
3362    size -= 8;
3363
3364    uint64_t dataOffset = mTrackFragmentHeaderInfo.mDataOffset;
3365
3366    uint32_t firstSampleFlags = 0;
3367
3368    if (flags & kDataOffsetPresent) {
3369        if (size < 4) {
3370            return -EINVAL;
3371        }
3372
3373        int32_t dataOffsetDelta;
3374        if (!mDataSource->getUInt32(offset, (uint32_t*)&dataOffsetDelta)) {
3375            return ERROR_MALFORMED;
3376        }
3377
3378        dataOffset = mTrackFragmentHeaderInfo.mBaseDataOffset + dataOffsetDelta;
3379
3380        offset += 4;
3381        size -= 4;
3382    }
3383
3384    if (flags & kFirstSampleFlagsPresent) {
3385        if (size < 4) {
3386            return -EINVAL;
3387        }
3388
3389        if (!mDataSource->getUInt32(offset, &firstSampleFlags)) {
3390            return ERROR_MALFORMED;
3391        }
3392        offset += 4;
3393        size -= 4;
3394    }
3395
3396    uint32_t sampleDuration = 0, sampleSize = 0, sampleFlags = 0,
3397             sampleCtsOffset = 0;
3398
3399    size_t bytesPerSample = 0;
3400    if (flags & kSampleDurationPresent) {
3401        bytesPerSample += 4;
3402    } else if (mTrackFragmentHeaderInfo.mFlags
3403            & TrackFragmentHeaderInfo::kDefaultSampleDurationPresent) {
3404        sampleDuration = mTrackFragmentHeaderInfo.mDefaultSampleDuration;
3405    } else {
3406        sampleDuration = mTrackFragmentHeaderInfo.mDefaultSampleDuration;
3407    }
3408
3409    if (flags & kSampleSizePresent) {
3410        bytesPerSample += 4;
3411    } else if (mTrackFragmentHeaderInfo.mFlags
3412            & TrackFragmentHeaderInfo::kDefaultSampleSizePresent) {
3413        sampleSize = mTrackFragmentHeaderInfo.mDefaultSampleSize;
3414    } else {
3415        sampleSize = mTrackFragmentHeaderInfo.mDefaultSampleSize;
3416    }
3417
3418    if (flags & kSampleFlagsPresent) {
3419        bytesPerSample += 4;
3420    } else if (mTrackFragmentHeaderInfo.mFlags
3421            & TrackFragmentHeaderInfo::kDefaultSampleFlagsPresent) {
3422        sampleFlags = mTrackFragmentHeaderInfo.mDefaultSampleFlags;
3423    } else {
3424        sampleFlags = mTrackFragmentHeaderInfo.mDefaultSampleFlags;
3425    }
3426
3427    if (flags & kSampleCompositionTimeOffsetPresent) {
3428        bytesPerSample += 4;
3429    } else {
3430        sampleCtsOffset = 0;
3431    }
3432
3433    if (size < sampleCount * bytesPerSample) {
3434        return -EINVAL;
3435    }
3436
3437    Sample tmp;
3438    for (uint32_t i = 0; i < sampleCount; ++i) {
3439        if (flags & kSampleDurationPresent) {
3440            if (!mDataSource->getUInt32(offset, &sampleDuration)) {
3441                return ERROR_MALFORMED;
3442            }
3443            offset += 4;
3444        }
3445
3446        if (flags & kSampleSizePresent) {
3447            if (!mDataSource->getUInt32(offset, &sampleSize)) {
3448                return ERROR_MALFORMED;
3449            }
3450            offset += 4;
3451        }
3452
3453        if (flags & kSampleFlagsPresent) {
3454            if (!mDataSource->getUInt32(offset, &sampleFlags)) {
3455                return ERROR_MALFORMED;
3456            }
3457            offset += 4;
3458        }
3459
3460        if (flags & kSampleCompositionTimeOffsetPresent) {
3461            if (!mDataSource->getUInt32(offset, &sampleCtsOffset)) {
3462                return ERROR_MALFORMED;
3463            }
3464            offset += 4;
3465        }
3466
3467        ALOGV("adding sample %d at offset 0x%08llx, size %u, duration %u, "
3468              " flags 0x%08x", i + 1,
3469                dataOffset, sampleSize, sampleDuration,
3470                (flags & kFirstSampleFlagsPresent) && i == 0
3471                    ? firstSampleFlags : sampleFlags);
3472        tmp.offset = dataOffset;
3473        tmp.size = sampleSize;
3474        tmp.duration = sampleDuration;
3475        mCurrentSamples.add(tmp);
3476
3477        dataOffset += sampleSize;
3478    }
3479
3480    mTrackFragmentHeaderInfo.mDataOffset = dataOffset;
3481
3482    return OK;
3483}
3484
3485sp<MetaData> MPEG4Source::getFormat() {
3486    Mutex::Autolock autoLock(mLock);
3487
3488    return mFormat;
3489}
3490
3491size_t MPEG4Source::parseNALSize(const uint8_t *data) const {
3492    switch (mNALLengthSize) {
3493        case 1:
3494            return *data;
3495        case 2:
3496            return U16_AT(data);
3497        case 3:
3498            return ((size_t)data[0] << 16) | U16_AT(&data[1]);
3499        case 4:
3500            return U32_AT(data);
3501    }
3502
3503    // This cannot happen, mNALLengthSize springs to life by adding 1 to
3504    // a 2-bit integer.
3505    CHECK(!"Should not be here.");
3506
3507    return 0;
3508}
3509
3510status_t MPEG4Source::read(
3511        MediaBuffer **out, const ReadOptions *options) {
3512    Mutex::Autolock autoLock(mLock);
3513
3514    CHECK(mStarted);
3515
3516    if (mFirstMoofOffset > 0) {
3517        return fragmentedRead(out, options);
3518    }
3519
3520    *out = NULL;
3521
3522    int64_t targetSampleTimeUs = -1;
3523
3524    int64_t seekTimeUs;
3525    ReadOptions::SeekMode mode;
3526    if (options && options->getSeekTo(&seekTimeUs, &mode)) {
3527        uint32_t findFlags = 0;
3528        switch (mode) {
3529            case ReadOptions::SEEK_PREVIOUS_SYNC:
3530                findFlags = SampleTable::kFlagBefore;
3531                break;
3532            case ReadOptions::SEEK_NEXT_SYNC:
3533                findFlags = SampleTable::kFlagAfter;
3534                break;
3535            case ReadOptions::SEEK_CLOSEST_SYNC:
3536            case ReadOptions::SEEK_CLOSEST:
3537                findFlags = SampleTable::kFlagClosest;
3538                break;
3539            default:
3540                CHECK(!"Should not be here.");
3541                break;
3542        }
3543
3544        uint32_t sampleIndex;
3545        status_t err = mSampleTable->findSampleAtTime(
3546                seekTimeUs * mTimescale / 1000000,
3547                &sampleIndex, findFlags);
3548
3549        if (mode == ReadOptions::SEEK_CLOSEST) {
3550            // We found the closest sample already, now we want the sync
3551            // sample preceding it (or the sample itself of course), even
3552            // if the subsequent sync sample is closer.
3553            findFlags = SampleTable::kFlagBefore;
3554        }
3555
3556        uint32_t syncSampleIndex;
3557        if (err == OK) {
3558            err = mSampleTable->findSyncSampleNear(
3559                    sampleIndex, &syncSampleIndex, findFlags);
3560        }
3561
3562        uint32_t sampleTime;
3563        if (err == OK) {
3564            err = mSampleTable->getMetaDataForSample(
3565                    sampleIndex, NULL, NULL, &sampleTime);
3566        }
3567
3568        if (err != OK) {
3569            if (err == ERROR_OUT_OF_RANGE) {
3570                // An attempt to seek past the end of the stream would
3571                // normally cause this ERROR_OUT_OF_RANGE error. Propagating
3572                // this all the way to the MediaPlayer would cause abnormal
3573                // termination. Legacy behaviour appears to be to behave as if
3574                // we had seeked to the end of stream, ending normally.
3575                err = ERROR_END_OF_STREAM;
3576            }
3577            ALOGV("end of stream");
3578            return err;
3579        }
3580
3581        if (mode == ReadOptions::SEEK_CLOSEST) {
3582            targetSampleTimeUs = (sampleTime * 1000000ll) / mTimescale;
3583        }
3584
3585#if 0
3586        uint32_t syncSampleTime;
3587        CHECK_EQ(OK, mSampleTable->getMetaDataForSample(
3588                    syncSampleIndex, NULL, NULL, &syncSampleTime));
3589
3590        ALOGI("seek to time %lld us => sample at time %lld us, "
3591             "sync sample at time %lld us",
3592             seekTimeUs,
3593             sampleTime * 1000000ll / mTimescale,
3594             syncSampleTime * 1000000ll / mTimescale);
3595#endif
3596
3597        mCurrentSampleIndex = syncSampleIndex;
3598        if (mBuffer != NULL) {
3599            mBuffer->release();
3600            mBuffer = NULL;
3601        }
3602
3603        // fall through
3604    }
3605
3606    off64_t offset;
3607    size_t size;
3608    uint32_t cts, stts;
3609    bool isSyncSample;
3610    bool newBuffer = false;
3611    if (mBuffer == NULL) {
3612        newBuffer = true;
3613
3614        status_t err =
3615            mSampleTable->getMetaDataForSample(
3616                    mCurrentSampleIndex, &offset, &size, &cts, &isSyncSample, &stts);
3617
3618        if (err != OK) {
3619            return err;
3620        }
3621
3622        err = mGroup->acquire_buffer(&mBuffer);
3623
3624        if (err != OK) {
3625            CHECK(mBuffer == NULL);
3626            return err;
3627        }
3628    }
3629
3630    if ((!mIsAVC && !mIsHEVC) || mWantsNALFragments) {
3631        if (newBuffer) {
3632            ssize_t num_bytes_read =
3633                mDataSource->readAt(offset, (uint8_t *)mBuffer->data(), size);
3634
3635            if (num_bytes_read < (ssize_t)size) {
3636                mBuffer->release();
3637                mBuffer = NULL;
3638
3639                return ERROR_IO;
3640            }
3641
3642            CHECK(mBuffer != NULL);
3643            mBuffer->set_range(0, size);
3644            mBuffer->meta_data()->clear();
3645            mBuffer->meta_data()->setInt64(
3646                    kKeyTime, ((int64_t)cts * 1000000) / mTimescale);
3647            mBuffer->meta_data()->setInt64(
3648                    kKeyDuration, ((int64_t)stts * 1000000) / mTimescale);
3649
3650            if (targetSampleTimeUs >= 0) {
3651                mBuffer->meta_data()->setInt64(
3652                        kKeyTargetTime, targetSampleTimeUs);
3653            }
3654
3655            if (isSyncSample) {
3656                mBuffer->meta_data()->setInt32(kKeyIsSyncFrame, 1);
3657            }
3658
3659            ++mCurrentSampleIndex;
3660        }
3661
3662        if (!mIsAVC && !mIsHEVC) {
3663            *out = mBuffer;
3664            mBuffer = NULL;
3665
3666            return OK;
3667        }
3668
3669        // Each NAL unit is split up into its constituent fragments and
3670        // each one of them returned in its own buffer.
3671
3672        CHECK(mBuffer->range_length() >= mNALLengthSize);
3673
3674        const uint8_t *src =
3675            (const uint8_t *)mBuffer->data() + mBuffer->range_offset();
3676
3677        size_t nal_size = parseNALSize(src);
3678        if (mBuffer->range_length() < mNALLengthSize + nal_size) {
3679            ALOGE("incomplete NAL unit.");
3680
3681            mBuffer->release();
3682            mBuffer = NULL;
3683
3684            return ERROR_MALFORMED;
3685        }
3686
3687        MediaBuffer *clone = mBuffer->clone();
3688        CHECK(clone != NULL);
3689        clone->set_range(mBuffer->range_offset() + mNALLengthSize, nal_size);
3690
3691        CHECK(mBuffer != NULL);
3692        mBuffer->set_range(
3693                mBuffer->range_offset() + mNALLengthSize + nal_size,
3694                mBuffer->range_length() - mNALLengthSize - nal_size);
3695
3696        if (mBuffer->range_length() == 0) {
3697            mBuffer->release();
3698            mBuffer = NULL;
3699        }
3700
3701        *out = clone;
3702
3703        return OK;
3704    } else {
3705        // Whole NAL units are returned but each fragment is prefixed by
3706        // the start code (0x00 00 00 01).
3707        ssize_t num_bytes_read = 0;
3708        int32_t drm = 0;
3709        bool usesDRM = (mFormat->findInt32(kKeyIsDRM, &drm) && drm != 0);
3710        if (usesDRM) {
3711            num_bytes_read =
3712                mDataSource->readAt(offset, (uint8_t*)mBuffer->data(), size);
3713        } else {
3714            num_bytes_read = mDataSource->readAt(offset, mSrcBuffer, size);
3715        }
3716
3717        if (num_bytes_read < (ssize_t)size) {
3718            mBuffer->release();
3719            mBuffer = NULL;
3720
3721            return ERROR_IO;
3722        }
3723
3724        if (usesDRM) {
3725            CHECK(mBuffer != NULL);
3726            mBuffer->set_range(0, size);
3727
3728        } else {
3729            uint8_t *dstData = (uint8_t *)mBuffer->data();
3730            size_t srcOffset = 0;
3731            size_t dstOffset = 0;
3732
3733            while (srcOffset < size) {
3734                bool isMalFormed = (srcOffset + mNALLengthSize > size);
3735                size_t nalLength = 0;
3736                if (!isMalFormed) {
3737                    nalLength = parseNALSize(&mSrcBuffer[srcOffset]);
3738                    srcOffset += mNALLengthSize;
3739                    isMalFormed = srcOffset + nalLength > size;
3740                }
3741
3742                if (isMalFormed) {
3743                    ALOGE("Video is malformed");
3744                    mBuffer->release();
3745                    mBuffer = NULL;
3746                    return ERROR_MALFORMED;
3747                }
3748
3749                if (nalLength == 0) {
3750                    continue;
3751                }
3752
3753                CHECK(dstOffset + 4 <= mBuffer->size());
3754
3755                dstData[dstOffset++] = 0;
3756                dstData[dstOffset++] = 0;
3757                dstData[dstOffset++] = 0;
3758                dstData[dstOffset++] = 1;
3759                memcpy(&dstData[dstOffset], &mSrcBuffer[srcOffset], nalLength);
3760                srcOffset += nalLength;
3761                dstOffset += nalLength;
3762            }
3763            CHECK_EQ(srcOffset, size);
3764            CHECK(mBuffer != NULL);
3765            mBuffer->set_range(0, dstOffset);
3766        }
3767
3768        mBuffer->meta_data()->clear();
3769        mBuffer->meta_data()->setInt64(
3770                kKeyTime, ((int64_t)cts * 1000000) / mTimescale);
3771        mBuffer->meta_data()->setInt64(
3772                kKeyDuration, ((int64_t)stts * 1000000) / mTimescale);
3773
3774        if (targetSampleTimeUs >= 0) {
3775            mBuffer->meta_data()->setInt64(
3776                    kKeyTargetTime, targetSampleTimeUs);
3777        }
3778
3779        if (isSyncSample) {
3780            mBuffer->meta_data()->setInt32(kKeyIsSyncFrame, 1);
3781        }
3782
3783        ++mCurrentSampleIndex;
3784
3785        *out = mBuffer;
3786        mBuffer = NULL;
3787
3788        return OK;
3789    }
3790}
3791
3792status_t MPEG4Source::fragmentedRead(
3793        MediaBuffer **out, const ReadOptions *options) {
3794
3795    ALOGV("MPEG4Source::fragmentedRead");
3796
3797    CHECK(mStarted);
3798
3799    *out = NULL;
3800
3801    int64_t targetSampleTimeUs = -1;
3802
3803    int64_t seekTimeUs;
3804    ReadOptions::SeekMode mode;
3805    if (options && options->getSeekTo(&seekTimeUs, &mode)) {
3806
3807        int numSidxEntries = mSegments.size();
3808        if (numSidxEntries != 0) {
3809            int64_t totalTime = 0;
3810            off64_t totalOffset = mFirstMoofOffset;
3811            for (int i = 0; i < numSidxEntries; i++) {
3812                const SidxEntry *se = &mSegments[i];
3813                if (totalTime + se->mDurationUs > seekTimeUs) {
3814                    // The requested time is somewhere in this segment
3815                    if ((mode == ReadOptions::SEEK_NEXT_SYNC && seekTimeUs > totalTime) ||
3816                        (mode == ReadOptions::SEEK_CLOSEST_SYNC &&
3817                        (seekTimeUs - totalTime) > (totalTime + se->mDurationUs - seekTimeUs))) {
3818                        // requested next sync, or closest sync and it was closer to the end of
3819                        // this segment
3820                        totalTime += se->mDurationUs;
3821                        totalOffset += se->mSize;
3822                    }
3823                    break;
3824                }
3825                totalTime += se->mDurationUs;
3826                totalOffset += se->mSize;
3827            }
3828            mCurrentMoofOffset = totalOffset;
3829            mCurrentSamples.clear();
3830            mCurrentSampleIndex = 0;
3831            parseChunk(&totalOffset);
3832            mCurrentTime = totalTime * mTimescale / 1000000ll;
3833        } else {
3834            // without sidx boxes, we can only seek to 0
3835            mCurrentMoofOffset = mFirstMoofOffset;
3836            mCurrentSamples.clear();
3837            mCurrentSampleIndex = 0;
3838            off64_t tmp = mCurrentMoofOffset;
3839            parseChunk(&tmp);
3840            mCurrentTime = 0;
3841        }
3842
3843        if (mBuffer != NULL) {
3844            mBuffer->release();
3845            mBuffer = NULL;
3846        }
3847
3848        // fall through
3849    }
3850
3851    off64_t offset = 0;
3852    size_t size = 0;
3853    uint32_t cts = 0;
3854    bool isSyncSample = false;
3855    bool newBuffer = false;
3856    if (mBuffer == NULL) {
3857        newBuffer = true;
3858
3859        if (mCurrentSampleIndex >= mCurrentSamples.size()) {
3860            // move to next fragment if there is one
3861            if (mNextMoofOffset <= mCurrentMoofOffset) {
3862                return ERROR_END_OF_STREAM;
3863            }
3864            off64_t nextMoof = mNextMoofOffset;
3865            mCurrentMoofOffset = nextMoof;
3866            mCurrentSamples.clear();
3867            mCurrentSampleIndex = 0;
3868            parseChunk(&nextMoof);
3869            if (mCurrentSampleIndex >= mCurrentSamples.size()) {
3870                return ERROR_END_OF_STREAM;
3871            }
3872        }
3873
3874        const Sample *smpl = &mCurrentSamples[mCurrentSampleIndex];
3875        offset = smpl->offset;
3876        size = smpl->size;
3877        cts = mCurrentTime;
3878        mCurrentTime += smpl->duration;
3879        isSyncSample = (mCurrentSampleIndex == 0); // XXX
3880
3881        status_t err = mGroup->acquire_buffer(&mBuffer);
3882
3883        if (err != OK) {
3884            CHECK(mBuffer == NULL);
3885            ALOGV("acquire_buffer returned %d", err);
3886            return err;
3887        }
3888    }
3889
3890    const Sample *smpl = &mCurrentSamples[mCurrentSampleIndex];
3891    const sp<MetaData> bufmeta = mBuffer->meta_data();
3892    bufmeta->clear();
3893    if (smpl->encryptedsizes.size()) {
3894        // store clear/encrypted lengths in metadata
3895        bufmeta->setData(kKeyPlainSizes, 0,
3896                smpl->clearsizes.array(), smpl->clearsizes.size() * 4);
3897        bufmeta->setData(kKeyEncryptedSizes, 0,
3898                smpl->encryptedsizes.array(), smpl->encryptedsizes.size() * 4);
3899        bufmeta->setData(kKeyCryptoIV, 0, smpl->iv, 16); // use 16 or the actual size?
3900        bufmeta->setInt32(kKeyCryptoDefaultIVSize, mDefaultIVSize);
3901        bufmeta->setInt32(kKeyCryptoMode, mCryptoMode);
3902        bufmeta->setData(kKeyCryptoKey, 0, mCryptoKey, 16);
3903    }
3904
3905    if ((!mIsAVC && !mIsHEVC)|| mWantsNALFragments) {
3906        if (newBuffer) {
3907            ssize_t num_bytes_read =
3908                mDataSource->readAt(offset, (uint8_t *)mBuffer->data(), size);
3909
3910            if (num_bytes_read < (ssize_t)size) {
3911                mBuffer->release();
3912                mBuffer = NULL;
3913
3914                ALOGV("i/o error");
3915                return ERROR_IO;
3916            }
3917
3918            CHECK(mBuffer != NULL);
3919            mBuffer->set_range(0, size);
3920            mBuffer->meta_data()->setInt64(
3921                    kKeyTime, ((int64_t)cts * 1000000) / mTimescale);
3922            mBuffer->meta_data()->setInt64(
3923                    kKeyDuration, ((int64_t)smpl->duration * 1000000) / mTimescale);
3924
3925            if (targetSampleTimeUs >= 0) {
3926                mBuffer->meta_data()->setInt64(
3927                        kKeyTargetTime, targetSampleTimeUs);
3928            }
3929
3930            if (isSyncSample) {
3931                mBuffer->meta_data()->setInt32(kKeyIsSyncFrame, 1);
3932            }
3933
3934            ++mCurrentSampleIndex;
3935        }
3936
3937        if (!mIsAVC && !mIsHEVC) {
3938            *out = mBuffer;
3939            mBuffer = NULL;
3940
3941            return OK;
3942        }
3943
3944        // Each NAL unit is split up into its constituent fragments and
3945        // each one of them returned in its own buffer.
3946
3947        CHECK(mBuffer->range_length() >= mNALLengthSize);
3948
3949        const uint8_t *src =
3950            (const uint8_t *)mBuffer->data() + mBuffer->range_offset();
3951
3952        size_t nal_size = parseNALSize(src);
3953        if (mBuffer->range_length() < mNALLengthSize + nal_size) {
3954            ALOGE("incomplete NAL unit.");
3955
3956            mBuffer->release();
3957            mBuffer = NULL;
3958
3959            return ERROR_MALFORMED;
3960        }
3961
3962        MediaBuffer *clone = mBuffer->clone();
3963        CHECK(clone != NULL);
3964        clone->set_range(mBuffer->range_offset() + mNALLengthSize, nal_size);
3965
3966        CHECK(mBuffer != NULL);
3967        mBuffer->set_range(
3968                mBuffer->range_offset() + mNALLengthSize + nal_size,
3969                mBuffer->range_length() - mNALLengthSize - nal_size);
3970
3971        if (mBuffer->range_length() == 0) {
3972            mBuffer->release();
3973            mBuffer = NULL;
3974        }
3975
3976        *out = clone;
3977
3978        return OK;
3979    } else {
3980        ALOGV("whole NAL");
3981        // Whole NAL units are returned but each fragment is prefixed by
3982        // the start code (0x00 00 00 01).
3983        ssize_t num_bytes_read = 0;
3984        int32_t drm = 0;
3985        bool usesDRM = (mFormat->findInt32(kKeyIsDRM, &drm) && drm != 0);
3986        if (usesDRM) {
3987            num_bytes_read =
3988                mDataSource->readAt(offset, (uint8_t*)mBuffer->data(), size);
3989        } else {
3990            num_bytes_read = mDataSource->readAt(offset, mSrcBuffer, size);
3991        }
3992
3993        if (num_bytes_read < (ssize_t)size) {
3994            mBuffer->release();
3995            mBuffer = NULL;
3996
3997            ALOGV("i/o error");
3998            return ERROR_IO;
3999        }
4000
4001        if (usesDRM) {
4002            CHECK(mBuffer != NULL);
4003            mBuffer->set_range(0, size);
4004
4005        } else {
4006            uint8_t *dstData = (uint8_t *)mBuffer->data();
4007            size_t srcOffset = 0;
4008            size_t dstOffset = 0;
4009
4010            while (srcOffset < size) {
4011                bool isMalFormed = (srcOffset + mNALLengthSize > size);
4012                size_t nalLength = 0;
4013                if (!isMalFormed) {
4014                    nalLength = parseNALSize(&mSrcBuffer[srcOffset]);
4015                    srcOffset += mNALLengthSize;
4016                    isMalFormed = srcOffset + nalLength > size;
4017                }
4018
4019                if (isMalFormed) {
4020                    ALOGE("Video is malformed");
4021                    mBuffer->release();
4022                    mBuffer = NULL;
4023                    return ERROR_MALFORMED;
4024                }
4025
4026                if (nalLength == 0) {
4027                    continue;
4028                }
4029
4030                CHECK(dstOffset + 4 <= mBuffer->size());
4031
4032                dstData[dstOffset++] = 0;
4033                dstData[dstOffset++] = 0;
4034                dstData[dstOffset++] = 0;
4035                dstData[dstOffset++] = 1;
4036                memcpy(&dstData[dstOffset], &mSrcBuffer[srcOffset], nalLength);
4037                srcOffset += nalLength;
4038                dstOffset += nalLength;
4039            }
4040            CHECK_EQ(srcOffset, size);
4041            CHECK(mBuffer != NULL);
4042            mBuffer->set_range(0, dstOffset);
4043        }
4044
4045        mBuffer->meta_data()->setInt64(
4046                kKeyTime, ((int64_t)cts * 1000000) / mTimescale);
4047        mBuffer->meta_data()->setInt64(
4048                kKeyDuration, ((int64_t)smpl->duration * 1000000) / mTimescale);
4049
4050        if (targetSampleTimeUs >= 0) {
4051            mBuffer->meta_data()->setInt64(
4052                    kKeyTargetTime, targetSampleTimeUs);
4053        }
4054
4055        if (isSyncSample) {
4056            mBuffer->meta_data()->setInt32(kKeyIsSyncFrame, 1);
4057        }
4058
4059        ++mCurrentSampleIndex;
4060
4061        *out = mBuffer;
4062        mBuffer = NULL;
4063
4064        return OK;
4065    }
4066}
4067
4068MPEG4Extractor::Track *MPEG4Extractor::findTrackByMimePrefix(
4069        const char *mimePrefix) {
4070    for (Track *track = mFirstTrack; track != NULL; track = track->next) {
4071        const char *mime;
4072        if (track->meta != NULL
4073                && track->meta->findCString(kKeyMIMEType, &mime)
4074                && !strncasecmp(mime, mimePrefix, strlen(mimePrefix))) {
4075            return track;
4076        }
4077    }
4078
4079    return NULL;
4080}
4081
4082static bool LegacySniffMPEG4(
4083        const sp<DataSource> &source, String8 *mimeType, float *confidence) {
4084    uint8_t header[8];
4085
4086    ssize_t n = source->readAt(4, header, sizeof(header));
4087    if (n < (ssize_t)sizeof(header)) {
4088        return false;
4089    }
4090
4091    if (!memcmp(header, "ftyp3gp", 7) || !memcmp(header, "ftypmp42", 8)
4092        || !memcmp(header, "ftyp3gr6", 8) || !memcmp(header, "ftyp3gs6", 8)
4093        || !memcmp(header, "ftyp3ge6", 8) || !memcmp(header, "ftyp3gg6", 8)
4094        || !memcmp(header, "ftypisom", 8) || !memcmp(header, "ftypM4V ", 8)
4095        || !memcmp(header, "ftypM4A ", 8) || !memcmp(header, "ftypf4v ", 8)
4096        || !memcmp(header, "ftypkddi", 8) || !memcmp(header, "ftypM4VP", 8)) {
4097        *mimeType = MEDIA_MIMETYPE_CONTAINER_MPEG4;
4098        *confidence = 0.4;
4099
4100        return true;
4101    }
4102
4103    return false;
4104}
4105
4106static bool isCompatibleBrand(uint32_t fourcc) {
4107    static const uint32_t kCompatibleBrands[] = {
4108        FOURCC('i', 's', 'o', 'm'),
4109        FOURCC('i', 's', 'o', '2'),
4110        FOURCC('a', 'v', 'c', '1'),
4111        FOURCC('h', 'v', 'c', '1'),
4112        FOURCC('h', 'e', 'v', '1'),
4113        FOURCC('3', 'g', 'p', '4'),
4114        FOURCC('m', 'p', '4', '1'),
4115        FOURCC('m', 'p', '4', '2'),
4116
4117        // Won't promise that the following file types can be played.
4118        // Just give these file types a chance.
4119        FOURCC('q', 't', ' ', ' '),  // Apple's QuickTime
4120        FOURCC('M', 'S', 'N', 'V'),  // Sony's PSP
4121
4122        FOURCC('3', 'g', '2', 'a'),  // 3GPP2
4123        FOURCC('3', 'g', '2', 'b'),
4124    };
4125
4126    for (size_t i = 0;
4127         i < sizeof(kCompatibleBrands) / sizeof(kCompatibleBrands[0]);
4128         ++i) {
4129        if (kCompatibleBrands[i] == fourcc) {
4130            return true;
4131        }
4132    }
4133
4134    return false;
4135}
4136
4137// Attempt to actually parse the 'ftyp' atom and determine if a suitable
4138// compatible brand is present.
4139// Also try to identify where this file's metadata ends
4140// (end of the 'moov' atom) and report it to the caller as part of
4141// the metadata.
4142static bool BetterSniffMPEG4(
4143        const sp<DataSource> &source, String8 *mimeType, float *confidence,
4144        sp<AMessage> *meta) {
4145    // We scan up to 128 bytes to identify this file as an MP4.
4146    static const off64_t kMaxScanOffset = 128ll;
4147
4148    off64_t offset = 0ll;
4149    bool foundGoodFileType = false;
4150    off64_t moovAtomEndOffset = -1ll;
4151    bool done = false;
4152
4153    while (!done && offset < kMaxScanOffset) {
4154        uint32_t hdr[2];
4155        if (source->readAt(offset, hdr, 8) < 8) {
4156            return false;
4157        }
4158
4159        uint64_t chunkSize = ntohl(hdr[0]);
4160        uint32_t chunkType = ntohl(hdr[1]);
4161        off64_t chunkDataOffset = offset + 8;
4162
4163        if (chunkSize == 1) {
4164            if (source->readAt(offset + 8, &chunkSize, 8) < 8) {
4165                return false;
4166            }
4167
4168            chunkSize = ntoh64(chunkSize);
4169            chunkDataOffset += 8;
4170
4171            if (chunkSize < 16) {
4172                // The smallest valid chunk is 16 bytes long in this case.
4173                return false;
4174            }
4175        } else if (chunkSize < 8) {
4176            // The smallest valid chunk is 8 bytes long.
4177            return false;
4178        }
4179
4180        off64_t chunkDataSize = offset + chunkSize - chunkDataOffset;
4181
4182        char chunkstring[5];
4183        MakeFourCCString(chunkType, chunkstring);
4184        ALOGV("saw chunk type %s, size %lld @ %lld", chunkstring, chunkSize, offset);
4185        switch (chunkType) {
4186            case FOURCC('f', 't', 'y', 'p'):
4187            {
4188                if (chunkDataSize < 8) {
4189                    return false;
4190                }
4191
4192                uint32_t numCompatibleBrands = (chunkDataSize - 8) / 4;
4193                for (size_t i = 0; i < numCompatibleBrands + 2; ++i) {
4194                    if (i == 1) {
4195                        // Skip this index, it refers to the minorVersion,
4196                        // not a brand.
4197                        continue;
4198                    }
4199
4200                    uint32_t brand;
4201                    if (source->readAt(
4202                                chunkDataOffset + 4 * i, &brand, 4) < 4) {
4203                        return false;
4204                    }
4205
4206                    brand = ntohl(brand);
4207
4208                    if (isCompatibleBrand(brand)) {
4209                        foundGoodFileType = true;
4210                        break;
4211                    }
4212                }
4213
4214                if (!foundGoodFileType) {
4215                    return false;
4216                }
4217
4218                break;
4219            }
4220
4221            case FOURCC('m', 'o', 'o', 'v'):
4222            {
4223                moovAtomEndOffset = offset + chunkSize;
4224
4225                done = true;
4226                break;
4227            }
4228
4229            default:
4230                break;
4231        }
4232
4233        offset += chunkSize;
4234    }
4235
4236    if (!foundGoodFileType) {
4237        return false;
4238    }
4239
4240    *mimeType = MEDIA_MIMETYPE_CONTAINER_MPEG4;
4241    *confidence = 0.4f;
4242
4243    if (moovAtomEndOffset >= 0) {
4244        *meta = new AMessage;
4245        (*meta)->setInt64("meta-data-size", moovAtomEndOffset);
4246
4247        ALOGV("found metadata size: %lld", moovAtomEndOffset);
4248    }
4249
4250    return true;
4251}
4252
4253bool SniffMPEG4(
4254        const sp<DataSource> &source, String8 *mimeType, float *confidence,
4255        sp<AMessage> *meta) {
4256    if (BetterSniffMPEG4(source, mimeType, confidence, meta)) {
4257        return true;
4258    }
4259
4260    if (LegacySniffMPEG4(source, mimeType, confidence)) {
4261        ALOGW("Identified supported mpeg4 through LegacySniffMPEG4.");
4262        return true;
4263    }
4264
4265    return false;
4266}
4267
4268}  // namespace android
4269