1/*
2 * Copyright (C) 2009 The Android Open Source Project
3 *
4 * Licensed under the Apache License, Version 2.0 (the "License");
5 * you may not use this file except in compliance with the License.
6 * You may obtain a copy of the License at
7 *
8 *      http://www.apache.org/licenses/LICENSE-2.0
9 *
10 * Unless required by applicable law or agreed to in writing, software
11 * distributed under the License is distributed on an "AS IS" BASIS,
12 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13 * See the License for the specific language governing permissions and
14 * limitations under the License.
15 */
16
17//#define LOG_NDEBUG 0
18#define LOG_TAG "MPEG4Extractor"
19
20#include <ctype.h>
21#include <inttypes.h>
22#include <stdint.h>
23#include <stdlib.h>
24#include <string.h>
25
26#include <utils/Log.h>
27
28#include "include/MPEG4Extractor.h"
29#include "include/SampleTable.h"
30#include "include/ESDS.h"
31
32#include <media/stagefright/foundation/ABitReader.h>
33#include <media/stagefright/foundation/ABuffer.h>
34#include <media/stagefright/foundation/ADebug.h>
35#include <media/stagefright/foundation/AMessage.h>
36#include <media/stagefright/foundation/AUtils.h>
37#include <media/stagefright/foundation/ColorUtils.h>
38#include <media/stagefright/MediaBuffer.h>
39#include <media/stagefright/MediaBufferGroup.h>
40#include <media/stagefright/MediaDefs.h>
41#include <media/stagefright/MediaSource.h>
42#include <media/stagefright/MetaData.h>
43#include <utils/String8.h>
44
45#include <byteswap.h>
46#include "include/ID3.h"
47
48#ifndef UINT32_MAX
49#define UINT32_MAX       (4294967295U)
50#endif
51
52namespace android {
53
54enum {
55    // max track header chunk to return
56    kMaxTrackHeaderSize = 32,
57};
58
59class MPEG4Source : public MediaSource {
60public:
61    // Caller retains ownership of both "dataSource" and "sampleTable".
62    MPEG4Source(const sp<MPEG4Extractor> &owner,
63                const sp<MetaData> &format,
64                const sp<DataSource> &dataSource,
65                int32_t timeScale,
66                const sp<SampleTable> &sampleTable,
67                Vector<SidxEntry> &sidx,
68                const Trex *trex,
69                off64_t firstMoofOffset);
70
71    virtual status_t start(MetaData *params = NULL);
72    virtual status_t stop();
73
74    virtual sp<MetaData> getFormat();
75
76    virtual status_t read(MediaBuffer **buffer, const ReadOptions *options = NULL);
77    virtual status_t fragmentedRead(MediaBuffer **buffer, const ReadOptions *options = NULL);
78
79protected:
80    virtual ~MPEG4Source();
81
82private:
83    Mutex mLock;
84
85    // keep the MPEG4Extractor around, since we're referencing its data
86    sp<MPEG4Extractor> mOwner;
87    sp<MetaData> mFormat;
88    sp<DataSource> mDataSource;
89    int32_t mTimescale;
90    sp<SampleTable> mSampleTable;
91    uint32_t mCurrentSampleIndex;
92    uint32_t mCurrentFragmentIndex;
93    Vector<SidxEntry> &mSegments;
94    const Trex *mTrex;
95    off64_t mFirstMoofOffset;
96    off64_t mCurrentMoofOffset;
97    off64_t mNextMoofOffset;
98    uint32_t mCurrentTime;
99    int32_t mLastParsedTrackId;
100    int32_t mTrackId;
101
102    int32_t mCryptoMode;    // passed in from extractor
103    int32_t mDefaultIVSize; // passed in from extractor
104    uint8_t mCryptoKey[16]; // passed in from extractor
105    uint32_t mCurrentAuxInfoType;
106    uint32_t mCurrentAuxInfoTypeParameter;
107    int32_t mCurrentDefaultSampleInfoSize;
108    uint32_t mCurrentSampleInfoCount;
109    uint32_t mCurrentSampleInfoAllocSize;
110    uint8_t* mCurrentSampleInfoSizes;
111    uint32_t mCurrentSampleInfoOffsetCount;
112    uint32_t mCurrentSampleInfoOffsetsAllocSize;
113    uint64_t* mCurrentSampleInfoOffsets;
114
115    bool mIsAVC;
116    bool mIsHEVC;
117    size_t mNALLengthSize;
118
119    bool mStarted;
120
121    MediaBufferGroup *mGroup;
122
123    MediaBuffer *mBuffer;
124
125    bool mWantsNALFragments;
126
127    uint8_t *mSrcBuffer;
128
129    size_t parseNALSize(const uint8_t *data) const;
130    status_t parseChunk(off64_t *offset);
131    status_t parseTrackFragmentHeader(off64_t offset, off64_t size);
132    status_t parseTrackFragmentRun(off64_t offset, off64_t size);
133    status_t parseSampleAuxiliaryInformationSizes(off64_t offset, off64_t size);
134    status_t parseSampleAuxiliaryInformationOffsets(off64_t offset, off64_t size);
135
136    struct TrackFragmentHeaderInfo {
137        enum Flags {
138            kBaseDataOffsetPresent         = 0x01,
139            kSampleDescriptionIndexPresent = 0x02,
140            kDefaultSampleDurationPresent  = 0x08,
141            kDefaultSampleSizePresent      = 0x10,
142            kDefaultSampleFlagsPresent     = 0x20,
143            kDurationIsEmpty               = 0x10000,
144        };
145
146        uint32_t mTrackID;
147        uint32_t mFlags;
148        uint64_t mBaseDataOffset;
149        uint32_t mSampleDescriptionIndex;
150        uint32_t mDefaultSampleDuration;
151        uint32_t mDefaultSampleSize;
152        uint32_t mDefaultSampleFlags;
153
154        uint64_t mDataOffset;
155    };
156    TrackFragmentHeaderInfo mTrackFragmentHeaderInfo;
157
158    struct Sample {
159        off64_t offset;
160        size_t size;
161        uint32_t duration;
162        int32_t compositionOffset;
163        uint8_t iv[16];
164        Vector<size_t> clearsizes;
165        Vector<size_t> encryptedsizes;
166    };
167    Vector<Sample> mCurrentSamples;
168
169    MPEG4Source(const MPEG4Source &);
170    MPEG4Source &operator=(const MPEG4Source &);
171};
172
173// This custom data source wraps an existing one and satisfies requests
174// falling entirely within a cached range from the cache while forwarding
175// all remaining requests to the wrapped datasource.
176// This is used to cache the full sampletable metadata for a single track,
177// possibly wrapping multiple times to cover all tracks, i.e.
178// Each MPEG4DataSource caches the sampletable metadata for a single track.
179
180struct MPEG4DataSource : public DataSource {
181    MPEG4DataSource(const sp<DataSource> &source);
182
183    virtual status_t initCheck() const;
184    virtual ssize_t readAt(off64_t offset, void *data, size_t size);
185    virtual status_t getSize(off64_t *size);
186    virtual uint32_t flags();
187
188    status_t setCachedRange(off64_t offset, size_t size);
189
190protected:
191    virtual ~MPEG4DataSource();
192
193private:
194    Mutex mLock;
195
196    sp<DataSource> mSource;
197    off64_t mCachedOffset;
198    size_t mCachedSize;
199    uint8_t *mCache;
200
201    void clearCache();
202
203    MPEG4DataSource(const MPEG4DataSource &);
204    MPEG4DataSource &operator=(const MPEG4DataSource &);
205};
206
207MPEG4DataSource::MPEG4DataSource(const sp<DataSource> &source)
208    : mSource(source),
209      mCachedOffset(0),
210      mCachedSize(0),
211      mCache(NULL) {
212}
213
214MPEG4DataSource::~MPEG4DataSource() {
215    clearCache();
216}
217
218void MPEG4DataSource::clearCache() {
219    if (mCache) {
220        free(mCache);
221        mCache = NULL;
222    }
223
224    mCachedOffset = 0;
225    mCachedSize = 0;
226}
227
228status_t MPEG4DataSource::initCheck() const {
229    return mSource->initCheck();
230}
231
232ssize_t MPEG4DataSource::readAt(off64_t offset, void *data, size_t size) {
233    Mutex::Autolock autoLock(mLock);
234
235    if (isInRange(mCachedOffset, mCachedSize, offset, size)) {
236        memcpy(data, &mCache[offset - mCachedOffset], size);
237        return size;
238    }
239
240    return mSource->readAt(offset, data, size);
241}
242
243status_t MPEG4DataSource::getSize(off64_t *size) {
244    return mSource->getSize(size);
245}
246
247uint32_t MPEG4DataSource::flags() {
248    return mSource->flags();
249}
250
251status_t MPEG4DataSource::setCachedRange(off64_t offset, size_t size) {
252    Mutex::Autolock autoLock(mLock);
253
254    clearCache();
255
256    mCache = (uint8_t *)malloc(size);
257
258    if (mCache == NULL) {
259        return -ENOMEM;
260    }
261
262    mCachedOffset = offset;
263    mCachedSize = size;
264
265    ssize_t err = mSource->readAt(mCachedOffset, mCache, mCachedSize);
266
267    if (err < (ssize_t)size) {
268        clearCache();
269
270        return ERROR_IO;
271    }
272
273    return OK;
274}
275
276////////////////////////////////////////////////////////////////////////////////
277
278static const bool kUseHexDump = false;
279
280static void hexdump(const void *_data, size_t size) {
281    const uint8_t *data = (const uint8_t *)_data;
282    size_t offset = 0;
283    while (offset < size) {
284        printf("0x%04zx  ", offset);
285
286        size_t n = size - offset;
287        if (n > 16) {
288            n = 16;
289        }
290
291        for (size_t i = 0; i < 16; ++i) {
292            if (i == 8) {
293                printf(" ");
294            }
295
296            if (offset + i < size) {
297                printf("%02x ", data[offset + i]);
298            } else {
299                printf("   ");
300            }
301        }
302
303        printf(" ");
304
305        for (size_t i = 0; i < n; ++i) {
306            if (isprint(data[offset + i])) {
307                printf("%c", data[offset + i]);
308            } else {
309                printf(".");
310            }
311        }
312
313        printf("\n");
314
315        offset += 16;
316    }
317}
318
319static const char *FourCC2MIME(uint32_t fourcc) {
320    switch (fourcc) {
321        case FOURCC('m', 'p', '4', 'a'):
322            return MEDIA_MIMETYPE_AUDIO_AAC;
323
324        case FOURCC('s', 'a', 'm', 'r'):
325            return MEDIA_MIMETYPE_AUDIO_AMR_NB;
326
327        case FOURCC('s', 'a', 'w', 'b'):
328            return MEDIA_MIMETYPE_AUDIO_AMR_WB;
329
330        case FOURCC('m', 'p', '4', 'v'):
331            return MEDIA_MIMETYPE_VIDEO_MPEG4;
332
333        case FOURCC('s', '2', '6', '3'):
334        case FOURCC('h', '2', '6', '3'):
335        case FOURCC('H', '2', '6', '3'):
336            return MEDIA_MIMETYPE_VIDEO_H263;
337
338        case FOURCC('a', 'v', 'c', '1'):
339            return MEDIA_MIMETYPE_VIDEO_AVC;
340
341        case FOURCC('h', 'v', 'c', '1'):
342        case FOURCC('h', 'e', 'v', '1'):
343            return MEDIA_MIMETYPE_VIDEO_HEVC;
344        default:
345            CHECK(!"should not be here.");
346            return NULL;
347    }
348}
349
350static bool AdjustChannelsAndRate(uint32_t fourcc, uint32_t *channels, uint32_t *rate) {
351    if (!strcasecmp(MEDIA_MIMETYPE_AUDIO_AMR_NB, FourCC2MIME(fourcc))) {
352        // AMR NB audio is always mono, 8kHz
353        *channels = 1;
354        *rate = 8000;
355        return true;
356    } else if (!strcasecmp(MEDIA_MIMETYPE_AUDIO_AMR_WB, FourCC2MIME(fourcc))) {
357        // AMR WB audio is always mono, 16kHz
358        *channels = 1;
359        *rate = 16000;
360        return true;
361    }
362    return false;
363}
364
365MPEG4Extractor::MPEG4Extractor(const sp<DataSource> &source)
366    : mMoofOffset(0),
367      mMoofFound(false),
368      mMdatFound(false),
369      mDataSource(source),
370      mInitCheck(NO_INIT),
371      mHasVideo(false),
372      mHeaderTimescale(0),
373      mFirstTrack(NULL),
374      mLastTrack(NULL),
375      mFileMetaData(new MetaData),
376      mFirstSINF(NULL),
377      mIsDrm(false) {
378}
379
380MPEG4Extractor::~MPEG4Extractor() {
381    Track *track = mFirstTrack;
382    while (track) {
383        Track *next = track->next;
384
385        delete track;
386        track = next;
387    }
388    mFirstTrack = mLastTrack = NULL;
389
390    SINF *sinf = mFirstSINF;
391    while (sinf) {
392        SINF *next = sinf->next;
393        delete[] sinf->IPMPData;
394        delete sinf;
395        sinf = next;
396    }
397    mFirstSINF = NULL;
398
399    for (size_t i = 0; i < mPssh.size(); i++) {
400        delete [] mPssh[i].data;
401    }
402}
403
404uint32_t MPEG4Extractor::flags() const {
405    return CAN_PAUSE |
406            ((mMoofOffset == 0 || mSidxEntries.size() != 0) ?
407                    (CAN_SEEK_BACKWARD | CAN_SEEK_FORWARD | CAN_SEEK) : 0);
408}
409
410sp<MetaData> MPEG4Extractor::getMetaData() {
411    status_t err;
412    if ((err = readMetaData()) != OK) {
413        return new MetaData;
414    }
415
416    return mFileMetaData;
417}
418
419size_t MPEG4Extractor::countTracks() {
420    status_t err;
421    if ((err = readMetaData()) != OK) {
422        ALOGV("MPEG4Extractor::countTracks: no tracks");
423        return 0;
424    }
425
426    size_t n = 0;
427    Track *track = mFirstTrack;
428    while (track) {
429        ++n;
430        track = track->next;
431    }
432
433    ALOGV("MPEG4Extractor::countTracks: %zu tracks", n);
434    return n;
435}
436
437sp<MetaData> MPEG4Extractor::getTrackMetaData(
438        size_t index, uint32_t flags) {
439    status_t err;
440    if ((err = readMetaData()) != OK) {
441        return NULL;
442    }
443
444    Track *track = mFirstTrack;
445    while (index > 0) {
446        if (track == NULL) {
447            return NULL;
448        }
449
450        track = track->next;
451        --index;
452    }
453
454    if (track == NULL) {
455        return NULL;
456    }
457
458    if ((flags & kIncludeExtensiveMetaData)
459            && !track->includes_expensive_metadata) {
460        track->includes_expensive_metadata = true;
461
462        const char *mime;
463        CHECK(track->meta->findCString(kKeyMIMEType, &mime));
464        if (!strncasecmp("video/", mime, 6)) {
465            if (mMoofOffset > 0) {
466                int64_t duration;
467                if (track->meta->findInt64(kKeyDuration, &duration)) {
468                    // nothing fancy, just pick a frame near 1/4th of the duration
469                    track->meta->setInt64(
470                            kKeyThumbnailTime, duration / 4);
471                }
472            } else {
473                uint32_t sampleIndex;
474                uint32_t sampleTime;
475                if (track->sampleTable->findThumbnailSample(&sampleIndex) == OK
476                        && track->sampleTable->getMetaDataForSample(
477                            sampleIndex, NULL /* offset */, NULL /* size */,
478                            &sampleTime) == OK) {
479                    track->meta->setInt64(
480                            kKeyThumbnailTime,
481                            ((int64_t)sampleTime * 1000000) / track->timescale);
482                }
483            }
484
485            // MPEG2 tracks do not provide CSD, so read the stream header
486            if (!strcmp(mime, MEDIA_MIMETYPE_VIDEO_MPEG2)) {
487                off64_t offset;
488                size_t size;
489                if (track->sampleTable->getMetaDataForSample(
490                            0 /* sampleIndex */, &offset, &size, NULL /* sampleTime */) == OK) {
491                    if (size > kMaxTrackHeaderSize) {
492                        size = kMaxTrackHeaderSize;
493                    }
494                    uint8_t header[kMaxTrackHeaderSize];
495                    if (mDataSource->readAt(offset, &header, size) == (ssize_t)size) {
496                        track->meta->setData(kKeyStreamHeader, 'mdat', header, size);
497                    }
498                }
499            }
500        }
501    }
502
503    return track->meta;
504}
505
506static void MakeFourCCString(uint32_t x, char *s) {
507    s[0] = x >> 24;
508    s[1] = (x >> 16) & 0xff;
509    s[2] = (x >> 8) & 0xff;
510    s[3] = x & 0xff;
511    s[4] = '\0';
512}
513
514status_t MPEG4Extractor::readMetaData() {
515    if (mInitCheck != NO_INIT) {
516        return mInitCheck;
517    }
518
519    off64_t offset = 0;
520    status_t err;
521    bool sawMoovOrSidx = false;
522
523    while (!(sawMoovOrSidx && (mMdatFound || mMoofFound))) {
524        off64_t orig_offset = offset;
525        err = parseChunk(&offset, 0);
526
527        if (err != OK && err != UNKNOWN_ERROR) {
528            break;
529        } else if (offset <= orig_offset) {
530            // only continue parsing if the offset was advanced,
531            // otherwise we might end up in an infinite loop
532            ALOGE("did not advance: %lld->%lld", (long long)orig_offset, (long long)offset);
533            err = ERROR_MALFORMED;
534            break;
535        } else if (err == UNKNOWN_ERROR) {
536            sawMoovOrSidx = true;
537        }
538    }
539
540    if (mInitCheck == OK) {
541        if (mHasVideo) {
542            mFileMetaData->setCString(
543                    kKeyMIMEType, MEDIA_MIMETYPE_CONTAINER_MPEG4);
544        } else {
545            mFileMetaData->setCString(kKeyMIMEType, "audio/mp4");
546        }
547    } else {
548        mInitCheck = err;
549    }
550
551    CHECK_NE(err, (status_t)NO_INIT);
552
553    // copy pssh data into file metadata
554    uint64_t psshsize = 0;
555    for (size_t i = 0; i < mPssh.size(); i++) {
556        psshsize += 20 + mPssh[i].datalen;
557    }
558    if (psshsize > 0 && psshsize <= UINT32_MAX) {
559        char *buf = (char*)malloc(psshsize);
560        if (!buf) {
561            ALOGE("b/28471206");
562            return NO_MEMORY;
563        }
564        char *ptr = buf;
565        for (size_t i = 0; i < mPssh.size(); i++) {
566            memcpy(ptr, mPssh[i].uuid, 20); // uuid + length
567            memcpy(ptr + 20, mPssh[i].data, mPssh[i].datalen);
568            ptr += (20 + mPssh[i].datalen);
569        }
570        mFileMetaData->setData(kKeyPssh, 'pssh', buf, psshsize);
571        free(buf);
572    }
573    return mInitCheck;
574}
575
576char* MPEG4Extractor::getDrmTrackInfo(size_t trackID, int *len) {
577    if (mFirstSINF == NULL) {
578        return NULL;
579    }
580
581    SINF *sinf = mFirstSINF;
582    while (sinf && (trackID != sinf->trackID)) {
583        sinf = sinf->next;
584    }
585
586    if (sinf == NULL) {
587        return NULL;
588    }
589
590    *len = sinf->len;
591    return sinf->IPMPData;
592}
593
594// Reads an encoded integer 7 bits at a time until it encounters the high bit clear.
595static int32_t readSize(off64_t offset,
596        const sp<DataSource> DataSource, uint8_t *numOfBytes) {
597    uint32_t size = 0;
598    uint8_t data;
599    bool moreData = true;
600    *numOfBytes = 0;
601
602    while (moreData) {
603        if (DataSource->readAt(offset, &data, 1) < 1) {
604            return -1;
605        }
606        offset ++;
607        moreData = (data >= 128) ? true : false;
608        size = (size << 7) | (data & 0x7f); // Take last 7 bits
609        (*numOfBytes) ++;
610    }
611
612    return size;
613}
614
615status_t MPEG4Extractor::parseDrmSINF(
616        off64_t * /* offset */, off64_t data_offset) {
617    uint8_t updateIdTag;
618    if (mDataSource->readAt(data_offset, &updateIdTag, 1) < 1) {
619        return ERROR_IO;
620    }
621    data_offset ++;
622
623    if (0x01/*OBJECT_DESCRIPTOR_UPDATE_ID_TAG*/ != updateIdTag) {
624        return ERROR_MALFORMED;
625    }
626
627    uint8_t numOfBytes;
628    int32_t size = readSize(data_offset, mDataSource, &numOfBytes);
629    if (size < 0) {
630        return ERROR_IO;
631    }
632    data_offset += numOfBytes;
633
634    while(size >= 11 ) {
635        uint8_t descriptorTag;
636        if (mDataSource->readAt(data_offset, &descriptorTag, 1) < 1) {
637            return ERROR_IO;
638        }
639        data_offset ++;
640
641        if (0x11/*OBJECT_DESCRIPTOR_ID_TAG*/ != descriptorTag) {
642            return ERROR_MALFORMED;
643        }
644
645        uint8_t buffer[8];
646        //ObjectDescriptorID and ObjectDescriptor url flag
647        if (mDataSource->readAt(data_offset, buffer, 2) < 2) {
648            return ERROR_IO;
649        }
650        data_offset += 2;
651
652        if ((buffer[1] >> 5) & 0x0001) { //url flag is set
653            return ERROR_MALFORMED;
654        }
655
656        if (mDataSource->readAt(data_offset, buffer, 8) < 8) {
657            return ERROR_IO;
658        }
659        data_offset += 8;
660
661        if ((0x0F/*ES_ID_REF_TAG*/ != buffer[1])
662                || ( 0x0A/*IPMP_DESCRIPTOR_POINTER_ID_TAG*/ != buffer[5])) {
663            return ERROR_MALFORMED;
664        }
665
666        SINF *sinf = new SINF;
667        sinf->trackID = U16_AT(&buffer[3]);
668        sinf->IPMPDescriptorID = buffer[7];
669        sinf->next = mFirstSINF;
670        mFirstSINF = sinf;
671
672        size -= (8 + 2 + 1);
673    }
674
675    if (size != 0) {
676        return ERROR_MALFORMED;
677    }
678
679    if (mDataSource->readAt(data_offset, &updateIdTag, 1) < 1) {
680        return ERROR_IO;
681    }
682    data_offset ++;
683
684    if(0x05/*IPMP_DESCRIPTOR_UPDATE_ID_TAG*/ != updateIdTag) {
685        return ERROR_MALFORMED;
686    }
687
688    size = readSize(data_offset, mDataSource, &numOfBytes);
689    if (size < 0) {
690        return ERROR_IO;
691    }
692    data_offset += numOfBytes;
693
694    while (size > 0) {
695        uint8_t tag;
696        int32_t dataLen;
697        if (mDataSource->readAt(data_offset, &tag, 1) < 1) {
698            return ERROR_IO;
699        }
700        data_offset ++;
701
702        if (0x0B/*IPMP_DESCRIPTOR_ID_TAG*/ == tag) {
703            uint8_t id;
704            dataLen = readSize(data_offset, mDataSource, &numOfBytes);
705            if (dataLen < 0) {
706                return ERROR_IO;
707            } else if (dataLen < 4) {
708                return ERROR_MALFORMED;
709            }
710            data_offset += numOfBytes;
711
712            if (mDataSource->readAt(data_offset, &id, 1) < 1) {
713                return ERROR_IO;
714            }
715            data_offset ++;
716
717            SINF *sinf = mFirstSINF;
718            while (sinf && (sinf->IPMPDescriptorID != id)) {
719                sinf = sinf->next;
720            }
721            if (sinf == NULL) {
722                return ERROR_MALFORMED;
723            }
724            sinf->len = dataLen - 3;
725            sinf->IPMPData = new (std::nothrow) char[sinf->len];
726            if (sinf->IPMPData == NULL) {
727                return ERROR_MALFORMED;
728            }
729            data_offset += 2;
730
731            if (mDataSource->readAt(data_offset, sinf->IPMPData, sinf->len) < sinf->len) {
732                return ERROR_IO;
733            }
734            data_offset += sinf->len;
735
736            size -= (dataLen + numOfBytes + 1);
737        }
738    }
739
740    if (size != 0) {
741        return ERROR_MALFORMED;
742    }
743
744    return UNKNOWN_ERROR;  // Return a dummy error.
745}
746
747struct PathAdder {
748    PathAdder(Vector<uint32_t> *path, uint32_t chunkType)
749        : mPath(path) {
750        mPath->push(chunkType);
751    }
752
753    ~PathAdder() {
754        mPath->pop();
755    }
756
757private:
758    Vector<uint32_t> *mPath;
759
760    PathAdder(const PathAdder &);
761    PathAdder &operator=(const PathAdder &);
762};
763
764static bool underMetaDataPath(const Vector<uint32_t> &path) {
765    return path.size() >= 5
766        && path[0] == FOURCC('m', 'o', 'o', 'v')
767        && path[1] == FOURCC('u', 'd', 't', 'a')
768        && path[2] == FOURCC('m', 'e', 't', 'a')
769        && path[3] == FOURCC('i', 'l', 's', 't');
770}
771
772static bool underQTMetaPath(const Vector<uint32_t> &path, int32_t depth) {
773    return path.size() >= 2
774            && path[0] == FOURCC('m', 'o', 'o', 'v')
775            && path[1] == FOURCC('m', 'e', 't', 'a')
776            && (depth == 2
777            || (depth == 3
778                    && (path[2] == FOURCC('h', 'd', 'l', 'r')
779                    ||  path[2] == FOURCC('i', 'l', 's', 't')
780                    ||  path[2] == FOURCC('k', 'e', 'y', 's'))));
781}
782
783// Given a time in seconds since Jan 1 1904, produce a human-readable string.
784static bool convertTimeToDate(int64_t time_1904, String8 *s) {
785    // delta between mpeg4 time and unix epoch time
786    static const int64_t delta = (((66 * 365 + 17) * 24) * 3600);
787    if (time_1904 < INT64_MIN + delta) {
788        return false;
789    }
790    time_t time_1970 = time_1904 - delta;
791
792    char tmp[32];
793    struct tm* tm = gmtime(&time_1970);
794    if (tm != NULL &&
795            strftime(tmp, sizeof(tmp), "%Y%m%dT%H%M%S.000Z", tm) > 0) {
796        s->setTo(tmp);
797        return true;
798    }
799    return false;
800}
801
802status_t MPEG4Extractor::parseChunk(off64_t *offset, int depth) {
803    ALOGV("entering parseChunk %lld/%d", (long long)*offset, depth);
804
805    if (*offset < 0) {
806        ALOGE("b/23540914");
807        return ERROR_MALFORMED;
808    }
809    uint32_t hdr[2];
810    if (mDataSource->readAt(*offset, hdr, 8) < 8) {
811        return ERROR_IO;
812    }
813    uint64_t chunk_size = ntohl(hdr[0]);
814    int32_t chunk_type = ntohl(hdr[1]);
815    off64_t data_offset = *offset + 8;
816
817    if (chunk_size == 1) {
818        if (mDataSource->readAt(*offset + 8, &chunk_size, 8) < 8) {
819            return ERROR_IO;
820        }
821        chunk_size = ntoh64(chunk_size);
822        data_offset += 8;
823
824        if (chunk_size < 16) {
825            // The smallest valid chunk is 16 bytes long in this case.
826            return ERROR_MALFORMED;
827        }
828    } else if (chunk_size == 0) {
829        if (depth == 0) {
830            // atom extends to end of file
831            off64_t sourceSize;
832            if (mDataSource->getSize(&sourceSize) == OK) {
833                chunk_size = (sourceSize - *offset);
834            } else {
835                // XXX could we just pick a "sufficiently large" value here?
836                ALOGE("atom size is 0, and data source has no size");
837                return ERROR_MALFORMED;
838            }
839        } else {
840            // not allowed for non-toplevel atoms, skip it
841            *offset += 4;
842            return OK;
843        }
844    } else if (chunk_size < 8) {
845        // The smallest valid chunk is 8 bytes long.
846        ALOGE("invalid chunk size: %" PRIu64, chunk_size);
847        return ERROR_MALFORMED;
848    }
849
850    char chunk[5];
851    MakeFourCCString(chunk_type, chunk);
852    ALOGV("chunk: %s @ %lld, %d", chunk, (long long)*offset, depth);
853
854    if (kUseHexDump) {
855        static const char kWhitespace[] = "                                        ";
856        const char *indent = &kWhitespace[sizeof(kWhitespace) - 1 - 2 * depth];
857        printf("%sfound chunk '%s' of size %" PRIu64 "\n", indent, chunk, chunk_size);
858
859        char buffer[256];
860        size_t n = chunk_size;
861        if (n > sizeof(buffer)) {
862            n = sizeof(buffer);
863        }
864        if (mDataSource->readAt(*offset, buffer, n)
865                < (ssize_t)n) {
866            return ERROR_IO;
867        }
868
869        hexdump(buffer, n);
870    }
871
872    PathAdder autoAdder(&mPath, chunk_type);
873
874    // (data_offset - *offset) is either 8 or 16
875    off64_t chunk_data_size = chunk_size - (data_offset - *offset);
876    if (chunk_data_size < 0) {
877        ALOGE("b/23540914");
878        return ERROR_MALFORMED;
879    }
880
881    if (chunk_type != FOURCC('c', 'p', 'r', 't')
882            && chunk_type != FOURCC('c', 'o', 'v', 'r')
883            && mPath.size() == 5 && underMetaDataPath(mPath)) {
884        off64_t stop_offset = *offset + chunk_size;
885        *offset = data_offset;
886        while (*offset < stop_offset) {
887            status_t err = parseChunk(offset, depth + 1);
888            if (err != OK) {
889                return err;
890            }
891        }
892
893        if (*offset != stop_offset) {
894            return ERROR_MALFORMED;
895        }
896
897        return OK;
898    }
899
900    switch(chunk_type) {
901        case FOURCC('m', 'o', 'o', 'v'):
902        case FOURCC('t', 'r', 'a', 'k'):
903        case FOURCC('m', 'd', 'i', 'a'):
904        case FOURCC('m', 'i', 'n', 'f'):
905        case FOURCC('d', 'i', 'n', 'f'):
906        case FOURCC('s', 't', 'b', 'l'):
907        case FOURCC('m', 'v', 'e', 'x'):
908        case FOURCC('m', 'o', 'o', 'f'):
909        case FOURCC('t', 'r', 'a', 'f'):
910        case FOURCC('m', 'f', 'r', 'a'):
911        case FOURCC('u', 'd', 't', 'a'):
912        case FOURCC('i', 'l', 's', 't'):
913        case FOURCC('s', 'i', 'n', 'f'):
914        case FOURCC('s', 'c', 'h', 'i'):
915        case FOURCC('e', 'd', 't', 's'):
916        {
917            if (chunk_type == FOURCC('m', 'o', 'o', 'f') && !mMoofFound) {
918                // store the offset of the first segment
919                mMoofFound = true;
920                mMoofOffset = *offset;
921            }
922
923            if (chunk_type == FOURCC('s', 't', 'b', 'l')) {
924                ALOGV("sampleTable chunk is %" PRIu64 " bytes long.", chunk_size);
925
926                if (mDataSource->flags()
927                        & (DataSource::kWantsPrefetching
928                            | DataSource::kIsCachingDataSource)) {
929                    sp<MPEG4DataSource> cachedSource =
930                        new MPEG4DataSource(mDataSource);
931
932                    if (cachedSource->setCachedRange(*offset, chunk_size) == OK) {
933                        mDataSource = cachedSource;
934                    }
935                }
936
937                if (mLastTrack == NULL)
938                    return ERROR_MALFORMED;
939
940                mLastTrack->sampleTable = new SampleTable(mDataSource);
941            }
942
943            bool isTrack = false;
944            if (chunk_type == FOURCC('t', 'r', 'a', 'k')) {
945                isTrack = true;
946
947                Track *track = new Track;
948                track->next = NULL;
949                if (mLastTrack) {
950                    mLastTrack->next = track;
951                } else {
952                    mFirstTrack = track;
953                }
954                mLastTrack = track;
955
956                track->meta = new MetaData;
957                track->includes_expensive_metadata = false;
958                track->skipTrack = false;
959                track->timescale = 0;
960                track->meta->setCString(kKeyMIMEType, "application/octet-stream");
961            }
962
963            off64_t stop_offset = *offset + chunk_size;
964            *offset = data_offset;
965            while (*offset < stop_offset) {
966                status_t err = parseChunk(offset, depth + 1);
967                if (err != OK) {
968                    return err;
969                }
970            }
971
972            if (*offset != stop_offset) {
973                return ERROR_MALFORMED;
974            }
975
976            if (isTrack) {
977                int32_t trackId;
978                // There must be exact one track header per track.
979                if (!mLastTrack->meta->findInt32(kKeyTrackID, &trackId)) {
980                    mLastTrack->skipTrack = true;
981                }
982                if (mLastTrack->skipTrack) {
983                    Track *cur = mFirstTrack;
984
985                    if (cur == mLastTrack) {
986                        delete cur;
987                        mFirstTrack = mLastTrack = NULL;
988                    } else {
989                        while (cur && cur->next != mLastTrack) {
990                            cur = cur->next;
991                        }
992                        cur->next = NULL;
993                        delete mLastTrack;
994                        mLastTrack = cur;
995                    }
996
997                    return OK;
998                }
999
1000                status_t err = verifyTrack(mLastTrack);
1001
1002                if (err != OK) {
1003                    return err;
1004                }
1005            } else if (chunk_type == FOURCC('m', 'o', 'o', 'v')) {
1006                mInitCheck = OK;
1007
1008                if (!mIsDrm) {
1009                    return UNKNOWN_ERROR;  // Return a dummy error.
1010                } else {
1011                    return OK;
1012                }
1013            }
1014            break;
1015        }
1016
1017        case FOURCC('e', 'l', 's', 't'):
1018        {
1019            *offset += chunk_size;
1020
1021            // See 14496-12 8.6.6
1022            uint8_t version;
1023            if (mDataSource->readAt(data_offset, &version, 1) < 1) {
1024                return ERROR_IO;
1025            }
1026
1027            uint32_t entry_count;
1028            if (!mDataSource->getUInt32(data_offset + 4, &entry_count)) {
1029                return ERROR_IO;
1030            }
1031
1032            if (entry_count != 1) {
1033                // we only support a single entry at the moment, for gapless playback
1034                ALOGW("ignoring edit list with %d entries", entry_count);
1035            } else if (mHeaderTimescale == 0) {
1036                ALOGW("ignoring edit list because timescale is 0");
1037            } else {
1038                off64_t entriesoffset = data_offset + 8;
1039                uint64_t segment_duration;
1040                int64_t media_time;
1041
1042                if (version == 1) {
1043                    if (!mDataSource->getUInt64(entriesoffset, &segment_duration) ||
1044                            !mDataSource->getUInt64(entriesoffset + 8, (uint64_t*)&media_time)) {
1045                        return ERROR_IO;
1046                    }
1047                } else if (version == 0) {
1048                    uint32_t sd;
1049                    int32_t mt;
1050                    if (!mDataSource->getUInt32(entriesoffset, &sd) ||
1051                            !mDataSource->getUInt32(entriesoffset + 4, (uint32_t*)&mt)) {
1052                        return ERROR_IO;
1053                    }
1054                    segment_duration = sd;
1055                    media_time = mt;
1056                } else {
1057                    return ERROR_IO;
1058                }
1059
1060                uint64_t halfscale = mHeaderTimescale / 2;
1061                segment_duration = (segment_duration * 1000000 + halfscale)/ mHeaderTimescale;
1062                media_time = (media_time * 1000000 + halfscale) / mHeaderTimescale;
1063
1064                int64_t duration;
1065                int32_t samplerate;
1066                if (!mLastTrack) {
1067                    return ERROR_MALFORMED;
1068                }
1069                if (mLastTrack->meta->findInt64(kKeyDuration, &duration) &&
1070                        mLastTrack->meta->findInt32(kKeySampleRate, &samplerate)) {
1071
1072                    int64_t delay = (media_time  * samplerate + 500000) / 1000000;
1073                    mLastTrack->meta->setInt32(kKeyEncoderDelay, delay);
1074
1075                    int64_t paddingus = duration - (int64_t)(segment_duration + media_time);
1076                    if (paddingus < 0) {
1077                        // track duration from media header (which is what kKeyDuration is) might
1078                        // be slightly shorter than the segment duration, which would make the
1079                        // padding negative. Clamp to zero.
1080                        paddingus = 0;
1081                    }
1082                    int64_t paddingsamples = (paddingus * samplerate + 500000) / 1000000;
1083                    mLastTrack->meta->setInt32(kKeyEncoderPadding, paddingsamples);
1084                }
1085            }
1086            break;
1087        }
1088
1089        case FOURCC('f', 'r', 'm', 'a'):
1090        {
1091            *offset += chunk_size;
1092
1093            uint32_t original_fourcc;
1094            if (mDataSource->readAt(data_offset, &original_fourcc, 4) < 4) {
1095                return ERROR_IO;
1096            }
1097            original_fourcc = ntohl(original_fourcc);
1098            ALOGV("read original format: %d", original_fourcc);
1099
1100            if (mLastTrack == NULL)
1101                return ERROR_MALFORMED;
1102
1103            mLastTrack->meta->setCString(kKeyMIMEType, FourCC2MIME(original_fourcc));
1104            uint32_t num_channels = 0;
1105            uint32_t sample_rate = 0;
1106            if (AdjustChannelsAndRate(original_fourcc, &num_channels, &sample_rate)) {
1107                mLastTrack->meta->setInt32(kKeyChannelCount, num_channels);
1108                mLastTrack->meta->setInt32(kKeySampleRate, sample_rate);
1109            }
1110            break;
1111        }
1112
1113        case FOURCC('t', 'e', 'n', 'c'):
1114        {
1115            *offset += chunk_size;
1116
1117            if (chunk_size < 32) {
1118                return ERROR_MALFORMED;
1119            }
1120
1121            // tenc box contains 1 byte version, 3 byte flags, 3 byte default algorithm id, one byte
1122            // default IV size, 16 bytes default KeyID
1123            // (ISO 23001-7)
1124            char buf[4];
1125            memset(buf, 0, 4);
1126            if (mDataSource->readAt(data_offset + 4, buf + 1, 3) < 3) {
1127                return ERROR_IO;
1128            }
1129            uint32_t defaultAlgorithmId = ntohl(*((int32_t*)buf));
1130            if (defaultAlgorithmId > 1) {
1131                // only 0 (clear) and 1 (AES-128) are valid
1132                return ERROR_MALFORMED;
1133            }
1134
1135            memset(buf, 0, 4);
1136            if (mDataSource->readAt(data_offset + 7, buf + 3, 1) < 1) {
1137                return ERROR_IO;
1138            }
1139            uint32_t defaultIVSize = ntohl(*((int32_t*)buf));
1140
1141            if ((defaultAlgorithmId == 0 && defaultIVSize != 0) ||
1142                    (defaultAlgorithmId != 0 && defaultIVSize == 0)) {
1143                // only unencrypted data must have 0 IV size
1144                return ERROR_MALFORMED;
1145            } else if (defaultIVSize != 0 &&
1146                    defaultIVSize != 8 &&
1147                    defaultIVSize != 16) {
1148                // only supported sizes are 0, 8 and 16
1149                return ERROR_MALFORMED;
1150            }
1151
1152            uint8_t defaultKeyId[16];
1153
1154            if (mDataSource->readAt(data_offset + 8, &defaultKeyId, 16) < 16) {
1155                return ERROR_IO;
1156            }
1157
1158            if (mLastTrack == NULL)
1159                return ERROR_MALFORMED;
1160
1161            mLastTrack->meta->setInt32(kKeyCryptoMode, defaultAlgorithmId);
1162            mLastTrack->meta->setInt32(kKeyCryptoDefaultIVSize, defaultIVSize);
1163            mLastTrack->meta->setData(kKeyCryptoKey, 'tenc', defaultKeyId, 16);
1164            break;
1165        }
1166
1167        case FOURCC('t', 'k', 'h', 'd'):
1168        {
1169            *offset += chunk_size;
1170
1171            status_t err;
1172            if ((err = parseTrackHeader(data_offset, chunk_data_size)) != OK) {
1173                return err;
1174            }
1175
1176            break;
1177        }
1178
1179        case FOURCC('p', 's', 's', 'h'):
1180        {
1181            *offset += chunk_size;
1182
1183            PsshInfo pssh;
1184
1185            if (mDataSource->readAt(data_offset + 4, &pssh.uuid, 16) < 16) {
1186                return ERROR_IO;
1187            }
1188
1189            uint32_t psshdatalen = 0;
1190            if (mDataSource->readAt(data_offset + 20, &psshdatalen, 4) < 4) {
1191                return ERROR_IO;
1192            }
1193            pssh.datalen = ntohl(psshdatalen);
1194            ALOGV("pssh data size: %d", pssh.datalen);
1195            if (chunk_size < 20 || pssh.datalen > chunk_size - 20) {
1196                // pssh data length exceeds size of containing box
1197                return ERROR_MALFORMED;
1198            }
1199
1200            pssh.data = new (std::nothrow) uint8_t[pssh.datalen];
1201            if (pssh.data == NULL) {
1202                return ERROR_MALFORMED;
1203            }
1204            ALOGV("allocated pssh @ %p", pssh.data);
1205            ssize_t requested = (ssize_t) pssh.datalen;
1206            if (mDataSource->readAt(data_offset + 24, pssh.data, requested) < requested) {
1207                return ERROR_IO;
1208            }
1209            mPssh.push_back(pssh);
1210
1211            break;
1212        }
1213
1214        case FOURCC('m', 'd', 'h', 'd'):
1215        {
1216            *offset += chunk_size;
1217
1218            if (chunk_data_size < 4 || mLastTrack == NULL) {
1219                return ERROR_MALFORMED;
1220            }
1221
1222            uint8_t version;
1223            if (mDataSource->readAt(
1224                        data_offset, &version, sizeof(version))
1225                    < (ssize_t)sizeof(version)) {
1226                return ERROR_IO;
1227            }
1228
1229            off64_t timescale_offset;
1230
1231            if (version == 1) {
1232                timescale_offset = data_offset + 4 + 16;
1233            } else if (version == 0) {
1234                timescale_offset = data_offset + 4 + 8;
1235            } else {
1236                return ERROR_IO;
1237            }
1238
1239            uint32_t timescale;
1240            if (mDataSource->readAt(
1241                        timescale_offset, &timescale, sizeof(timescale))
1242                    < (ssize_t)sizeof(timescale)) {
1243                return ERROR_IO;
1244            }
1245
1246            if (!timescale) {
1247                ALOGE("timescale should not be ZERO.");
1248                return ERROR_MALFORMED;
1249            }
1250
1251            mLastTrack->timescale = ntohl(timescale);
1252
1253            // 14496-12 says all ones means indeterminate, but some files seem to use
1254            // 0 instead. We treat both the same.
1255            int64_t duration = 0;
1256            if (version == 1) {
1257                if (mDataSource->readAt(
1258                            timescale_offset + 4, &duration, sizeof(duration))
1259                        < (ssize_t)sizeof(duration)) {
1260                    return ERROR_IO;
1261                }
1262                if (duration != -1) {
1263                    duration = ntoh64(duration);
1264                }
1265            } else {
1266                uint32_t duration32;
1267                if (mDataSource->readAt(
1268                            timescale_offset + 4, &duration32, sizeof(duration32))
1269                        < (ssize_t)sizeof(duration32)) {
1270                    return ERROR_IO;
1271                }
1272                if (duration32 != 0xffffffff) {
1273                    duration = ntohl(duration32);
1274                }
1275            }
1276            if (duration != 0 && mLastTrack->timescale != 0) {
1277                mLastTrack->meta->setInt64(
1278                        kKeyDuration, (duration * 1000000) / mLastTrack->timescale);
1279            }
1280
1281            uint8_t lang[2];
1282            off64_t lang_offset;
1283            if (version == 1) {
1284                lang_offset = timescale_offset + 4 + 8;
1285            } else if (version == 0) {
1286                lang_offset = timescale_offset + 4 + 4;
1287            } else {
1288                return ERROR_IO;
1289            }
1290
1291            if (mDataSource->readAt(lang_offset, &lang, sizeof(lang))
1292                    < (ssize_t)sizeof(lang)) {
1293                return ERROR_IO;
1294            }
1295
1296            // To get the ISO-639-2/T three character language code
1297            // 1 bit pad followed by 3 5-bits characters. Each character
1298            // is packed as the difference between its ASCII value and 0x60.
1299            char lang_code[4];
1300            lang_code[0] = ((lang[0] >> 2) & 0x1f) + 0x60;
1301            lang_code[1] = ((lang[0] & 0x3) << 3 | (lang[1] >> 5)) + 0x60;
1302            lang_code[2] = (lang[1] & 0x1f) + 0x60;
1303            lang_code[3] = '\0';
1304
1305            mLastTrack->meta->setCString(
1306                    kKeyMediaLanguage, lang_code);
1307
1308            break;
1309        }
1310
1311        case FOURCC('s', 't', 's', 'd'):
1312        {
1313            if (chunk_data_size < 8) {
1314                return ERROR_MALFORMED;
1315            }
1316
1317            uint8_t buffer[8];
1318            if (chunk_data_size < (off64_t)sizeof(buffer)) {
1319                return ERROR_MALFORMED;
1320            }
1321
1322            if (mDataSource->readAt(
1323                        data_offset, buffer, 8) < 8) {
1324                return ERROR_IO;
1325            }
1326
1327            if (U32_AT(buffer) != 0) {
1328                // Should be version 0, flags 0.
1329                return ERROR_MALFORMED;
1330            }
1331
1332            uint32_t entry_count = U32_AT(&buffer[4]);
1333
1334            if (entry_count > 1) {
1335                // For 3GPP timed text, there could be multiple tx3g boxes contain
1336                // multiple text display formats. These formats will be used to
1337                // display the timed text.
1338                // For encrypted files, there may also be more than one entry.
1339                const char *mime;
1340
1341                if (mLastTrack == NULL)
1342                    return ERROR_MALFORMED;
1343
1344                CHECK(mLastTrack->meta->findCString(kKeyMIMEType, &mime));
1345                if (strcasecmp(mime, MEDIA_MIMETYPE_TEXT_3GPP) &&
1346                        strcasecmp(mime, "application/octet-stream")) {
1347                    // For now we only support a single type of media per track.
1348                    mLastTrack->skipTrack = true;
1349                    *offset += chunk_size;
1350                    break;
1351                }
1352            }
1353            off64_t stop_offset = *offset + chunk_size;
1354            *offset = data_offset + 8;
1355            for (uint32_t i = 0; i < entry_count; ++i) {
1356                status_t err = parseChunk(offset, depth + 1);
1357                if (err != OK) {
1358                    return err;
1359                }
1360            }
1361
1362            if (*offset != stop_offset) {
1363                return ERROR_MALFORMED;
1364            }
1365            break;
1366        }
1367
1368        case FOURCC('m', 'p', '4', 'a'):
1369        case FOURCC('e', 'n', 'c', 'a'):
1370        case FOURCC('s', 'a', 'm', 'r'):
1371        case FOURCC('s', 'a', 'w', 'b'):
1372        {
1373            uint8_t buffer[8 + 20];
1374            if (chunk_data_size < (ssize_t)sizeof(buffer)) {
1375                // Basic AudioSampleEntry size.
1376                return ERROR_MALFORMED;
1377            }
1378
1379            if (mDataSource->readAt(
1380                        data_offset, buffer, sizeof(buffer)) < (ssize_t)sizeof(buffer)) {
1381                return ERROR_IO;
1382            }
1383
1384            uint16_t data_ref_index __unused = U16_AT(&buffer[6]);
1385            uint32_t num_channels = U16_AT(&buffer[16]);
1386
1387            uint16_t sample_size = U16_AT(&buffer[18]);
1388            uint32_t sample_rate = U32_AT(&buffer[24]) >> 16;
1389
1390            if (mLastTrack == NULL)
1391                return ERROR_MALFORMED;
1392
1393            if (chunk_type != FOURCC('e', 'n', 'c', 'a')) {
1394                // if the chunk type is enca, we'll get the type from the sinf/frma box later
1395                mLastTrack->meta->setCString(kKeyMIMEType, FourCC2MIME(chunk_type));
1396                AdjustChannelsAndRate(chunk_type, &num_channels, &sample_rate);
1397            }
1398            ALOGV("*** coding='%s' %d channels, size %d, rate %d\n",
1399                   chunk, num_channels, sample_size, sample_rate);
1400            mLastTrack->meta->setInt32(kKeyChannelCount, num_channels);
1401            mLastTrack->meta->setInt32(kKeySampleRate, sample_rate);
1402
1403            off64_t stop_offset = *offset + chunk_size;
1404            *offset = data_offset + sizeof(buffer);
1405            while (*offset < stop_offset) {
1406                status_t err = parseChunk(offset, depth + 1);
1407                if (err != OK) {
1408                    return err;
1409                }
1410            }
1411
1412            if (*offset != stop_offset) {
1413                return ERROR_MALFORMED;
1414            }
1415            break;
1416        }
1417
1418        case FOURCC('m', 'p', '4', 'v'):
1419        case FOURCC('e', 'n', 'c', 'v'):
1420        case FOURCC('s', '2', '6', '3'):
1421        case FOURCC('H', '2', '6', '3'):
1422        case FOURCC('h', '2', '6', '3'):
1423        case FOURCC('a', 'v', 'c', '1'):
1424        case FOURCC('h', 'v', 'c', '1'):
1425        case FOURCC('h', 'e', 'v', '1'):
1426        {
1427            mHasVideo = true;
1428
1429            uint8_t buffer[78];
1430            if (chunk_data_size < (ssize_t)sizeof(buffer)) {
1431                // Basic VideoSampleEntry size.
1432                return ERROR_MALFORMED;
1433            }
1434
1435            if (mDataSource->readAt(
1436                        data_offset, buffer, sizeof(buffer)) < (ssize_t)sizeof(buffer)) {
1437                return ERROR_IO;
1438            }
1439
1440            uint16_t data_ref_index __unused = U16_AT(&buffer[6]);
1441            uint16_t width = U16_AT(&buffer[6 + 18]);
1442            uint16_t height = U16_AT(&buffer[6 + 20]);
1443
1444            // The video sample is not standard-compliant if it has invalid dimension.
1445            // Use some default width and height value, and
1446            // let the decoder figure out the actual width and height (and thus
1447            // be prepared for INFO_FOMRAT_CHANGED event).
1448            if (width == 0)  width  = 352;
1449            if (height == 0) height = 288;
1450
1451            // printf("*** coding='%s' width=%d height=%d\n",
1452            //        chunk, width, height);
1453
1454            if (mLastTrack == NULL)
1455                return ERROR_MALFORMED;
1456
1457            if (chunk_type != FOURCC('e', 'n', 'c', 'v')) {
1458                // if the chunk type is encv, we'll get the type from the sinf/frma box later
1459                mLastTrack->meta->setCString(kKeyMIMEType, FourCC2MIME(chunk_type));
1460            }
1461            mLastTrack->meta->setInt32(kKeyWidth, width);
1462            mLastTrack->meta->setInt32(kKeyHeight, height);
1463
1464            off64_t stop_offset = *offset + chunk_size;
1465            *offset = data_offset + sizeof(buffer);
1466            while (*offset < stop_offset) {
1467                status_t err = parseChunk(offset, depth + 1);
1468                if (err != OK) {
1469                    return err;
1470                }
1471            }
1472
1473            if (*offset != stop_offset) {
1474                return ERROR_MALFORMED;
1475            }
1476            break;
1477        }
1478
1479        case FOURCC('s', 't', 'c', 'o'):
1480        case FOURCC('c', 'o', '6', '4'):
1481        {
1482            if ((mLastTrack == NULL) || (mLastTrack->sampleTable == NULL))
1483                return ERROR_MALFORMED;
1484
1485            status_t err =
1486                mLastTrack->sampleTable->setChunkOffsetParams(
1487                        chunk_type, data_offset, chunk_data_size);
1488
1489            *offset += chunk_size;
1490
1491            if (err != OK) {
1492                return err;
1493            }
1494
1495            break;
1496        }
1497
1498        case FOURCC('s', 't', 's', 'c'):
1499        {
1500            if ((mLastTrack == NULL) || (mLastTrack->sampleTable == NULL))
1501                return ERROR_MALFORMED;
1502
1503            status_t err =
1504                mLastTrack->sampleTable->setSampleToChunkParams(
1505                        data_offset, chunk_data_size);
1506
1507            *offset += chunk_size;
1508
1509            if (err != OK) {
1510                return err;
1511            }
1512
1513            break;
1514        }
1515
1516        case FOURCC('s', 't', 's', 'z'):
1517        case FOURCC('s', 't', 'z', '2'):
1518        {
1519            if ((mLastTrack == NULL) || (mLastTrack->sampleTable == NULL))
1520                return ERROR_MALFORMED;
1521
1522            status_t err =
1523                mLastTrack->sampleTable->setSampleSizeParams(
1524                        chunk_type, data_offset, chunk_data_size);
1525
1526            *offset += chunk_size;
1527
1528            if (err != OK) {
1529                return err;
1530            }
1531
1532            size_t max_size;
1533            err = mLastTrack->sampleTable->getMaxSampleSize(&max_size);
1534
1535            if (err != OK) {
1536                return err;
1537            }
1538
1539            if (max_size != 0) {
1540                // Assume that a given buffer only contains at most 10 chunks,
1541                // each chunk originally prefixed with a 2 byte length will
1542                // have a 4 byte header (0x00 0x00 0x00 0x01) after conversion,
1543                // and thus will grow by 2 bytes per chunk.
1544                if (max_size > SIZE_MAX - 10 * 2) {
1545                    ALOGE("max sample size too big: %zu", max_size);
1546                    return ERROR_MALFORMED;
1547                }
1548                mLastTrack->meta->setInt32(kKeyMaxInputSize, max_size + 10 * 2);
1549            } else {
1550                // No size was specified. Pick a conservatively large size.
1551                uint32_t width, height;
1552                if (!mLastTrack->meta->findInt32(kKeyWidth, (int32_t*)&width) ||
1553                    !mLastTrack->meta->findInt32(kKeyHeight,(int32_t*) &height)) {
1554                    ALOGE("No width or height, assuming worst case 1080p");
1555                    width = 1920;
1556                    height = 1080;
1557                } else {
1558                    // A resolution was specified, check that it's not too big. The values below
1559                    // were chosen so that the calculations below don't cause overflows, they're
1560                    // not indicating that resolutions up to 32kx32k are actually supported.
1561                    if (width > 32768 || height > 32768) {
1562                        ALOGE("can't support %u x %u video", width, height);
1563                        return ERROR_MALFORMED;
1564                    }
1565                }
1566
1567                const char *mime;
1568                CHECK(mLastTrack->meta->findCString(kKeyMIMEType, &mime));
1569                if (!strcmp(mime, MEDIA_MIMETYPE_VIDEO_AVC)
1570                        || !strcmp(mime, MEDIA_MIMETYPE_VIDEO_HEVC)) {
1571                    // AVC & HEVC requires compression ratio of at least 2, and uses
1572                    // macroblocks
1573                    max_size = ((width + 15) / 16) * ((height + 15) / 16) * 192;
1574                } else {
1575                    // For all other formats there is no minimum compression
1576                    // ratio. Use compression ratio of 1.
1577                    max_size = width * height * 3 / 2;
1578                }
1579                mLastTrack->meta->setInt32(kKeyMaxInputSize, max_size);
1580            }
1581
1582            // NOTE: setting another piece of metadata invalidates any pointers (such as the
1583            // mimetype) previously obtained, so don't cache them.
1584            const char *mime;
1585            CHECK(mLastTrack->meta->findCString(kKeyMIMEType, &mime));
1586            // Calculate average frame rate.
1587            if (!strncasecmp("video/", mime, 6)) {
1588                size_t nSamples = mLastTrack->sampleTable->countSamples();
1589                if (nSamples == 0) {
1590                    int32_t trackId;
1591                    if (mLastTrack->meta->findInt32(kKeyTrackID, &trackId)) {
1592                        for (size_t i = 0; i < mTrex.size(); i++) {
1593                            Trex *t = &mTrex.editItemAt(i);
1594                            if (t->track_ID == (uint32_t) trackId) {
1595                                if (t->default_sample_duration > 0) {
1596                                    int32_t frameRate =
1597                                            mLastTrack->timescale / t->default_sample_duration;
1598                                    mLastTrack->meta->setInt32(kKeyFrameRate, frameRate);
1599                                }
1600                                break;
1601                            }
1602                        }
1603                    }
1604                } else {
1605                    int64_t durationUs;
1606                    if (mLastTrack->meta->findInt64(kKeyDuration, &durationUs)) {
1607                        if (durationUs > 0) {
1608                            int32_t frameRate = (nSamples * 1000000LL +
1609                                        (durationUs >> 1)) / durationUs;
1610                            mLastTrack->meta->setInt32(kKeyFrameRate, frameRate);
1611                        }
1612                    }
1613                }
1614            }
1615
1616            break;
1617        }
1618
1619        case FOURCC('s', 't', 't', 's'):
1620        {
1621            if ((mLastTrack == NULL) || (mLastTrack->sampleTable == NULL))
1622                return ERROR_MALFORMED;
1623
1624            *offset += chunk_size;
1625
1626            status_t err =
1627                mLastTrack->sampleTable->setTimeToSampleParams(
1628                        data_offset, chunk_data_size);
1629
1630            if (err != OK) {
1631                return err;
1632            }
1633
1634            break;
1635        }
1636
1637        case FOURCC('c', 't', 't', 's'):
1638        {
1639            if ((mLastTrack == NULL) || (mLastTrack->sampleTable == NULL))
1640                return ERROR_MALFORMED;
1641
1642            *offset += chunk_size;
1643
1644            status_t err =
1645                mLastTrack->sampleTable->setCompositionTimeToSampleParams(
1646                        data_offset, chunk_data_size);
1647
1648            if (err != OK) {
1649                return err;
1650            }
1651
1652            break;
1653        }
1654
1655        case FOURCC('s', 't', 's', 's'):
1656        {
1657            if ((mLastTrack == NULL) || (mLastTrack->sampleTable == NULL))
1658                return ERROR_MALFORMED;
1659
1660            *offset += chunk_size;
1661
1662            status_t err =
1663                mLastTrack->sampleTable->setSyncSampleParams(
1664                        data_offset, chunk_data_size);
1665
1666            if (err != OK) {
1667                return err;
1668            }
1669
1670            break;
1671        }
1672
1673        // \xA9xyz
1674        case FOURCC(0xA9, 'x', 'y', 'z'):
1675        {
1676            *offset += chunk_size;
1677
1678            // Best case the total data length inside "\xA9xyz" box
1679            // would be 8, for instance "\xA9xyz" + "\x00\x04\x15\xc7" + "0+0/",
1680            // where "\x00\x04" is the text string length with value = 4,
1681            // "\0x15\xc7" is the language code = en, and "0+0" is a
1682            // location (string) value with longitude = 0 and latitude = 0.
1683            if (chunk_data_size < 8) {
1684                return ERROR_MALFORMED;
1685            }
1686
1687            // Worst case the location string length would be 18,
1688            // for instance +90.0000-180.0000, without the trailing "/" and
1689            // the string length + language code.
1690            char buffer[18];
1691
1692            // Substracting 5 from the data size is because the text string length +
1693            // language code takes 4 bytes, and the trailing slash "/" takes 1 byte.
1694            off64_t location_length = chunk_data_size - 5;
1695            if (location_length >= (off64_t) sizeof(buffer)) {
1696                return ERROR_MALFORMED;
1697            }
1698
1699            if (mDataSource->readAt(
1700                        data_offset + 4, buffer, location_length) < location_length) {
1701                return ERROR_IO;
1702            }
1703
1704            buffer[location_length] = '\0';
1705            mFileMetaData->setCString(kKeyLocation, buffer);
1706            break;
1707        }
1708
1709        case FOURCC('e', 's', 'd', 's'):
1710        {
1711            *offset += chunk_size;
1712
1713            if (chunk_data_size < 4) {
1714                return ERROR_MALFORMED;
1715            }
1716
1717            uint8_t buffer[256];
1718            if (chunk_data_size > (off64_t)sizeof(buffer)) {
1719                return ERROR_BUFFER_TOO_SMALL;
1720            }
1721
1722            if (mDataSource->readAt(
1723                        data_offset, buffer, chunk_data_size) < chunk_data_size) {
1724                return ERROR_IO;
1725            }
1726
1727            if (U32_AT(buffer) != 0) {
1728                // Should be version 0, flags 0.
1729                return ERROR_MALFORMED;
1730            }
1731
1732            if (mLastTrack == NULL)
1733                return ERROR_MALFORMED;
1734
1735            mLastTrack->meta->setData(
1736                    kKeyESDS, kTypeESDS, &buffer[4], chunk_data_size - 4);
1737
1738            if (mPath.size() >= 2
1739                    && mPath[mPath.size() - 2] == FOURCC('m', 'p', '4', 'a')) {
1740                // Information from the ESDS must be relied on for proper
1741                // setup of sample rate and channel count for MPEG4 Audio.
1742                // The generic header appears to only contain generic
1743                // information...
1744
1745                status_t err = updateAudioTrackInfoFromESDS_MPEG4Audio(
1746                        &buffer[4], chunk_data_size - 4);
1747
1748                if (err != OK) {
1749                    return err;
1750                }
1751            }
1752            if (mPath.size() >= 2
1753                    && mPath[mPath.size() - 2] == FOURCC('m', 'p', '4', 'v')) {
1754                // Check if the video is MPEG2
1755                ESDS esds(&buffer[4], chunk_data_size - 4);
1756
1757                uint8_t objectTypeIndication;
1758                if (esds.getObjectTypeIndication(&objectTypeIndication) == OK) {
1759                    if (objectTypeIndication >= 0x60 && objectTypeIndication <= 0x65) {
1760                        mLastTrack->meta->setCString(kKeyMIMEType, MEDIA_MIMETYPE_VIDEO_MPEG2);
1761                    }
1762                }
1763            }
1764            break;
1765        }
1766
1767        case FOURCC('b', 't', 'r', 't'):
1768        {
1769            *offset += chunk_size;
1770
1771            uint8_t buffer[12];
1772            if (chunk_data_size != sizeof(buffer)) {
1773                return ERROR_MALFORMED;
1774            }
1775
1776            if (mDataSource->readAt(
1777                    data_offset, buffer, chunk_data_size) < chunk_data_size) {
1778                return ERROR_IO;
1779            }
1780
1781            uint32_t maxBitrate = U32_AT(&buffer[4]);
1782            uint32_t avgBitrate = U32_AT(&buffer[8]);
1783            if (maxBitrate > 0 && maxBitrate < INT32_MAX) {
1784                mLastTrack->meta->setInt32(kKeyMaxBitRate, (int32_t)maxBitrate);
1785            }
1786            if (avgBitrate > 0 && avgBitrate < INT32_MAX) {
1787                mLastTrack->meta->setInt32(kKeyBitRate, (int32_t)avgBitrate);
1788            }
1789            break;
1790        }
1791
1792        case FOURCC('a', 'v', 'c', 'C'):
1793        {
1794            *offset += chunk_size;
1795
1796            sp<ABuffer> buffer = new ABuffer(chunk_data_size);
1797
1798            if (buffer->data() == NULL) {
1799                ALOGE("b/28471206");
1800                return NO_MEMORY;
1801            }
1802
1803            if (mDataSource->readAt(
1804                        data_offset, buffer->data(), chunk_data_size) < chunk_data_size) {
1805                return ERROR_IO;
1806            }
1807
1808            if (mLastTrack == NULL)
1809                return ERROR_MALFORMED;
1810
1811            mLastTrack->meta->setData(
1812                    kKeyAVCC, kTypeAVCC, buffer->data(), chunk_data_size);
1813
1814            break;
1815        }
1816        case FOURCC('h', 'v', 'c', 'C'):
1817        {
1818            sp<ABuffer> buffer = new ABuffer(chunk_data_size);
1819
1820            if (buffer->data() == NULL) {
1821                ALOGE("b/28471206");
1822                return NO_MEMORY;
1823            }
1824
1825            if (mDataSource->readAt(
1826                        data_offset, buffer->data(), chunk_data_size) < chunk_data_size) {
1827                return ERROR_IO;
1828            }
1829
1830            if (mLastTrack == NULL)
1831                return ERROR_MALFORMED;
1832
1833            mLastTrack->meta->setData(
1834                    kKeyHVCC, kTypeHVCC, buffer->data(), chunk_data_size);
1835
1836            *offset += chunk_size;
1837            break;
1838        }
1839
1840        case FOURCC('d', '2', '6', '3'):
1841        {
1842            *offset += chunk_size;
1843            /*
1844             * d263 contains a fixed 7 bytes part:
1845             *   vendor - 4 bytes
1846             *   version - 1 byte
1847             *   level - 1 byte
1848             *   profile - 1 byte
1849             * optionally, "d263" box itself may contain a 16-byte
1850             * bit rate box (bitr)
1851             *   average bit rate - 4 bytes
1852             *   max bit rate - 4 bytes
1853             */
1854            char buffer[23];
1855            if (chunk_data_size != 7 &&
1856                chunk_data_size != 23) {
1857                ALOGE("Incorrect D263 box size %lld", (long long)chunk_data_size);
1858                return ERROR_MALFORMED;
1859            }
1860
1861            if (mDataSource->readAt(
1862                    data_offset, buffer, chunk_data_size) < chunk_data_size) {
1863                return ERROR_IO;
1864            }
1865
1866            if (mLastTrack == NULL)
1867                return ERROR_MALFORMED;
1868
1869            mLastTrack->meta->setData(kKeyD263, kTypeD263, buffer, chunk_data_size);
1870
1871            break;
1872        }
1873
1874        case FOURCC('m', 'e', 't', 'a'):
1875        {
1876            off64_t stop_offset = *offset + chunk_size;
1877            *offset = data_offset;
1878            bool isParsingMetaKeys = underQTMetaPath(mPath, 2);
1879            if (!isParsingMetaKeys) {
1880                uint8_t buffer[4];
1881                if (chunk_data_size < (off64_t)sizeof(buffer)) {
1882                    *offset = stop_offset;
1883                    return ERROR_MALFORMED;
1884                }
1885
1886                if (mDataSource->readAt(
1887                            data_offset, buffer, 4) < 4) {
1888                    *offset = stop_offset;
1889                    return ERROR_IO;
1890                }
1891
1892                if (U32_AT(buffer) != 0) {
1893                    // Should be version 0, flags 0.
1894
1895                    // If it's not, let's assume this is one of those
1896                    // apparently malformed chunks that don't have flags
1897                    // and completely different semantics than what's
1898                    // in the MPEG4 specs and skip it.
1899                    *offset = stop_offset;
1900                    return OK;
1901                }
1902                *offset +=  sizeof(buffer);
1903            }
1904
1905            while (*offset < stop_offset) {
1906                status_t err = parseChunk(offset, depth + 1);
1907                if (err != OK) {
1908                    return err;
1909                }
1910            }
1911
1912            if (*offset != stop_offset) {
1913                return ERROR_MALFORMED;
1914            }
1915            break;
1916        }
1917
1918        case FOURCC('m', 'e', 'a', 'n'):
1919        case FOURCC('n', 'a', 'm', 'e'):
1920        case FOURCC('d', 'a', 't', 'a'):
1921        {
1922            *offset += chunk_size;
1923
1924            if (mPath.size() == 6 && underMetaDataPath(mPath)) {
1925                status_t err = parseITunesMetaData(data_offset, chunk_data_size);
1926
1927                if (err != OK) {
1928                    return err;
1929                }
1930            }
1931
1932            break;
1933        }
1934
1935        case FOURCC('m', 'v', 'h', 'd'):
1936        {
1937            *offset += chunk_size;
1938
1939            if (chunk_data_size < 32) {
1940                return ERROR_MALFORMED;
1941            }
1942
1943            uint8_t header[32];
1944            if (mDataSource->readAt(
1945                        data_offset, header, sizeof(header))
1946                    < (ssize_t)sizeof(header)) {
1947                return ERROR_IO;
1948            }
1949
1950            uint64_t creationTime;
1951            uint64_t duration = 0;
1952            if (header[0] == 1) {
1953                creationTime = U64_AT(&header[4]);
1954                mHeaderTimescale = U32_AT(&header[20]);
1955                duration = U64_AT(&header[24]);
1956                if (duration == 0xffffffffffffffff) {
1957                    duration = 0;
1958                }
1959            } else if (header[0] != 0) {
1960                return ERROR_MALFORMED;
1961            } else {
1962                creationTime = U32_AT(&header[4]);
1963                mHeaderTimescale = U32_AT(&header[12]);
1964                uint32_t d32 = U32_AT(&header[16]);
1965                if (d32 == 0xffffffff) {
1966                    d32 = 0;
1967                }
1968                duration = d32;
1969            }
1970            if (duration != 0 && mHeaderTimescale != 0 && duration < UINT64_MAX / 1000000) {
1971                mFileMetaData->setInt64(kKeyDuration, duration * 1000000 / mHeaderTimescale);
1972            }
1973
1974            String8 s;
1975            if (convertTimeToDate(creationTime, &s)) {
1976                mFileMetaData->setCString(kKeyDate, s.string());
1977            }
1978
1979
1980            break;
1981        }
1982
1983        case FOURCC('m', 'e', 'h', 'd'):
1984        {
1985            *offset += chunk_size;
1986
1987            if (chunk_data_size < 8) {
1988                return ERROR_MALFORMED;
1989            }
1990
1991            uint8_t flags[4];
1992            if (mDataSource->readAt(
1993                        data_offset, flags, sizeof(flags))
1994                    < (ssize_t)sizeof(flags)) {
1995                return ERROR_IO;
1996            }
1997
1998            uint64_t duration = 0;
1999            if (flags[0] == 1) {
2000                // 64 bit
2001                if (chunk_data_size < 12) {
2002                    return ERROR_MALFORMED;
2003                }
2004                mDataSource->getUInt64(data_offset + 4, &duration);
2005                if (duration == 0xffffffffffffffff) {
2006                    duration = 0;
2007                }
2008            } else if (flags[0] == 0) {
2009                // 32 bit
2010                uint32_t d32;
2011                mDataSource->getUInt32(data_offset + 4, &d32);
2012                if (d32 == 0xffffffff) {
2013                    d32 = 0;
2014                }
2015                duration = d32;
2016            } else {
2017                return ERROR_MALFORMED;
2018            }
2019
2020            if (duration != 0 && mHeaderTimescale != 0) {
2021                mFileMetaData->setInt64(kKeyDuration, duration * 1000000 / mHeaderTimescale);
2022            }
2023
2024            break;
2025        }
2026
2027        case FOURCC('m', 'd', 'a', 't'):
2028        {
2029            ALOGV("mdat chunk, drm: %d", mIsDrm);
2030
2031            mMdatFound = true;
2032
2033            if (!mIsDrm) {
2034                *offset += chunk_size;
2035                break;
2036            }
2037
2038            if (chunk_size < 8) {
2039                return ERROR_MALFORMED;
2040            }
2041
2042            return parseDrmSINF(offset, data_offset);
2043        }
2044
2045        case FOURCC('h', 'd', 'l', 'r'):
2046        {
2047            *offset += chunk_size;
2048
2049            if (underQTMetaPath(mPath, 3)) {
2050                break;
2051            }
2052
2053            uint32_t buffer;
2054            if (mDataSource->readAt(
2055                        data_offset + 8, &buffer, 4) < 4) {
2056                return ERROR_IO;
2057            }
2058
2059            uint32_t type = ntohl(buffer);
2060            // For the 3GPP file format, the handler-type within the 'hdlr' box
2061            // shall be 'text'. We also want to support 'sbtl' handler type
2062            // for a practical reason as various MPEG4 containers use it.
2063            if (type == FOURCC('t', 'e', 'x', 't') || type == FOURCC('s', 'b', 't', 'l')) {
2064                if (mLastTrack != NULL) {
2065                    mLastTrack->meta->setCString(kKeyMIMEType, MEDIA_MIMETYPE_TEXT_3GPP);
2066                }
2067            }
2068
2069            break;
2070        }
2071
2072        case FOURCC('k', 'e', 'y', 's'):
2073        {
2074            *offset += chunk_size;
2075
2076            if (underQTMetaPath(mPath, 3)) {
2077                parseQTMetaKey(data_offset, chunk_data_size);
2078            }
2079            break;
2080        }
2081
2082        case FOURCC('t', 'r', 'e', 'x'):
2083        {
2084            *offset += chunk_size;
2085
2086            if (chunk_data_size < 24) {
2087                return ERROR_IO;
2088            }
2089            Trex trex;
2090            if (!mDataSource->getUInt32(data_offset + 4, &trex.track_ID) ||
2091                !mDataSource->getUInt32(data_offset + 8, &trex.default_sample_description_index) ||
2092                !mDataSource->getUInt32(data_offset + 12, &trex.default_sample_duration) ||
2093                !mDataSource->getUInt32(data_offset + 16, &trex.default_sample_size) ||
2094                !mDataSource->getUInt32(data_offset + 20, &trex.default_sample_flags)) {
2095                return ERROR_IO;
2096            }
2097            mTrex.add(trex);
2098            break;
2099        }
2100
2101        case FOURCC('t', 'x', '3', 'g'):
2102        {
2103            if (mLastTrack == NULL)
2104                return ERROR_MALFORMED;
2105
2106            uint32_t type;
2107            const void *data;
2108            size_t size = 0;
2109            if (!mLastTrack->meta->findData(
2110                    kKeyTextFormatData, &type, &data, &size)) {
2111                size = 0;
2112            }
2113
2114            if ((chunk_size > SIZE_MAX) || (SIZE_MAX - chunk_size <= size)) {
2115                return ERROR_MALFORMED;
2116            }
2117
2118            uint8_t *buffer = new (std::nothrow) uint8_t[size + chunk_size];
2119            if (buffer == NULL) {
2120                return ERROR_MALFORMED;
2121            }
2122
2123            if (size > 0) {
2124                memcpy(buffer, data, size);
2125            }
2126
2127            if ((size_t)(mDataSource->readAt(*offset, buffer + size, chunk_size))
2128                    < chunk_size) {
2129                delete[] buffer;
2130                buffer = NULL;
2131
2132                // advance read pointer so we don't end up reading this again
2133                *offset += chunk_size;
2134                return ERROR_IO;
2135            }
2136
2137            mLastTrack->meta->setData(
2138                    kKeyTextFormatData, 0, buffer, size + chunk_size);
2139
2140            delete[] buffer;
2141
2142            *offset += chunk_size;
2143            break;
2144        }
2145
2146        case FOURCC('c', 'o', 'v', 'r'):
2147        {
2148            *offset += chunk_size;
2149
2150            if (mFileMetaData != NULL) {
2151                ALOGV("chunk_data_size = %" PRId64 " and data_offset = %" PRId64,
2152                      chunk_data_size, data_offset);
2153
2154                if (chunk_data_size < 0 || static_cast<uint64_t>(chunk_data_size) >= SIZE_MAX - 1) {
2155                    return ERROR_MALFORMED;
2156                }
2157                sp<ABuffer> buffer = new ABuffer(chunk_data_size + 1);
2158                if (buffer->data() == NULL) {
2159                    ALOGE("b/28471206");
2160                    return NO_MEMORY;
2161                }
2162                if (mDataSource->readAt(
2163                    data_offset, buffer->data(), chunk_data_size) != (ssize_t)chunk_data_size) {
2164                    return ERROR_IO;
2165                }
2166                const int kSkipBytesOfDataBox = 16;
2167                if (chunk_data_size <= kSkipBytesOfDataBox) {
2168                    return ERROR_MALFORMED;
2169                }
2170
2171                mFileMetaData->setData(
2172                    kKeyAlbumArt, MetaData::TYPE_NONE,
2173                    buffer->data() + kSkipBytesOfDataBox, chunk_data_size - kSkipBytesOfDataBox);
2174            }
2175
2176            break;
2177        }
2178
2179        case FOURCC('c', 'o', 'l', 'r'):
2180        {
2181            *offset += chunk_size;
2182            // this must be in a VisualSampleEntry box under the Sample Description Box ('stsd')
2183            // ignore otherwise
2184            if (depth >= 2 && mPath[depth - 2] == FOURCC('s', 't', 's', 'd')) {
2185                status_t err = parseColorInfo(data_offset, chunk_data_size);
2186                if (err != OK) {
2187                    return err;
2188                }
2189            }
2190
2191            break;
2192        }
2193
2194        case FOURCC('t', 'i', 't', 'l'):
2195        case FOURCC('p', 'e', 'r', 'f'):
2196        case FOURCC('a', 'u', 't', 'h'):
2197        case FOURCC('g', 'n', 'r', 'e'):
2198        case FOURCC('a', 'l', 'b', 'm'):
2199        case FOURCC('y', 'r', 'r', 'c'):
2200        {
2201            *offset += chunk_size;
2202
2203            status_t err = parse3GPPMetaData(data_offset, chunk_data_size, depth);
2204
2205            if (err != OK) {
2206                return err;
2207            }
2208
2209            break;
2210        }
2211
2212        case FOURCC('I', 'D', '3', '2'):
2213        {
2214            *offset += chunk_size;
2215
2216            if (chunk_data_size < 6) {
2217                return ERROR_MALFORMED;
2218            }
2219
2220            parseID3v2MetaData(data_offset + 6);
2221
2222            break;
2223        }
2224
2225        case FOURCC('-', '-', '-', '-'):
2226        {
2227            mLastCommentMean.clear();
2228            mLastCommentName.clear();
2229            mLastCommentData.clear();
2230            *offset += chunk_size;
2231            break;
2232        }
2233
2234        case FOURCC('s', 'i', 'd', 'x'):
2235        {
2236            parseSegmentIndex(data_offset, chunk_data_size);
2237            *offset += chunk_size;
2238            return UNKNOWN_ERROR; // stop parsing after sidx
2239        }
2240
2241        default:
2242        {
2243            // check if we're parsing 'ilst' for meta keys
2244            // if so, treat type as a number (key-id).
2245            if (underQTMetaPath(mPath, 3)) {
2246                parseQTMetaVal(chunk_type, data_offset, chunk_data_size);
2247            }
2248
2249            *offset += chunk_size;
2250            break;
2251        }
2252    }
2253
2254    return OK;
2255}
2256
2257status_t MPEG4Extractor::parseSegmentIndex(off64_t offset, size_t size) {
2258  ALOGV("MPEG4Extractor::parseSegmentIndex");
2259
2260    if (size < 12) {
2261      return -EINVAL;
2262    }
2263
2264    uint32_t flags;
2265    if (!mDataSource->getUInt32(offset, &flags)) {
2266        return ERROR_MALFORMED;
2267    }
2268
2269    uint32_t version = flags >> 24;
2270    flags &= 0xffffff;
2271
2272    ALOGV("sidx version %d", version);
2273
2274    uint32_t referenceId;
2275    if (!mDataSource->getUInt32(offset + 4, &referenceId)) {
2276        return ERROR_MALFORMED;
2277    }
2278
2279    uint32_t timeScale;
2280    if (!mDataSource->getUInt32(offset + 8, &timeScale)) {
2281        return ERROR_MALFORMED;
2282    }
2283    ALOGV("sidx refid/timescale: %d/%d", referenceId, timeScale);
2284    if (timeScale == 0)
2285        return ERROR_MALFORMED;
2286
2287    uint64_t earliestPresentationTime;
2288    uint64_t firstOffset;
2289
2290    offset += 12;
2291    size -= 12;
2292
2293    if (version == 0) {
2294        if (size < 8) {
2295            return -EINVAL;
2296        }
2297        uint32_t tmp;
2298        if (!mDataSource->getUInt32(offset, &tmp)) {
2299            return ERROR_MALFORMED;
2300        }
2301        earliestPresentationTime = tmp;
2302        if (!mDataSource->getUInt32(offset + 4, &tmp)) {
2303            return ERROR_MALFORMED;
2304        }
2305        firstOffset = tmp;
2306        offset += 8;
2307        size -= 8;
2308    } else {
2309        if (size < 16) {
2310            return -EINVAL;
2311        }
2312        if (!mDataSource->getUInt64(offset, &earliestPresentationTime)) {
2313            return ERROR_MALFORMED;
2314        }
2315        if (!mDataSource->getUInt64(offset + 8, &firstOffset)) {
2316            return ERROR_MALFORMED;
2317        }
2318        offset += 16;
2319        size -= 16;
2320    }
2321    ALOGV("sidx pres/off: %" PRIu64 "/%" PRIu64, earliestPresentationTime, firstOffset);
2322
2323    if (size < 4) {
2324        return -EINVAL;
2325    }
2326
2327    uint16_t referenceCount;
2328    if (!mDataSource->getUInt16(offset + 2, &referenceCount)) {
2329        return ERROR_MALFORMED;
2330    }
2331    offset += 4;
2332    size -= 4;
2333    ALOGV("refcount: %d", referenceCount);
2334
2335    if (size < referenceCount * 12) {
2336        return -EINVAL;
2337    }
2338
2339    uint64_t total_duration = 0;
2340    for (unsigned int i = 0; i < referenceCount; i++) {
2341        uint32_t d1, d2, d3;
2342
2343        if (!mDataSource->getUInt32(offset, &d1) ||     // size
2344            !mDataSource->getUInt32(offset + 4, &d2) || // duration
2345            !mDataSource->getUInt32(offset + 8, &d3)) { // flags
2346            return ERROR_MALFORMED;
2347        }
2348
2349        if (d1 & 0x80000000) {
2350            ALOGW("sub-sidx boxes not supported yet");
2351        }
2352        bool sap = d3 & 0x80000000;
2353        uint32_t saptype = (d3 >> 28) & 7;
2354        if (!sap || (saptype != 1 && saptype != 2)) {
2355            // type 1 and 2 are sync samples
2356            ALOGW("not a stream access point, or unsupported type: %08x", d3);
2357        }
2358        total_duration += d2;
2359        offset += 12;
2360        ALOGV(" item %d, %08x %08x %08x", i, d1, d2, d3);
2361        SidxEntry se;
2362        se.mSize = d1 & 0x7fffffff;
2363        se.mDurationUs = 1000000LL * d2 / timeScale;
2364        mSidxEntries.add(se);
2365    }
2366
2367    uint64_t sidxDuration = total_duration * 1000000 / timeScale;
2368
2369    if (mLastTrack == NULL)
2370        return ERROR_MALFORMED;
2371
2372    int64_t metaDuration;
2373    if (!mLastTrack->meta->findInt64(kKeyDuration, &metaDuration) || metaDuration == 0) {
2374        mLastTrack->meta->setInt64(kKeyDuration, sidxDuration);
2375    }
2376    return OK;
2377}
2378
2379status_t MPEG4Extractor::parseQTMetaKey(off64_t offset, size_t size) {
2380    if (size < 8) {
2381        return ERROR_MALFORMED;
2382    }
2383
2384    uint32_t count;
2385    if (!mDataSource->getUInt32(offset + 4, &count)) {
2386        return ERROR_MALFORMED;
2387    }
2388
2389    if (mMetaKeyMap.size() > 0) {
2390        ALOGW("'keys' atom seen again, discarding existing entries");
2391        mMetaKeyMap.clear();
2392    }
2393
2394    off64_t keyOffset = offset + 8;
2395    off64_t stopOffset = offset + size;
2396    for (size_t i = 1; i <= count; i++) {
2397        if (keyOffset + 8 > stopOffset) {
2398            return ERROR_MALFORMED;
2399        }
2400
2401        uint32_t keySize;
2402        if (!mDataSource->getUInt32(keyOffset, &keySize)
2403                || keySize < 8
2404                || keyOffset + keySize > stopOffset) {
2405            return ERROR_MALFORMED;
2406        }
2407
2408        uint32_t type;
2409        if (!mDataSource->getUInt32(keyOffset + 4, &type)
2410                || type != FOURCC('m', 'd', 't', 'a')) {
2411            return ERROR_MALFORMED;
2412        }
2413
2414        keySize -= 8;
2415        keyOffset += 8;
2416
2417        sp<ABuffer> keyData = new ABuffer(keySize);
2418        if (keyData->data() == NULL) {
2419            return ERROR_MALFORMED;
2420        }
2421        if (mDataSource->readAt(
2422                keyOffset, keyData->data(), keySize) < (ssize_t) keySize) {
2423            return ERROR_MALFORMED;
2424        }
2425
2426        AString key((const char *)keyData->data(), keySize);
2427        mMetaKeyMap.add(i, key);
2428
2429        keyOffset += keySize;
2430    }
2431    return OK;
2432}
2433
2434status_t MPEG4Extractor::parseQTMetaVal(
2435        int32_t keyId, off64_t offset, size_t size) {
2436    ssize_t index = mMetaKeyMap.indexOfKey(keyId);
2437    if (index < 0) {
2438        // corresponding key is not present, ignore
2439        return ERROR_MALFORMED;
2440    }
2441
2442    if (size <= 16) {
2443        return ERROR_MALFORMED;
2444    }
2445    uint32_t dataSize;
2446    if (!mDataSource->getUInt32(offset, &dataSize)
2447            || dataSize > size || dataSize <= 16) {
2448        return ERROR_MALFORMED;
2449    }
2450    uint32_t atomFourCC;
2451    if (!mDataSource->getUInt32(offset + 4, &atomFourCC)
2452            || atomFourCC != FOURCC('d', 'a', 't', 'a')) {
2453        return ERROR_MALFORMED;
2454    }
2455    uint32_t dataType;
2456    if (!mDataSource->getUInt32(offset + 8, &dataType)
2457            || ((dataType & 0xff000000) != 0)) {
2458        // not well-known type
2459        return ERROR_MALFORMED;
2460    }
2461
2462    dataSize -= 16;
2463    offset += 16;
2464
2465    if (dataType == 23 && dataSize >= 4) {
2466        // BE Float32
2467        uint32_t val;
2468        if (!mDataSource->getUInt32(offset, &val)) {
2469            return ERROR_MALFORMED;
2470        }
2471        if (!strcasecmp(mMetaKeyMap[index].c_str(), "com.android.capture.fps")) {
2472            mFileMetaData->setFloat(kKeyCaptureFramerate, *(float *)&val);
2473        }
2474    } else {
2475        // add more keys if needed
2476        ALOGV("ignoring key: type %d, size %d", dataType, dataSize);
2477    }
2478
2479    return OK;
2480}
2481
2482status_t MPEG4Extractor::parseTrackHeader(
2483        off64_t data_offset, off64_t data_size) {
2484    if (data_size < 4) {
2485        return ERROR_MALFORMED;
2486    }
2487
2488    uint8_t version;
2489    if (mDataSource->readAt(data_offset, &version, 1) < 1) {
2490        return ERROR_IO;
2491    }
2492
2493    size_t dynSize = (version == 1) ? 36 : 24;
2494
2495    uint8_t buffer[36 + 60];
2496
2497    if (data_size != (off64_t)dynSize + 60) {
2498        return ERROR_MALFORMED;
2499    }
2500
2501    if (mDataSource->readAt(
2502                data_offset, buffer, data_size) < (ssize_t)data_size) {
2503        return ERROR_IO;
2504    }
2505
2506    uint64_t ctime __unused, mtime __unused, duration __unused;
2507    int32_t id;
2508
2509    if (version == 1) {
2510        ctime = U64_AT(&buffer[4]);
2511        mtime = U64_AT(&buffer[12]);
2512        id = U32_AT(&buffer[20]);
2513        duration = U64_AT(&buffer[28]);
2514    } else if (version == 0) {
2515        ctime = U32_AT(&buffer[4]);
2516        mtime = U32_AT(&buffer[8]);
2517        id = U32_AT(&buffer[12]);
2518        duration = U32_AT(&buffer[20]);
2519    } else {
2520        return ERROR_UNSUPPORTED;
2521    }
2522
2523    if (mLastTrack == NULL)
2524        return ERROR_MALFORMED;
2525
2526    mLastTrack->meta->setInt32(kKeyTrackID, id);
2527
2528    size_t matrixOffset = dynSize + 16;
2529    int32_t a00 = U32_AT(&buffer[matrixOffset]);
2530    int32_t a01 = U32_AT(&buffer[matrixOffset + 4]);
2531    int32_t a10 = U32_AT(&buffer[matrixOffset + 12]);
2532    int32_t a11 = U32_AT(&buffer[matrixOffset + 16]);
2533
2534#if 0
2535    int32_t dx = U32_AT(&buffer[matrixOffset + 8]);
2536    int32_t dy = U32_AT(&buffer[matrixOffset + 20]);
2537
2538    ALOGI("x' = %.2f * x + %.2f * y + %.2f",
2539         a00 / 65536.0f, a01 / 65536.0f, dx / 65536.0f);
2540    ALOGI("y' = %.2f * x + %.2f * y + %.2f",
2541         a10 / 65536.0f, a11 / 65536.0f, dy / 65536.0f);
2542#endif
2543
2544    uint32_t rotationDegrees;
2545
2546    static const int32_t kFixedOne = 0x10000;
2547    if (a00 == kFixedOne && a01 == 0 && a10 == 0 && a11 == kFixedOne) {
2548        // Identity, no rotation
2549        rotationDegrees = 0;
2550    } else if (a00 == 0 && a01 == kFixedOne && a10 == -kFixedOne && a11 == 0) {
2551        rotationDegrees = 90;
2552    } else if (a00 == 0 && a01 == -kFixedOne && a10 == kFixedOne && a11 == 0) {
2553        rotationDegrees = 270;
2554    } else if (a00 == -kFixedOne && a01 == 0 && a10 == 0 && a11 == -kFixedOne) {
2555        rotationDegrees = 180;
2556    } else {
2557        ALOGW("We only support 0,90,180,270 degree rotation matrices");
2558        rotationDegrees = 0;
2559    }
2560
2561    if (rotationDegrees != 0) {
2562        mLastTrack->meta->setInt32(kKeyRotation, rotationDegrees);
2563    }
2564
2565    // Handle presentation display size, which could be different
2566    // from the image size indicated by kKeyWidth and kKeyHeight.
2567    uint32_t width = U32_AT(&buffer[dynSize + 52]);
2568    uint32_t height = U32_AT(&buffer[dynSize + 56]);
2569    mLastTrack->meta->setInt32(kKeyDisplayWidth, width >> 16);
2570    mLastTrack->meta->setInt32(kKeyDisplayHeight, height >> 16);
2571
2572    return OK;
2573}
2574
2575status_t MPEG4Extractor::parseITunesMetaData(off64_t offset, size_t size) {
2576    if (size < 4 || size == SIZE_MAX) {
2577        return ERROR_MALFORMED;
2578    }
2579
2580    uint8_t *buffer = new (std::nothrow) uint8_t[size + 1];
2581    if (buffer == NULL) {
2582        return ERROR_MALFORMED;
2583    }
2584    if (mDataSource->readAt(
2585                offset, buffer, size) != (ssize_t)size) {
2586        delete[] buffer;
2587        buffer = NULL;
2588
2589        return ERROR_IO;
2590    }
2591
2592    uint32_t flags = U32_AT(buffer);
2593
2594    uint32_t metadataKey = 0;
2595    char chunk[5];
2596    MakeFourCCString(mPath[4], chunk);
2597    ALOGV("meta: %s @ %lld", chunk, (long long)offset);
2598    switch ((int32_t)mPath[4]) {
2599        case FOURCC(0xa9, 'a', 'l', 'b'):
2600        {
2601            metadataKey = kKeyAlbum;
2602            break;
2603        }
2604        case FOURCC(0xa9, 'A', 'R', 'T'):
2605        {
2606            metadataKey = kKeyArtist;
2607            break;
2608        }
2609        case FOURCC('a', 'A', 'R', 'T'):
2610        {
2611            metadataKey = kKeyAlbumArtist;
2612            break;
2613        }
2614        case FOURCC(0xa9, 'd', 'a', 'y'):
2615        {
2616            metadataKey = kKeyYear;
2617            break;
2618        }
2619        case FOURCC(0xa9, 'n', 'a', 'm'):
2620        {
2621            metadataKey = kKeyTitle;
2622            break;
2623        }
2624        case FOURCC(0xa9, 'w', 'r', 't'):
2625        {
2626            metadataKey = kKeyWriter;
2627            break;
2628        }
2629        case FOURCC('c', 'o', 'v', 'r'):
2630        {
2631            metadataKey = kKeyAlbumArt;
2632            break;
2633        }
2634        case FOURCC('g', 'n', 'r', 'e'):
2635        {
2636            metadataKey = kKeyGenre;
2637            break;
2638        }
2639        case FOURCC(0xa9, 'g', 'e', 'n'):
2640        {
2641            metadataKey = kKeyGenre;
2642            break;
2643        }
2644        case FOURCC('c', 'p', 'i', 'l'):
2645        {
2646            if (size == 9 && flags == 21) {
2647                char tmp[16];
2648                sprintf(tmp, "%d",
2649                        (int)buffer[size - 1]);
2650
2651                mFileMetaData->setCString(kKeyCompilation, tmp);
2652            }
2653            break;
2654        }
2655        case FOURCC('t', 'r', 'k', 'n'):
2656        {
2657            if (size == 16 && flags == 0) {
2658                char tmp[16];
2659                uint16_t* pTrack = (uint16_t*)&buffer[10];
2660                uint16_t* pTotalTracks = (uint16_t*)&buffer[12];
2661                sprintf(tmp, "%d/%d", ntohs(*pTrack), ntohs(*pTotalTracks));
2662
2663                mFileMetaData->setCString(kKeyCDTrackNumber, tmp);
2664            }
2665            break;
2666        }
2667        case FOURCC('d', 'i', 's', 'k'):
2668        {
2669            if ((size == 14 || size == 16) && flags == 0) {
2670                char tmp[16];
2671                uint16_t* pDisc = (uint16_t*)&buffer[10];
2672                uint16_t* pTotalDiscs = (uint16_t*)&buffer[12];
2673                sprintf(tmp, "%d/%d", ntohs(*pDisc), ntohs(*pTotalDiscs));
2674
2675                mFileMetaData->setCString(kKeyDiscNumber, tmp);
2676            }
2677            break;
2678        }
2679        case FOURCC('-', '-', '-', '-'):
2680        {
2681            buffer[size] = '\0';
2682            switch (mPath[5]) {
2683                case FOURCC('m', 'e', 'a', 'n'):
2684                    mLastCommentMean.setTo((const char *)buffer + 4);
2685                    break;
2686                case FOURCC('n', 'a', 'm', 'e'):
2687                    mLastCommentName.setTo((const char *)buffer + 4);
2688                    break;
2689                case FOURCC('d', 'a', 't', 'a'):
2690                    if (size < 8) {
2691                        delete[] buffer;
2692                        buffer = NULL;
2693                        ALOGE("b/24346430");
2694                        return ERROR_MALFORMED;
2695                    }
2696                    mLastCommentData.setTo((const char *)buffer + 8);
2697                    break;
2698            }
2699
2700            // Once we have a set of mean/name/data info, go ahead and process
2701            // it to see if its something we are interested in.  Whether or not
2702            // were are interested in the specific tag, make sure to clear out
2703            // the set so we can be ready to process another tuple should one
2704            // show up later in the file.
2705            if ((mLastCommentMean.length() != 0) &&
2706                (mLastCommentName.length() != 0) &&
2707                (mLastCommentData.length() != 0)) {
2708
2709                if (mLastCommentMean == "com.apple.iTunes"
2710                        && mLastCommentName == "iTunSMPB") {
2711                    int32_t delay, padding;
2712                    if (sscanf(mLastCommentData,
2713                               " %*x %x %x %*x", &delay, &padding) == 2) {
2714                        if (mLastTrack == NULL)
2715                            return ERROR_MALFORMED;
2716
2717                        mLastTrack->meta->setInt32(kKeyEncoderDelay, delay);
2718                        mLastTrack->meta->setInt32(kKeyEncoderPadding, padding);
2719                    }
2720                }
2721
2722                mLastCommentMean.clear();
2723                mLastCommentName.clear();
2724                mLastCommentData.clear();
2725            }
2726            break;
2727        }
2728
2729        default:
2730            break;
2731    }
2732
2733    if (size >= 8 && metadataKey && !mFileMetaData->hasData(metadataKey)) {
2734        if (metadataKey == kKeyAlbumArt) {
2735            mFileMetaData->setData(
2736                    kKeyAlbumArt, MetaData::TYPE_NONE,
2737                    buffer + 8, size - 8);
2738        } else if (metadataKey == kKeyGenre) {
2739            if (flags == 0) {
2740                // uint8_t genre code, iTunes genre codes are
2741                // the standard id3 codes, except they start
2742                // at 1 instead of 0 (e.g. Pop is 14, not 13)
2743                // We use standard id3 numbering, so subtract 1.
2744                int genrecode = (int)buffer[size - 1];
2745                genrecode--;
2746                if (genrecode < 0) {
2747                    genrecode = 255; // reserved for 'unknown genre'
2748                }
2749                char genre[10];
2750                sprintf(genre, "%d", genrecode);
2751
2752                mFileMetaData->setCString(metadataKey, genre);
2753            } else if (flags == 1) {
2754                // custom genre string
2755                buffer[size] = '\0';
2756
2757                mFileMetaData->setCString(
2758                        metadataKey, (const char *)buffer + 8);
2759            }
2760        } else {
2761            buffer[size] = '\0';
2762
2763            mFileMetaData->setCString(
2764                    metadataKey, (const char *)buffer + 8);
2765        }
2766    }
2767
2768    delete[] buffer;
2769    buffer = NULL;
2770
2771    return OK;
2772}
2773
2774status_t MPEG4Extractor::parseColorInfo(off64_t offset, size_t size) {
2775    if (size < 4 || size == SIZE_MAX || mLastTrack == NULL) {
2776        return ERROR_MALFORMED;
2777    }
2778
2779    uint8_t *buffer = new (std::nothrow) uint8_t[size + 1];
2780    if (buffer == NULL) {
2781        return ERROR_MALFORMED;
2782    }
2783    if (mDataSource->readAt(offset, buffer, size) != (ssize_t)size) {
2784        delete[] buffer;
2785        buffer = NULL;
2786
2787        return ERROR_IO;
2788    }
2789
2790    int32_t type = U32_AT(&buffer[0]);
2791    if ((type == FOURCC('n', 'c', 'l', 'x') && size >= 11)
2792            || (type == FOURCC('n', 'c', 'l', 'c' && size >= 10))) {
2793        int32_t primaries = U16_AT(&buffer[4]);
2794        int32_t transfer = U16_AT(&buffer[6]);
2795        int32_t coeffs = U16_AT(&buffer[8]);
2796        bool fullRange = (type == FOURCC('n', 'c', 'l', 'x')) && (buffer[10] & 128);
2797
2798        ColorAspects aspects;
2799        ColorUtils::convertIsoColorAspectsToCodecAspects(
2800                primaries, transfer, coeffs, fullRange, aspects);
2801
2802        // only store the first color specification
2803        if (!mLastTrack->meta->hasData(kKeyColorPrimaries)) {
2804            mLastTrack->meta->setInt32(kKeyColorPrimaries, aspects.mPrimaries);
2805            mLastTrack->meta->setInt32(kKeyTransferFunction, aspects.mTransfer);
2806            mLastTrack->meta->setInt32(kKeyColorMatrix, aspects.mMatrixCoeffs);
2807            mLastTrack->meta->setInt32(kKeyColorRange, aspects.mRange);
2808        }
2809    }
2810
2811    delete[] buffer;
2812    buffer = NULL;
2813
2814    return OK;
2815}
2816
2817status_t MPEG4Extractor::parse3GPPMetaData(off64_t offset, size_t size, int depth) {
2818    if (size < 4 || size == SIZE_MAX) {
2819        return ERROR_MALFORMED;
2820    }
2821
2822    uint8_t *buffer = new (std::nothrow) uint8_t[size + 1];
2823    if (buffer == NULL) {
2824        return ERROR_MALFORMED;
2825    }
2826    if (mDataSource->readAt(
2827                offset, buffer, size) != (ssize_t)size) {
2828        delete[] buffer;
2829        buffer = NULL;
2830
2831        return ERROR_IO;
2832    }
2833
2834    uint32_t metadataKey = 0;
2835    switch (mPath[depth]) {
2836        case FOURCC('t', 'i', 't', 'l'):
2837        {
2838            metadataKey = kKeyTitle;
2839            break;
2840        }
2841        case FOURCC('p', 'e', 'r', 'f'):
2842        {
2843            metadataKey = kKeyArtist;
2844            break;
2845        }
2846        case FOURCC('a', 'u', 't', 'h'):
2847        {
2848            metadataKey = kKeyWriter;
2849            break;
2850        }
2851        case FOURCC('g', 'n', 'r', 'e'):
2852        {
2853            metadataKey = kKeyGenre;
2854            break;
2855        }
2856        case FOURCC('a', 'l', 'b', 'm'):
2857        {
2858            if (buffer[size - 1] != '\0') {
2859              char tmp[4];
2860              sprintf(tmp, "%u", buffer[size - 1]);
2861
2862              mFileMetaData->setCString(kKeyCDTrackNumber, tmp);
2863            }
2864
2865            metadataKey = kKeyAlbum;
2866            break;
2867        }
2868        case FOURCC('y', 'r', 'r', 'c'):
2869        {
2870            char tmp[5];
2871            uint16_t year = U16_AT(&buffer[4]);
2872
2873            if (year < 10000) {
2874                sprintf(tmp, "%u", year);
2875
2876                mFileMetaData->setCString(kKeyYear, tmp);
2877            }
2878            break;
2879        }
2880
2881        default:
2882            break;
2883    }
2884
2885    if (metadataKey > 0) {
2886        bool isUTF8 = true; // Common case
2887        char16_t *framedata = NULL;
2888        int len16 = 0; // Number of UTF-16 characters
2889
2890        // smallest possible valid UTF-16 string w BOM: 0xfe 0xff 0x00 0x00
2891        if (size < 6) {
2892            return ERROR_MALFORMED;
2893        }
2894
2895        if (size - 6 >= 4) {
2896            len16 = ((size - 6) / 2) - 1; // don't include 0x0000 terminator
2897            framedata = (char16_t *)(buffer + 6);
2898            if (0xfffe == *framedata) {
2899                // endianness marker (BOM) doesn't match host endianness
2900                for (int i = 0; i < len16; i++) {
2901                    framedata[i] = bswap_16(framedata[i]);
2902                }
2903                // BOM is now swapped to 0xfeff, we will execute next block too
2904            }
2905
2906            if (0xfeff == *framedata) {
2907                // Remove the BOM
2908                framedata++;
2909                len16--;
2910                isUTF8 = false;
2911            }
2912            // else normal non-zero-length UTF-8 string
2913            // we can't handle UTF-16 without BOM as there is no other
2914            // indication of encoding.
2915        }
2916
2917        if (isUTF8) {
2918            buffer[size] = 0;
2919            mFileMetaData->setCString(metadataKey, (const char *)buffer + 6);
2920        } else {
2921            // Convert from UTF-16 string to UTF-8 string.
2922            String8 tmpUTF8str(framedata, len16);
2923            mFileMetaData->setCString(metadataKey, tmpUTF8str.string());
2924        }
2925    }
2926
2927    delete[] buffer;
2928    buffer = NULL;
2929
2930    return OK;
2931}
2932
2933void MPEG4Extractor::parseID3v2MetaData(off64_t offset) {
2934    ID3 id3(mDataSource, true /* ignorev1 */, offset);
2935
2936    if (id3.isValid()) {
2937        struct Map {
2938            int key;
2939            const char *tag1;
2940            const char *tag2;
2941        };
2942        static const Map kMap[] = {
2943            { kKeyAlbum, "TALB", "TAL" },
2944            { kKeyArtist, "TPE1", "TP1" },
2945            { kKeyAlbumArtist, "TPE2", "TP2" },
2946            { kKeyComposer, "TCOM", "TCM" },
2947            { kKeyGenre, "TCON", "TCO" },
2948            { kKeyTitle, "TIT2", "TT2" },
2949            { kKeyYear, "TYE", "TYER" },
2950            { kKeyAuthor, "TXT", "TEXT" },
2951            { kKeyCDTrackNumber, "TRK", "TRCK" },
2952            { kKeyDiscNumber, "TPA", "TPOS" },
2953            { kKeyCompilation, "TCP", "TCMP" },
2954        };
2955        static const size_t kNumMapEntries = sizeof(kMap) / sizeof(kMap[0]);
2956
2957        for (size_t i = 0; i < kNumMapEntries; ++i) {
2958            if (!mFileMetaData->hasData(kMap[i].key)) {
2959                ID3::Iterator *it = new ID3::Iterator(id3, kMap[i].tag1);
2960                if (it->done()) {
2961                    delete it;
2962                    it = new ID3::Iterator(id3, kMap[i].tag2);
2963                }
2964
2965                if (it->done()) {
2966                    delete it;
2967                    continue;
2968                }
2969
2970                String8 s;
2971                it->getString(&s);
2972                delete it;
2973
2974                mFileMetaData->setCString(kMap[i].key, s);
2975            }
2976        }
2977
2978        size_t dataSize;
2979        String8 mime;
2980        const void *data = id3.getAlbumArt(&dataSize, &mime);
2981
2982        if (data) {
2983            mFileMetaData->setData(kKeyAlbumArt, MetaData::TYPE_NONE, data, dataSize);
2984            mFileMetaData->setCString(kKeyAlbumArtMIME, mime.string());
2985        }
2986    }
2987}
2988
2989sp<IMediaSource> MPEG4Extractor::getTrack(size_t index) {
2990    status_t err;
2991    if ((err = readMetaData()) != OK) {
2992        return NULL;
2993    }
2994
2995    Track *track = mFirstTrack;
2996    while (index > 0) {
2997        if (track == NULL) {
2998            return NULL;
2999        }
3000
3001        track = track->next;
3002        --index;
3003    }
3004
3005    if (track == NULL) {
3006        return NULL;
3007    }
3008
3009
3010    Trex *trex = NULL;
3011    int32_t trackId;
3012    if (track->meta->findInt32(kKeyTrackID, &trackId)) {
3013        for (size_t i = 0; i < mTrex.size(); i++) {
3014            Trex *t = &mTrex.editItemAt(i);
3015            if (t->track_ID == (uint32_t) trackId) {
3016                trex = t;
3017                break;
3018            }
3019        }
3020    } else {
3021        ALOGE("b/21657957");
3022        return NULL;
3023    }
3024
3025    ALOGV("getTrack called, pssh: %zu", mPssh.size());
3026
3027    const char *mime;
3028    if (!track->meta->findCString(kKeyMIMEType, &mime)) {
3029        return NULL;
3030    }
3031
3032    if (!strcasecmp(mime, MEDIA_MIMETYPE_VIDEO_AVC)) {
3033        uint32_t type;
3034        const void *data;
3035        size_t size;
3036        if (!track->meta->findData(kKeyAVCC, &type, &data, &size)) {
3037            return NULL;
3038        }
3039
3040        const uint8_t *ptr = (const uint8_t *)data;
3041
3042        if (size < 7 || ptr[0] != 1) {  // configurationVersion == 1
3043            return NULL;
3044        }
3045    } else if (!strcasecmp(mime, MEDIA_MIMETYPE_VIDEO_HEVC)) {
3046        uint32_t type;
3047        const void *data;
3048        size_t size;
3049        if (!track->meta->findData(kKeyHVCC, &type, &data, &size)) {
3050            return NULL;
3051        }
3052
3053        const uint8_t *ptr = (const uint8_t *)data;
3054
3055        if (size < 22 || ptr[0] != 1) {  // configurationVersion == 1
3056            return NULL;
3057        }
3058    }
3059
3060    return new MPEG4Source(this,
3061            track->meta, mDataSource, track->timescale, track->sampleTable,
3062            mSidxEntries, trex, mMoofOffset);
3063}
3064
3065// static
3066status_t MPEG4Extractor::verifyTrack(Track *track) {
3067    const char *mime;
3068    CHECK(track->meta->findCString(kKeyMIMEType, &mime));
3069
3070    uint32_t type;
3071    const void *data;
3072    size_t size;
3073    if (!strcasecmp(mime, MEDIA_MIMETYPE_VIDEO_AVC)) {
3074        if (!track->meta->findData(kKeyAVCC, &type, &data, &size)
3075                || type != kTypeAVCC) {
3076            return ERROR_MALFORMED;
3077        }
3078    } else if (!strcasecmp(mime, MEDIA_MIMETYPE_VIDEO_HEVC)) {
3079        if (!track->meta->findData(kKeyHVCC, &type, &data, &size)
3080                    || type != kTypeHVCC) {
3081            return ERROR_MALFORMED;
3082        }
3083    } else if (!strcasecmp(mime, MEDIA_MIMETYPE_VIDEO_MPEG4)
3084            || !strcasecmp(mime, MEDIA_MIMETYPE_VIDEO_MPEG2)
3085            || !strcasecmp(mime, MEDIA_MIMETYPE_AUDIO_AAC)) {
3086        if (!track->meta->findData(kKeyESDS, &type, &data, &size)
3087                || type != kTypeESDS) {
3088            return ERROR_MALFORMED;
3089        }
3090    }
3091
3092    if (track->sampleTable == NULL || !track->sampleTable->isValid()) {
3093        // Make sure we have all the metadata we need.
3094        ALOGE("stbl atom missing/invalid.");
3095        return ERROR_MALFORMED;
3096    }
3097
3098    if (track->timescale == 0) {
3099        ALOGE("timescale invalid.");
3100        return ERROR_MALFORMED;
3101    }
3102
3103    return OK;
3104}
3105
3106typedef enum {
3107    //AOT_NONE             = -1,
3108    //AOT_NULL_OBJECT      = 0,
3109    //AOT_AAC_MAIN         = 1, /**< Main profile                              */
3110    AOT_AAC_LC           = 2,   /**< Low Complexity object                     */
3111    //AOT_AAC_SSR          = 3,
3112    //AOT_AAC_LTP          = 4,
3113    AOT_SBR              = 5,
3114    //AOT_AAC_SCAL         = 6,
3115    //AOT_TWIN_VQ          = 7,
3116    //AOT_CELP             = 8,
3117    //AOT_HVXC             = 9,
3118    //AOT_RSVD_10          = 10, /**< (reserved)                                */
3119    //AOT_RSVD_11          = 11, /**< (reserved)                                */
3120    //AOT_TTSI             = 12, /**< TTSI Object                               */
3121    //AOT_MAIN_SYNTH       = 13, /**< Main Synthetic object                     */
3122    //AOT_WAV_TAB_SYNTH    = 14, /**< Wavetable Synthesis object                */
3123    //AOT_GEN_MIDI         = 15, /**< General MIDI object                       */
3124    //AOT_ALG_SYNTH_AUD_FX = 16, /**< Algorithmic Synthesis and Audio FX object */
3125    AOT_ER_AAC_LC        = 17,   /**< Error Resilient(ER) AAC Low Complexity    */
3126    //AOT_RSVD_18          = 18, /**< (reserved)                                */
3127    //AOT_ER_AAC_LTP       = 19, /**< Error Resilient(ER) AAC LTP object        */
3128    AOT_ER_AAC_SCAL      = 20,   /**< Error Resilient(ER) AAC Scalable object   */
3129    //AOT_ER_TWIN_VQ       = 21, /**< Error Resilient(ER) TwinVQ object         */
3130    AOT_ER_BSAC          = 22,   /**< Error Resilient(ER) BSAC object           */
3131    AOT_ER_AAC_LD        = 23,   /**< Error Resilient(ER) AAC LowDelay object   */
3132    //AOT_ER_CELP          = 24, /**< Error Resilient(ER) CELP object           */
3133    //AOT_ER_HVXC          = 25, /**< Error Resilient(ER) HVXC object           */
3134    //AOT_ER_HILN          = 26, /**< Error Resilient(ER) HILN object           */
3135    //AOT_ER_PARA          = 27, /**< Error Resilient(ER) Parametric object     */
3136    //AOT_RSVD_28          = 28, /**< might become SSC                          */
3137    AOT_PS               = 29,   /**< PS, Parametric Stereo (includes SBR)      */
3138    //AOT_MPEGS            = 30, /**< MPEG Surround                             */
3139
3140    AOT_ESCAPE           = 31,   /**< Signal AOT uses more than 5 bits          */
3141
3142    //AOT_MP3ONMP4_L1      = 32, /**< MPEG-Layer1 in mp4                        */
3143    //AOT_MP3ONMP4_L2      = 33, /**< MPEG-Layer2 in mp4                        */
3144    //AOT_MP3ONMP4_L3      = 34, /**< MPEG-Layer3 in mp4                        */
3145    //AOT_RSVD_35          = 35, /**< might become DST                          */
3146    //AOT_RSVD_36          = 36, /**< might become ALS                          */
3147    //AOT_AAC_SLS          = 37, /**< AAC + SLS                                 */
3148    //AOT_SLS              = 38, /**< SLS                                       */
3149    //AOT_ER_AAC_ELD       = 39, /**< AAC Enhanced Low Delay                    */
3150
3151    //AOT_USAC             = 42, /**< USAC                                      */
3152    //AOT_SAOC             = 43, /**< SAOC                                      */
3153    //AOT_LD_MPEGS         = 44, /**< Low Delay MPEG Surround                   */
3154
3155    //AOT_RSVD50           = 50,  /**< Interim AOT for Rsvd50                   */
3156} AUDIO_OBJECT_TYPE;
3157
3158status_t MPEG4Extractor::updateAudioTrackInfoFromESDS_MPEG4Audio(
3159        const void *esds_data, size_t esds_size) {
3160    ESDS esds(esds_data, esds_size);
3161
3162    uint8_t objectTypeIndication;
3163    if (esds.getObjectTypeIndication(&objectTypeIndication) != OK) {
3164        return ERROR_MALFORMED;
3165    }
3166
3167    if (objectTypeIndication == 0xe1) {
3168        // This isn't MPEG4 audio at all, it's QCELP 14k...
3169        if (mLastTrack == NULL)
3170            return ERROR_MALFORMED;
3171
3172        mLastTrack->meta->setCString(kKeyMIMEType, MEDIA_MIMETYPE_AUDIO_QCELP);
3173        return OK;
3174    }
3175
3176    if (objectTypeIndication  == 0x6b) {
3177        // The media subtype is MP3 audio
3178        // Our software MP3 audio decoder may not be able to handle
3179        // packetized MP3 audio; for now, lets just return ERROR_UNSUPPORTED
3180        ALOGE("MP3 track in MP4/3GPP file is not supported");
3181        return ERROR_UNSUPPORTED;
3182    }
3183
3184    const uint8_t *csd;
3185    size_t csd_size;
3186    if (esds.getCodecSpecificInfo(
3187                (const void **)&csd, &csd_size) != OK) {
3188        return ERROR_MALFORMED;
3189    }
3190
3191    if (kUseHexDump) {
3192        printf("ESD of size %zu\n", csd_size);
3193        hexdump(csd, csd_size);
3194    }
3195
3196    if (csd_size == 0) {
3197        // There's no further information, i.e. no codec specific data
3198        // Let's assume that the information provided in the mpeg4 headers
3199        // is accurate and hope for the best.
3200
3201        return OK;
3202    }
3203
3204    if (csd_size < 2) {
3205        return ERROR_MALFORMED;
3206    }
3207
3208    static uint32_t kSamplingRate[] = {
3209        96000, 88200, 64000, 48000, 44100, 32000, 24000, 22050,
3210        16000, 12000, 11025, 8000, 7350
3211    };
3212
3213    ABitReader br(csd, csd_size);
3214    uint32_t objectType = br.getBits(5);
3215
3216    if (objectType == 31) {  // AAC-ELD => additional 6 bits
3217        objectType = 32 + br.getBits(6);
3218    }
3219
3220    if (mLastTrack == NULL)
3221        return ERROR_MALFORMED;
3222
3223    //keep AOT type
3224    mLastTrack->meta->setInt32(kKeyAACAOT, objectType);
3225
3226    uint32_t freqIndex = br.getBits(4);
3227
3228    int32_t sampleRate = 0;
3229    int32_t numChannels = 0;
3230    if (freqIndex == 15) {
3231        if (br.numBitsLeft() < 28) return ERROR_MALFORMED;
3232        sampleRate = br.getBits(24);
3233        numChannels = br.getBits(4);
3234    } else {
3235        if (br.numBitsLeft() < 4) return ERROR_MALFORMED;
3236        numChannels = br.getBits(4);
3237
3238        if (freqIndex == 13 || freqIndex == 14) {
3239            return ERROR_MALFORMED;
3240        }
3241
3242        sampleRate = kSamplingRate[freqIndex];
3243    }
3244
3245    if (objectType == AOT_SBR || objectType == AOT_PS) {//SBR specific config per 14496-3 table 1.13
3246        if (br.numBitsLeft() < 4) return ERROR_MALFORMED;
3247        uint32_t extFreqIndex = br.getBits(4);
3248        int32_t extSampleRate __unused;
3249        if (extFreqIndex == 15) {
3250            if (csd_size < 8) {
3251                return ERROR_MALFORMED;
3252            }
3253            if (br.numBitsLeft() < 24) return ERROR_MALFORMED;
3254            extSampleRate = br.getBits(24);
3255        } else {
3256            if (extFreqIndex == 13 || extFreqIndex == 14) {
3257                return ERROR_MALFORMED;
3258            }
3259            extSampleRate = kSamplingRate[extFreqIndex];
3260        }
3261        //TODO: save the extension sampling rate value in meta data =>
3262        //      mLastTrack->meta->setInt32(kKeyExtSampleRate, extSampleRate);
3263    }
3264
3265    switch (numChannels) {
3266        // values defined in 14496-3_2009 amendment-4 Table 1.19 - Channel Configuration
3267        case 0:
3268        case 1:// FC
3269        case 2:// FL FR
3270        case 3:// FC, FL FR
3271        case 4:// FC, FL FR, RC
3272        case 5:// FC, FL FR, SL SR
3273        case 6:// FC, FL FR, SL SR, LFE
3274            //numChannels already contains the right value
3275            break;
3276        case 11:// FC, FL FR, SL SR, RC, LFE
3277            numChannels = 7;
3278            break;
3279        case 7: // FC, FCL FCR, FL FR, SL SR, LFE
3280        case 12:// FC, FL  FR,  SL SR, RL RR, LFE
3281        case 14:// FC, FL  FR,  SL SR, LFE, FHL FHR
3282            numChannels = 8;
3283            break;
3284        default:
3285            return ERROR_UNSUPPORTED;
3286    }
3287
3288    {
3289        if (objectType == AOT_SBR || objectType == AOT_PS) {
3290            if (br.numBitsLeft() < 5) return ERROR_MALFORMED;
3291            objectType = br.getBits(5);
3292
3293            if (objectType == AOT_ESCAPE) {
3294                if (br.numBitsLeft() < 6) return ERROR_MALFORMED;
3295                objectType = 32 + br.getBits(6);
3296            }
3297        }
3298        if (objectType == AOT_AAC_LC || objectType == AOT_ER_AAC_LC ||
3299                objectType == AOT_ER_AAC_LD || objectType == AOT_ER_AAC_SCAL ||
3300                objectType == AOT_ER_BSAC) {
3301            if (br.numBitsLeft() < 2) return ERROR_MALFORMED;
3302            const int32_t frameLengthFlag __unused = br.getBits(1);
3303
3304            const int32_t dependsOnCoreCoder = br.getBits(1);
3305
3306            if (dependsOnCoreCoder ) {
3307                if (br.numBitsLeft() < 14) return ERROR_MALFORMED;
3308                const int32_t coreCoderDelay __unused = br.getBits(14);
3309            }
3310
3311            int32_t extensionFlag = -1;
3312            if (br.numBitsLeft() > 0) {
3313                extensionFlag = br.getBits(1);
3314            } else {
3315                switch (objectType) {
3316                // 14496-3 4.5.1.1 extensionFlag
3317                case AOT_AAC_LC:
3318                    extensionFlag = 0;
3319                    break;
3320                case AOT_ER_AAC_LC:
3321                case AOT_ER_AAC_SCAL:
3322                case AOT_ER_BSAC:
3323                case AOT_ER_AAC_LD:
3324                    extensionFlag = 1;
3325                    break;
3326                default:
3327                    return ERROR_MALFORMED;
3328                    break;
3329                }
3330                ALOGW("csd missing extension flag; assuming %d for object type %u.",
3331                        extensionFlag, objectType);
3332            }
3333
3334            if (numChannels == 0) {
3335                int32_t channelsEffectiveNum = 0;
3336                int32_t channelsNum = 0;
3337                if (br.numBitsLeft() < 32) {
3338                    return ERROR_MALFORMED;
3339                }
3340                const int32_t ElementInstanceTag __unused = br.getBits(4);
3341                const int32_t Profile __unused = br.getBits(2);
3342                const int32_t SamplingFrequencyIndex __unused = br.getBits(4);
3343                const int32_t NumFrontChannelElements = br.getBits(4);
3344                const int32_t NumSideChannelElements = br.getBits(4);
3345                const int32_t NumBackChannelElements = br.getBits(4);
3346                const int32_t NumLfeChannelElements = br.getBits(2);
3347                const int32_t NumAssocDataElements __unused = br.getBits(3);
3348                const int32_t NumValidCcElements __unused = br.getBits(4);
3349
3350                const int32_t MonoMixdownPresent = br.getBits(1);
3351
3352                if (MonoMixdownPresent != 0) {
3353                    if (br.numBitsLeft() < 4) return ERROR_MALFORMED;
3354                    const int32_t MonoMixdownElementNumber __unused = br.getBits(4);
3355                }
3356
3357                if (br.numBitsLeft() < 1) return ERROR_MALFORMED;
3358                const int32_t StereoMixdownPresent = br.getBits(1);
3359                if (StereoMixdownPresent != 0) {
3360                    if (br.numBitsLeft() < 4) return ERROR_MALFORMED;
3361                    const int32_t StereoMixdownElementNumber __unused = br.getBits(4);
3362                }
3363
3364                if (br.numBitsLeft() < 1) return ERROR_MALFORMED;
3365                const int32_t MatrixMixdownIndexPresent = br.getBits(1);
3366                if (MatrixMixdownIndexPresent != 0) {
3367                    if (br.numBitsLeft() < 3) return ERROR_MALFORMED;
3368                    const int32_t MatrixMixdownIndex __unused = br.getBits(2);
3369                    const int32_t PseudoSurroundEnable __unused = br.getBits(1);
3370                }
3371
3372                int i;
3373                for (i=0; i < NumFrontChannelElements; i++) {
3374                    if (br.numBitsLeft() < 5) return ERROR_MALFORMED;
3375                    const int32_t FrontElementIsCpe = br.getBits(1);
3376                    const int32_t FrontElementTagSelect __unused = br.getBits(4);
3377                    channelsNum += FrontElementIsCpe ? 2 : 1;
3378                }
3379
3380                for (i=0; i < NumSideChannelElements; i++) {
3381                    if (br.numBitsLeft() < 5) return ERROR_MALFORMED;
3382                    const int32_t SideElementIsCpe = br.getBits(1);
3383                    const int32_t SideElementTagSelect __unused = br.getBits(4);
3384                    channelsNum += SideElementIsCpe ? 2 : 1;
3385                }
3386
3387                for (i=0; i < NumBackChannelElements; i++) {
3388                    if (br.numBitsLeft() < 5) return ERROR_MALFORMED;
3389                    const int32_t BackElementIsCpe = br.getBits(1);
3390                    const int32_t BackElementTagSelect __unused = br.getBits(4);
3391                    channelsNum += BackElementIsCpe ? 2 : 1;
3392                }
3393                channelsEffectiveNum = channelsNum;
3394
3395                for (i=0; i < NumLfeChannelElements; i++) {
3396                    if (br.numBitsLeft() < 4) return ERROR_MALFORMED;
3397                    const int32_t LfeElementTagSelect __unused = br.getBits(4);
3398                    channelsNum += 1;
3399                }
3400                ALOGV("mpeg4 audio channelsNum = %d", channelsNum);
3401                ALOGV("mpeg4 audio channelsEffectiveNum = %d", channelsEffectiveNum);
3402                numChannels = channelsNum;
3403            }
3404        }
3405    }
3406
3407    if (numChannels == 0) {
3408        return ERROR_UNSUPPORTED;
3409    }
3410
3411    if (mLastTrack == NULL)
3412        return ERROR_MALFORMED;
3413
3414    int32_t prevSampleRate;
3415    CHECK(mLastTrack->meta->findInt32(kKeySampleRate, &prevSampleRate));
3416
3417    if (prevSampleRate != sampleRate) {
3418        ALOGV("mpeg4 audio sample rate different from previous setting. "
3419             "was: %d, now: %d", prevSampleRate, sampleRate);
3420    }
3421
3422    mLastTrack->meta->setInt32(kKeySampleRate, sampleRate);
3423
3424    int32_t prevChannelCount;
3425    CHECK(mLastTrack->meta->findInt32(kKeyChannelCount, &prevChannelCount));
3426
3427    if (prevChannelCount != numChannels) {
3428        ALOGV("mpeg4 audio channel count different from previous setting. "
3429             "was: %d, now: %d", prevChannelCount, numChannels);
3430    }
3431
3432    mLastTrack->meta->setInt32(kKeyChannelCount, numChannels);
3433
3434    return OK;
3435}
3436
3437////////////////////////////////////////////////////////////////////////////////
3438
3439MPEG4Source::MPEG4Source(
3440        const sp<MPEG4Extractor> &owner,
3441        const sp<MetaData> &format,
3442        const sp<DataSource> &dataSource,
3443        int32_t timeScale,
3444        const sp<SampleTable> &sampleTable,
3445        Vector<SidxEntry> &sidx,
3446        const Trex *trex,
3447        off64_t firstMoofOffset)
3448    : mOwner(owner),
3449      mFormat(format),
3450      mDataSource(dataSource),
3451      mTimescale(timeScale),
3452      mSampleTable(sampleTable),
3453      mCurrentSampleIndex(0),
3454      mCurrentFragmentIndex(0),
3455      mSegments(sidx),
3456      mTrex(trex),
3457      mFirstMoofOffset(firstMoofOffset),
3458      mCurrentMoofOffset(firstMoofOffset),
3459      mCurrentTime(0),
3460      mCurrentSampleInfoAllocSize(0),
3461      mCurrentSampleInfoSizes(NULL),
3462      mCurrentSampleInfoOffsetsAllocSize(0),
3463      mCurrentSampleInfoOffsets(NULL),
3464      mIsAVC(false),
3465      mIsHEVC(false),
3466      mNALLengthSize(0),
3467      mStarted(false),
3468      mGroup(NULL),
3469      mBuffer(NULL),
3470      mWantsNALFragments(false),
3471      mSrcBuffer(NULL) {
3472
3473    memset(&mTrackFragmentHeaderInfo, 0, sizeof(mTrackFragmentHeaderInfo));
3474
3475    mFormat->findInt32(kKeyCryptoMode, &mCryptoMode);
3476    mDefaultIVSize = 0;
3477    mFormat->findInt32(kKeyCryptoDefaultIVSize, &mDefaultIVSize);
3478    uint32_t keytype;
3479    const void *key;
3480    size_t keysize;
3481    if (mFormat->findData(kKeyCryptoKey, &keytype, &key, &keysize)) {
3482        CHECK(keysize <= 16);
3483        memset(mCryptoKey, 0, 16);
3484        memcpy(mCryptoKey, key, keysize);
3485    }
3486
3487    const char *mime;
3488    bool success = mFormat->findCString(kKeyMIMEType, &mime);
3489    CHECK(success);
3490
3491    mIsAVC = !strcasecmp(mime, MEDIA_MIMETYPE_VIDEO_AVC);
3492    mIsHEVC = !strcasecmp(mime, MEDIA_MIMETYPE_VIDEO_HEVC);
3493
3494    if (mIsAVC) {
3495        uint32_t type;
3496        const void *data;
3497        size_t size;
3498        CHECK(format->findData(kKeyAVCC, &type, &data, &size));
3499
3500        const uint8_t *ptr = (const uint8_t *)data;
3501
3502        CHECK(size >= 7);
3503        CHECK_EQ((unsigned)ptr[0], 1u);  // configurationVersion == 1
3504
3505        // The number of bytes used to encode the length of a NAL unit.
3506        mNALLengthSize = 1 + (ptr[4] & 3);
3507    } else if (mIsHEVC) {
3508        uint32_t type;
3509        const void *data;
3510        size_t size;
3511        CHECK(format->findData(kKeyHVCC, &type, &data, &size));
3512
3513        const uint8_t *ptr = (const uint8_t *)data;
3514
3515        CHECK(size >= 22);
3516        CHECK_EQ((unsigned)ptr[0], 1u);  // configurationVersion == 1
3517
3518        mNALLengthSize = 1 + (ptr[14 + 7] & 3);
3519    }
3520
3521    CHECK(format->findInt32(kKeyTrackID, &mTrackId));
3522
3523    if (mFirstMoofOffset != 0) {
3524        off64_t offset = mFirstMoofOffset;
3525        parseChunk(&offset);
3526    }
3527}
3528
3529MPEG4Source::~MPEG4Source() {
3530    if (mStarted) {
3531        stop();
3532    }
3533    free(mCurrentSampleInfoSizes);
3534    free(mCurrentSampleInfoOffsets);
3535}
3536
3537status_t MPEG4Source::start(MetaData *params) {
3538    Mutex::Autolock autoLock(mLock);
3539
3540    CHECK(!mStarted);
3541
3542    int32_t val;
3543    if (params && params->findInt32(kKeyWantsNALFragments, &val)
3544        && val != 0) {
3545        mWantsNALFragments = true;
3546    } else {
3547        mWantsNALFragments = false;
3548    }
3549
3550    int32_t tmp;
3551    CHECK(mFormat->findInt32(kKeyMaxInputSize, &tmp));
3552    size_t max_size = tmp;
3553
3554    // A somewhat arbitrary limit that should be sufficient for 8k video frames
3555    // If you see the message below for a valid input stream: increase the limit
3556    if (max_size > 64 * 1024 * 1024) {
3557        ALOGE("bogus max input size: %zu", max_size);
3558        return ERROR_MALFORMED;
3559    }
3560    mGroup = new MediaBufferGroup;
3561    mGroup->add_buffer(new MediaBuffer(max_size));
3562
3563    mSrcBuffer = new (std::nothrow) uint8_t[max_size];
3564    if (mSrcBuffer == NULL) {
3565        // file probably specified a bad max size
3566        delete mGroup;
3567        mGroup = NULL;
3568        return ERROR_MALFORMED;
3569    }
3570
3571    mStarted = true;
3572
3573    return OK;
3574}
3575
3576status_t MPEG4Source::stop() {
3577    Mutex::Autolock autoLock(mLock);
3578
3579    CHECK(mStarted);
3580
3581    if (mBuffer != NULL) {
3582        mBuffer->release();
3583        mBuffer = NULL;
3584    }
3585
3586    delete[] mSrcBuffer;
3587    mSrcBuffer = NULL;
3588
3589    delete mGroup;
3590    mGroup = NULL;
3591
3592    mStarted = false;
3593    mCurrentSampleIndex = 0;
3594
3595    return OK;
3596}
3597
3598status_t MPEG4Source::parseChunk(off64_t *offset) {
3599    uint32_t hdr[2];
3600    if (mDataSource->readAt(*offset, hdr, 8) < 8) {
3601        return ERROR_IO;
3602    }
3603    uint64_t chunk_size = ntohl(hdr[0]);
3604    uint32_t chunk_type = ntohl(hdr[1]);
3605    off64_t data_offset = *offset + 8;
3606
3607    if (chunk_size == 1) {
3608        if (mDataSource->readAt(*offset + 8, &chunk_size, 8) < 8) {
3609            return ERROR_IO;
3610        }
3611        chunk_size = ntoh64(chunk_size);
3612        data_offset += 8;
3613
3614        if (chunk_size < 16) {
3615            // The smallest valid chunk is 16 bytes long in this case.
3616            return ERROR_MALFORMED;
3617        }
3618    } else if (chunk_size < 8) {
3619        // The smallest valid chunk is 8 bytes long.
3620        return ERROR_MALFORMED;
3621    }
3622
3623    char chunk[5];
3624    MakeFourCCString(chunk_type, chunk);
3625    ALOGV("MPEG4Source chunk %s @ %#llx", chunk, (long long)*offset);
3626
3627    off64_t chunk_data_size = *offset + chunk_size - data_offset;
3628
3629    switch(chunk_type) {
3630
3631        case FOURCC('t', 'r', 'a', 'f'):
3632        case FOURCC('m', 'o', 'o', 'f'): {
3633            off64_t stop_offset = *offset + chunk_size;
3634            *offset = data_offset;
3635            while (*offset < stop_offset) {
3636                status_t err = parseChunk(offset);
3637                if (err != OK) {
3638                    return err;
3639                }
3640            }
3641            if (chunk_type == FOURCC('m', 'o', 'o', 'f')) {
3642                // *offset points to the box following this moof. Find the next moof from there.
3643
3644                while (true) {
3645                    if (mDataSource->readAt(*offset, hdr, 8) < 8) {
3646                        return ERROR_END_OF_STREAM;
3647                    }
3648                    chunk_size = ntohl(hdr[0]);
3649                    chunk_type = ntohl(hdr[1]);
3650                    if (chunk_type == FOURCC('m', 'o', 'o', 'f')) {
3651                        mNextMoofOffset = *offset;
3652                        break;
3653                    }
3654                    *offset += chunk_size;
3655                }
3656            }
3657            break;
3658        }
3659
3660        case FOURCC('t', 'f', 'h', 'd'): {
3661                status_t err;
3662                if ((err = parseTrackFragmentHeader(data_offset, chunk_data_size)) != OK) {
3663                    return err;
3664                }
3665                *offset += chunk_size;
3666                break;
3667        }
3668
3669        case FOURCC('t', 'r', 'u', 'n'): {
3670                status_t err;
3671                if (mLastParsedTrackId == mTrackId) {
3672                    if ((err = parseTrackFragmentRun(data_offset, chunk_data_size)) != OK) {
3673                        return err;
3674                    }
3675                }
3676
3677                *offset += chunk_size;
3678                break;
3679        }
3680
3681        case FOURCC('s', 'a', 'i', 'z'): {
3682            status_t err;
3683            if ((err = parseSampleAuxiliaryInformationSizes(data_offset, chunk_data_size)) != OK) {
3684                return err;
3685            }
3686            *offset += chunk_size;
3687            break;
3688        }
3689        case FOURCC('s', 'a', 'i', 'o'): {
3690            status_t err;
3691            if ((err = parseSampleAuxiliaryInformationOffsets(data_offset, chunk_data_size)) != OK) {
3692                return err;
3693            }
3694            *offset += chunk_size;
3695            break;
3696        }
3697
3698        case FOURCC('m', 'd', 'a', 't'): {
3699            // parse DRM info if present
3700            ALOGV("MPEG4Source::parseChunk mdat");
3701            // if saiz/saoi was previously observed, do something with the sampleinfos
3702            *offset += chunk_size;
3703            break;
3704        }
3705
3706        default: {
3707            *offset += chunk_size;
3708            break;
3709        }
3710    }
3711    return OK;
3712}
3713
3714status_t MPEG4Source::parseSampleAuxiliaryInformationSizes(
3715        off64_t offset, off64_t /* size */) {
3716    ALOGV("parseSampleAuxiliaryInformationSizes");
3717    // 14496-12 8.7.12
3718    uint8_t version;
3719    if (mDataSource->readAt(
3720            offset, &version, sizeof(version))
3721            < (ssize_t)sizeof(version)) {
3722        return ERROR_IO;
3723    }
3724
3725    if (version != 0) {
3726        return ERROR_UNSUPPORTED;
3727    }
3728    offset++;
3729
3730    uint32_t flags;
3731    if (!mDataSource->getUInt24(offset, &flags)) {
3732        return ERROR_IO;
3733    }
3734    offset += 3;
3735