MPEG4Extractor.cpp revision c1e24ce7fe17981e80f85d2345c53599ba5f850d
1/*
2 * Copyright (C) 2009 The Android Open Source Project
3 *
4 * Licensed under the Apache License, Version 2.0 (the "License");
5 * you may not use this file except in compliance with the License.
6 * You may obtain a copy of the License at
7 *
8 *      http://www.apache.org/licenses/LICENSE-2.0
9 *
10 * Unless required by applicable law or agreed to in writing, software
11 * distributed under the License is distributed on an "AS IS" BASIS,
12 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13 * See the License for the specific language governing permissions and
14 * limitations under the License.
15 */
16
17//#define LOG_NDEBUG 0
18#define LOG_TAG "MPEG4Extractor"
19
20#include <ctype.h>
21#include <inttypes.h>
22#include <memory>
23#include <stdint.h>
24#include <stdlib.h>
25#include <string.h>
26
27#include <utils/Log.h>
28
29#include "MPEG4Extractor.h"
30#include "SampleTable.h"
31#include "ItemTable.h"
32#include "include/ESDS.h"
33
34#include <media/ExtractorUtils.h>
35#include <media/MediaTrack.h>
36#include <media/stagefright/foundation/ABitReader.h>
37#include <media/stagefright/foundation/ABuffer.h>
38#include <media/stagefright/foundation/ADebug.h>
39#include <media/stagefright/foundation/AMessage.h>
40#include <media/stagefright/foundation/AUtils.h>
41#include <media/stagefright/foundation/ByteUtils.h>
42#include <media/stagefright/foundation/ColorUtils.h>
43#include <media/stagefright/foundation/avc_utils.h>
44#include <media/stagefright/foundation/hexdump.h>
45#include <media/stagefright/MediaBufferBase.h>
46#include <media/stagefright/MediaBufferGroup.h>
47#include <media/stagefright/MediaDefs.h>
48#include <media/stagefright/MetaData.h>
49#include <utils/String8.h>
50
51#include <byteswap.h>
52#include "include/ID3.h"
53
54#ifndef UINT32_MAX
55#define UINT32_MAX       (4294967295U)
56#endif
57
58namespace android {
59
60enum {
61    // max track header chunk to return
62    kMaxTrackHeaderSize = 32,
63
64    // maximum size of an atom. Some atoms can be bigger according to the spec,
65    // but we only allow up to this size.
66    kMaxAtomSize = 64 * 1024 * 1024,
67};
68
69class MPEG4Source : public MediaTrack {
70public:
71    // Caller retains ownership of both "dataSource" and "sampleTable".
72    MPEG4Source(MetaDataBase &format,
73                DataSourceBase *dataSource,
74                int32_t timeScale,
75                const sp<SampleTable> &sampleTable,
76                Vector<SidxEntry> &sidx,
77                const Trex *trex,
78                off64_t firstMoofOffset,
79                const sp<ItemTable> &itemTable);
80    virtual status_t init();
81
82    virtual status_t start(MetaDataBase *params = NULL);
83    virtual status_t stop();
84
85    virtual status_t getFormat(MetaDataBase &);
86
87    virtual status_t read(MediaBufferBase **buffer, const ReadOptions *options = NULL);
88    virtual bool supportNonblockingRead() { return true; }
89    virtual status_t fragmentedRead(MediaBufferBase **buffer, const ReadOptions *options = NULL);
90
91    virtual ~MPEG4Source();
92
93private:
94    Mutex mLock;
95
96    MetaDataBase &mFormat;
97    DataSourceBase *mDataSource;
98    int32_t mTimescale;
99    sp<SampleTable> mSampleTable;
100    uint32_t mCurrentSampleIndex;
101    uint32_t mCurrentFragmentIndex;
102    Vector<SidxEntry> &mSegments;
103    const Trex *mTrex;
104    off64_t mFirstMoofOffset;
105    off64_t mCurrentMoofOffset;
106    off64_t mNextMoofOffset;
107    uint32_t mCurrentTime;
108    int32_t mLastParsedTrackId;
109    int32_t mTrackId;
110
111    int32_t mCryptoMode;    // passed in from extractor
112    int32_t mDefaultIVSize; // passed in from extractor
113    uint8_t mCryptoKey[16]; // passed in from extractor
114    uint32_t mCurrentAuxInfoType;
115    uint32_t mCurrentAuxInfoTypeParameter;
116    int32_t mCurrentDefaultSampleInfoSize;
117    uint32_t mCurrentSampleInfoCount;
118    uint32_t mCurrentSampleInfoAllocSize;
119    uint8_t* mCurrentSampleInfoSizes;
120    uint32_t mCurrentSampleInfoOffsetCount;
121    uint32_t mCurrentSampleInfoOffsetsAllocSize;
122    uint64_t* mCurrentSampleInfoOffsets;
123
124    bool mIsAVC;
125    bool mIsHEVC;
126    size_t mNALLengthSize;
127
128    bool mStarted;
129
130    MediaBufferGroup *mGroup;
131
132    MediaBufferBase *mBuffer;
133
134    bool mWantsNALFragments;
135
136    uint8_t *mSrcBuffer;
137
138    bool mIsHeif;
139    sp<ItemTable> mItemTable;
140
141    size_t parseNALSize(const uint8_t *data) const;
142    status_t parseChunk(off64_t *offset);
143    status_t parseTrackFragmentHeader(off64_t offset, off64_t size);
144    status_t parseTrackFragmentRun(off64_t offset, off64_t size);
145    status_t parseSampleAuxiliaryInformationSizes(off64_t offset, off64_t size);
146    status_t parseSampleAuxiliaryInformationOffsets(off64_t offset, off64_t size);
147
148    struct TrackFragmentHeaderInfo {
149        enum Flags {
150            kBaseDataOffsetPresent         = 0x01,
151            kSampleDescriptionIndexPresent = 0x02,
152            kDefaultSampleDurationPresent  = 0x08,
153            kDefaultSampleSizePresent      = 0x10,
154            kDefaultSampleFlagsPresent     = 0x20,
155            kDurationIsEmpty               = 0x10000,
156        };
157
158        uint32_t mTrackID;
159        uint32_t mFlags;
160        uint64_t mBaseDataOffset;
161        uint32_t mSampleDescriptionIndex;
162        uint32_t mDefaultSampleDuration;
163        uint32_t mDefaultSampleSize;
164        uint32_t mDefaultSampleFlags;
165
166        uint64_t mDataOffset;
167    };
168    TrackFragmentHeaderInfo mTrackFragmentHeaderInfo;
169
170    struct Sample {
171        off64_t offset;
172        size_t size;
173        uint32_t duration;
174        int32_t compositionOffset;
175        uint8_t iv[16];
176        Vector<size_t> clearsizes;
177        Vector<size_t> encryptedsizes;
178    };
179    Vector<Sample> mCurrentSamples;
180
181    MPEG4Source(const MPEG4Source &);
182    MPEG4Source &operator=(const MPEG4Source &);
183};
184
185// This custom data source wraps an existing one and satisfies requests
186// falling entirely within a cached range from the cache while forwarding
187// all remaining requests to the wrapped datasource.
188// This is used to cache the full sampletable metadata for a single track,
189// possibly wrapping multiple times to cover all tracks, i.e.
190// Each CachedRangedDataSource caches the sampletable metadata for a single track.
191
192struct CachedRangedDataSource : public DataSourceBase {
193    explicit CachedRangedDataSource(DataSourceBase *source);
194    virtual ~CachedRangedDataSource();
195
196    virtual status_t initCheck() const;
197    virtual ssize_t readAt(off64_t offset, void *data, size_t size);
198    virtual status_t getSize(off64_t *size);
199    virtual uint32_t flags();
200
201    status_t setCachedRange(off64_t offset, size_t size, bool assumeSourceOwnershipOnSuccess);
202
203
204private:
205    Mutex mLock;
206
207    DataSourceBase *mSource;
208    bool mOwnsDataSource;
209    off64_t mCachedOffset;
210    size_t mCachedSize;
211    uint8_t *mCache;
212
213    void clearCache();
214
215    CachedRangedDataSource(const CachedRangedDataSource &);
216    CachedRangedDataSource &operator=(const CachedRangedDataSource &);
217};
218
219CachedRangedDataSource::CachedRangedDataSource(DataSourceBase *source)
220    : mSource(source),
221      mOwnsDataSource(false),
222      mCachedOffset(0),
223      mCachedSize(0),
224      mCache(NULL) {
225}
226
227CachedRangedDataSource::~CachedRangedDataSource() {
228    clearCache();
229    if (mOwnsDataSource) {
230        delete (CachedRangedDataSource*)mSource;
231    }
232}
233
234void CachedRangedDataSource::clearCache() {
235    if (mCache) {
236        free(mCache);
237        mCache = NULL;
238    }
239
240    mCachedOffset = 0;
241    mCachedSize = 0;
242}
243
244status_t CachedRangedDataSource::initCheck() const {
245    return mSource->initCheck();
246}
247
248ssize_t CachedRangedDataSource::readAt(off64_t offset, void *data, size_t size) {
249    Mutex::Autolock autoLock(mLock);
250
251    if (isInRange(mCachedOffset, mCachedSize, offset, size)) {
252        memcpy(data, &mCache[offset - mCachedOffset], size);
253        return size;
254    }
255
256    return mSource->readAt(offset, data, size);
257}
258
259status_t CachedRangedDataSource::getSize(off64_t *size) {
260    return mSource->getSize(size);
261}
262
263uint32_t CachedRangedDataSource::flags() {
264    return mSource->flags();
265}
266
267status_t CachedRangedDataSource::setCachedRange(off64_t offset,
268        size_t size,
269        bool assumeSourceOwnershipOnSuccess) {
270    Mutex::Autolock autoLock(mLock);
271
272    clearCache();
273
274    mCache = (uint8_t *)malloc(size);
275
276    if (mCache == NULL) {
277        return -ENOMEM;
278    }
279
280    mCachedOffset = offset;
281    mCachedSize = size;
282
283    ssize_t err = mSource->readAt(mCachedOffset, mCache, mCachedSize);
284
285    if (err < (ssize_t)size) {
286        clearCache();
287
288        return ERROR_IO;
289    }
290    mOwnsDataSource = assumeSourceOwnershipOnSuccess;
291    return OK;
292}
293
294////////////////////////////////////////////////////////////////////////////////
295
296static const bool kUseHexDump = false;
297
298static const char *FourCC2MIME(uint32_t fourcc) {
299    switch (fourcc) {
300        case FOURCC('m', 'p', '4', 'a'):
301            return MEDIA_MIMETYPE_AUDIO_AAC;
302
303        case FOURCC('s', 'a', 'm', 'r'):
304            return MEDIA_MIMETYPE_AUDIO_AMR_NB;
305
306        case FOURCC('s', 'a', 'w', 'b'):
307            return MEDIA_MIMETYPE_AUDIO_AMR_WB;
308
309        case FOURCC('m', 'p', '4', 'v'):
310            return MEDIA_MIMETYPE_VIDEO_MPEG4;
311
312        case FOURCC('s', '2', '6', '3'):
313        case FOURCC('h', '2', '6', '3'):
314        case FOURCC('H', '2', '6', '3'):
315            return MEDIA_MIMETYPE_VIDEO_H263;
316
317        case FOURCC('a', 'v', 'c', '1'):
318            return MEDIA_MIMETYPE_VIDEO_AVC;
319
320        case FOURCC('h', 'v', 'c', '1'):
321        case FOURCC('h', 'e', 'v', '1'):
322            return MEDIA_MIMETYPE_VIDEO_HEVC;
323        default:
324            ALOGW("Unknown fourcc: %c%c%c%c",
325                   (fourcc >> 24) & 0xff,
326                   (fourcc >> 16) & 0xff,
327                   (fourcc >> 8) & 0xff,
328                   fourcc & 0xff
329                   );
330            return "application/octet-stream";
331    }
332}
333
334static bool AdjustChannelsAndRate(uint32_t fourcc, uint32_t *channels, uint32_t *rate) {
335    if (!strcasecmp(MEDIA_MIMETYPE_AUDIO_AMR_NB, FourCC2MIME(fourcc))) {
336        // AMR NB audio is always mono, 8kHz
337        *channels = 1;
338        *rate = 8000;
339        return true;
340    } else if (!strcasecmp(MEDIA_MIMETYPE_AUDIO_AMR_WB, FourCC2MIME(fourcc))) {
341        // AMR WB audio is always mono, 16kHz
342        *channels = 1;
343        *rate = 16000;
344        return true;
345    }
346    return false;
347}
348
349MPEG4Extractor::MPEG4Extractor(DataSourceBase *source, const char *mime)
350    : mMoofOffset(0),
351      mMoofFound(false),
352      mMdatFound(false),
353      mDataSource(source),
354      mCachedSource(NULL),
355      mInitCheck(NO_INIT),
356      mHeaderTimescale(0),
357      mIsQT(false),
358      mIsHeif(false),
359      mHasMoovBox(false),
360      mPreferHeif(mime != NULL && !strcasecmp(mime, MEDIA_MIMETYPE_CONTAINER_HEIF)),
361      mFirstTrack(NULL),
362      mLastTrack(NULL) {
363    ALOGV("mime=%s, mPreferHeif=%d", mime, mPreferHeif);
364}
365
366MPEG4Extractor::~MPEG4Extractor() {
367    Track *track = mFirstTrack;
368    while (track) {
369        Track *next = track->next;
370
371        delete track;
372        track = next;
373    }
374    mFirstTrack = mLastTrack = NULL;
375
376    for (size_t i = 0; i < mPssh.size(); i++) {
377        delete [] mPssh[i].data;
378    }
379    mPssh.clear();
380
381    delete mCachedSource;
382}
383
384uint32_t MPEG4Extractor::flags() const {
385    return CAN_PAUSE |
386            ((mMoofOffset == 0 || mSidxEntries.size() != 0) ?
387                    (CAN_SEEK_BACKWARD | CAN_SEEK_FORWARD | CAN_SEEK) : 0);
388}
389
390status_t MPEG4Extractor::getMetaData(MetaDataBase &meta) {
391    status_t err;
392    if ((err = readMetaData()) != OK) {
393        return UNKNOWN_ERROR;
394    }
395    meta = mFileMetaData;
396    return OK;
397}
398
399size_t MPEG4Extractor::countTracks() {
400    status_t err;
401    if ((err = readMetaData()) != OK) {
402        ALOGV("MPEG4Extractor::countTracks: no tracks");
403        return 0;
404    }
405
406    size_t n = 0;
407    Track *track = mFirstTrack;
408    while (track) {
409        ++n;
410        track = track->next;
411    }
412
413    ALOGV("MPEG4Extractor::countTracks: %zu tracks", n);
414    return n;
415}
416
417status_t MPEG4Extractor::getTrackMetaData(
418        MetaDataBase &meta,
419        size_t index, uint32_t flags) {
420    status_t err;
421    if ((err = readMetaData()) != OK) {
422        return UNKNOWN_ERROR;
423    }
424
425    Track *track = mFirstTrack;
426    while (index > 0) {
427        if (track == NULL) {
428            return UNKNOWN_ERROR;
429        }
430
431        track = track->next;
432        --index;
433    }
434
435    if (track == NULL) {
436        return UNKNOWN_ERROR;
437    }
438
439    [=] {
440        int64_t duration;
441        int32_t samplerate;
442        if (track->has_elst && mHeaderTimescale != 0 &&
443                track->meta.findInt64(kKeyDuration, &duration) &&
444                track->meta.findInt32(kKeySampleRate, &samplerate)) {
445
446            track->has_elst = false;
447
448            if (track->elst_segment_duration > INT64_MAX) {
449                return;
450            }
451            int64_t segment_duration = track->elst_segment_duration;
452            int64_t media_time = track->elst_media_time;
453            int64_t halfscale = mHeaderTimescale / 2;
454            ALOGV("segment_duration = %" PRId64 ", media_time = %" PRId64
455                  ", halfscale = %" PRId64 ", timescale = %d",
456                  segment_duration,
457                  media_time,
458                  halfscale,
459                  mHeaderTimescale);
460
461            int64_t delay;
462            // delay = ((media_time * samplerate) + halfscale) / mHeaderTimescale;
463            if (__builtin_mul_overflow(media_time, samplerate, &delay) ||
464                    __builtin_add_overflow(delay, halfscale, &delay) ||
465                    (delay /= mHeaderTimescale, false) ||
466                    delay > INT32_MAX ||
467                    delay < INT32_MIN) {
468                return;
469            }
470            ALOGV("delay = %" PRId64, delay);
471            track->meta.setInt32(kKeyEncoderDelay, delay);
472
473            int64_t scaled_duration;
474            // scaled_duration = duration * mHeaderTimescale;
475            if (__builtin_mul_overflow(duration, mHeaderTimescale, &scaled_duration)) {
476                return;
477            }
478            ALOGV("scaled_duration = %" PRId64, scaled_duration);
479
480            int64_t segment_end;
481            int64_t padding;
482            // padding = scaled_duration - ((segment_duration + media_time) * 1000000);
483            if (__builtin_add_overflow(segment_duration, media_time, &segment_end) ||
484                    __builtin_mul_overflow(segment_end, 1000000, &segment_end) ||
485                    __builtin_sub_overflow(scaled_duration, segment_end, &padding)) {
486                return;
487            }
488            ALOGV("segment_end = %" PRId64 ", padding = %" PRId64, segment_end, padding);
489
490            if (padding < 0) {
491                // track duration from media header (which is what kKeyDuration is) might
492                // be slightly shorter than the segment duration, which would make the
493                // padding negative. Clamp to zero.
494                padding = 0;
495            }
496
497            int64_t paddingsamples;
498            int64_t halfscale_e6;
499            int64_t timescale_e6;
500            // paddingsamples = ((padding * samplerate) + (halfscale * 1000000))
501            //                / (mHeaderTimescale * 1000000);
502            if (__builtin_mul_overflow(padding, samplerate, &paddingsamples) ||
503                    __builtin_mul_overflow(halfscale, 1000000, &halfscale_e6) ||
504                    __builtin_mul_overflow(mHeaderTimescale, 1000000, &timescale_e6) ||
505                    __builtin_add_overflow(paddingsamples, halfscale_e6, &paddingsamples) ||
506                    (paddingsamples /= timescale_e6, false) ||
507                    paddingsamples > INT32_MAX) {
508                return;
509            }
510            ALOGV("paddingsamples = %" PRId64, paddingsamples);
511            track->meta.setInt32(kKeyEncoderPadding, paddingsamples);
512        }
513    }();
514
515    if ((flags & kIncludeExtensiveMetaData)
516            && !track->includes_expensive_metadata) {
517        track->includes_expensive_metadata = true;
518
519        const char *mime;
520        CHECK(track->meta.findCString(kKeyMIMEType, &mime));
521        if (!strncasecmp("video/", mime, 6)) {
522            // MPEG2 tracks do not provide CSD, so read the stream header
523            if (!strcmp(mime, MEDIA_MIMETYPE_VIDEO_MPEG2)) {
524                off64_t offset;
525                size_t size;
526                if (track->sampleTable->getMetaDataForSample(
527                            0 /* sampleIndex */, &offset, &size, NULL /* sampleTime */) == OK) {
528                    if (size > kMaxTrackHeaderSize) {
529                        size = kMaxTrackHeaderSize;
530                    }
531                    uint8_t header[kMaxTrackHeaderSize];
532                    if (mDataSource->readAt(offset, &header, size) == (ssize_t)size) {
533                        track->meta.setData(kKeyStreamHeader, 'mdat', header, size);
534                    }
535                }
536            }
537
538            if (mMoofOffset > 0) {
539                int64_t duration;
540                if (track->meta.findInt64(kKeyDuration, &duration)) {
541                    // nothing fancy, just pick a frame near 1/4th of the duration
542                    track->meta.setInt64(
543                            kKeyThumbnailTime, duration / 4);
544                }
545            } else {
546                uint32_t sampleIndex;
547                uint32_t sampleTime;
548                if (track->timescale != 0 &&
549                        track->sampleTable->findThumbnailSample(&sampleIndex) == OK
550                        && track->sampleTable->getMetaDataForSample(
551                            sampleIndex, NULL /* offset */, NULL /* size */,
552                            &sampleTime) == OK) {
553                    track->meta.setInt64(
554                            kKeyThumbnailTime,
555                            ((int64_t)sampleTime * 1000000) / track->timescale);
556                }
557            }
558        }
559    }
560
561    meta = track->meta;
562    return OK;
563}
564
565status_t MPEG4Extractor::readMetaData() {
566    if (mInitCheck != NO_INIT) {
567        return mInitCheck;
568    }
569
570    off64_t offset = 0;
571    status_t err;
572    bool sawMoovOrSidx = false;
573
574    while (!((mHasMoovBox && sawMoovOrSidx && (mMdatFound || mMoofFound)) ||
575             (mIsHeif && (mPreferHeif || !mHasMoovBox) &&
576                     (mItemTable != NULL) && mItemTable->isValid()))) {
577        off64_t orig_offset = offset;
578        err = parseChunk(&offset, 0);
579
580        if (err != OK && err != UNKNOWN_ERROR) {
581            break;
582        } else if (offset <= orig_offset) {
583            // only continue parsing if the offset was advanced,
584            // otherwise we might end up in an infinite loop
585            ALOGE("did not advance: %lld->%lld", (long long)orig_offset, (long long)offset);
586            err = ERROR_MALFORMED;
587            break;
588        } else if (err == UNKNOWN_ERROR) {
589            sawMoovOrSidx = true;
590        }
591    }
592
593    if (mIsHeif && (mItemTable != NULL) && (mItemTable->countImages() > 0)) {
594        off64_t exifOffset;
595        size_t exifSize;
596        if (mItemTable->getExifOffsetAndSize(&exifOffset, &exifSize) == OK) {
597            mFileMetaData.setInt64(kKeyExifOffset, (int64_t)exifOffset);
598            mFileMetaData.setInt64(kKeyExifSize, (int64_t)exifSize);
599        }
600        for (uint32_t imageIndex = 0;
601                imageIndex < mItemTable->countImages(); imageIndex++) {
602            sp<MetaData> meta = mItemTable->getImageMeta(imageIndex);
603            if (meta == NULL) {
604                ALOGE("heif image %u has no meta!", imageIndex);
605                continue;
606            }
607            // Some heif files advertise image sequence brands (eg. 'hevc') in
608            // ftyp box, but don't have any valid tracks in them. Instead of
609            // reporting the entire file as malformed, we override the error
610            // to allow still images to be extracted.
611            if (err != OK) {
612                ALOGW("Extracting still images only");
613                err = OK;
614            }
615            mInitCheck = OK;
616
617            ALOGV("adding HEIF image track %u", imageIndex);
618            Track *track = new Track;
619            track->next = NULL;
620            if (mLastTrack != NULL) {
621                mLastTrack->next = track;
622            } else {
623                mFirstTrack = track;
624            }
625            mLastTrack = track;
626
627            track->meta = *(meta.get());
628            track->meta.setInt32(kKeyTrackID, imageIndex);
629            track->includes_expensive_metadata = false;
630            track->skipTrack = false;
631            track->timescale = 1000000;
632        }
633    }
634
635    if (mInitCheck == OK) {
636        if (findTrackByMimePrefix("video/") != NULL) {
637            mFileMetaData.setCString(
638                    kKeyMIMEType, MEDIA_MIMETYPE_CONTAINER_MPEG4);
639        } else if (findTrackByMimePrefix("audio/") != NULL) {
640            mFileMetaData.setCString(kKeyMIMEType, "audio/mp4");
641        } else if (findTrackByMimePrefix(
642                MEDIA_MIMETYPE_IMAGE_ANDROID_HEIC) != NULL) {
643            mFileMetaData.setCString(
644                    kKeyMIMEType, MEDIA_MIMETYPE_CONTAINER_HEIF);
645        } else {
646            mFileMetaData.setCString(kKeyMIMEType, "application/octet-stream");
647        }
648    } else {
649        mInitCheck = err;
650    }
651
652    CHECK_NE(err, (status_t)NO_INIT);
653
654    // copy pssh data into file metadata
655    uint64_t psshsize = 0;
656    for (size_t i = 0; i < mPssh.size(); i++) {
657        psshsize += 20 + mPssh[i].datalen;
658    }
659    if (psshsize > 0 && psshsize <= UINT32_MAX) {
660        char *buf = (char*)malloc(psshsize);
661        if (!buf) {
662            ALOGE("b/28471206");
663            return NO_MEMORY;
664        }
665        char *ptr = buf;
666        for (size_t i = 0; i < mPssh.size(); i++) {
667            memcpy(ptr, mPssh[i].uuid, 20); // uuid + length
668            memcpy(ptr + 20, mPssh[i].data, mPssh[i].datalen);
669            ptr += (20 + mPssh[i].datalen);
670        }
671        mFileMetaData.setData(kKeyPssh, 'pssh', buf, psshsize);
672        free(buf);
673    }
674
675    return mInitCheck;
676}
677
678struct PathAdder {
679    PathAdder(Vector<uint32_t> *path, uint32_t chunkType)
680        : mPath(path) {
681        mPath->push(chunkType);
682    }
683
684    ~PathAdder() {
685        mPath->pop();
686    }
687
688private:
689    Vector<uint32_t> *mPath;
690
691    PathAdder(const PathAdder &);
692    PathAdder &operator=(const PathAdder &);
693};
694
695static bool underMetaDataPath(const Vector<uint32_t> &path) {
696    return path.size() >= 5
697        && path[0] == FOURCC('m', 'o', 'o', 'v')
698        && path[1] == FOURCC('u', 'd', 't', 'a')
699        && path[2] == FOURCC('m', 'e', 't', 'a')
700        && path[3] == FOURCC('i', 'l', 's', 't');
701}
702
703static bool underQTMetaPath(const Vector<uint32_t> &path, int32_t depth) {
704    return path.size() >= 2
705            && path[0] == FOURCC('m', 'o', 'o', 'v')
706            && path[1] == FOURCC('m', 'e', 't', 'a')
707            && (depth == 2
708            || (depth == 3
709                    && (path[2] == FOURCC('h', 'd', 'l', 'r')
710                    ||  path[2] == FOURCC('i', 'l', 's', 't')
711                    ||  path[2] == FOURCC('k', 'e', 'y', 's'))));
712}
713
714// Given a time in seconds since Jan 1 1904, produce a human-readable string.
715static bool convertTimeToDate(int64_t time_1904, String8 *s) {
716    // delta between mpeg4 time and unix epoch time
717    static const int64_t delta = (((66 * 365 + 17) * 24) * 3600);
718    if (time_1904 < INT64_MIN + delta) {
719        return false;
720    }
721    time_t time_1970 = time_1904 - delta;
722
723    char tmp[32];
724    struct tm* tm = gmtime(&time_1970);
725    if (tm != NULL &&
726            strftime(tmp, sizeof(tmp), "%Y%m%dT%H%M%S.000Z", tm) > 0) {
727        s->setTo(tmp);
728        return true;
729    }
730    return false;
731}
732
733status_t MPEG4Extractor::parseChunk(off64_t *offset, int depth) {
734    ALOGV("entering parseChunk %lld/%d", (long long)*offset, depth);
735
736    if (*offset < 0) {
737        ALOGE("b/23540914");
738        return ERROR_MALFORMED;
739    }
740    if (depth > 100) {
741        ALOGE("b/27456299");
742        return ERROR_MALFORMED;
743    }
744    uint32_t hdr[2];
745    if (mDataSource->readAt(*offset, hdr, 8) < 8) {
746        return ERROR_IO;
747    }
748    uint64_t chunk_size = ntohl(hdr[0]);
749    int32_t chunk_type = ntohl(hdr[1]);
750    off64_t data_offset = *offset + 8;
751
752    if (chunk_size == 1) {
753        if (mDataSource->readAt(*offset + 8, &chunk_size, 8) < 8) {
754            return ERROR_IO;
755        }
756        chunk_size = ntoh64(chunk_size);
757        data_offset += 8;
758
759        if (chunk_size < 16) {
760            // The smallest valid chunk is 16 bytes long in this case.
761            return ERROR_MALFORMED;
762        }
763    } else if (chunk_size == 0) {
764        if (depth == 0) {
765            // atom extends to end of file
766            off64_t sourceSize;
767            if (mDataSource->getSize(&sourceSize) == OK) {
768                chunk_size = (sourceSize - *offset);
769            } else {
770                // XXX could we just pick a "sufficiently large" value here?
771                ALOGE("atom size is 0, and data source has no size");
772                return ERROR_MALFORMED;
773            }
774        } else {
775            // not allowed for non-toplevel atoms, skip it
776            *offset += 4;
777            return OK;
778        }
779    } else if (chunk_size < 8) {
780        // The smallest valid chunk is 8 bytes long.
781        ALOGE("invalid chunk size: %" PRIu64, chunk_size);
782        return ERROR_MALFORMED;
783    }
784
785    char chunk[5];
786    MakeFourCCString(chunk_type, chunk);
787    ALOGV("chunk: %s @ %lld, %d", chunk, (long long)*offset, depth);
788
789    if (kUseHexDump) {
790        static const char kWhitespace[] = "                                        ";
791        const char *indent = &kWhitespace[sizeof(kWhitespace) - 1 - 2 * depth];
792        printf("%sfound chunk '%s' of size %" PRIu64 "\n", indent, chunk, chunk_size);
793
794        char buffer[256];
795        size_t n = chunk_size;
796        if (n > sizeof(buffer)) {
797            n = sizeof(buffer);
798        }
799        if (mDataSource->readAt(*offset, buffer, n)
800                < (ssize_t)n) {
801            return ERROR_IO;
802        }
803
804        hexdump(buffer, n);
805    }
806
807    PathAdder autoAdder(&mPath, chunk_type);
808
809    // (data_offset - *offset) is either 8 or 16
810    off64_t chunk_data_size = chunk_size - (data_offset - *offset);
811    if (chunk_data_size < 0) {
812        ALOGE("b/23540914");
813        return ERROR_MALFORMED;
814    }
815    if (chunk_type != FOURCC('m', 'd', 'a', 't') && chunk_data_size > kMaxAtomSize) {
816        char errMsg[100];
817        sprintf(errMsg, "%s atom has size %" PRId64, chunk, chunk_data_size);
818        ALOGE("%s (b/28615448)", errMsg);
819        android_errorWriteWithInfoLog(0x534e4554, "28615448", -1, errMsg, strlen(errMsg));
820        return ERROR_MALFORMED;
821    }
822
823    if (chunk_type != FOURCC('c', 'p', 'r', 't')
824            && chunk_type != FOURCC('c', 'o', 'v', 'r')
825            && mPath.size() == 5 && underMetaDataPath(mPath)) {
826        off64_t stop_offset = *offset + chunk_size;
827        *offset = data_offset;
828        while (*offset < stop_offset) {
829            status_t err = parseChunk(offset, depth + 1);
830            if (err != OK) {
831                return err;
832            }
833        }
834
835        if (*offset != stop_offset) {
836            return ERROR_MALFORMED;
837        }
838
839        return OK;
840    }
841
842    switch(chunk_type) {
843        case FOURCC('m', 'o', 'o', 'v'):
844        case FOURCC('t', 'r', 'a', 'k'):
845        case FOURCC('m', 'd', 'i', 'a'):
846        case FOURCC('m', 'i', 'n', 'f'):
847        case FOURCC('d', 'i', 'n', 'f'):
848        case FOURCC('s', 't', 'b', 'l'):
849        case FOURCC('m', 'v', 'e', 'x'):
850        case FOURCC('m', 'o', 'o', 'f'):
851        case FOURCC('t', 'r', 'a', 'f'):
852        case FOURCC('m', 'f', 'r', 'a'):
853        case FOURCC('u', 'd', 't', 'a'):
854        case FOURCC('i', 'l', 's', 't'):
855        case FOURCC('s', 'i', 'n', 'f'):
856        case FOURCC('s', 'c', 'h', 'i'):
857        case FOURCC('e', 'd', 't', 's'):
858        case FOURCC('w', 'a', 'v', 'e'):
859        {
860            if (chunk_type == FOURCC('m', 'o', 'o', 'v') && depth != 0) {
861                ALOGE("moov: depth %d", depth);
862                return ERROR_MALFORMED;
863            }
864
865            if (chunk_type == FOURCC('m', 'o', 'o', 'v') && mInitCheck == OK) {
866                ALOGE("duplicate moov");
867                return ERROR_MALFORMED;
868            }
869
870            if (chunk_type == FOURCC('m', 'o', 'o', 'f') && !mMoofFound) {
871                // store the offset of the first segment
872                mMoofFound = true;
873                mMoofOffset = *offset;
874            }
875
876            if (chunk_type == FOURCC('s', 't', 'b', 'l')) {
877                ALOGV("sampleTable chunk is %" PRIu64 " bytes long.", chunk_size);
878
879                if (mDataSource->flags()
880                        & (DataSourceBase::kWantsPrefetching
881                            | DataSourceBase::kIsCachingDataSource)) {
882                    CachedRangedDataSource *cachedSource =
883                        new CachedRangedDataSource(mDataSource);
884
885                    if (cachedSource->setCachedRange(
886                            *offset, chunk_size,
887                            mCachedSource != NULL /* assume ownership on success */) == OK) {
888                        mDataSource = mCachedSource = cachedSource;
889                    } else {
890                        delete cachedSource;
891                    }
892                }
893
894                if (mLastTrack == NULL) {
895                    return ERROR_MALFORMED;
896                }
897
898                mLastTrack->sampleTable = new SampleTable(mDataSource);
899            }
900
901            bool isTrack = false;
902            if (chunk_type == FOURCC('t', 'r', 'a', 'k')) {
903                if (depth != 1) {
904                    ALOGE("trak: depth %d", depth);
905                    return ERROR_MALFORMED;
906                }
907                isTrack = true;
908
909                ALOGV("adding new track");
910                Track *track = new Track;
911                track->next = NULL;
912                if (mLastTrack) {
913                    mLastTrack->next = track;
914                } else {
915                    mFirstTrack = track;
916                }
917                mLastTrack = track;
918
919                track->includes_expensive_metadata = false;
920                track->skipTrack = false;
921                track->timescale = 0;
922                track->meta.setCString(kKeyMIMEType, "application/octet-stream");
923                track->has_elst = false;
924            }
925
926            off64_t stop_offset = *offset + chunk_size;
927            *offset = data_offset;
928            while (*offset < stop_offset) {
929                status_t err = parseChunk(offset, depth + 1);
930                if (err != OK) {
931                    if (isTrack) {
932                        mLastTrack->skipTrack = true;
933                        break;
934                    }
935                    return err;
936                }
937            }
938
939            if (*offset != stop_offset) {
940                return ERROR_MALFORMED;
941            }
942
943            if (isTrack) {
944                int32_t trackId;
945                // There must be exact one track header per track.
946                if (!mLastTrack->meta.findInt32(kKeyTrackID, &trackId)) {
947                    mLastTrack->skipTrack = true;
948                }
949
950                status_t err = verifyTrack(mLastTrack);
951                if (err != OK) {
952                    mLastTrack->skipTrack = true;
953                }
954
955                if (mLastTrack->skipTrack) {
956                    ALOGV("skipping this track...");
957                    Track *cur = mFirstTrack;
958
959                    if (cur == mLastTrack) {
960                        delete cur;
961                        mFirstTrack = mLastTrack = NULL;
962                    } else {
963                        while (cur && cur->next != mLastTrack) {
964                            cur = cur->next;
965                        }
966                        if (cur) {
967                            cur->next = NULL;
968                        }
969                        delete mLastTrack;
970                        mLastTrack = cur;
971                    }
972
973                    return OK;
974                }
975            } else if (chunk_type == FOURCC('m', 'o', 'o', 'v')) {
976                mInitCheck = OK;
977
978                return UNKNOWN_ERROR;  // Return a dummy error.
979            }
980            break;
981        }
982
983        case FOURCC('e', 'l', 's', 't'):
984        {
985            *offset += chunk_size;
986
987            if (!mLastTrack) {
988                return ERROR_MALFORMED;
989            }
990
991            // See 14496-12 8.6.6
992            uint8_t version;
993            if (mDataSource->readAt(data_offset, &version, 1) < 1) {
994                return ERROR_IO;
995            }
996
997            uint32_t entry_count;
998            if (!mDataSource->getUInt32(data_offset + 4, &entry_count)) {
999                return ERROR_IO;
1000            }
1001
1002            if (entry_count != 1) {
1003                // we only support a single entry at the moment, for gapless playback
1004                ALOGW("ignoring edit list with %d entries", entry_count);
1005            } else {
1006                off64_t entriesoffset = data_offset + 8;
1007                uint64_t segment_duration;
1008                int64_t media_time;
1009
1010                if (version == 1) {
1011                    if (!mDataSource->getUInt64(entriesoffset, &segment_duration) ||
1012                            !mDataSource->getUInt64(entriesoffset + 8, (uint64_t*)&media_time)) {
1013                        return ERROR_IO;
1014                    }
1015                } else if (version == 0) {
1016                    uint32_t sd;
1017                    int32_t mt;
1018                    if (!mDataSource->getUInt32(entriesoffset, &sd) ||
1019                            !mDataSource->getUInt32(entriesoffset + 4, (uint32_t*)&mt)) {
1020                        return ERROR_IO;
1021                    }
1022                    segment_duration = sd;
1023                    media_time = mt;
1024                } else {
1025                    return ERROR_IO;
1026                }
1027
1028                // save these for later, because the elst atom might precede
1029                // the atoms that actually gives us the duration and sample rate
1030                // needed to calculate the padding and delay values
1031                mLastTrack->has_elst = true;
1032                mLastTrack->elst_media_time = media_time;
1033                mLastTrack->elst_segment_duration = segment_duration;
1034            }
1035            break;
1036        }
1037
1038        case FOURCC('f', 'r', 'm', 'a'):
1039        {
1040            *offset += chunk_size;
1041
1042            uint32_t original_fourcc;
1043            if (mDataSource->readAt(data_offset, &original_fourcc, 4) < 4) {
1044                return ERROR_IO;
1045            }
1046            original_fourcc = ntohl(original_fourcc);
1047            ALOGV("read original format: %d", original_fourcc);
1048
1049            if (mLastTrack == NULL) {
1050                return ERROR_MALFORMED;
1051            }
1052
1053            mLastTrack->meta.setCString(kKeyMIMEType, FourCC2MIME(original_fourcc));
1054            uint32_t num_channels = 0;
1055            uint32_t sample_rate = 0;
1056            if (AdjustChannelsAndRate(original_fourcc, &num_channels, &sample_rate)) {
1057                mLastTrack->meta.setInt32(kKeyChannelCount, num_channels);
1058                mLastTrack->meta.setInt32(kKeySampleRate, sample_rate);
1059            }
1060            break;
1061        }
1062
1063        case FOURCC('t', 'e', 'n', 'c'):
1064        {
1065            *offset += chunk_size;
1066
1067            if (chunk_size < 32) {
1068                return ERROR_MALFORMED;
1069            }
1070
1071            // tenc box contains 1 byte version, 3 byte flags, 3 byte default algorithm id, one byte
1072            // default IV size, 16 bytes default KeyID
1073            // (ISO 23001-7)
1074            char buf[4];
1075            memset(buf, 0, 4);
1076            if (mDataSource->readAt(data_offset + 4, buf + 1, 3) < 3) {
1077                return ERROR_IO;
1078            }
1079            uint32_t defaultAlgorithmId = ntohl(*((int32_t*)buf));
1080            if (defaultAlgorithmId > 1) {
1081                // only 0 (clear) and 1 (AES-128) are valid
1082                return ERROR_MALFORMED;
1083            }
1084
1085            memset(buf, 0, 4);
1086            if (mDataSource->readAt(data_offset + 7, buf + 3, 1) < 1) {
1087                return ERROR_IO;
1088            }
1089            uint32_t defaultIVSize = ntohl(*((int32_t*)buf));
1090
1091            if ((defaultAlgorithmId == 0 && defaultIVSize != 0) ||
1092                    (defaultAlgorithmId != 0 && defaultIVSize == 0)) {
1093                // only unencrypted data must have 0 IV size
1094                return ERROR_MALFORMED;
1095            } else if (defaultIVSize != 0 &&
1096                    defaultIVSize != 8 &&
1097                    defaultIVSize != 16) {
1098                // only supported sizes are 0, 8 and 16
1099                return ERROR_MALFORMED;
1100            }
1101
1102            uint8_t defaultKeyId[16];
1103
1104            if (mDataSource->readAt(data_offset + 8, &defaultKeyId, 16) < 16) {
1105                return ERROR_IO;
1106            }
1107
1108            if (mLastTrack == NULL)
1109                return ERROR_MALFORMED;
1110
1111            mLastTrack->meta.setInt32(kKeyCryptoMode, defaultAlgorithmId);
1112            mLastTrack->meta.setInt32(kKeyCryptoDefaultIVSize, defaultIVSize);
1113            mLastTrack->meta.setData(kKeyCryptoKey, 'tenc', defaultKeyId, 16);
1114            break;
1115        }
1116
1117        case FOURCC('t', 'k', 'h', 'd'):
1118        {
1119            *offset += chunk_size;
1120
1121            status_t err;
1122            if ((err = parseTrackHeader(data_offset, chunk_data_size)) != OK) {
1123                return err;
1124            }
1125
1126            break;
1127        }
1128
1129        case FOURCC('t', 'r', 'e', 'f'):
1130        {
1131            off64_t stop_offset = *offset + chunk_size;
1132            *offset = data_offset;
1133            while (*offset < stop_offset) {
1134                status_t err = parseChunk(offset, depth + 1);
1135                if (err != OK) {
1136                    return err;
1137                }
1138            }
1139            if (*offset != stop_offset) {
1140                return ERROR_MALFORMED;
1141            }
1142            break;
1143        }
1144
1145        case FOURCC('t', 'h', 'm', 'b'):
1146        {
1147            *offset += chunk_size;
1148
1149            if (mLastTrack != NULL) {
1150                // Skip thumbnail track for now since we don't have an
1151                // API to retrieve it yet.
1152                // The thumbnail track can't be accessed by negative index or time,
1153                // because each timed sample has its own corresponding thumbnail
1154                // in the thumbnail track. We'll need a dedicated API to retrieve
1155                // thumbnail at time instead.
1156                mLastTrack->skipTrack = true;
1157            }
1158
1159            break;
1160        }
1161
1162        case FOURCC('p', 's', 's', 'h'):
1163        {
1164            *offset += chunk_size;
1165
1166            PsshInfo pssh;
1167
1168            if (mDataSource->readAt(data_offset + 4, &pssh.uuid, 16) < 16) {
1169                return ERROR_IO;
1170            }
1171
1172            uint32_t psshdatalen = 0;
1173            if (mDataSource->readAt(data_offset + 20, &psshdatalen, 4) < 4) {
1174                return ERROR_IO;
1175            }
1176            pssh.datalen = ntohl(psshdatalen);
1177            ALOGV("pssh data size: %d", pssh.datalen);
1178            if (chunk_size < 20 || pssh.datalen > chunk_size - 20) {
1179                // pssh data length exceeds size of containing box
1180                return ERROR_MALFORMED;
1181            }
1182
1183            pssh.data = new (std::nothrow) uint8_t[pssh.datalen];
1184            if (pssh.data == NULL) {
1185                return ERROR_MALFORMED;
1186            }
1187            ALOGV("allocated pssh @ %p", pssh.data);
1188            ssize_t requested = (ssize_t) pssh.datalen;
1189            if (mDataSource->readAt(data_offset + 24, pssh.data, requested) < requested) {
1190                delete[] pssh.data;
1191                return ERROR_IO;
1192            }
1193            mPssh.push_back(pssh);
1194
1195            break;
1196        }
1197
1198        case FOURCC('m', 'd', 'h', 'd'):
1199        {
1200            *offset += chunk_size;
1201
1202            if (chunk_data_size < 4 || mLastTrack == NULL) {
1203                return ERROR_MALFORMED;
1204            }
1205
1206            uint8_t version;
1207            if (mDataSource->readAt(
1208                        data_offset, &version, sizeof(version))
1209                    < (ssize_t)sizeof(version)) {
1210                return ERROR_IO;
1211            }
1212
1213            off64_t timescale_offset;
1214
1215            if (version == 1) {
1216                timescale_offset = data_offset + 4 + 16;
1217            } else if (version == 0) {
1218                timescale_offset = data_offset + 4 + 8;
1219            } else {
1220                return ERROR_IO;
1221            }
1222
1223            uint32_t timescale;
1224            if (mDataSource->readAt(
1225                        timescale_offset, &timescale, sizeof(timescale))
1226                    < (ssize_t)sizeof(timescale)) {
1227                return ERROR_IO;
1228            }
1229
1230            if (!timescale) {
1231                ALOGE("timescale should not be ZERO.");
1232                return ERROR_MALFORMED;
1233            }
1234
1235            mLastTrack->timescale = ntohl(timescale);
1236
1237            // 14496-12 says all ones means indeterminate, but some files seem to use
1238            // 0 instead. We treat both the same.
1239            int64_t duration = 0;
1240            if (version == 1) {
1241                if (mDataSource->readAt(
1242                            timescale_offset + 4, &duration, sizeof(duration))
1243                        < (ssize_t)sizeof(duration)) {
1244                    return ERROR_IO;
1245                }
1246                if (duration != -1) {
1247                    duration = ntoh64(duration);
1248                }
1249            } else {
1250                uint32_t duration32;
1251                if (mDataSource->readAt(
1252                            timescale_offset + 4, &duration32, sizeof(duration32))
1253                        < (ssize_t)sizeof(duration32)) {
1254                    return ERROR_IO;
1255                }
1256                if (duration32 != 0xffffffff) {
1257                    duration = ntohl(duration32);
1258                }
1259            }
1260            if (duration != 0 && mLastTrack->timescale != 0) {
1261                mLastTrack->meta.setInt64(
1262                        kKeyDuration, (duration * 1000000) / mLastTrack->timescale);
1263            }
1264
1265            uint8_t lang[2];
1266            off64_t lang_offset;
1267            if (version == 1) {
1268                lang_offset = timescale_offset + 4 + 8;
1269            } else if (version == 0) {
1270                lang_offset = timescale_offset + 4 + 4;
1271            } else {
1272                return ERROR_IO;
1273            }
1274
1275            if (mDataSource->readAt(lang_offset, &lang, sizeof(lang))
1276                    < (ssize_t)sizeof(lang)) {
1277                return ERROR_IO;
1278            }
1279
1280            // To get the ISO-639-2/T three character language code
1281            // 1 bit pad followed by 3 5-bits characters. Each character
1282            // is packed as the difference between its ASCII value and 0x60.
1283            char lang_code[4];
1284            lang_code[0] = ((lang[0] >> 2) & 0x1f) + 0x60;
1285            lang_code[1] = ((lang[0] & 0x3) << 3 | (lang[1] >> 5)) + 0x60;
1286            lang_code[2] = (lang[1] & 0x1f) + 0x60;
1287            lang_code[3] = '\0';
1288
1289            mLastTrack->meta.setCString(
1290                    kKeyMediaLanguage, lang_code);
1291
1292            break;
1293        }
1294
1295        case FOURCC('s', 't', 's', 'd'):
1296        {
1297            uint8_t buffer[8];
1298            if (chunk_data_size < (off64_t)sizeof(buffer)) {
1299                return ERROR_MALFORMED;
1300            }
1301
1302            if (mDataSource->readAt(
1303                        data_offset, buffer, 8) < 8) {
1304                return ERROR_IO;
1305            }
1306
1307            if (U32_AT(buffer) != 0) {
1308                // Should be version 0, flags 0.
1309                return ERROR_MALFORMED;
1310            }
1311
1312            uint32_t entry_count = U32_AT(&buffer[4]);
1313
1314            if (entry_count > 1) {
1315                // For 3GPP timed text, there could be multiple tx3g boxes contain
1316                // multiple text display formats. These formats will be used to
1317                // display the timed text.
1318                // For encrypted files, there may also be more than one entry.
1319                const char *mime;
1320
1321                if (mLastTrack == NULL)
1322                    return ERROR_MALFORMED;
1323
1324                CHECK(mLastTrack->meta.findCString(kKeyMIMEType, &mime));
1325                if (strcasecmp(mime, MEDIA_MIMETYPE_TEXT_3GPP) &&
1326                        strcasecmp(mime, "application/octet-stream")) {
1327                    // For now we only support a single type of media per track.
1328                    mLastTrack->skipTrack = true;
1329                    *offset += chunk_size;
1330                    break;
1331                }
1332            }
1333            off64_t stop_offset = *offset + chunk_size;
1334            *offset = data_offset + 8;
1335            for (uint32_t i = 0; i < entry_count; ++i) {
1336                status_t err = parseChunk(offset, depth + 1);
1337                if (err != OK) {
1338                    return err;
1339                }
1340            }
1341
1342            if (*offset != stop_offset) {
1343                return ERROR_MALFORMED;
1344            }
1345            break;
1346        }
1347        case FOURCC('m', 'e', 't', 't'):
1348        {
1349            *offset += chunk_size;
1350
1351            if (mLastTrack == NULL)
1352                return ERROR_MALFORMED;
1353
1354            auto buffer = heapbuffer<uint8_t>(chunk_data_size);
1355            if (buffer.get() == NULL) {
1356                return NO_MEMORY;
1357            }
1358
1359            if (mDataSource->readAt(
1360                        data_offset, buffer.get(), chunk_data_size) < chunk_data_size) {
1361                return ERROR_IO;
1362            }
1363
1364            String8 mimeFormat((const char *)(buffer.get()), chunk_data_size);
1365            mLastTrack->meta.setCString(kKeyMIMEType, mimeFormat.string());
1366
1367            break;
1368        }
1369
1370        case FOURCC('m', 'p', '4', 'a'):
1371        case FOURCC('e', 'n', 'c', 'a'):
1372        case FOURCC('s', 'a', 'm', 'r'):
1373        case FOURCC('s', 'a', 'w', 'b'):
1374        {
1375            if (mIsQT && chunk_type == FOURCC('m', 'p', '4', 'a')
1376                    && depth >= 1 && mPath[depth - 1] == FOURCC('w', 'a', 'v', 'e')) {
1377                // Ignore mp4a embedded in QT wave atom
1378                *offset += chunk_size;
1379                break;
1380            }
1381
1382            uint8_t buffer[8 + 20];
1383            if (chunk_data_size < (ssize_t)sizeof(buffer)) {
1384                // Basic AudioSampleEntry size.
1385                return ERROR_MALFORMED;
1386            }
1387
1388            if (mDataSource->readAt(
1389                        data_offset, buffer, sizeof(buffer)) < (ssize_t)sizeof(buffer)) {
1390                return ERROR_IO;
1391            }
1392
1393            uint16_t data_ref_index __unused = U16_AT(&buffer[6]);
1394            uint16_t version = U16_AT(&buffer[8]);
1395            uint32_t num_channels = U16_AT(&buffer[16]);
1396
1397            uint16_t sample_size = U16_AT(&buffer[18]);
1398            uint32_t sample_rate = U32_AT(&buffer[24]) >> 16;
1399
1400            if (mLastTrack == NULL)
1401                return ERROR_MALFORMED;
1402
1403            off64_t stop_offset = *offset + chunk_size;
1404            *offset = data_offset + sizeof(buffer);
1405
1406            if (mIsQT && chunk_type == FOURCC('m', 'p', '4', 'a')) {
1407                if (version == 1) {
1408                    if (mDataSource->readAt(*offset, buffer, 16) < 16) {
1409                        return ERROR_IO;
1410                    }
1411
1412#if 0
1413                    U32_AT(buffer);  // samples per packet
1414                    U32_AT(&buffer[4]);  // bytes per packet
1415                    U32_AT(&buffer[8]);  // bytes per frame
1416                    U32_AT(&buffer[12]);  // bytes per sample
1417#endif
1418                    *offset += 16;
1419                } else if (version == 2) {
1420                    uint8_t v2buffer[36];
1421                    if (mDataSource->readAt(*offset, v2buffer, 36) < 36) {
1422                        return ERROR_IO;
1423                    }
1424
1425#if 0
1426                    U32_AT(v2buffer);  // size of struct only
1427                    sample_rate = (uint32_t)U64_AT(&v2buffer[4]);  // audio sample rate
1428                    num_channels = U32_AT(&v2buffer[12]);  // num audio channels
1429                    U32_AT(&v2buffer[16]);  // always 0x7f000000
1430                    sample_size = (uint16_t)U32_AT(&v2buffer[20]);  // const bits per channel
1431                    U32_AT(&v2buffer[24]);  // format specifc flags
1432                    U32_AT(&v2buffer[28]);  // const bytes per audio packet
1433                    U32_AT(&v2buffer[32]);  // const LPCM frames per audio packet
1434#endif
1435                    *offset += 36;
1436                }
1437            }
1438
1439            if (chunk_type != FOURCC('e', 'n', 'c', 'a')) {
1440                // if the chunk type is enca, we'll get the type from the frma box later
1441                mLastTrack->meta.setCString(kKeyMIMEType, FourCC2MIME(chunk_type));
1442                AdjustChannelsAndRate(chunk_type, &num_channels, &sample_rate);
1443            }
1444            ALOGV("*** coding='%s' %d channels, size %d, rate %d\n",
1445                   chunk, num_channels, sample_size, sample_rate);
1446            mLastTrack->meta.setInt32(kKeyChannelCount, num_channels);
1447            mLastTrack->meta.setInt32(kKeySampleRate, sample_rate);
1448
1449            while (*offset < stop_offset) {
1450                status_t err = parseChunk(offset, depth + 1);
1451                if (err != OK) {
1452                    return err;
1453                }
1454            }
1455
1456            if (*offset != stop_offset) {
1457                return ERROR_MALFORMED;
1458            }
1459            break;
1460        }
1461
1462        case FOURCC('m', 'p', '4', 'v'):
1463        case FOURCC('e', 'n', 'c', 'v'):
1464        case FOURCC('s', '2', '6', '3'):
1465        case FOURCC('H', '2', '6', '3'):
1466        case FOURCC('h', '2', '6', '3'):
1467        case FOURCC('a', 'v', 'c', '1'):
1468        case FOURCC('h', 'v', 'c', '1'):
1469        case FOURCC('h', 'e', 'v', '1'):
1470        {
1471            uint8_t buffer[78];
1472            if (chunk_data_size < (ssize_t)sizeof(buffer)) {
1473                // Basic VideoSampleEntry size.
1474                return ERROR_MALFORMED;
1475            }
1476
1477            if (mDataSource->readAt(
1478                        data_offset, buffer, sizeof(buffer)) < (ssize_t)sizeof(buffer)) {
1479                return ERROR_IO;
1480            }
1481
1482            uint16_t data_ref_index __unused = U16_AT(&buffer[6]);
1483            uint16_t width = U16_AT(&buffer[6 + 18]);
1484            uint16_t height = U16_AT(&buffer[6 + 20]);
1485
1486            // The video sample is not standard-compliant if it has invalid dimension.
1487            // Use some default width and height value, and
1488            // let the decoder figure out the actual width and height (and thus
1489            // be prepared for INFO_FOMRAT_CHANGED event).
1490            if (width == 0)  width  = 352;
1491            if (height == 0) height = 288;
1492
1493            // printf("*** coding='%s' width=%d height=%d\n",
1494            //        chunk, width, height);
1495
1496            if (mLastTrack == NULL)
1497                return ERROR_MALFORMED;
1498
1499            if (chunk_type != FOURCC('e', 'n', 'c', 'v')) {
1500                // if the chunk type is encv, we'll get the type from the frma box later
1501                mLastTrack->meta.setCString(kKeyMIMEType, FourCC2MIME(chunk_type));
1502            }
1503            mLastTrack->meta.setInt32(kKeyWidth, width);
1504            mLastTrack->meta.setInt32(kKeyHeight, height);
1505
1506            off64_t stop_offset = *offset + chunk_size;
1507            *offset = data_offset + sizeof(buffer);
1508            while (*offset < stop_offset) {
1509                status_t err = parseChunk(offset, depth + 1);
1510                if (err != OK) {
1511                    return err;
1512                }
1513            }
1514
1515            if (*offset != stop_offset) {
1516                return ERROR_MALFORMED;
1517            }
1518            break;
1519        }
1520
1521        case FOURCC('s', 't', 'c', 'o'):
1522        case FOURCC('c', 'o', '6', '4'):
1523        {
1524            if ((mLastTrack == NULL) || (mLastTrack->sampleTable == NULL)) {
1525                return ERROR_MALFORMED;
1526            }
1527
1528            status_t err =
1529                mLastTrack->sampleTable->setChunkOffsetParams(
1530                        chunk_type, data_offset, chunk_data_size);
1531
1532            *offset += chunk_size;
1533
1534            if (err != OK) {
1535                return err;
1536            }
1537
1538            break;
1539        }
1540
1541        case FOURCC('s', 't', 's', 'c'):
1542        {
1543            if ((mLastTrack == NULL) || (mLastTrack->sampleTable == NULL))
1544                return ERROR_MALFORMED;
1545
1546            status_t err =
1547                mLastTrack->sampleTable->setSampleToChunkParams(
1548                        data_offset, chunk_data_size);
1549
1550            *offset += chunk_size;
1551
1552            if (err != OK) {
1553                return err;
1554            }
1555
1556            break;
1557        }
1558
1559        case FOURCC('s', 't', 's', 'z'):
1560        case FOURCC('s', 't', 'z', '2'):
1561        {
1562            if ((mLastTrack == NULL) || (mLastTrack->sampleTable == NULL)) {
1563                return ERROR_MALFORMED;
1564            }
1565
1566            status_t err =
1567                mLastTrack->sampleTable->setSampleSizeParams(
1568                        chunk_type, data_offset, chunk_data_size);
1569
1570            *offset += chunk_size;
1571
1572            if (err != OK) {
1573                return err;
1574            }
1575
1576            size_t max_size;
1577            err = mLastTrack->sampleTable->getMaxSampleSize(&max_size);
1578
1579            if (err != OK) {
1580                return err;
1581            }
1582
1583            if (max_size != 0) {
1584                // Assume that a given buffer only contains at most 10 chunks,
1585                // each chunk originally prefixed with a 2 byte length will
1586                // have a 4 byte header (0x00 0x00 0x00 0x01) after conversion,
1587                // and thus will grow by 2 bytes per chunk.
1588                if (max_size > SIZE_MAX - 10 * 2) {
1589                    ALOGE("max sample size too big: %zu", max_size);
1590                    return ERROR_MALFORMED;
1591                }
1592                mLastTrack->meta.setInt32(kKeyMaxInputSize, max_size + 10 * 2);
1593            } else {
1594                // No size was specified. Pick a conservatively large size.
1595                uint32_t width, height;
1596                if (!mLastTrack->meta.findInt32(kKeyWidth, (int32_t*)&width) ||
1597                    !mLastTrack->meta.findInt32(kKeyHeight,(int32_t*) &height)) {
1598                    ALOGE("No width or height, assuming worst case 1080p");
1599                    width = 1920;
1600                    height = 1080;
1601                } else {
1602                    // A resolution was specified, check that it's not too big. The values below
1603                    // were chosen so that the calculations below don't cause overflows, they're
1604                    // not indicating that resolutions up to 32kx32k are actually supported.
1605                    if (width > 32768 || height > 32768) {
1606                        ALOGE("can't support %u x %u video", width, height);
1607                        return ERROR_MALFORMED;
1608                    }
1609                }
1610
1611                const char *mime;
1612                CHECK(mLastTrack->meta.findCString(kKeyMIMEType, &mime));
1613                if (!strncmp(mime, "audio/", 6)) {
1614                    // for audio, use 128KB
1615                    max_size = 1024 * 128;
1616                } else if (!strcmp(mime, MEDIA_MIMETYPE_VIDEO_AVC)
1617                        || !strcmp(mime, MEDIA_MIMETYPE_VIDEO_HEVC)) {
1618                    // AVC & HEVC requires compression ratio of at least 2, and uses
1619                    // macroblocks
1620                    max_size = ((width + 15) / 16) * ((height + 15) / 16) * 192;
1621                } else {
1622                    // For all other formats there is no minimum compression
1623                    // ratio. Use compression ratio of 1.
1624                    max_size = width * height * 3 / 2;
1625                }
1626                // HACK: allow 10% overhead
1627                // TODO: read sample size from traf atom for fragmented MPEG4.
1628                max_size += max_size / 10;
1629                mLastTrack->meta.setInt32(kKeyMaxInputSize, max_size);
1630            }
1631
1632            // NOTE: setting another piece of metadata invalidates any pointers (such as the
1633            // mimetype) previously obtained, so don't cache them.
1634            const char *mime;
1635            CHECK(mLastTrack->meta.findCString(kKeyMIMEType, &mime));
1636            // Calculate average frame rate.
1637            if (!strncasecmp("video/", mime, 6)) {
1638                size_t nSamples = mLastTrack->sampleTable->countSamples();
1639                if (nSamples == 0) {
1640                    int32_t trackId;
1641                    if (mLastTrack->meta.findInt32(kKeyTrackID, &trackId)) {
1642                        for (size_t i = 0; i < mTrex.size(); i++) {
1643                            Trex *t = &mTrex.editItemAt(i);
1644                            if (t->track_ID == (uint32_t) trackId) {
1645                                if (t->default_sample_duration > 0) {
1646                                    int32_t frameRate =
1647                                            mLastTrack->timescale / t->default_sample_duration;
1648                                    mLastTrack->meta.setInt32(kKeyFrameRate, frameRate);
1649                                }
1650                                break;
1651                            }
1652                        }
1653                    }
1654                } else {
1655                    int64_t durationUs;
1656                    if (mLastTrack->meta.findInt64(kKeyDuration, &durationUs)) {
1657                        if (durationUs > 0) {
1658                            int32_t frameRate = (nSamples * 1000000LL +
1659                                        (durationUs >> 1)) / durationUs;
1660                            mLastTrack->meta.setInt32(kKeyFrameRate, frameRate);
1661                        }
1662                    }
1663                    ALOGV("setting frame count %zu", nSamples);
1664                    mLastTrack->meta.setInt32(kKeyFrameCount, nSamples);
1665                }
1666            }
1667
1668            break;
1669        }
1670
1671        case FOURCC('s', 't', 't', 's'):
1672        {
1673            if ((mLastTrack == NULL) || (mLastTrack->sampleTable == NULL))
1674                return ERROR_MALFORMED;
1675
1676            *offset += chunk_size;
1677
1678            status_t err =
1679                mLastTrack->sampleTable->setTimeToSampleParams(
1680                        data_offset, chunk_data_size);
1681
1682            if (err != OK) {
1683                return err;
1684            }
1685
1686            break;
1687        }
1688
1689        case FOURCC('c', 't', 't', 's'):
1690        {
1691            if ((mLastTrack == NULL) || (mLastTrack->sampleTable == NULL))
1692                return ERROR_MALFORMED;
1693
1694            *offset += chunk_size;
1695
1696            status_t err =
1697                mLastTrack->sampleTable->setCompositionTimeToSampleParams(
1698                        data_offset, chunk_data_size);
1699
1700            if (err != OK) {
1701                return err;
1702            }
1703
1704            break;
1705        }
1706
1707        case FOURCC('s', 't', 's', 's'):
1708        {
1709            if ((mLastTrack == NULL) || (mLastTrack->sampleTable == NULL))
1710                return ERROR_MALFORMED;
1711
1712            *offset += chunk_size;
1713
1714            status_t err =
1715                mLastTrack->sampleTable->setSyncSampleParams(
1716                        data_offset, chunk_data_size);
1717
1718            if (err != OK) {
1719                return err;
1720            }
1721
1722            break;
1723        }
1724
1725        // \xA9xyz
1726        case FOURCC(0xA9, 'x', 'y', 'z'):
1727        {
1728            *offset += chunk_size;
1729
1730            // Best case the total data length inside "\xA9xyz" box would
1731            // be 9, for instance "\xA9xyz" + "\x00\x05\x15\xc7" + "+0+0/",
1732            // where "\x00\x05" is the text string length with value = 5,
1733            // "\0x15\xc7" is the language code = en, and "+0+0/" is a
1734            // location (string) value with longitude = 0 and latitude = 0.
1735            // Since some devices encountered in the wild omit the trailing
1736            // slash, we'll allow that.
1737            if (chunk_data_size < 8) { // 8 instead of 9 to allow for missing /
1738                return ERROR_MALFORMED;
1739            }
1740
1741            uint16_t len;
1742            if (!mDataSource->getUInt16(data_offset, &len)) {
1743                return ERROR_IO;
1744            }
1745
1746            // allow "+0+0" without trailing slash
1747            if (len < 4 || len > chunk_data_size - 4) {
1748                return ERROR_MALFORMED;
1749            }
1750            // The location string following the language code is formatted
1751            // according to ISO 6709:2008 (https://en.wikipedia.org/wiki/ISO_6709).
1752            // Allocate 2 extra bytes, in case we need to add a trailing slash,
1753            // and to add a terminating 0.
1754            std::unique_ptr<char[]> buffer(new (std::nothrow) char[len+2]());
1755            if (!buffer) {
1756                return NO_MEMORY;
1757            }
1758
1759            if (mDataSource->readAt(
1760                        data_offset + 4, &buffer[0], len) < len) {
1761                return ERROR_IO;
1762            }
1763
1764            len = strlen(&buffer[0]);
1765            if (len < 4) {
1766                return ERROR_MALFORMED;
1767            }
1768            // Add a trailing slash if there wasn't one.
1769            if (buffer[len - 1] != '/') {
1770                buffer[len] = '/';
1771            }
1772            mFileMetaData.setCString(kKeyLocation, &buffer[0]);
1773            break;
1774        }
1775
1776        case FOURCC('e', 's', 'd', 's'):
1777        {
1778            *offset += chunk_size;
1779
1780            if (chunk_data_size < 4) {
1781                return ERROR_MALFORMED;
1782            }
1783
1784            uint8_t buffer[256];
1785            if (chunk_data_size > (off64_t)sizeof(buffer)) {
1786                return ERROR_BUFFER_TOO_SMALL;
1787            }
1788
1789            if (mDataSource->readAt(
1790                        data_offset, buffer, chunk_data_size) < chunk_data_size) {
1791                return ERROR_IO;
1792            }
1793
1794            if (U32_AT(buffer) != 0) {
1795                // Should be version 0, flags 0.
1796                return ERROR_MALFORMED;
1797            }
1798
1799            if (mLastTrack == NULL)
1800                return ERROR_MALFORMED;
1801
1802            mLastTrack->meta.setData(
1803                    kKeyESDS, kTypeESDS, &buffer[4], chunk_data_size - 4);
1804
1805            if (mPath.size() >= 2
1806                    && mPath[mPath.size() - 2] == FOURCC('m', 'p', '4', 'a')) {
1807                // Information from the ESDS must be relied on for proper
1808                // setup of sample rate and channel count for MPEG4 Audio.
1809                // The generic header appears to only contain generic
1810                // information...
1811
1812                status_t err = updateAudioTrackInfoFromESDS_MPEG4Audio(
1813                        &buffer[4], chunk_data_size - 4);
1814
1815                if (err != OK) {
1816                    return err;
1817                }
1818            }
1819            if (mPath.size() >= 2
1820                    && mPath[mPath.size() - 2] == FOURCC('m', 'p', '4', 'v')) {
1821                // Check if the video is MPEG2
1822                ESDS esds(&buffer[4], chunk_data_size - 4);
1823
1824                uint8_t objectTypeIndication;
1825                if (esds.getObjectTypeIndication(&objectTypeIndication) == OK) {
1826                    if (objectTypeIndication >= 0x60 && objectTypeIndication <= 0x65) {
1827                        mLastTrack->meta.setCString(kKeyMIMEType, MEDIA_MIMETYPE_VIDEO_MPEG2);
1828                    }
1829                }
1830            }
1831            break;
1832        }
1833
1834        case FOURCC('b', 't', 'r', 't'):
1835        {
1836            *offset += chunk_size;
1837            if (mLastTrack == NULL) {
1838                return ERROR_MALFORMED;
1839            }
1840
1841            uint8_t buffer[12];
1842            if (chunk_data_size != sizeof(buffer)) {
1843                return ERROR_MALFORMED;
1844            }
1845
1846            if (mDataSource->readAt(
1847                    data_offset, buffer, chunk_data_size) < chunk_data_size) {
1848                return ERROR_IO;
1849            }
1850
1851            uint32_t maxBitrate = U32_AT(&buffer[4]);
1852            uint32_t avgBitrate = U32_AT(&buffer[8]);
1853            if (maxBitrate > 0 && maxBitrate < INT32_MAX) {
1854                mLastTrack->meta.setInt32(kKeyMaxBitRate, (int32_t)maxBitrate);
1855            }
1856            if (avgBitrate > 0 && avgBitrate < INT32_MAX) {
1857                mLastTrack->meta.setInt32(kKeyBitRate, (int32_t)avgBitrate);
1858            }
1859            break;
1860        }
1861
1862        case FOURCC('a', 'v', 'c', 'C'):
1863        {
1864            *offset += chunk_size;
1865
1866            auto buffer = heapbuffer<uint8_t>(chunk_data_size);
1867
1868            if (buffer.get() == NULL) {
1869                ALOGE("b/28471206");
1870                return NO_MEMORY;
1871            }
1872
1873            if (mDataSource->readAt(
1874                        data_offset, buffer.get(), chunk_data_size) < chunk_data_size) {
1875                return ERROR_IO;
1876            }
1877
1878            if (mLastTrack == NULL)
1879                return ERROR_MALFORMED;
1880
1881            mLastTrack->meta.setData(
1882                    kKeyAVCC, kTypeAVCC, buffer.get(), chunk_data_size);
1883
1884            break;
1885        }
1886        case FOURCC('h', 'v', 'c', 'C'):
1887        {
1888            auto buffer = heapbuffer<uint8_t>(chunk_data_size);
1889
1890            if (buffer.get() == NULL) {
1891                ALOGE("b/28471206");
1892                return NO_MEMORY;
1893            }
1894
1895            if (mDataSource->readAt(
1896                        data_offset, buffer.get(), chunk_data_size) < chunk_data_size) {
1897                return ERROR_IO;
1898            }
1899
1900            if (mLastTrack == NULL)
1901                return ERROR_MALFORMED;
1902
1903            mLastTrack->meta.setData(
1904                    kKeyHVCC, kTypeHVCC, buffer.get(), chunk_data_size);
1905
1906            *offset += chunk_size;
1907            break;
1908        }
1909
1910        case FOURCC('d', '2', '6', '3'):
1911        {
1912            *offset += chunk_size;
1913            /*
1914             * d263 contains a fixed 7 bytes part:
1915             *   vendor - 4 bytes
1916             *   version - 1 byte
1917             *   level - 1 byte
1918             *   profile - 1 byte
1919             * optionally, "d263" box itself may contain a 16-byte
1920             * bit rate box (bitr)
1921             *   average bit rate - 4 bytes
1922             *   max bit rate - 4 bytes
1923             */
1924            char buffer[23];
1925            if (chunk_data_size != 7 &&
1926                chunk_data_size != 23) {
1927                ALOGE("Incorrect D263 box size %lld", (long long)chunk_data_size);
1928                return ERROR_MALFORMED;
1929            }
1930
1931            if (mDataSource->readAt(
1932                    data_offset, buffer, chunk_data_size) < chunk_data_size) {
1933                return ERROR_IO;
1934            }
1935
1936            if (mLastTrack == NULL)
1937                return ERROR_MALFORMED;
1938
1939            mLastTrack->meta.setData(kKeyD263, kTypeD263, buffer, chunk_data_size);
1940
1941            break;
1942        }
1943
1944        case FOURCC('m', 'e', 't', 'a'):
1945        {
1946            off64_t stop_offset = *offset + chunk_size;
1947            *offset = data_offset;
1948            bool isParsingMetaKeys = underQTMetaPath(mPath, 2);
1949            if (!isParsingMetaKeys) {
1950                uint8_t buffer[4];
1951                if (chunk_data_size < (off64_t)sizeof(buffer)) {
1952                    *offset = stop_offset;
1953                    return ERROR_MALFORMED;
1954                }
1955
1956                if (mDataSource->readAt(
1957                            data_offset, buffer, 4) < 4) {
1958                    *offset = stop_offset;
1959                    return ERROR_IO;
1960                }
1961
1962                if (U32_AT(buffer) != 0) {
1963                    // Should be version 0, flags 0.
1964
1965                    // If it's not, let's assume this is one of those
1966                    // apparently malformed chunks that don't have flags
1967                    // and completely different semantics than what's
1968                    // in the MPEG4 specs and skip it.
1969                    *offset = stop_offset;
1970                    return OK;
1971                }
1972                *offset +=  sizeof(buffer);
1973            }
1974
1975            while (*offset < stop_offset) {
1976                status_t err = parseChunk(offset, depth + 1);
1977                if (err != OK) {
1978                    return err;
1979                }
1980            }
1981
1982            if (*offset != stop_offset) {
1983                return ERROR_MALFORMED;
1984            }
1985            break;
1986        }
1987
1988        case FOURCC('i', 'l', 'o', 'c'):
1989        case FOURCC('i', 'i', 'n', 'f'):
1990        case FOURCC('i', 'p', 'r', 'p'):
1991        case FOURCC('p', 'i', 't', 'm'):
1992        case FOURCC('i', 'd', 'a', 't'):
1993        case FOURCC('i', 'r', 'e', 'f'):
1994        case FOURCC('i', 'p', 'r', 'o'):
1995        {
1996            if (mIsHeif) {
1997                if (mItemTable == NULL) {
1998                    mItemTable = new ItemTable(mDataSource);
1999                }
2000                status_t err = mItemTable->parse(
2001                        chunk_type, data_offset, chunk_data_size);
2002                if (err != OK) {
2003                    return err;
2004                }
2005            }
2006            *offset += chunk_size;
2007            break;
2008        }
2009
2010        case FOURCC('m', 'e', 'a', 'n'):
2011        case FOURCC('n', 'a', 'm', 'e'):
2012        case FOURCC('d', 'a', 't', 'a'):
2013        {
2014            *offset += chunk_size;
2015
2016            if (mPath.size() == 6 && underMetaDataPath(mPath)) {
2017                status_t err = parseITunesMetaData(data_offset, chunk_data_size);
2018
2019                if (err != OK) {
2020                    return err;
2021                }
2022            }
2023
2024            break;
2025        }
2026
2027        case FOURCC('m', 'v', 'h', 'd'):
2028        {
2029            *offset += chunk_size;
2030
2031            if (depth != 1) {
2032                ALOGE("mvhd: depth %d", depth);
2033                return ERROR_MALFORMED;
2034            }
2035            if (chunk_data_size < 32) {
2036                return ERROR_MALFORMED;
2037            }
2038
2039            uint8_t header[32];
2040            if (mDataSource->readAt(
2041                        data_offset, header, sizeof(header))
2042                    < (ssize_t)sizeof(header)) {
2043                return ERROR_IO;
2044            }
2045
2046            uint64_t creationTime;
2047            uint64_t duration = 0;
2048            if (header[0] == 1) {
2049                creationTime = U64_AT(&header[4]);
2050                mHeaderTimescale = U32_AT(&header[20]);
2051                duration = U64_AT(&header[24]);
2052                if (duration == 0xffffffffffffffff) {
2053                    duration = 0;
2054                }
2055            } else if (header[0] != 0) {
2056                return ERROR_MALFORMED;
2057            } else {
2058                creationTime = U32_AT(&header[4]);
2059                mHeaderTimescale = U32_AT(&header[12]);
2060                uint32_t d32 = U32_AT(&header[16]);
2061                if (d32 == 0xffffffff) {
2062                    d32 = 0;
2063                }
2064                duration = d32;
2065            }
2066            if (duration != 0 && mHeaderTimescale != 0 && duration < UINT64_MAX / 1000000) {
2067                mFileMetaData.setInt64(kKeyDuration, duration * 1000000 / mHeaderTimescale);
2068            }
2069
2070            String8 s;
2071            if (convertTimeToDate(creationTime, &s)) {
2072                mFileMetaData.setCString(kKeyDate, s.string());
2073            }
2074
2075
2076            break;
2077        }
2078
2079        case FOURCC('m', 'e', 'h', 'd'):
2080        {
2081            *offset += chunk_size;
2082
2083            if (chunk_data_size < 8) {
2084                return ERROR_MALFORMED;
2085            }
2086
2087            uint8_t flags[4];
2088            if (mDataSource->readAt(
2089                        data_offset, flags, sizeof(flags))
2090                    < (ssize_t)sizeof(flags)) {
2091                return ERROR_IO;
2092            }
2093
2094            uint64_t duration = 0;
2095            if (flags[0] == 1) {
2096                // 64 bit
2097                if (chunk_data_size < 12) {
2098                    return ERROR_MALFORMED;
2099                }
2100                mDataSource->getUInt64(data_offset + 4, &duration);
2101                if (duration == 0xffffffffffffffff) {
2102                    duration = 0;
2103                }
2104            } else if (flags[0] == 0) {
2105                // 32 bit
2106                uint32_t d32;
2107                mDataSource->getUInt32(data_offset + 4, &d32);
2108                if (d32 == 0xffffffff) {
2109                    d32 = 0;
2110                }
2111                duration = d32;
2112            } else {
2113                return ERROR_MALFORMED;
2114            }
2115
2116            if (duration != 0 && mHeaderTimescale != 0) {
2117                mFileMetaData.setInt64(kKeyDuration, duration * 1000000 / mHeaderTimescale);
2118            }
2119
2120            break;
2121        }
2122
2123        case FOURCC('m', 'd', 'a', 't'):
2124        {
2125            mMdatFound = true;
2126
2127            *offset += chunk_size;
2128            break;
2129        }
2130
2131        case FOURCC('h', 'd', 'l', 'r'):
2132        {
2133            *offset += chunk_size;
2134
2135            if (underQTMetaPath(mPath, 3)) {
2136                break;
2137            }
2138
2139            uint32_t buffer;
2140            if (mDataSource->readAt(
2141                        data_offset + 8, &buffer, 4) < 4) {
2142                return ERROR_IO;
2143            }
2144
2145            uint32_t type = ntohl(buffer);
2146            // For the 3GPP file format, the handler-type within the 'hdlr' box
2147            // shall be 'text'. We also want to support 'sbtl' handler type
2148            // for a practical reason as various MPEG4 containers use it.
2149            if (type == FOURCC('t', 'e', 'x', 't') || type == FOURCC('s', 'b', 't', 'l')) {
2150                if (mLastTrack != NULL) {
2151                    mLastTrack->meta.setCString(kKeyMIMEType, MEDIA_MIMETYPE_TEXT_3GPP);
2152                }
2153            }
2154
2155            break;
2156        }
2157
2158        case FOURCC('k', 'e', 'y', 's'):
2159        {
2160            *offset += chunk_size;
2161
2162            if (underQTMetaPath(mPath, 3)) {
2163                status_t err = parseQTMetaKey(data_offset, chunk_data_size);
2164                if (err != OK) {
2165                    return err;
2166                }
2167            }
2168            break;
2169        }
2170
2171        case FOURCC('t', 'r', 'e', 'x'):
2172        {
2173            *offset += chunk_size;
2174
2175            if (chunk_data_size < 24) {
2176                return ERROR_IO;
2177            }
2178            Trex trex;
2179            if (!mDataSource->getUInt32(data_offset + 4, &trex.track_ID) ||
2180                !mDataSource->getUInt32(data_offset + 8, &trex.default_sample_description_index) ||
2181                !mDataSource->getUInt32(data_offset + 12, &trex.default_sample_duration) ||
2182                !mDataSource->getUInt32(data_offset + 16, &trex.default_sample_size) ||
2183                !mDataSource->getUInt32(data_offset + 20, &trex.default_sample_flags)) {
2184                return ERROR_IO;
2185            }
2186            mTrex.add(trex);
2187            break;
2188        }
2189
2190        case FOURCC('t', 'x', '3', 'g'):
2191        {
2192            if (mLastTrack == NULL)
2193                return ERROR_MALFORMED;
2194
2195            uint32_t type;
2196            const void *data;
2197            size_t size = 0;
2198            if (!mLastTrack->meta.findData(
2199                    kKeyTextFormatData, &type, &data, &size)) {
2200                size = 0;
2201            }
2202
2203            if ((chunk_size > SIZE_MAX) || (SIZE_MAX - chunk_size <= size)) {
2204                return ERROR_MALFORMED;
2205            }
2206
2207            uint8_t *buffer = new (std::nothrow) uint8_t[size + chunk_size];
2208            if (buffer == NULL) {
2209                return ERROR_MALFORMED;
2210            }
2211
2212            if (size > 0) {
2213                memcpy(buffer, data, size);
2214            }
2215
2216            if ((size_t)(mDataSource->readAt(*offset, buffer + size, chunk_size))
2217                    < chunk_size) {
2218                delete[] buffer;
2219                buffer = NULL;
2220
2221                // advance read pointer so we don't end up reading this again
2222                *offset += chunk_size;
2223                return ERROR_IO;
2224            }
2225
2226            mLastTrack->meta.setData(
2227                    kKeyTextFormatData, 0, buffer, size + chunk_size);
2228
2229            delete[] buffer;
2230
2231            *offset += chunk_size;
2232            break;
2233        }
2234
2235        case FOURCC('c', 'o', 'v', 'r'):
2236        {
2237            *offset += chunk_size;
2238
2239            ALOGV("chunk_data_size = %" PRId64 " and data_offset = %" PRId64,
2240                  chunk_data_size, data_offset);
2241
2242            if (chunk_data_size < 0 || static_cast<uint64_t>(chunk_data_size) >= SIZE_MAX - 1) {
2243                return ERROR_MALFORMED;
2244            }
2245            auto buffer = heapbuffer<uint8_t>(chunk_data_size);
2246            if (buffer.get() == NULL) {
2247                ALOGE("b/28471206");
2248                return NO_MEMORY;
2249            }
2250            if (mDataSource->readAt(
2251                data_offset, buffer.get(), chunk_data_size) != (ssize_t)chunk_data_size) {
2252                return ERROR_IO;
2253            }
2254            const int kSkipBytesOfDataBox = 16;
2255            if (chunk_data_size <= kSkipBytesOfDataBox) {
2256                return ERROR_MALFORMED;
2257            }
2258
2259            mFileMetaData.setData(
2260                kKeyAlbumArt, MetaData::TYPE_NONE,
2261                buffer.get() + kSkipBytesOfDataBox, chunk_data_size - kSkipBytesOfDataBox);
2262
2263            break;
2264        }
2265
2266        case FOURCC('c', 'o', 'l', 'r'):
2267        {
2268            *offset += chunk_size;
2269            // this must be in a VisualSampleEntry box under the Sample Description Box ('stsd')
2270            // ignore otherwise
2271            if (depth >= 2 && mPath[depth - 2] == FOURCC('s', 't', 's', 'd')) {
2272                status_t err = parseColorInfo(data_offset, chunk_data_size);
2273                if (err != OK) {
2274                    return err;
2275                }
2276            }
2277
2278            break;
2279        }
2280
2281        case FOURCC('t', 'i', 't', 'l'):
2282        case FOURCC('p', 'e', 'r', 'f'):
2283        case FOURCC('a', 'u', 't', 'h'):
2284        case FOURCC('g', 'n', 'r', 'e'):
2285        case FOURCC('a', 'l', 'b', 'm'):
2286        case FOURCC('y', 'r', 'r', 'c'):
2287        {
2288            *offset += chunk_size;
2289
2290            status_t err = parse3GPPMetaData(data_offset, chunk_data_size, depth);
2291
2292            if (err != OK) {
2293                return err;
2294            }
2295
2296            break;
2297        }
2298
2299        case FOURCC('I', 'D', '3', '2'):
2300        {
2301            *offset += chunk_size;
2302
2303            if (chunk_data_size < 6) {
2304                return ERROR_MALFORMED;
2305            }
2306
2307            parseID3v2MetaData(data_offset + 6);
2308
2309            break;
2310        }
2311
2312        case FOURCC('-', '-', '-', '-'):
2313        {
2314            mLastCommentMean.clear();
2315            mLastCommentName.clear();
2316            mLastCommentData.clear();
2317            *offset += chunk_size;
2318            break;
2319        }
2320
2321        case FOURCC('s', 'i', 'd', 'x'):
2322        {
2323            status_t err = parseSegmentIndex(data_offset, chunk_data_size);
2324            if (err != OK) {
2325                return err;
2326            }
2327            *offset += chunk_size;
2328            return UNKNOWN_ERROR; // stop parsing after sidx
2329        }
2330
2331        case FOURCC('a', 'c', '-', '3'):
2332        {
2333            *offset += chunk_size;
2334            return parseAC3SampleEntry(data_offset);
2335        }
2336
2337        case FOURCC('f', 't', 'y', 'p'):
2338        {
2339            if (chunk_data_size < 8 || depth != 0) {
2340                return ERROR_MALFORMED;
2341            }
2342
2343            off64_t stop_offset = *offset + chunk_size;
2344            uint32_t numCompatibleBrands = (chunk_data_size - 8) / 4;
2345            std::set<uint32_t> brandSet;
2346            for (size_t i = 0; i < numCompatibleBrands + 2; ++i) {
2347                if (i == 1) {
2348                    // Skip this index, it refers to the minorVersion,
2349                    // not a brand.
2350                    continue;
2351                }
2352
2353                uint32_t brand;
2354                if (mDataSource->readAt(data_offset + 4 * i, &brand, 4) < 4) {
2355                    return ERROR_MALFORMED;
2356                }
2357
2358                brand = ntohl(brand);
2359                brandSet.insert(brand);
2360            }
2361
2362            if (brandSet.count(FOURCC('q', 't', ' ', ' ')) > 0) {
2363                mIsQT = true;
2364            } else {
2365                if (brandSet.count(FOURCC('m', 'i', 'f', '1')) > 0
2366                 && brandSet.count(FOURCC('h', 'e', 'i', 'c')) > 0) {
2367                    ALOGV("identified HEIF image");
2368
2369                    mIsHeif = true;
2370                    brandSet.erase(FOURCC('m', 'i', 'f', '1'));
2371                    brandSet.erase(FOURCC('h', 'e', 'i', 'c'));
2372                }
2373
2374                if (!brandSet.empty()) {
2375                    // This means that the file should have moov box.
2376                    // It could be any iso files (mp4, heifs, etc.)
2377                    mHasMoovBox = true;
2378                    if (mIsHeif) {
2379                        ALOGV("identified HEIF image with other tracks");
2380                    }
2381                }
2382            }
2383
2384            *offset = stop_offset;
2385
2386            break;
2387        }
2388
2389        default:
2390        {
2391            // check if we're parsing 'ilst' for meta keys
2392            // if so, treat type as a number (key-id).
2393            if (underQTMetaPath(mPath, 3)) {
2394                status_t err = parseQTMetaVal(chunk_type, data_offset, chunk_data_size);
2395                if (err != OK) {
2396                    return err;
2397                }
2398            }
2399
2400            *offset += chunk_size;
2401            break;
2402        }
2403    }
2404
2405    return OK;
2406}
2407
2408status_t MPEG4Extractor::parseAC3SampleEntry(off64_t offset) {
2409    // skip 16 bytes:
2410    //  + 6-byte reserved,
2411    //  + 2-byte data reference index,
2412    //  + 8-byte reserved
2413    offset += 16;
2414    uint16_t channelCount;
2415    if (!mDataSource->getUInt16(offset, &channelCount)) {
2416        return ERROR_MALFORMED;
2417    }
2418    // skip 8 bytes:
2419    //  + 2-byte channelCount,
2420    //  + 2-byte sample size,
2421    //  + 4-byte reserved
2422    offset += 8;
2423    uint16_t sampleRate;
2424    if (!mDataSource->getUInt16(offset, &sampleRate)) {
2425        ALOGE("MPEG4Extractor: error while reading ac-3 block: cannot read sample rate");
2426        return ERROR_MALFORMED;
2427    }
2428
2429    // skip 4 bytes:
2430    //  + 2-byte sampleRate,
2431    //  + 2-byte reserved
2432    offset += 4;
2433    return parseAC3SpecificBox(offset, sampleRate);
2434}
2435
2436status_t MPEG4Extractor::parseAC3SpecificBox(
2437        off64_t offset, uint16_t sampleRate) {
2438    uint32_t size;
2439    // + 4-byte size
2440    // + 4-byte type
2441    // + 3-byte payload
2442    const uint32_t kAC3SpecificBoxSize = 11;
2443    if (!mDataSource->getUInt32(offset, &size) || size < kAC3SpecificBoxSize) {
2444        ALOGE("MPEG4Extractor: error while reading ac-3 block: cannot read specific box size");
2445        return ERROR_MALFORMED;
2446    }
2447
2448    offset += 4;
2449    uint32_t type;
2450    if (!mDataSource->getUInt32(offset, &type) || type != FOURCC('d', 'a', 'c', '3')) {
2451        ALOGE("MPEG4Extractor: error while reading ac-3 specific block: header not dac3");
2452        return ERROR_MALFORMED;
2453    }
2454
2455    offset += 4;
2456    const uint32_t kAC3SpecificBoxPayloadSize = 3;
2457    uint8_t chunk[kAC3SpecificBoxPayloadSize];
2458    if (mDataSource->readAt(offset, chunk, sizeof(chunk)) != sizeof(chunk)) {
2459        ALOGE("MPEG4Extractor: error while reading ac-3 specific block: bitstream fields");
2460        return ERROR_MALFORMED;
2461    }
2462
2463    ABitReader br(chunk, sizeof(chunk));
2464    static const unsigned channelCountTable[] = {2, 1, 2, 3, 3, 4, 4, 5};
2465    static const unsigned sampleRateTable[] = {48000, 44100, 32000};
2466
2467    unsigned fscod = br.getBits(2);
2468    if (fscod == 3) {
2469        ALOGE("Incorrect fscod (3) in AC3 header");
2470        return ERROR_MALFORMED;
2471    }
2472    unsigned boxSampleRate = sampleRateTable[fscod];
2473    if (boxSampleRate != sampleRate) {
2474        ALOGE("sample rate mismatch: boxSampleRate = %d, sampleRate = %d",
2475            boxSampleRate, sampleRate);
2476        return ERROR_MALFORMED;
2477    }
2478
2479    unsigned bsid = br.getBits(5);
2480    if (bsid > 8) {
2481        ALOGW("Incorrect bsid in AC3 header. Possibly E-AC-3?");
2482        return ERROR_MALFORMED;
2483    }
2484
2485    // skip
2486    unsigned bsmod __unused = br.getBits(3);
2487
2488    unsigned acmod = br.getBits(3);
2489    unsigned lfeon = br.getBits(1);
2490    unsigned channelCount = channelCountTable[acmod] + lfeon;
2491
2492    if (mLastTrack == NULL) {
2493        return ERROR_MALFORMED;
2494    }
2495    mLastTrack->meta.setCString(kKeyMIMEType, MEDIA_MIMETYPE_AUDIO_AC3);
2496    mLastTrack->meta.setInt32(kKeyChannelCount, channelCount);
2497    mLastTrack->meta.setInt32(kKeySampleRate, sampleRate);
2498    return OK;
2499}
2500
2501status_t MPEG4Extractor::parseSegmentIndex(off64_t offset, size_t size) {
2502  ALOGV("MPEG4Extractor::parseSegmentIndex");
2503
2504    if (size < 12) {
2505      return -EINVAL;
2506    }
2507
2508    uint32_t flags;
2509    if (!mDataSource->getUInt32(offset, &flags)) {
2510        return ERROR_MALFORMED;
2511    }
2512
2513    uint32_t version = flags >> 24;
2514    flags &= 0xffffff;
2515
2516    ALOGV("sidx version %d", version);
2517
2518    uint32_t referenceId;
2519    if (!mDataSource->getUInt32(offset + 4, &referenceId)) {
2520        return ERROR_MALFORMED;
2521    }
2522
2523    uint32_t timeScale;
2524    if (!mDataSource->getUInt32(offset + 8, &timeScale)) {
2525        return ERROR_MALFORMED;
2526    }
2527    ALOGV("sidx refid/timescale: %d/%d", referenceId, timeScale);
2528    if (timeScale == 0)
2529        return ERROR_MALFORMED;
2530
2531    uint64_t earliestPresentationTime;
2532    uint64_t firstOffset;
2533
2534    offset += 12;
2535    size -= 12;
2536
2537    if (version == 0) {
2538        if (size < 8) {
2539            return -EINVAL;
2540        }
2541        uint32_t tmp;
2542        if (!mDataSource->getUInt32(offset, &tmp)) {
2543            return ERROR_MALFORMED;
2544        }
2545        earliestPresentationTime = tmp;
2546        if (!mDataSource->getUInt32(offset + 4, &tmp)) {
2547            return ERROR_MALFORMED;
2548        }
2549        firstOffset = tmp;
2550        offset += 8;
2551        size -= 8;
2552    } else {
2553        if (size < 16) {
2554            return -EINVAL;
2555        }
2556        if (!mDataSource->getUInt64(offset, &earliestPresentationTime)) {
2557            return ERROR_MALFORMED;
2558        }
2559        if (!mDataSource->getUInt64(offset + 8, &firstOffset)) {
2560            return ERROR_MALFORMED;
2561        }
2562        offset += 16;
2563        size -= 16;
2564    }
2565    ALOGV("sidx pres/off: %" PRIu64 "/%" PRIu64, earliestPresentationTime, firstOffset);
2566
2567    if (size < 4) {
2568        return -EINVAL;
2569    }
2570
2571    uint16_t referenceCount;
2572    if (!mDataSource->getUInt16(offset + 2, &referenceCount)) {
2573        return ERROR_MALFORMED;
2574    }
2575    offset += 4;
2576    size -= 4;
2577    ALOGV("refcount: %d", referenceCount);
2578
2579    if (size < referenceCount * 12) {
2580        return -EINVAL;
2581    }
2582
2583    uint64_t total_duration = 0;
2584    for (unsigned int i = 0; i < referenceCount; i++) {
2585        uint32_t d1, d2, d3;
2586
2587        if (!mDataSource->getUInt32(offset, &d1) ||     // size
2588            !mDataSource->getUInt32(offset + 4, &d2) || // duration
2589            !mDataSource->getUInt32(offset + 8, &d3)) { // flags
2590            return ERROR_MALFORMED;
2591        }
2592
2593        if (d1 & 0x80000000) {
2594            ALOGW("sub-sidx boxes not supported yet");
2595        }
2596        bool sap = d3 & 0x80000000;
2597        uint32_t saptype = (d3 >> 28) & 7;
2598        if (!sap || (saptype != 1 && saptype != 2)) {
2599            // type 1 and 2 are sync samples
2600            ALOGW("not a stream access point, or unsupported type: %08x", d3);
2601        }
2602        total_duration += d2;
2603        offset += 12;
2604        ALOGV(" item %d, %08x %08x %08x", i, d1, d2, d3);
2605        SidxEntry se;
2606        se.mSize = d1 & 0x7fffffff;
2607        se.mDurationUs = 1000000LL * d2 / timeScale;
2608        mSidxEntries.add(se);
2609    }
2610
2611    uint64_t sidxDuration = total_duration * 1000000 / timeScale;
2612
2613    if (mLastTrack == NULL)
2614        return ERROR_MALFORMED;
2615
2616    int64_t metaDuration;
2617    if (!mLastTrack->meta.findInt64(kKeyDuration, &metaDuration) || metaDuration == 0) {
2618        mLastTrack->meta.setInt64(kKeyDuration, sidxDuration);
2619    }
2620    return OK;
2621}
2622
2623status_t MPEG4Extractor::parseQTMetaKey(off64_t offset, size_t size) {
2624    if (size < 8) {
2625        return ERROR_MALFORMED;
2626    }
2627
2628    uint32_t count;
2629    if (!mDataSource->getUInt32(offset + 4, &count)) {
2630        return ERROR_MALFORMED;
2631    }
2632
2633    if (mMetaKeyMap.size() > 0) {
2634        ALOGW("'keys' atom seen again, discarding existing entries");
2635        mMetaKeyMap.clear();
2636    }
2637
2638    off64_t keyOffset = offset + 8;
2639    off64_t stopOffset = offset + size;
2640    for (size_t i = 1; i <= count; i++) {
2641        if (keyOffset + 8 > stopOffset) {
2642            return ERROR_MALFORMED;
2643        }
2644
2645        uint32_t keySize;
2646        if (!mDataSource->getUInt32(keyOffset, &keySize)
2647                || keySize < 8
2648                || keyOffset + keySize > stopOffset) {
2649            return ERROR_MALFORMED;
2650        }
2651
2652        uint32_t type;
2653        if (!mDataSource->getUInt32(keyOffset + 4, &type)
2654                || type != FOURCC('m', 'd', 't', 'a')) {
2655            return ERROR_MALFORMED;
2656        }
2657
2658        keySize -= 8;
2659        keyOffset += 8;
2660
2661        auto keyData = heapbuffer<uint8_t>(keySize);
2662        if (keyData.get() == NULL) {
2663            return ERROR_MALFORMED;
2664        }
2665        if (mDataSource->readAt(
2666                keyOffset, keyData.get(), keySize) < (ssize_t) keySize) {
2667            return ERROR_MALFORMED;
2668        }
2669
2670        AString key((const char *)keyData.get(), keySize);
2671        mMetaKeyMap.add(i, key);
2672
2673        keyOffset += keySize;
2674    }
2675    return OK;
2676}
2677
2678status_t MPEG4Extractor::parseQTMetaVal(
2679        int32_t keyId, off64_t offset, size_t size) {
2680    ssize_t index = mMetaKeyMap.indexOfKey(keyId);
2681    if (index < 0) {
2682        // corresponding key is not present, ignore
2683        return ERROR_MALFORMED;
2684    }
2685
2686    if (size <= 16) {
2687        return ERROR_MALFORMED;
2688    }
2689    uint32_t dataSize;
2690    if (!mDataSource->getUInt32(offset, &dataSize)
2691            || dataSize > size || dataSize <= 16) {
2692        return ERROR_MALFORMED;
2693    }
2694    uint32_t atomFourCC;
2695    if (!mDataSource->getUInt32(offset + 4, &atomFourCC)
2696            || atomFourCC != FOURCC('d', 'a', 't', 'a')) {
2697        return ERROR_MALFORMED;
2698    }
2699    uint32_t dataType;
2700    if (!mDataSource->getUInt32(offset + 8, &dataType)
2701            || ((dataType & 0xff000000) != 0)) {
2702        // not well-known type
2703        return ERROR_MALFORMED;
2704    }
2705
2706    dataSize -= 16;
2707    offset += 16;
2708
2709    if (dataType == 23 && dataSize >= 4) {
2710        // BE Float32
2711        uint32_t val;
2712        if (!mDataSource->getUInt32(offset, &val)) {
2713            return ERROR_MALFORMED;
2714        }
2715        if (!strcasecmp(mMetaKeyMap[index].c_str(), "com.android.capture.fps")) {
2716            mFileMetaData.setFloat(kKeyCaptureFramerate, *(float *)&val);
2717        }
2718    } else if (dataType == 67 && dataSize >= 4) {
2719        // BE signed int32
2720        uint32_t val;
2721        if (!mDataSource->getUInt32(offset, &val)) {
2722            return ERROR_MALFORMED;
2723        }
2724        if (!strcasecmp(mMetaKeyMap[index].c_str(), "com.android.video.temporal_layers_count")) {
2725            mFileMetaData.setInt32(kKeyTemporalLayerCount, val);
2726        }
2727    } else {
2728        // add more keys if needed
2729        ALOGV("ignoring key: type %d, size %d", dataType, dataSize);
2730    }
2731
2732    return OK;
2733}
2734
2735status_t MPEG4Extractor::parseTrackHeader(
2736        off64_t data_offset, off64_t data_size) {
2737    if (data_size < 4) {
2738        return ERROR_MALFORMED;
2739    }
2740
2741    uint8_t version;
2742    if (mDataSource->readAt(data_offset, &version, 1) < 1) {
2743        return ERROR_IO;
2744    }
2745
2746    size_t dynSize = (version == 1) ? 36 : 24;
2747
2748    uint8_t buffer[36 + 60];
2749
2750    if (data_size != (off64_t)dynSize + 60) {
2751        return ERROR_MALFORMED;
2752    }
2753
2754    if (mDataSource->readAt(
2755                data_offset, buffer, data_size) < (ssize_t)data_size) {
2756        return ERROR_IO;
2757    }
2758
2759    uint64_t ctime __unused, mtime __unused, duration __unused;
2760    int32_t id;
2761
2762    if (version == 1) {
2763        ctime = U64_AT(&buffer[4]);
2764        mtime = U64_AT(&buffer[12]);
2765        id = U32_AT(&buffer[20]);
2766        duration = U64_AT(&buffer[28]);
2767    } else if (version == 0) {
2768        ctime = U32_AT(&buffer[4]);
2769        mtime = U32_AT(&buffer[8]);
2770        id = U32_AT(&buffer[12]);
2771        duration = U32_AT(&buffer[20]);
2772    } else {
2773        return ERROR_UNSUPPORTED;
2774    }
2775
2776    if (mLastTrack == NULL)
2777        return ERROR_MALFORMED;
2778
2779    mLastTrack->meta.setInt32(kKeyTrackID, id);
2780
2781    size_t matrixOffset = dynSize + 16;
2782    int32_t a00 = U32_AT(&buffer[matrixOffset]);
2783    int32_t a01 = U32_AT(&buffer[matrixOffset + 4]);
2784    int32_t a10 = U32_AT(&buffer[matrixOffset + 12]);
2785    int32_t a11 = U32_AT(&buffer[matrixOffset + 16]);
2786
2787#if 0
2788    int32_t dx = U32_AT(&buffer[matrixOffset + 8]);
2789    int32_t dy = U32_AT(&buffer[matrixOffset + 20]);
2790
2791    ALOGI("x' = %.2f * x + %.2f * y + %.2f",
2792         a00 / 65536.0f, a01 / 65536.0f, dx / 65536.0f);
2793    ALOGI("y' = %.2f * x + %.2f * y + %.2f",
2794         a10 / 65536.0f, a11 / 65536.0f, dy / 65536.0f);
2795#endif
2796
2797    uint32_t rotationDegrees;
2798
2799    static const int32_t kFixedOne = 0x10000;
2800    if (a00 == kFixedOne && a01 == 0 && a10 == 0 && a11 == kFixedOne) {
2801        // Identity, no rotation
2802        rotationDegrees = 0;
2803    } else if (a00 == 0 && a01 == kFixedOne && a10 == -kFixedOne && a11 == 0) {
2804        rotationDegrees = 90;
2805    } else if (a00 == 0 && a01 == -kFixedOne && a10 == kFixedOne && a11 == 0) {
2806        rotationDegrees = 270;
2807    } else if (a00 == -kFixedOne && a01 == 0 && a10 == 0 && a11 == -kFixedOne) {
2808        rotationDegrees = 180;
2809    } else {
2810        ALOGW("We only support 0,90,180,270 degree rotation matrices");
2811        rotationDegrees = 0;
2812    }
2813
2814    if (rotationDegrees != 0) {
2815        mLastTrack->meta.setInt32(kKeyRotation, rotationDegrees);
2816    }
2817
2818    // Handle presentation display size, which could be different
2819    // from the image size indicated by kKeyWidth and kKeyHeight.
2820    uint32_t width = U32_AT(&buffer[dynSize + 52]);
2821    uint32_t height = U32_AT(&buffer[dynSize + 56]);
2822    mLastTrack->meta.setInt32(kKeyDisplayWidth, width >> 16);
2823    mLastTrack->meta.setInt32(kKeyDisplayHeight, height >> 16);
2824
2825    return OK;
2826}
2827
2828status_t MPEG4Extractor::parseITunesMetaData(off64_t offset, size_t size) {
2829    if (size == 0) {
2830        return OK;
2831    }
2832
2833    if (size < 4 || size == SIZE_MAX) {
2834        return ERROR_MALFORMED;
2835    }
2836
2837    uint8_t *buffer = new (std::nothrow) uint8_t[size + 1];
2838    if (buffer == NULL) {
2839        return ERROR_MALFORMED;
2840    }
2841    if (mDataSource->readAt(
2842                offset, buffer, size) != (ssize_t)size) {
2843        delete[] buffer;
2844        buffer = NULL;
2845
2846        return ERROR_IO;
2847    }
2848
2849    uint32_t flags = U32_AT(buffer);
2850
2851    uint32_t metadataKey = 0;
2852    char chunk[5];
2853    MakeFourCCString(mPath[4], chunk);
2854    ALOGV("meta: %s @ %lld", chunk, (long long)offset);
2855    switch ((int32_t)mPath[4]) {
2856        case FOURCC(0xa9, 'a', 'l', 'b'):
2857        {
2858            metadataKey = kKeyAlbum;
2859            break;
2860        }
2861        case FOURCC(0xa9, 'A', 'R', 'T'):
2862        {
2863            metadataKey = kKeyArtist;
2864            break;
2865        }
2866        case FOURCC('a', 'A', 'R', 'T'):
2867        {
2868            metadataKey = kKeyAlbumArtist;
2869            break;
2870        }
2871        case FOURCC(0xa9, 'd', 'a', 'y'):
2872        {
2873            metadataKey = kKeyYear;
2874            break;
2875        }
2876        case FOURCC(0xa9, 'n', 'a', 'm'):
2877        {
2878            metadataKey = kKeyTitle;
2879            break;
2880        }
2881        case FOURCC(0xa9, 'w', 'r', 't'):
2882        {
2883            metadataKey = kKeyWriter;
2884            break;
2885        }
2886        case FOURCC('c', 'o', 'v', 'r'):
2887        {
2888            metadataKey = kKeyAlbumArt;
2889            break;
2890        }
2891        case FOURCC('g', 'n', 'r', 'e'):
2892        {
2893            metadataKey = kKeyGenre;
2894            break;
2895        }
2896        case FOURCC(0xa9, 'g', 'e', 'n'):
2897        {
2898            metadataKey = kKeyGenre;
2899            break;
2900        }
2901        case FOURCC('c', 'p', 'i', 'l'):
2902        {
2903            if (size == 9 && flags == 21) {
2904                char tmp[16];
2905                sprintf(tmp, "%d",
2906                        (int)buffer[size - 1]);
2907
2908                mFileMetaData.setCString(kKeyCompilation, tmp);
2909            }
2910            break;
2911        }
2912        case FOURCC('t', 'r', 'k', 'n'):
2913        {
2914            if (size == 16 && flags == 0) {
2915                char tmp[16];
2916                uint16_t* pTrack = (uint16_t*)&buffer[10];
2917                uint16_t* pTotalTracks = (uint16_t*)&buffer[12];
2918                sprintf(tmp, "%d/%d", ntohs(*pTrack), ntohs(*pTotalTracks));
2919
2920                mFileMetaData.setCString(kKeyCDTrackNumber, tmp);
2921            }
2922            break;
2923        }
2924        case FOURCC('d', 'i', 's', 'k'):
2925        {
2926            if ((size == 14 || size == 16) && flags == 0) {
2927                char tmp[16];
2928                uint16_t* pDisc = (uint16_t*)&buffer[10];
2929                uint16_t* pTotalDiscs = (uint16_t*)&buffer[12];
2930                sprintf(tmp, "%d/%d", ntohs(*pDisc), ntohs(*pTotalDiscs));
2931
2932                mFileMetaData.setCString(kKeyDiscNumber, tmp);
2933            }
2934            break;
2935        }
2936        case FOURCC('-', '-', '-', '-'):
2937        {
2938            buffer[size] = '\0';
2939            switch (mPath[5]) {
2940                case FOURCC('m', 'e', 'a', 'n'):
2941                    mLastCommentMean.setTo((const char *)buffer + 4);
2942                    break;
2943                case FOURCC('n', 'a', 'm', 'e'):
2944                    mLastCommentName.setTo((const char *)buffer + 4);
2945                    break;
2946                case FOURCC('d', 'a', 't', 'a'):
2947                    if (size < 8) {
2948                        delete[] buffer;
2949                        buffer = NULL;
2950                        ALOGE("b/24346430");
2951                        return ERROR_MALFORMED;
2952                    }
2953                    mLastCommentData.setTo((const char *)buffer + 8);
2954                    break;
2955            }
2956
2957            // Once we have a set of mean/name/data info, go ahead and process
2958            // it to see if its something we are interested in.  Whether or not
2959            // were are interested in the specific tag, make sure to clear out
2960            // the set so we can be ready to process another tuple should one
2961            // show up later in the file.
2962            if ((mLastCommentMean.length() != 0) &&
2963                (mLastCommentName.length() != 0) &&
2964                (mLastCommentData.length() != 0)) {
2965
2966                if (mLastCommentMean == "com.apple.iTunes"
2967                        && mLastCommentName == "iTunSMPB") {
2968                    int32_t delay, padding;
2969                    if (sscanf(mLastCommentData,
2970                               " %*x %x %x %*x", &delay, &padding) == 2) {
2971                        if (mLastTrack == NULL) {
2972                            delete[] buffer;
2973                            return ERROR_MALFORMED;
2974                        }
2975
2976                        mLastTrack->meta.setInt32(kKeyEncoderDelay, delay);
2977                        mLastTrack->meta.setInt32(kKeyEncoderPadding, padding);
2978                    }
2979                }
2980
2981                mLastCommentMean.clear();
2982                mLastCommentName.clear();
2983                mLastCommentData.clear();
2984            }
2985            break;
2986        }
2987
2988        default:
2989            break;
2990    }
2991
2992    if (size >= 8 && metadataKey && !mFileMetaData.hasData(metadataKey)) {
2993        if (metadataKey == kKeyAlbumArt) {
2994            mFileMetaData.setData(
2995                    kKeyAlbumArt, MetaData::TYPE_NONE,
2996                    buffer + 8, size - 8);
2997        } else if (metadataKey == kKeyGenre) {
2998            if (flags == 0) {
2999                // uint8_t genre code, iTunes genre codes are
3000                // the standard id3 codes, except they start
3001                // at 1 instead of 0 (e.g. Pop is 14, not 13)
3002                // We use standard id3 numbering, so subtract 1.
3003                int genrecode = (int)buffer[size - 1];
3004                genrecode--;
3005                if (genrecode < 0) {
3006                    genrecode = 255; // reserved for 'unknown genre'
3007                }
3008                char genre[10];
3009                sprintf(genre, "%d", genrecode);
3010
3011                mFileMetaData.setCString(metadataKey, genre);
3012            } else if (flags == 1) {
3013                // custom genre string
3014                buffer[size] = '\0';
3015
3016                mFileMetaData.setCString(
3017                        metadataKey, (const char *)buffer + 8);
3018            }
3019        } else {
3020            buffer[size] = '\0';
3021
3022            mFileMetaData.setCString(
3023                    metadataKey, (const char *)buffer + 8);
3024        }
3025    }
3026
3027    delete[] buffer;
3028    buffer = NULL;
3029
3030    return OK;
3031}
3032
3033status_t MPEG4Extractor::parseColorInfo(off64_t offset, size_t size) {
3034    if (size < 4 || size == SIZE_MAX || mLastTrack == NULL) {
3035        return ERROR_MALFORMED;
3036    }
3037
3038    uint8_t *buffer = new (std::nothrow) uint8_t[size + 1];
3039    if (buffer == NULL) {
3040        return ERROR_MALFORMED;
3041    }
3042    if (mDataSource->readAt(offset, buffer, size) != (ssize_t)size) {
3043        delete[] buffer;
3044        buffer = NULL;
3045
3046        return ERROR_IO;
3047    }
3048
3049    int32_t type = U32_AT(&buffer[0]);
3050    if ((type == FOURCC('n', 'c', 'l', 'x') && size >= 11)
3051            || (type == FOURCC('n', 'c', 'l', 'c') && size >= 10)) {
3052        int32_t primaries = U16_AT(&buffer[4]);
3053        int32_t transfer = U16_AT(&buffer[6]);
3054        int32_t coeffs = U16_AT(&buffer[8]);
3055        bool fullRange = (type == FOURCC('n', 'c', 'l', 'x')) && (buffer[10] & 128);
3056
3057        ColorAspects aspects;
3058        ColorUtils::convertIsoColorAspectsToCodecAspects(
3059                primaries, transfer, coeffs, fullRange, aspects);
3060
3061        // only store the first color specification
3062        if (!mLastTrack->meta.hasData(kKeyColorPrimaries)) {
3063            mLastTrack->meta.setInt32(kKeyColorPrimaries, aspects.mPrimaries);
3064            mLastTrack->meta.setInt32(kKeyTransferFunction, aspects.mTransfer);
3065            mLastTrack->meta.setInt32(kKeyColorMatrix, aspects.mMatrixCoeffs);
3066            mLastTrack->meta.setInt32(kKeyColorRange, aspects.mRange);
3067        }
3068    }
3069
3070    delete[] buffer;
3071    buffer = NULL;
3072
3073    return OK;
3074}
3075
3076status_t MPEG4Extractor::parse3GPPMetaData(off64_t offset, size_t size, int depth) {
3077    if (size < 4 || size == SIZE_MAX) {
3078        return ERROR_MALFORMED;
3079    }
3080
3081    uint8_t *buffer = new (std::nothrow) uint8_t[size + 1];
3082    if (buffer == NULL) {
3083        return ERROR_MALFORMED;
3084    }
3085    if (mDataSource->readAt(
3086                offset, buffer, size) != (ssize_t)size) {
3087        delete[] buffer;
3088        buffer = NULL;
3089
3090        return ERROR_IO;
3091    }
3092
3093    uint32_t metadataKey = 0;
3094    switch (mPath[depth]) {
3095        case FOURCC('t', 'i', 't', 'l'):
3096        {
3097            metadataKey = kKeyTitle;
3098            break;
3099        }
3100        case FOURCC('p', 'e', 'r', 'f'):
3101        {
3102            metadataKey = kKeyArtist;
3103            break;
3104        }
3105        case FOURCC('a', 'u', 't', 'h'):
3106        {
3107            metadataKey = kKeyWriter;
3108            break;
3109        }
3110        case FOURCC('g', 'n', 'r', 'e'):
3111        {
3112            metadataKey = kKeyGenre;
3113            break;
3114        }
3115        case FOURCC('a', 'l', 'b', 'm'):
3116        {
3117            if (buffer[size - 1] != '\0') {
3118              char tmp[4];
3119              sprintf(tmp, "%u", buffer[size - 1]);
3120
3121              mFileMetaData.setCString(kKeyCDTrackNumber, tmp);
3122            }
3123
3124            metadataKey = kKeyAlbum;
3125            break;
3126        }
3127        case FOURCC('y', 'r', 'r', 'c'):
3128        {
3129            if (size < 6) {
3130                delete[] buffer;
3131                buffer = NULL;
3132                ALOGE("b/62133227");
3133                android_errorWriteLog(0x534e4554, "62133227");
3134                return ERROR_MALFORMED;
3135            }
3136            char tmp[5];
3137            uint16_t year = U16_AT(&buffer[4]);
3138
3139            if (year < 10000) {
3140                sprintf(tmp, "%u", year);
3141
3142                mFileMetaData.setCString(kKeyYear, tmp);
3143            }
3144            break;
3145        }
3146
3147        default:
3148            break;
3149    }
3150
3151    if (metadataKey > 0) {
3152        bool isUTF8 = true; // Common case
3153        char16_t *framedata = NULL;
3154        int len16 = 0; // Number of UTF-16 characters
3155
3156        // smallest possible valid UTF-16 string w BOM: 0xfe 0xff 0x00 0x00
3157        if (size < 6) {
3158            delete[] buffer;
3159            buffer = NULL;
3160            return ERROR_MALFORMED;
3161        }
3162
3163        if (size - 6 >= 4) {
3164            len16 = ((size - 6) / 2) - 1; // don't include 0x0000 terminator
3165            framedata = (char16_t *)(buffer + 6);
3166            if (0xfffe == *framedata) {
3167                // endianness marker (BOM) doesn't match host endianness
3168                for (int i = 0; i < len16; i++) {
3169                    framedata[i] = bswap_16(framedata[i]);
3170                }
3171                // BOM is now swapped to 0xfeff, we will execute next block too
3172            }
3173
3174            if (0xfeff == *framedata) {
3175                // Remove the BOM
3176                framedata++;
3177                len16--;
3178                isUTF8 = false;
3179            }
3180            // else normal non-zero-length UTF-8 string
3181            // we can't handle UTF-16 without BOM as there is no other
3182            // indication of encoding.
3183        }
3184
3185        if (isUTF8) {
3186            buffer[size] = 0;
3187            mFileMetaData.setCString(metadataKey, (const char *)buffer + 6);
3188        } else {
3189            // Convert from UTF-16 string to UTF-8 string.
3190            String8 tmpUTF8str(framedata, len16);
3191            mFileMetaData.setCString(metadataKey, tmpUTF8str.string());
3192        }
3193    }
3194
3195    delete[] buffer;
3196    buffer = NULL;
3197
3198    return OK;
3199}
3200
3201void MPEG4Extractor::parseID3v2MetaData(off64_t offset) {
3202    ID3 id3(mDataSource, true /* ignorev1 */, offset);
3203
3204    if (id3.isValid()) {
3205        struct Map {
3206            int key;
3207            const char *tag1;
3208            const char *tag2;
3209        };
3210        static const Map kMap[] = {
3211            { kKeyAlbum, "TALB", "TAL" },
3212            { kKeyArtist, "TPE1", "TP1" },
3213            { kKeyAlbumArtist, "TPE2", "TP2" },
3214            { kKeyComposer, "TCOM", "TCM" },
3215            { kKeyGenre, "TCON", "TCO" },
3216            { kKeyTitle, "TIT2", "TT2" },
3217            { kKeyYear, "TYE", "TYER" },
3218            { kKeyAuthor, "TXT", "TEXT" },
3219            { kKeyCDTrackNumber, "TRK", "TRCK" },
3220            { kKeyDiscNumber, "TPA", "TPOS" },
3221            { kKeyCompilation, "TCP", "TCMP" },
3222        };
3223        static const size_t kNumMapEntries = sizeof(kMap) / sizeof(kMap[0]);
3224
3225        for (size_t i = 0; i < kNumMapEntries; ++i) {
3226            if (!mFileMetaData.hasData(kMap[i].key)) {
3227                ID3::Iterator *it = new ID3::Iterator(id3, kMap[i].tag1);
3228                if (it->done()) {
3229                    delete it;
3230                    it = new ID3::Iterator(id3, kMap[i].tag2);
3231                }
3232
3233                if (it->done()) {
3234                    delete it;
3235                    continue;
3236                }
3237
3238                String8 s;
3239                it->getString(&s);
3240                delete it;
3241
3242                mFileMetaData.setCString(kMap[i].key, s);
3243            }
3244        }
3245
3246        size_t dataSize;
3247        String8 mime;
3248        const void *data = id3.getAlbumArt(&dataSize, &mime);
3249
3250        if (data) {
3251            mFileMetaData.setData(kKeyAlbumArt, MetaData::TYPE_NONE, data, dataSize);
3252            mFileMetaData.setCString(kKeyAlbumArtMIME, mime.string());
3253        }
3254    }
3255}
3256
3257MediaTrack *MPEG4Extractor::getTrack(size_t index) {
3258    status_t err;
3259    if ((err = readMetaData()) != OK) {
3260        return NULL;
3261    }
3262
3263    Track *track = mFirstTrack;
3264    while (index > 0) {
3265        if (track == NULL) {
3266            return NULL;
3267        }
3268
3269        track = track->next;
3270        --index;
3271    }
3272
3273    if (track == NULL) {
3274        return NULL;
3275    }
3276
3277
3278    Trex *trex = NULL;
3279    int32_t trackId;
3280    if (track->meta.findInt32(kKeyTrackID, &trackId)) {
3281        for (size_t i = 0; i < mTrex.size(); i++) {
3282            Trex *t = &mTrex.editItemAt(i);
3283            if (t->track_ID == (uint32_t) trackId) {
3284                trex = t;
3285                break;
3286            }
3287        }
3288    } else {
3289        ALOGE("b/21657957");
3290        return NULL;
3291    }
3292
3293    ALOGV("getTrack called, pssh: %zu", mPssh.size());
3294
3295    const char *mime;
3296    if (!track->meta.findCString(kKeyMIMEType, &mime)) {
3297        return NULL;
3298    }
3299
3300    sp<ItemTable> itemTable;
3301    if (!strcasecmp(mime, MEDIA_MIMETYPE_VIDEO_AVC)) {
3302        uint32_t type;
3303        const void *data;
3304        size_t size;
3305        if (!track->meta.findData(kKeyAVCC, &type, &data, &size)) {
3306            return NULL;
3307        }
3308
3309        const uint8_t *ptr = (const uint8_t *)data;
3310
3311        if (size < 7 || ptr[0] != 1) {  // configurationVersion == 1
3312            return NULL;
3313        }
3314    } else if (!strcasecmp(mime, MEDIA_MIMETYPE_VIDEO_HEVC)
3315            || !strcasecmp(mime, MEDIA_MIMETYPE_IMAGE_ANDROID_HEIC)) {
3316        uint32_t type;
3317        const void *data;
3318        size_t size;
3319        if (!track->meta.findData(kKeyHVCC, &type, &data, &size)) {
3320            return NULL;
3321        }
3322
3323        const uint8_t *ptr = (const uint8_t *)data;
3324
3325        if (size < 22 || ptr[0] != 1) {  // configurationVersion == 1
3326            return NULL;
3327        }
3328        if (!strcasecmp(mime, MEDIA_MIMETYPE_IMAGE_ANDROID_HEIC)) {
3329            itemTable = mItemTable;
3330        }
3331    }
3332
3333    MPEG4Source *source =  new MPEG4Source(
3334            track->meta, mDataSource, track->timescale, track->sampleTable,
3335            mSidxEntries, trex, mMoofOffset, itemTable);
3336    if (source->init() != OK) {
3337        delete source;
3338        return NULL;
3339    }
3340    return source;
3341}
3342
3343// static
3344status_t MPEG4Extractor::verifyTrack(Track *track) {
3345    const char *mime;
3346    CHECK(track->meta.findCString(kKeyMIMEType, &mime));
3347
3348    uint32_t type;
3349    const void *data;
3350    size_t size;
3351    if (!strcasecmp(mime, MEDIA_MIMETYPE_VIDEO_AVC)) {
3352        if (!track->meta.findData(kKeyAVCC, &type, &data, &size)
3353                || type != kTypeAVCC) {
3354            return ERROR_MALFORMED;
3355        }
3356    } else if (!strcasecmp(mime, MEDIA_MIMETYPE_VIDEO_HEVC)) {
3357        if (!track->meta.findData(kKeyHVCC, &type, &data, &size)
3358                    || type != kTypeHVCC) {
3359            return ERROR_MALFORMED;
3360        }
3361    } else if (!strcasecmp(mime, MEDIA_MIMETYPE_VIDEO_MPEG4)
3362            || !strcasecmp(mime, MEDIA_MIMETYPE_VIDEO_MPEG2)
3363            || !strcasecmp(mime, MEDIA_MIMETYPE_AUDIO_AAC)) {
3364        if (!track->meta.findData(kKeyESDS, &type, &data, &size)
3365                || type != kTypeESDS) {
3366            return ERROR_MALFORMED;
3367        }
3368    }
3369
3370    if (track->sampleTable == NULL || !track->sampleTable->isValid()) {
3371        // Make sure we have all the metadata we need.
3372        ALOGE("stbl atom missing/invalid.");
3373        return ERROR_MALFORMED;
3374    }
3375
3376    if (track->timescale == 0) {
3377        ALOGE("timescale invalid.");
3378        return ERROR_MALFORMED;
3379    }
3380
3381    return OK;
3382}
3383
3384typedef enum {
3385    //AOT_NONE             = -1,
3386    //AOT_NULL_OBJECT      = 0,
3387    //AOT_AAC_MAIN         = 1, /**< Main profile                              */
3388    AOT_AAC_LC           = 2,   /**< Low Complexity object                     */
3389    //AOT_AAC_SSR          = 3,
3390    //AOT_AAC_LTP          = 4,
3391    AOT_SBR              = 5,
3392    //AOT_AAC_SCAL         = 6,
3393    //AOT_TWIN_VQ          = 7,
3394    //AOT_CELP             = 8,
3395    //AOT_HVXC             = 9,
3396    //AOT_RSVD_10          = 10, /**< (reserved)                                */
3397    //AOT_RSVD_11          = 11, /**< (reserved)                                */
3398    //AOT_TTSI             = 12, /**< TTSI Object                               */
3399    //AOT_MAIN_SYNTH       = 13, /**< Main Synthetic object                     */
3400    //AOT_WAV_TAB_SYNTH    = 14, /**< Wavetable Synthesis object                */
3401    //AOT_GEN_MIDI         = 15, /**< General MIDI object                       */
3402    //AOT_ALG_SYNTH_AUD_FX = 16, /**< Algorithmic Synthesis and Audio FX object */
3403    AOT_ER_AAC_LC        = 17,   /**< Error Resilient(ER) AAC Low Complexity    */
3404    //AOT_RSVD_18          = 18, /**< (reserved)                                */
3405    //AOT_ER_AAC_LTP       = 19, /**< Error Resilient(ER) AAC LTP object        */
3406    AOT_ER_AAC_SCAL      = 20,   /**< Error Resilient(ER) AAC Scalable object   */
3407    //AOT_ER_TWIN_VQ       = 21, /**< Error Resilient(ER) TwinVQ object         */
3408    AOT_ER_BSAC          = 22,   /**< Error Resilient(ER) BSAC object           */
3409    AOT_ER_AAC_LD        = 23,   /**< Error Resilient(ER) AAC LowDelay object   */
3410    //AOT_ER_CELP          = 24, /**< Error Resilient(ER) CELP object           */
3411    //AOT_ER_HVXC          = 25, /**< Error Resilient(ER) HVXC object           */
3412    //AOT_ER_HILN          = 26, /**< Error Resilient(ER) HILN object           */
3413    //AOT_ER_PARA          = 27, /**< Error Resilient(ER) Parametric object     */
3414    //AOT_RSVD_28          = 28, /**< might become SSC                          */
3415    AOT_PS               = 29,   /**< PS, Parametric Stereo (includes SBR)      */
3416    //AOT_MPEGS            = 30, /**< MPEG Surround                             */
3417
3418    AOT_ESCAPE           = 31,   /**< Signal AOT uses more than 5 bits          */
3419
3420    //AOT_MP3ONMP4_L1      = 32, /**< MPEG-Layer1 in mp4                        */
3421    //AOT_MP3ONMP4_L2      = 33, /**< MPEG-Layer2 in mp4                        */
3422    //AOT_MP3ONMP4_L3      = 34, /**< MPEG-Layer3 in mp4                        */
3423    //AOT_RSVD_35          = 35, /**< might become DST                          */
3424    //AOT_RSVD_36          = 36, /**< might become ALS                          */
3425    //AOT_AAC_SLS          = 37, /**< AAC + SLS                                 */
3426    //AOT_SLS              = 38, /**< SLS                                       */
3427    //AOT_ER_AAC_ELD       = 39, /**< AAC Enhanced Low Delay                    */
3428
3429    //AOT_USAC             = 42, /**< USAC                                      */
3430    //AOT_SAOC             = 43, /**< SAOC                                      */
3431    //AOT_LD_MPEGS         = 44, /**< Low Delay MPEG Surround                   */
3432
3433    //AOT_RSVD50           = 50,  /**< Interim AOT for Rsvd50                   */
3434} AUDIO_OBJECT_TYPE;
3435
3436status_t MPEG4Extractor::updateAudioTrackInfoFromESDS_MPEG4Audio(
3437        const void *esds_data, size_t esds_size) {
3438    ESDS esds(esds_data, esds_size);
3439
3440    uint8_t objectTypeIndication;
3441    if (esds.getObjectTypeIndication(&objectTypeIndication) != OK) {
3442        return ERROR_MALFORMED;
3443    }
3444
3445    if (objectTypeIndication == 0xe1) {
3446        // This isn't MPEG4 audio at all, it's QCELP 14k...
3447        if (mLastTrack == NULL)
3448            return ERROR_MALFORMED;
3449
3450        mLastTrack->meta.setCString(kKeyMIMEType, MEDIA_MIMETYPE_AUDIO_QCELP);
3451        return OK;
3452    }
3453
3454    if (objectTypeIndication  == 0x6b) {
3455        // The media subtype is MP3 audio
3456        // Our software MP3 audio decoder may not be able to handle
3457        // packetized MP3 audio; for now, lets just return ERROR_UNSUPPORTED
3458        ALOGE("MP3 track in MP4/3GPP file is not supported");
3459        return ERROR_UNSUPPORTED;
3460    }
3461
3462    if (mLastTrack != NULL) {
3463        uint32_t maxBitrate = 0;
3464        uint32_t avgBitrate = 0;
3465        esds.getBitRate(&maxBitrate, &avgBitrate);
3466        if (maxBitrate > 0 && maxBitrate < INT32_MAX) {
3467            mLastTrack->meta.setInt32(kKeyMaxBitRate, (int32_t)maxBitrate);
3468        }
3469        if (avgBitrate > 0 && avgBitrate < INT32_MAX) {
3470            mLastTrack->meta.setInt32(kKeyBitRate, (int32_t)avgBitrate);
3471        }
3472    }
3473
3474    const uint8_t *csd;
3475    size_t csd_size;
3476    if (esds.getCodecSpecificInfo(
3477                (const void **)&csd, &csd_size) != OK) {
3478        return ERROR_MALFORMED;
3479    }
3480
3481    if (kUseHexDump) {
3482        printf("ESD of size %zu\n", csd_size);
3483        hexdump(csd, csd_size);
3484    }
3485
3486    if (csd_size == 0) {
3487        // There's no further information, i.e. no codec specific data
3488        // Let's assume that the information provided in the mpeg4 headers
3489        // is accurate and hope for the best.
3490
3491        return OK;
3492    }
3493
3494    if (csd_size < 2) {
3495        return ERROR_MALFORMED;
3496    }
3497
3498    static uint32_t kSamplingRate[] = {
3499        96000, 88200, 64000, 48000, 44100, 32000, 24000, 22050,
3500        16000, 12000, 11025, 8000, 7350
3501    };
3502
3503    ABitReader br(csd, csd_size);
3504    uint32_t objectType = br.getBits(5);
3505
3506    if (objectType == 31) {  // AAC-ELD => additional 6 bits
3507        objectType = 32 + br.getBits(6);
3508    }
3509
3510    if (mLastTrack == NULL)
3511        return ERROR_MALFORMED;
3512
3513    //keep AOT type
3514    mLastTrack->meta.setInt32(kKeyAACAOT, objectType);
3515
3516    uint32_t freqIndex = br.getBits(4);
3517
3518    int32_t sampleRate = 0;
3519    int32_t numChannels = 0;
3520    if (freqIndex == 15) {
3521        if (br.numBitsLeft() < 28) return ERROR_MALFORMED;
3522        sampleRate = br.getBits(24);
3523        numChannels = br.getBits(4);
3524    } else {
3525        if (br.numBitsLeft() < 4) return ERROR_MALFORMED;
3526        numChannels = br.getBits(4);
3527
3528        if (freqIndex == 13 || freqIndex == 14) {
3529            return ERROR_MALFORMED;
3530        }
3531
3532        sampleRate = kSamplingRate[freqIndex];
3533    }
3534
3535    if (objectType == AOT_SBR || objectType == AOT_PS) {//SBR specific config per 14496-3 table 1.13
3536        if (br.numBitsLeft() < 4) return ERROR_MALFORMED;
3537        uint32_t extFreqIndex = br.getBits(4);
3538        int32_t extSampleRate __unused;
3539        if (extFreqIndex == 15) {
3540            if (csd_size < 8) {
3541                return ERROR_MALFORMED;
3542            }
3543            if (br.numBitsLeft() < 24) return ERROR_MALFORMED;
3544            extSampleRate = br.getBits(24);
3545        } else {
3546            if (extFreqIndex == 13 || extFreqIndex == 14) {
3547                return ERROR_MALFORMED;
3548            }
3549            extSampleRate = kSamplingRate[extFreqIndex];
3550        }
3551        //TODO: save the extension sampling rate value in meta data =>
3552        //      mLastTrack->meta.setInt32(kKeyExtSampleRate, extSampleRate);
3553    }
3554
3555    switch (numChannels) {
3556        // values defined in 14496-3_2009 amendment-4 Table 1.19 - Channel Configuration
3557        case 0:
3558        case 1:// FC
3559        case 2:// FL FR
3560        case 3:// FC, FL FR
3561        case 4:// FC, FL FR, RC
3562        case 5:// FC, FL FR, SL SR
3563        case 6:// FC, FL FR, SL SR, LFE
3564            //numChannels already contains the right value
3565            break;
3566        case 11:// FC, FL FR, SL SR, RC, LFE
3567            numChannels = 7;
3568            break;
3569        case 7: // FC, FCL FCR, FL FR, SL SR, LFE
3570        case 12:// FC, FL  FR,  SL SR, RL RR, LFE
3571        case 14:// FC, FL  FR,  SL SR, LFE, FHL FHR
3572            numChannels = 8;
3573            break;
3574        default:
3575            return ERROR_UNSUPPORTED;
3576    }
3577
3578    {
3579        if (objectType == AOT_SBR || objectType == AOT_PS) {
3580            if (br.numBitsLeft() < 5) return ERROR_MALFORMED;
3581            objectType = br.getBits(5);
3582
3583            if (objectType == AOT_ESCAPE) {
3584                if (br.numBitsLeft() < 6) return ERROR_MALFORMED;
3585                objectType = 32 + br.getBits(6);
3586            }
3587        }
3588        if (objectType == AOT_AAC_LC || objectType == AOT_ER_AAC_LC ||
3589                objectType == AOT_ER_AAC_LD || objectType == AOT_ER_AAC_SCAL ||
3590                objectType == AOT_ER_BSAC) {
3591            if (br.numBitsLeft() < 2) return ERROR_MALFORMED;
3592            const int32_t frameLengthFlag __unused = br.getBits(1);
3593
3594            const int32_t dependsOnCoreCoder = br.getBits(1);
3595
3596            if (dependsOnCoreCoder ) {
3597                if (br.numBitsLeft() < 14) return ERROR_MALFORMED;
3598                const int32_t coreCoderDelay __unused = br.getBits(14);
3599            }
3600
3601            int32_t extensionFlag = -1;
3602            if (br.numBitsLeft() > 0) {
3603                extensionFlag = br.getBits(1);
3604            } else {
3605                switch (objectType) {
3606                // 14496-3 4.5.1.1 extensionFlag
3607                case AOT_AAC_LC:
3608                    extensionFlag = 0;
3609                    break;
3610                case AOT_ER_AAC_LC:
3611                case AOT_ER_AAC_SCAL:
3612                case AOT_ER_BSAC:
3613                case AOT_ER_AAC_LD:
3614                    extensionFlag = 1;
3615                    break;
3616                default:
3617                    return ERROR_MALFORMED;
3618                    break;
3619                }
3620                ALOGW("csd missing extension flag; assuming %d for object type %u.",
3621                        extensionFlag, objectType);
3622            }
3623
3624            if (numChannels == 0) {
3625                int32_t channelsEffectiveNum = 0;
3626                int32_t channelsNum = 0;
3627                if (br.numBitsLeft() < 32) {
3628                    return ERROR_MALFORMED;
3629                }
3630                const int32_t ElementInstanceTag __unused = br.getBits(4);
3631                const int32_t Profile __unused = br.getBits(2);
3632                const int32_t SamplingFrequencyIndex __unused = br.getBits(4);
3633                const int32_t NumFrontChannelElements = br.getBits(4);
3634                const int32_t NumSideChannelElements = br.getBits(4);
3635                const int32_t NumBackChannelElements = br.getBits(4);
3636                const int32_t NumLfeChannelElements = br.getBits(2);
3637                const int32_t NumAssocDataElements __unused = br.getBits(3);
3638                const int32_t NumValidCcElements __unused = br.getBits(4);
3639
3640                const int32_t MonoMixdownPresent = br.getBits(1);
3641
3642                if (MonoMixdownPresent != 0) {
3643                    if (br.numBitsLeft() < 4) return ERROR_MALFORMED;
3644                    const int32_t MonoMixdownElementNumber __unused = br.getBits(4);
3645                }
3646
3647                if (br.numBitsLeft() < 1) return ERROR_MALFORMED;
3648                const int32_t StereoMixdownPresent = br.getBits(1);
3649                if (StereoMixdownPresent != 0) {
3650                    if (br.numBitsLeft() < 4) return ERROR_MALFORMED;
3651                    const int32_t StereoMixdownElementNumber __unused = br.getBits(4);
3652                }
3653
3654                if (br.numBitsLeft() < 1) return ERROR_MALFORMED;
3655                const int32_t MatrixMixdownIndexPresent = br.getBits(1);
3656                if (MatrixMixdownIndexPresent != 0) {
3657                    if (br.numBitsLeft() < 3) return ERROR_MALFORMED;
3658                    const int32_t MatrixMixdownIndex __unused = br.getBits(2);
3659                    const int32_t PseudoSurroundEnable __unused = br.getBits(1);
3660                }
3661
3662                int i;
3663                for (i=0; i < NumFrontChannelElements; i++) {
3664                    if (br.numBitsLeft() < 5) return ERROR_MALFORMED;
3665                    const int32_t FrontElementIsCpe = br.getBits(1);
3666                    const int32_t FrontElementTagSelect __unused = br.getBits(4);
3667                    channelsNum += FrontElementIsCpe ? 2 : 1;
3668                }
3669
3670                for (i=0; i < NumSideChannelElements; i++) {
3671                    if (br.numBitsLeft() < 5) return ERROR_MALFORMED;
3672                    const int32_t SideElementIsCpe = br.getBits(1);
3673                    const int32_t SideElementTagSelect __unused = br.getBits(4);
3674                    channelsNum += SideElementIsCpe ? 2 : 1;
3675                }
3676
3677                for (i=0; i < NumBackChannelElements; i++) {
3678                    if (br.numBitsLeft() < 5) return ERROR_MALFORMED;
3679                    const int32_t BackElementIsCpe = br.getBits(1);
3680                    const int32_t BackElementTagSelect __unused = br.getBits(4);
3681                    channelsNum += BackElementIsCpe ? 2 : 1;
3682                }
3683                channelsEffectiveNum = channelsNum;
3684
3685                for (i=0; i < NumLfeChannelElements; i++) {
3686                    if (br.numBitsLeft() < 4) return ERROR_MALFORMED;
3687                    const int32_t LfeElementTagSelect __unused = br.getBits(4);
3688                    channelsNum += 1;
3689                }
3690                ALOGV("mpeg4 audio channelsNum = %d", channelsNum);
3691                ALOGV("mpeg4 audio channelsEffectiveNum = %d", channelsEffectiveNum);
3692                numChannels = channelsNum;
3693            }
3694        }
3695    }
3696
3697    if (numChannels == 0) {
3698        return ERROR_UNSUPPORTED;
3699    }
3700
3701    if (mLastTrack == NULL)
3702        return ERROR_MALFORMED;
3703
3704    int32_t prevSampleRate;
3705    CHECK(mLastTrack->meta.findInt32(kKeySampleRate, &prevSampleRate));
3706
3707    if (prevSampleRate != sampleRate) {
3708        ALOGV("mpeg4 audio sample rate different from previous setting. "
3709             "was: %d, now: %d", prevSampleRate, sampleRate);
3710    }
3711
3712    mLastTrack->meta.setInt32(kKeySampleRate, sampleRate);
3713
3714    int32_t prevChannelCount;
3715    CHECK(mLastTrack->meta.findInt32(kKeyChannelCount, &prevChannelCount));
3716
3717    if (prevChannelCount != numChannels) {
3718        ALOGV("mpeg4 audio channel count different from previous setting. "
3719             "was: %d, now: %d", prevChannelCount, numChannels);
3720    }
3721
3722    mLastTrack->meta.setInt32(kKeyChannelCount, numChannels);
3723
3724    return OK;
3725}
3726
3727////////////////////////////////////////////////////////////////////////////////
3728
3729MPEG4Source::MPEG4Source(
3730        MetaDataBase &format,
3731        DataSourceBase *dataSource,
3732        int32_t timeScale,
3733        const sp<SampleTable> &sampleTable,
3734        Vector<SidxEntry> &sidx,
3735        const Trex *trex,
3736        off64_t firstMoofOffset,
3737        const sp<ItemTable> &itemTable)
3738    : mFormat(format),
3739      mDataSource(dataSource),
3740      mTimescale(timeScale),
3741      mSampleTable(sampleTable),
3742      mCurrentSampleIndex(0),
3743      mCurrentFragmentIndex(0),
3744      mSegments(sidx),
3745      mTrex(trex),
3746      mFirstMoofOffset(firstMoofOffset),
3747      mCurrentMoofOffset(firstMoofOffset),
3748      mNextMoofOffset(-1),
3749      mCurrentTime(0),
3750      mCurrentSampleInfoAllocSize(0),
3751      mCurrentSampleInfoSizes(NULL),
3752      mCurrentSampleInfoOffsetsAllocSize(0),
3753      mCurrentSampleInfoOffsets(NULL),
3754      mIsAVC(false),
3755      mIsHEVC(false),
3756      mNALLengthSize(0),
3757      mStarted(false),
3758      mGroup(NULL),
3759      mBuffer(NULL),
3760      mWantsNALFragments(false),
3761      mSrcBuffer(NULL),
3762      mIsHeif(itemTable != NULL),
3763      mItemTable(itemTable) {
3764
3765    memset(&mTrackFragmentHeaderInfo, 0, sizeof(mTrackFragmentHeaderInfo));
3766
3767    mFormat.findInt32(kKeyCryptoMode, &mCryptoMode);
3768    mDefaultIVSize = 0;
3769    mFormat.findInt32(kKeyCryptoDefaultIVSize, &mDefaultIVSize);
3770    uint32_t keytype;
3771    const void *key;
3772    size_t keysize;
3773    if (mFormat.findData(kKeyCryptoKey, &keytype, &key, &keysize)) {
3774        CHECK(keysize <= 16);
3775        memset(mCryptoKey, 0, 16);
3776        memcpy(mCryptoKey, key, keysize);
3777    }
3778
3779    const char *mime;
3780    bool success = mFormat.findCString(kKeyMIMEType, &mime);
3781    CHECK(success);
3782
3783    mIsAVC = !strcasecmp(mime, MEDIA_MIMETYPE_VIDEO_AVC);
3784    mIsHEVC = !strcasecmp(mime, MEDIA_MIMETYPE_VIDEO_HEVC) ||
3785              !strcasecmp(mime, MEDIA_MIMETYPE_IMAGE_ANDROID_HEIC);
3786
3787    if (mIsAVC) {
3788        uint32_t type;
3789        const void *data;
3790        size_t size;
3791        CHECK(format.findData(kKeyAVCC, &type, &data, &size));
3792
3793        const uint8_t *ptr = (const uint8_t *)data;
3794
3795        CHECK(size >= 7);
3796        CHECK_EQ((unsigned)ptr[0], 1u);  // configurationVersion == 1
3797
3798        // The number of bytes used to encode the length of a NAL unit.
3799        mNALLengthSize = 1 + (ptr[4] & 3);
3800    } else if (mIsHEVC) {
3801        uint32_t type;
3802        const void *data;
3803        size_t size;
3804        CHECK(format.findData(kKeyHVCC, &type, &data, &size));
3805
3806        const uint8_t *ptr = (const uint8_t *)data;
3807
3808        CHECK(size >= 22);
3809        CHECK_EQ((unsigned)ptr[0], 1u);  // configurationVersion == 1
3810
3811        mNALLengthSize = 1 + (ptr[14 + 7] & 3);
3812    }
3813
3814    CHECK(format.findInt32(kKeyTrackID, &mTrackId));
3815
3816}
3817
3818status_t MPEG4Source::init() {
3819    if (mFirstMoofOffset != 0) {
3820        off64_t offset = mFirstMoofOffset;
3821        return parseChunk(&offset);
3822    }
3823    return OK;
3824}
3825
3826MPEG4Source::~MPEG4Source() {
3827    if (mStarted) {
3828        stop();
3829    }
3830    free(mCurrentSampleInfoSizes);
3831    free(mCurrentSampleInfoOffsets);
3832}
3833
3834status_t MPEG4Source::start(MetaDataBase *params) {
3835    Mutex::Autolock autoLock(mLock);
3836
3837    CHECK(!mStarted);
3838
3839    int32_t val;
3840    if (params && params->findInt32(kKeyWantsNALFragments, &val)
3841        && val != 0) {
3842        mWantsNALFragments = true;
3843    } else {
3844        mWantsNALFragments = false;
3845    }
3846
3847    int32_t tmp;
3848    CHECK(mFormat.findInt32(kKeyMaxInputSize, &tmp));
3849    size_t max_size = tmp;
3850
3851    // A somewhat arbitrary limit that should be sufficient for 8k video frames
3852    // If you see the message below for a valid input stream: increase the limit
3853    const size_t kMaxBufferSize = 64 * 1024 * 1024;
3854    if (max_size > kMaxBufferSize) {
3855        ALOGE("bogus max input size: %zu > %zu", max_size, kMaxBufferSize);
3856        return ERROR_MALFORMED;
3857    }
3858    if (max_size == 0) {
3859        ALOGE("zero max input size");
3860        return ERROR_MALFORMED;
3861    }
3862
3863    // Allow up to kMaxBuffers, but not if the total exceeds kMaxBufferSize.
3864    const size_t kInitialBuffers = 2;
3865    const size_t kMaxBuffers = 8;
3866    const size_t realMaxBuffers = min(kMaxBufferSize / max_size, kMaxBuffers);
3867    mGroup = new MediaBufferGroup(kInitialBuffers, max_size, realMaxBuffers);
3868    mSrcBuffer = new (std::nothrow) uint8_t[max_size];
3869    if (mSrcBuffer == NULL) {
3870        // file probably specified a bad max size
3871        delete mGroup;
3872        mGroup = NULL;
3873        return ERROR_MALFORMED;
3874    }
3875
3876    mStarted = true;
3877
3878    return OK;
3879}
3880
3881status_t MPEG4Source::stop() {
3882    Mutex::Autolock autoLock(mLock);
3883
3884    CHECK(mStarted);
3885
3886    if (mBuffer != NULL) {
3887        mBuffer->release();
3888        mBuffer = NULL;
3889    }
3890
3891    delete[] mSrcBuffer;
3892    mSrcBuffer = NULL;
3893
3894    delete mGroup;
3895    mGroup = NULL;
3896
3897    mStarted = false;
3898    mCurrentSampleIndex = 0;
3899
3900    return OK;
3901}
3902
3903status_t MPEG4Source::parseChunk(off64_t *offset) {
3904    uint32_t hdr[2];
3905    if (mDataSource->readAt(*offset, hdr, 8) < 8) {
3906        return ERROR_IO;
3907    }
3908    uint64_t chunk_size = ntohl(hdr[0]);
3909    uint32_t chunk_type = ntohl(hdr[1]);
3910    off64_t data_offset = *offset + 8;
3911
3912    if (chunk_size == 1) {
3913        if (mDataSource->readAt(*offset + 8, &chunk_size, 8) < 8) {
3914            return ERROR_IO;
3915        }
3916        chunk_size = ntoh64(chunk_size);
3917        data_offset += 8;
3918
3919        if (chunk_size < 16) {
3920            // The smallest valid chunk is 16 bytes long in this case.
3921            return ERROR_MALFORMED;
3922        }
3923    } else if (chunk_size < 8) {
3924        // The smallest valid chunk is 8 bytes long.
3925        return ERROR_MALFORMED;
3926    }
3927
3928    char chunk[5];
3929    MakeFourCCString(chunk_type, chunk);
3930    ALOGV("MPEG4Source chunk %s @ %#llx", chunk, (long long)*offset);
3931
3932    off64_t chunk_data_size = *offset + chunk_size - data_offset;
3933
3934    switch(chunk_type) {
3935
3936        case FOURCC('t', 'r', 'a', 'f'):
3937        case FOURCC('m', 'o', 'o', 'f'): {
3938            off64_t stop_offset = *offset + chunk_size;
3939            *offset = data_offset;
3940            while (*offset < stop_offset) {
3941                status_t err = parseChunk(offset);
3942                if (err != OK) {
3943                    return err;
3944                }
3945            }
3946            if (chunk_type == FOURCC('m', 'o', 'o', 'f')) {
3947                // *offset points to the box following this moof. Find the next moof from there.
3948
3949                while (true) {
3950                    if (mDataSource->readAt(*offset, hdr, 8) < 8) {
3951                        // no more box to the end of file.
3952                        break;
3953                    }
3954                    chunk_size = ntohl(hdr[0]);
3955                    chunk_type = ntohl(hdr[1]);
3956                    if (chunk_size == 1) {
3957                        // ISO/IEC 14496-12:2012, 8.8.4 Movie Fragment Box, moof is a Box
3958                        // which is defined in 4.2 Object Structure.
3959                        // When chunk_size==1, 8 bytes follows as "largesize".
3960                        if (mDataSource->readAt(*offset + 8, &chunk_size, 8) < 8) {
3961                            return ERROR_IO;
3962                        }
3963                        chunk_size = ntoh64(chunk_size);
3964                        if (chunk_size < 16) {
3965                            // The smallest valid chunk is 16 bytes long in this case.
3966                            return ERROR_MALFORMED;
3967                        }
3968                    } else if (chunk_size == 0) {
3969                        // next box extends to end of file.
3970                    } else if (chunk_size < 8) {
3971                        // The smallest valid chunk is 8 bytes long in this case.
3972                        return ERROR_MALFORMED;
3973                    }
3974
3975                    if (chunk_type == FOURCC('m', 'o', 'o', 'f')) {
3976                        mNextMoofOffset = *offset;
3977                        break;
3978                    } else if (chunk_size == 0) {
3979                        break;
3980                    }
3981                    *offset += chunk_size;
3982                }
3983            }
3984            break;
3985        }
3986
3987        case FOURCC('t', 'f', 'h', 'd'): {
3988                status_t err;
3989                if ((err = parseTrackFragmentHeader(data_offset, chunk_data_size)) != OK) {
3990                    return err;
3991                }
3992                *offset += chunk_size;
3993                break;
3994        }
3995
3996        case FOURCC('t', 'r', 'u', 'n'): {
3997                status_t err;
3998                if (mLastParsedTrackId == mTrackId) {
3999                    if ((err = parseTrackFragmentRun(data_offset, chunk_data_size)) != OK) {
4000                        return err;
4001                    }
4002                }
4003
4004                *offset += chunk_size;
4005                break;
4006        }
4007
4008        case FOURCC('s', 'a', 'i', 'z'): {
4009            status_t err;
4010            if ((err = parseSampleAuxiliaryInformationSizes(data_offset, chunk_data_size)) != OK) {
4011                return err;
4012            }
4013            *offset += chunk_size;
4014            break;
4015        }
4016        case FOURCC('s', 'a', 'i', 'o'): {
4017            status_t err;
4018            if ((err = parseSampleAuxiliaryInformationOffsets(data_offset, chunk_data_size)) != OK) {
4019                return err;
4020            }
4021            *offset += chunk_size;
4022            break;
4023        }
4024
4025        case FOURCC('m', 'd', 'a', 't'): {
4026            // parse DRM info if present
4027            ALOGV("MPEG4Source::parseChunk mdat");
4028            // if saiz/saoi was previously observed, do something with the sampleinfos
4029            *offset += chunk_size;
4030            break;
4031        }
4032
4033        default: {
4034            *offset += chunk_size;
4035            break;
4036        }
4037    }
4038    return OK;
4039}
4040
4041status_t MPEG4Source::parseSampleAuxiliaryInformationSizes(
4042        off64_t offset, off64_t /* size */) {
4043    ALOGV("parseSampleAuxiliaryInformationSizes");
4044    // 14496-12 8.7.12
4045    uint8_t version;
4046    if (mDataSource->readAt(
4047            offset, &version, sizeof(version))
4048            < (ssize_t)sizeof(version)) {
4049        return ERROR_IO;
4050    }
4051
4052    if (version != 0) {
4053        return ERROR_UNSUPPORTED;
4054    }
4055    offset++;
4056
4057    uint32_t flags;
4058    if (!mDataSource->getUInt24(offset, &flags)) {
4059        return ERROR_IO;
4060    }
4061    offset += 3;
4062
4063    if (flags & 1) {
4064        uint32_t tmp;
4065        if (!mDataSource->getUInt32(offset, &tmp)) {
4066            return ERROR_MALFORMED;
4067        }
4068        mCurrentAuxInfoType = tmp;
4069        offset += 4;
4070        if (!mDataSource->getUInt32(offset, &tmp)) {
4071            return ERROR_MALFORMED;
4072        }
4073        mCurrentAuxInfoTypeParameter = tmp;
4074        offset += 4;
4075    }
4076
4077    uint8_t defsize;
4078    if (mDataSource->readAt(offset, &defsize, 1) != 1) {
4079        return ERROR_MALFORMED;
4080    }
4081    mCurrentDefaultSampleInfoSize = defsize;
4082    offset++;
4083
4084    uint32_t smplcnt;
4085    if (!mDataSource->getUInt32(offset, &smplcnt)) {
4086        return ERROR_MALFORMED;
4087    }
4088    mCurrentSampleInfoCount = smplcnt;
4089    offset += 4;
4090
4091    if (mCurrentDefaultSampleInfoSize != 0) {
4092        ALOGV("@@@@ using default sample info size of %d", mCurrentDefaultSampleInfoSize);
4093        return OK;
4094    }
4095    if (smplcnt > mCurrentSampleInfoAllocSize) {
4096        uint8_t * newPtr =  (uint8_t*) realloc(mCurrentSampleInfoSizes, smplcnt);
4097        if (newPtr == NULL) {
4098            ALOGE("failed to realloc %u -> %u", mCurrentSampleInfoAllocSize, smplcnt);
4099            return NO_MEMORY;
4100        }
4101        mCurrentSampleInfoSizes = newPtr;
4102        mCurrentSampleInfoAllocSize = smplcnt;
4103    }
4104
4105    mDataSource->readAt(offset, mCurrentSampleInfoSizes, smplcnt);
4106    return OK;
4107}
4108
4109status_t MPEG4Source::parseSampleAuxiliaryInformationOffsets(
4110        off64_t offset, off64_t /* size */) {
4111    ALOGV("parseSampleAuxiliaryInformationOffsets");
4112    // 14496-12 8.7.13
4113    uint8_t version;
4114    if (mDataSource->readAt(offset, &version, sizeof(version)) != 1) {
4115        return ERROR_IO;
4116    }
4117    offset++;
4118
4119    uint32_t flags;
4120    if (!mDataSource->getUInt24(offset, &flags)) {
4121        return ERROR_IO;
4122    }
4123    offset += 3;
4124
4125    uint32_t entrycount;
4126    if (!mDataSource->getUInt32(offset, &entrycount)) {
4127        return ERROR_IO;
4128    }
4129    offset += 4;
4130    if (entrycount == 0) {
4131        return OK;
4132    }
4133    if (entrycount > UINT32_MAX / 8) {
4134        return ERROR_MALFORMED;
4135    }
4136
4137    if (entrycount > mCurrentSampleInfoOffsetsAllocSize) {
4138        uint64_t *newPtr = (uint64_t *)realloc(mCurrentSampleInfoOffsets, entrycount * 8);
4139        if (newPtr == NULL) {
4140            ALOGE("failed to realloc %u -> %u", mCurrentSampleInfoOffsetsAllocSize, entrycount * 8);
4141            return NO_MEMORY;
4142        }
4143        mCurrentSampleInfoOffsets = newPtr;
4144        mCurrentSampleInfoOffsetsAllocSize = entrycount;
4145    }
4146    mCurrentSampleInfoOffsetCount = entrycount;
4147
4148    if (mCurrentSampleInfoOffsets == NULL) {
4149        return OK;
4150    }
4151
4152    for (size_t i = 0; i < entrycount; i++) {
4153        if (version == 0) {
4154            uint32_t tmp;
4155            if (!mDataSource->getUInt32(offset, &tmp)) {
4156                return ERROR_IO;
4157            }
4158            mCurrentSampleInfoOffsets[i] = tmp;
4159            offset += 4;
4160        } else {
4161            uint64_t tmp;
4162            if (!mDataSource->getUInt64(offset, &tmp)) {
4163                return ERROR_IO;
4164            }
4165            mCurrentSampleInfoOffsets[i] = tmp;
4166            offset += 8;
4167        }
4168    }
4169
4170    // parse clear/encrypted data
4171
4172    off64_t drmoffset = mCurrentSampleInfoOffsets[0]; // from moof
4173
4174    drmoffset += mCurrentMoofOffset;
4175    int ivlength;
4176    CHECK(mFormat.findInt32(kKeyCryptoDefaultIVSize, &ivlength));
4177
4178    // only 0, 8 and 16 byte initialization vectors are supported
4179    if (ivlength != 0 && ivlength != 8 && ivlength != 16) {
4180        ALOGW("unsupported IV length: %d", ivlength);
4181        return ERROR_MALFORMED;
4182    }
4183    // read CencSampleAuxiliaryDataFormats
4184    for (size_t i = 0; i < mCurrentSampleInfoCount; i++) {
4185        if (i >= mCurrentSamples.size()) {
4186            ALOGW("too few samples");
4187            break;
4188        }
4189        Sample *smpl = &mCurrentSamples.editItemAt(i);
4190
4191        memset(smpl->iv, 0, 16);
4192        if (mDataSource->readAt(drmoffset, smpl->iv, ivlength) != ivlength) {
4193            return ERROR_IO;
4194        }
4195
4196        drmoffset += ivlength;
4197
4198        int32_t smplinfosize = mCurrentDefaultSampleInfoSize;
4199        if (smplinfosize == 0) {
4200            smplinfosize = mCurrentSampleInfoSizes[i];
4201        }
4202        if (smplinfosize > ivlength) {
4203            uint16_t numsubsamples;
4204            if (!mDataSource->getUInt16(drmoffset, &numsubsamples)) {
4205                return ERROR_IO;
4206            }
4207            drmoffset += 2;
4208            for (size_t j = 0; j < numsubsamples; j++) {
4209                uint16_t numclear;
4210                uint32_t numencrypted;
4211                if (!mDataSource->getUInt16(drmoffset, &numclear)) {
4212                    return ERROR_IO;
4213                }
4214                drmoffset += 2;
4215                if (!mDataSource->getUInt32(drmoffset, &numencrypted)) {
4216                    return ERROR_IO;
4217                }
4218                drmoffset += 4;
4219                smpl->clearsizes.add(numclear);
4220                smpl->encryptedsizes.add(numencrypted);
4221            }
4222        } else {
4223            smpl->clearsizes.add(0);
4224            smpl->encryptedsizes.add(smpl->size);
4225        }
4226    }
4227
4228
4229    return OK;
4230}
4231
4232status_t MPEG4Source::parseTrackFragmentHeader(off64_t offset, off64_t size) {
4233
4234    if (size < 8) {
4235        return -EINVAL;
4236    }
4237
4238    uint32_t flags;
4239    if (!mDataSource->getUInt32(offset, &flags)) { // actually version + flags
4240        return ERROR_MALFORMED;
4241    }
4242
4243    if (flags & 0xff000000) {
4244        return -EINVAL;
4245    }
4246
4247    if (!mDataSource->getUInt32(offset + 4, (uint32_t*)&mLastParsedTrackId)) {
4248        return ERROR_MALFORMED;
4249    }
4250
4251    if (mLastParsedTrackId != mTrackId) {
4252        // this is not the right track, skip it
4253        return OK;
4254    }
4255
4256    mTrackFragmentHeaderInfo.mFlags = flags;
4257    mTrackFragmentHeaderInfo.mTrackID = mLastParsedTrackId;
4258    offset += 8;
4259    size -= 8;
4260
4261    ALOGV("fragment header: %08x %08x", flags, mTrackFragmentHeaderInfo.mTrackID);
4262
4263    if (flags & TrackFragmentHeaderInfo::kBaseDataOffsetPresent) {
4264        if (size < 8) {
4265            return -EINVAL;
4266        }
4267
4268        if (!mDataSource->getUInt64(offset, &mTrackFragmentHeaderInfo.mBaseDataOffset)) {
4269            return ERROR_MALFORMED;
4270        }
4271        offset += 8;
4272        size -= 8;
4273    }
4274
4275    if (flags & TrackFragmentHeaderInfo::kSampleDescriptionIndexPresent) {
4276        if (size < 4) {
4277            return -EINVAL;
4278        }
4279
4280        if (!mDataSource->getUInt32(offset, &mTrackFragmentHeaderInfo.mSampleDescriptionIndex)) {
4281            return ERROR_MALFORMED;
4282        }
4283        offset += 4;
4284        size -= 4;
4285    }
4286
4287    if (flags & TrackFragmentHeaderInfo::kDefaultSampleDurationPresent) {
4288        if (size < 4) {
4289            return -EINVAL;
4290        }
4291
4292        if (!mDataSource->getUInt32(offset, &mTrackFragmentHeaderInfo.mDefaultSampleDuration)) {
4293            return ERROR_MALFORMED;
4294        }
4295        offset += 4;
4296        size -= 4;
4297    }
4298
4299    if (flags & TrackFragmentHeaderInfo::kDefaultSampleSizePresent) {
4300        if (size < 4) {
4301            return -EINVAL;
4302        }
4303
4304        if (!mDataSource->getUInt32(offset, &mTrackFragmentHeaderInfo.mDefaultSampleSize)) {
4305            return ERROR_MALFORMED;
4306        }
4307        offset += 4;
4308        size -= 4;
4309    }
4310
4311    if (flags & TrackFragmentHeaderInfo::kDefaultSampleFlagsPresent) {
4312        if (size < 4) {
4313            return -EINVAL;
4314        }
4315
4316        if (!mDataSource->getUInt32(offset, &mTrackFragmentHeaderInfo.mDefaultSampleFlags)) {
4317            return ERROR_MALFORMED;
4318        }
4319        offset += 4;
4320        size -= 4;
4321    }
4322
4323    if (!(flags & TrackFragmentHeaderInfo::kBaseDataOffsetPresent)) {
4324        mTrackFragmentHeaderInfo.mBaseDataOffset = mCurrentMoofOffset;
4325    }
4326
4327    mTrackFragmentHeaderInfo.mDataOffset = 0;
4328    return OK;
4329}
4330
4331status_t MPEG4Source::parseTrackFragmentRun(off64_t offset, off64_t size) {
4332
4333    ALOGV("MPEG4Extractor::parseTrackFragmentRun");
4334    if (size < 8) {
4335        return -EINVAL;
4336    }
4337
4338    enum {
4339        kDataOffsetPresent                  = 0x01,
4340        kFirstSampleFlagsPresent            = 0x04,
4341        kSampleDurationPresent              = 0x100,
4342        kSampleSizePresent                  = 0x200,
4343        kSampleFlagsPresent                 = 0x400,
4344        kSampleCompositionTimeOffsetPresent = 0x800,
4345    };
4346
4347    uint32_t flags;
4348    if (!mDataSource->getUInt32(offset, &flags)) {
4349        return ERROR_MALFORMED;
4350    }
4351    // |version| only affects SampleCompositionTimeOffset field.
4352    // If version == 0, SampleCompositionTimeOffset is uint32_t;
4353    // Otherwise, SampleCompositionTimeOffset is int32_t.
4354    // Sample.compositionOffset is defined as int32_t.
4355    uint8_t version = flags >> 24;
4356    flags &= 0xffffff;
4357    ALOGV("fragment run version: 0x%02x, flags: 0x%06x", version, flags);
4358
4359    if ((flags & kFirstSampleFlagsPresent) && (flags & kSampleFlagsPresent)) {
4360        // These two shall not be used together.
4361        return -EINVAL;
4362    }
4363
4364    uint32_t sampleCount;
4365    if (!mDataSource->getUInt32(offset + 4, &sampleCount)) {
4366        return ERROR_MALFORMED;
4367    }
4368    offset += 8;
4369    size -= 8;
4370
4371    uint64_t dataOffset = mTrackFragmentHeaderInfo.mDataOffset;
4372
4373    uint32_t firstSampleFlags = 0;
4374
4375    if (flags & kDataOffsetPresent) {
4376        if (size < 4) {
4377            return -EINVAL;
4378        }
4379
4380        int32_t dataOffsetDelta;
4381        if (!mDataSource->getUInt32(offset, (uint32_t*)&dataOffsetDelta)) {
4382            return ERROR_MALFORMED;
4383        }
4384
4385        dataOffset = mTrackFragmentHeaderInfo.mBaseDataOffset + dataOffsetDelta;
4386
4387        offset += 4;
4388        size -= 4;
4389    }
4390
4391    if (flags & kFirstSampleFlagsPresent) {
4392        if (size < 4) {
4393            return -EINVAL;
4394        }
4395
4396        if (!mDataSource->getUInt32(offset, &firstSampleFlags)) {
4397            return ERROR_MALFORMED;
4398        }
4399        offset += 4;
4400        size -= 4;
4401    }
4402
4403    uint32_t sampleDuration = 0, sampleSize = 0, sampleFlags = 0,
4404             sampleCtsOffset = 0;
4405
4406    size_t bytesPerSample = 0;
4407    if (flags & kSampleDurationPresent) {
4408        bytesPerSample += 4;
4409    } else if (mTrackFragmentHeaderInfo.mFlags
4410            & TrackFragmentHeaderInfo::kDefaultSampleDurationPresent) {
4411        sampleDuration = mTrackFragmentHeaderInfo.mDefaultSampleDuration;
4412    } else if (mTrex) {
4413        sampleDuration = mTrex->default_sample_duration;
4414    }
4415
4416    if (flags & kSampleSizePresent) {
4417        bytesPerSample += 4;
4418    } else if (mTrackFragmentHeaderInfo.mFlags
4419            & TrackFragmentHeaderInfo::kDefaultSampleSizePresent) {
4420        sampleSize = mTrackFragmentHeaderInfo.mDefaultSampleSize;
4421    } else {
4422        sampleSize = mTrackFragmentHeaderInfo.mDefaultSampleSize;
4423    }
4424
4425    if (flags & kSampleFlagsPresent) {
4426        bytesPerSample += 4;
4427    } else if (mTrackFragmentHeaderInfo.mFlags
4428            & TrackFragmentHeaderInfo::kDefaultSampleFlagsPresent) {
4429        sampleFlags = mTrackFragmentHeaderInfo.mDefaultSampleFlags;
4430    } else {
4431        sampleFlags = mTrackFragmentHeaderInfo.mDefaultSampleFlags;
4432    }
4433
4434    if (flags & kSampleCompositionTimeOffsetPresent) {
4435        bytesPerSample += 4;
4436    } else {
4437        sampleCtsOffset = 0;
4438    }
4439
4440    if (size < (off64_t)(sampleCount * bytesPerSample)) {
4441        return -EINVAL;
4442    }
4443
4444    Sample tmp;
4445    for (uint32_t i = 0; i < sampleCount; ++i) {
4446        if (flags & kSampleDurationPresent) {
4447            if (!mDataSource->getUInt32(offset, &sampleDuration)) {
4448                return ERROR_MALFORMED;
4449            }
4450            offset += 4;
4451        }
4452
4453        if (flags & kSampleSizePresent) {
4454            if (!mDataSource->getUInt32(offset, &sampleSize)) {
4455                return ERROR_MALFORMED;
4456            }
4457            offset += 4;
4458        }
4459
4460        if (flags & kSampleFlagsPresent) {
4461            if (!mDataSource->getUInt32(offset, &sampleFlags)) {
4462                return ERROR_MALFORMED;
4463            }
4464            offset += 4;
4465        }
4466
4467        if (flags & kSampleCompositionTimeOffsetPresent) {
4468            if (!mDataSource->getUInt32(offset, &sampleCtsOffset)) {
4469                return ERROR_MALFORMED;
4470            }
4471            offset += 4;
4472        }
4473
4474        ALOGV("adding sample %d at offset 0x%08" PRIx64 ", size %u, duration %u, "
4475              " flags 0x%08x", i + 1,
4476                dataOffset, sampleSize, sampleDuration,
4477                (flags & kFirstSampleFlagsPresent) && i == 0
4478                    ? firstSampleFlags : sampleFlags);
4479        tmp.offset = dataOffset;
4480        tmp.size = sampleSize;
4481        tmp.duration = sampleDuration;
4482        tmp.compositionOffset = sampleCtsOffset;
4483        mCurrentSamples.add(tmp);
4484
4485        dataOffset += sampleSize;
4486    }
4487
4488    mTrackFragmentHeaderInfo.mDataOffset = dataOffset;
4489
4490    return OK;
4491}
4492
4493status_t MPEG4Source::getFormat(MetaDataBase &meta) {
4494    Mutex::Autolock autoLock(mLock);
4495    meta = mFormat;
4496    return OK;
4497}
4498
4499size_t MPEG4Source::parseNALSize(const uint8_t *data) const {
4500    switch (mNALLengthSize) {
4501        case 1:
4502            return *data;
4503        case 2:
4504            return U16_AT(data);
4505        case 3:
4506            return ((size_t)data[0] << 16) | U16_AT(&data[1]);
4507        case 4:
4508            return U32_AT(data);
4509    }
4510
4511    // This cannot happen, mNALLengthSize springs to life by adding 1 to
4512    // a 2-bit integer.
4513    CHECK(!"Should not be here.");
4514
4515    return 0;
4516}
4517
4518status_t MPEG4Source::read(
4519        MediaBufferBase **out, const ReadOptions *options) {
4520    Mutex::Autolock autoLock(mLock);
4521
4522    CHECK(mStarted);
4523
4524    if (options != nullptr && options->getNonBlocking() && !mGroup->has_buffers()) {
4525        *out = nullptr;
4526        return WOULD_BLOCK;
4527    }
4528
4529    if (mFirstMoofOffset > 0) {
4530        return fragmentedRead(out, options);
4531    }
4532
4533    *out = NULL;
4534
4535    int64_t targetSampleTimeUs = -1;
4536
4537    int64_t seekTimeUs;
4538    ReadOptions::SeekMode mode;
4539    if (options && options->getSeekTo(&seekTimeUs, &mode)) {
4540        if (mIsHeif) {
4541            CHECK(mSampleTable == NULL);
4542            CHECK(mItemTable != NULL);
4543            int32_t imageIndex;
4544            if (!mFormat.findInt32(kKeyTrackID, &imageIndex)) {
4545                return ERROR_MALFORMED;
4546            }
4547
4548            status_t err;
4549            if (seekTimeUs >= 0) {
4550                err = mItemTable->findImageItem(imageIndex, &mCurrentSampleIndex);
4551            } else {
4552                err = mItemTable->findThumbnailItem(imageIndex, &mCurrentSampleIndex);
4553            }
4554            if (err != OK) {
4555                return err;
4556            }
4557        } else {
4558            uint32_t findFlags = 0;
4559            switch (mode) {
4560                case ReadOptions::SEEK_PREVIOUS_SYNC:
4561                    findFlags = SampleTable::kFlagBefore;
4562                    break;
4563                case ReadOptions::SEEK_NEXT_SYNC:
4564                    findFlags = SampleTable::kFlagAfter;
4565                    break;
4566                case ReadOptions::SEEK_CLOSEST_SYNC:
4567                case ReadOptions::SEEK_CLOSEST:
4568                    findFlags = SampleTable::kFlagClosest;
4569                    break;
4570                case ReadOptions::SEEK_FRAME_INDEX:
4571                    findFlags = SampleTable::kFlagFrameIndex;
4572                    break;
4573                default:
4574                    CHECK(!"Should not be here.");
4575                    break;
4576            }
4577
4578            uint32_t sampleIndex;
4579            status_t err = mSampleTable->findSampleAtTime(
4580                    seekTimeUs, 1000000, mTimescale,
4581                    &sampleIndex, findFlags);
4582
4583            if (mode == ReadOptions::SEEK_CLOSEST
4584                    || mode == ReadOptions::SEEK_FRAME_INDEX) {
4585                // We found the closest sample already, now we want the sync
4586                // sample preceding it (or the sample itself of course), even
4587                // if the subsequent sync sample is closer.
4588                findFlags = SampleTable::kFlagBefore;
4589            }
4590
4591            uint32_t syncSampleIndex;
4592            if (err == OK) {
4593                err = mSampleTable->findSyncSampleNear(
4594                        sampleIndex, &syncSampleIndex, findFlags);
4595            }
4596
4597            uint32_t sampleTime;
4598            if (err == OK) {
4599                err = mSampleTable->getMetaDataForSample(
4600                        sampleIndex, NULL, NULL, &sampleTime);
4601            }
4602
4603            if (err != OK) {
4604                if (err == ERROR_OUT_OF_RANGE) {
4605                    // An attempt to seek past the end of the stream would
4606                    // normally cause this ERROR_OUT_OF_RANGE error. Propagating
4607                    // this all the way to the MediaPlayer would cause abnormal
4608                    // termination. Legacy behaviour appears to be to behave as if
4609                    // we had seeked to the end of stream, ending normally.
4610                    err = ERROR_END_OF_STREAM;
4611                }
4612                ALOGV("end of stream");
4613                return err;
4614            }
4615
4616            if (mode == ReadOptions::SEEK_CLOSEST
4617                || mode == ReadOptions::SEEK_FRAME_INDEX) {
4618                targetSampleTimeUs = (sampleTime * 1000000ll) / mTimescale;
4619            }
4620
4621#if 0
4622            uint32_t syncSampleTime;
4623            CHECK_EQ(OK, mSampleTable->getMetaDataForSample(
4624                        syncSampleIndex, NULL, NULL, &syncSampleTime));
4625
4626            ALOGI("seek to time %lld us => sample at time %lld us, "
4627                 "sync sample at time %lld us",
4628                 seekTimeUs,
4629                 sampleTime * 1000000ll / mTimescale,
4630                 syncSampleTime * 1000000ll / mTimescale);
4631#endif
4632
4633            mCurrentSampleIndex = syncSampleIndex;
4634        }
4635
4636        if (mBuffer != NULL) {
4637            mBuffer->release();
4638            mBuffer = NULL;
4639        }
4640
4641        // fall through
4642    }
4643
4644    off64_t offset = 0;
4645    size_t size = 0;
4646    uint32_t cts, stts;
4647    bool isSyncSample;
4648    bool newBuffer = false;
4649    if (mBuffer == NULL) {
4650        newBuffer = true;
4651
4652        status_t err;
4653        if (!mIsHeif) {
4654            err = mSampleTable->getMetaDataForSample(
4655                    mCurrentSampleIndex, &offset, &size, &cts, &isSyncSample, &stts);
4656        } else {
4657            err = mItemTable->getImageOffsetAndSize(
4658                    options && options->getSeekTo(&seekTimeUs, &mode) ?
4659                            &mCurrentSampleIndex : NULL, &offset, &size);
4660
4661            cts = stts = 0;
4662            isSyncSample = 0;
4663            ALOGV("image offset %lld, size %zu", (long long)offset, size);
4664        }
4665
4666        if (err != OK) {
4667            return err;
4668        }
4669
4670        err = mGroup->acquire_buffer(&mBuffer);
4671
4672        if (err != OK) {
4673            CHECK(mBuffer == NULL);
4674            return err;
4675        }
4676        if (size > mBuffer->size()) {
4677            ALOGE("buffer too small: %zu > %zu", size, mBuffer->size());
4678            mBuffer->release();
4679            mBuffer = NULL;
4680            return ERROR_BUFFER_TOO_SMALL;
4681        }
4682    }
4683
4684    if ((!mIsAVC && !mIsHEVC) || mWantsNALFragments) {
4685        if (newBuffer) {
4686            ssize_t num_bytes_read =
4687                mDataSource->readAt(offset, (uint8_t *)mBuffer->data(), size);
4688
4689            if (num_bytes_read < (ssize_t)size) {
4690                mBuffer->release();
4691                mBuffer = NULL;
4692
4693                return ERROR_IO;
4694            }
4695
4696            CHECK(mBuffer != NULL);
4697            mBuffer->set_range(0, size);
4698            mBuffer->meta_data().clear();
4699            mBuffer->meta_data().setInt64(
4700                    kKeyTime, ((int64_t)cts * 1000000) / mTimescale);
4701            mBuffer->meta_data().setInt64(
4702                    kKeyDuration, ((int64_t)stts * 1000000) / mTimescale);
4703
4704            if (targetSampleTimeUs >= 0) {
4705                mBuffer->meta_data().setInt64(
4706                        kKeyTargetTime, targetSampleTimeUs);
4707            }
4708
4709            if (isSyncSample) {
4710                mBuffer->meta_data().setInt32(kKeyIsSyncFrame, 1);
4711            }
4712
4713            ++mCurrentSampleIndex;
4714        }
4715
4716        if (!mIsAVC && !mIsHEVC) {
4717            *out = mBuffer;
4718            mBuffer = NULL;
4719
4720            return OK;
4721        }
4722
4723        // Each NAL unit is split up into its constituent fragments and
4724        // each one of them returned in its own buffer.
4725
4726        CHECK(mBuffer->range_length() >= mNALLengthSize);
4727
4728        const uint8_t *src =
4729            (const uint8_t *)mBuffer->data() + mBuffer->range_offset();
4730
4731        size_t nal_size = parseNALSize(src);
4732        if (mNALLengthSize > SIZE_MAX - nal_size) {
4733            ALOGE("b/24441553, b/24445122");
4734        }
4735        if (mBuffer->range_length() - mNALLengthSize < nal_size) {
4736            ALOGE("incomplete NAL unit.");
4737
4738            mBuffer->release();
4739            mBuffer = NULL;
4740
4741            return ERROR_MALFORMED;
4742        }
4743
4744        MediaBufferBase *clone = mBuffer->clone();
4745        CHECK(clone != NULL);
4746        clone->set_range(mBuffer->range_offset() + mNALLengthSize, nal_size);
4747
4748        CHECK(mBuffer != NULL);
4749        mBuffer->set_range(
4750                mBuffer->range_offset() + mNALLengthSize + nal_size,
4751                mBuffer->range_length() - mNALLengthSize - nal_size);
4752
4753        if (mBuffer->range_length() == 0) {
4754            mBuffer->release();
4755            mBuffer = NULL;
4756        }
4757
4758        *out = clone;
4759
4760        return OK;
4761    } else {
4762        // Whole NAL units are returned but each fragment is prefixed by
4763        // the start code (0x00 00 00 01).
4764        ssize_t num_bytes_read = 0;
4765        int32_t drm = 0;
4766        bool usesDRM = (mFormat.findInt32(kKeyIsDRM, &drm) && drm != 0);
4767        if (usesDRM) {
4768            num_bytes_read =
4769                mDataSource->readAt(offset, (uint8_t*)mBuffer->data(), size);
4770        } else {
4771            num_bytes_read = mDataSource->readAt(offset, mSrcBuffer, size);
4772        }
4773
4774        if (num_bytes_read < (ssize_t)size) {
4775            mBuffer->release();
4776            mBuffer = NULL;
4777
4778            return ERROR_IO;
4779        }
4780
4781        if (usesDRM) {
4782            CHECK(mBuffer != NULL);
4783            mBuffer->set_range(0, size);
4784
4785        } else {
4786            uint8_t *dstData = (uint8_t *)mBuffer->data();
4787            size_t srcOffset = 0;
4788            size_t dstOffset = 0;
4789
4790            while (srcOffset < size) {
4791                bool isMalFormed = !isInRange((size_t)0u, size, srcOffset, mNALLengthSize);
4792                size_t nalLength = 0;
4793                if (!isMalFormed) {
4794                    nalLength = parseNALSize(&mSrcBuffer[srcOffset]);
4795                    srcOffset += mNALLengthSize;
4796                    isMalFormed = !isInRange((size_t)0u, size, srcOffset, nalLength);
4797                }
4798
4799                if (isMalFormed) {
4800                    ALOGE("Video is malformed");
4801                    mBuffer->release();
4802                    mBuffer = NULL;
4803                    return ERROR_MALFORMED;
4804                }
4805
4806                if (nalLength == 0) {
4807                    continue;
4808                }
4809
4810                if (dstOffset > SIZE_MAX - 4 ||
4811                        dstOffset + 4 > SIZE_MAX - nalLength ||
4812                        dstOffset + 4 + nalLength > mBuffer->size()) {
4813                    ALOGE("b/27208621 : %zu %zu", dstOffset, mBuffer->size());
4814                    android_errorWriteLog(0x534e4554, "27208621");
4815                    mBuffer->release();
4816                    mBuffer = NULL;
4817                    return ERROR_MALFORMED;
4818                }
4819
4820                dstData[dstOffset++] = 0;
4821                dstData[dstOffset++] = 0;
4822                dstData[dstOffset++] = 0;
4823                dstData[dstOffset++] = 1;
4824                memcpy(&dstData[dstOffset], &mSrcBuffer[srcOffset], nalLength);
4825                srcOffset += nalLength;
4826                dstOffset += nalLength;
4827            }
4828            CHECK_EQ(srcOffset, size);
4829            CHECK(mBuffer != NULL);
4830            mBuffer->set_range(0, dstOffset);
4831        }
4832
4833        mBuffer->meta_data().clear();
4834        mBuffer->meta_data().setInt64(
4835                kKeyTime, ((int64_t)cts * 1000000) / mTimescale);
4836        mBuffer->meta_data().setInt64(
4837                kKeyDuration, ((int64_t)stts * 1000000) / mTimescale);
4838
4839        if (targetSampleTimeUs >= 0) {
4840            mBuffer->meta_data().setInt64(
4841                    kKeyTargetTime, targetSampleTimeUs);
4842        }
4843
4844        if (mIsAVC) {
4845            uint32_t layerId = FindAVCLayerId(
4846                    (const uint8_t *)mBuffer->data(), mBuffer->range_length());
4847            mBuffer->meta_data().setInt32(kKeyTemporalLayerId, layerId);
4848        }
4849
4850        if (isSyncSample) {
4851            mBuffer->meta_data().setInt32(kKeyIsSyncFrame, 1);
4852        }
4853
4854        ++mCurrentSampleIndex;
4855
4856        *out = mBuffer;
4857        mBuffer = NULL;
4858
4859        return OK;
4860    }
4861}
4862
4863status_t MPEG4Source::fragmentedRead(
4864        MediaBufferBase **out, const ReadOptions *options) {
4865
4866    ALOGV("MPEG4Source::fragmentedRead");
4867
4868    CHECK(mStarted);
4869
4870    *out = NULL;
4871
4872    int64_t targetSampleTimeUs = -1;
4873
4874    int64_t seekTimeUs;
4875    ReadOptions::SeekMode mode;
4876    if (options && options->getSeekTo(&seekTimeUs, &mode)) {
4877
4878        int numSidxEntries = mSegments.size();
4879        if (numSidxEntries != 0) {
4880            int64_t totalTime = 0;
4881            off64_t totalOffset = mFirstMoofOffset;
4882            for (int i = 0; i < numSidxEntries; i++) {
4883                const SidxEntry *se = &mSegments[i];
4884                if (totalTime + se->mDurationUs > seekTimeUs) {
4885                    // The requested time is somewhere in this segment
4886                    if ((mode == ReadOptions::SEEK_NEXT_SYNC && seekTimeUs > totalTime) ||
4887                        (mode == ReadOptions::SEEK_CLOSEST_SYNC &&
4888                        (seekTimeUs - totalTime) > (totalTime + se->mDurationUs - seekTimeUs))) {
4889                        // requested next sync, or closest sync and it was closer to the end of
4890                        // this segment
4891                        totalTime += se->mDurationUs;
4892                        totalOffset += se->mSize;
4893                    }
4894                    break;
4895                }
4896                totalTime += se->mDurationUs;
4897                totalOffset += se->mSize;
4898            }
4899            mCurrentMoofOffset = totalOffset;
4900            mNextMoofOffset = -1;
4901            mCurrentSamples.clear();
4902            mCurrentSampleIndex = 0;
4903            status_t err = parseChunk(&totalOffset);
4904            if (err != OK) {
4905                return err;
4906            }
4907            mCurrentTime = totalTime * mTimescale / 1000000ll;
4908        } else {
4909            // without sidx boxes, we can only seek to 0
4910            mCurrentMoofOffset = mFirstMoofOffset;
4911            mNextMoofOffset = -1;
4912            mCurrentSamples.clear();
4913            mCurrentSampleIndex = 0;
4914            off64_t tmp = mCurrentMoofOffset;
4915            status_t err = parseChunk(&tmp);
4916            if (err != OK) {
4917                return err;
4918            }
4919            mCurrentTime = 0;
4920        }
4921
4922        if (mBuffer != NULL) {
4923            mBuffer->release();
4924            mBuffer = NULL;
4925        }
4926
4927        // fall through
4928    }
4929
4930    off64_t offset = 0;
4931    size_t size = 0;
4932    uint32_t cts = 0;
4933    bool isSyncSample = false;
4934    bool newBuffer = false;
4935    if (mBuffer == NULL) {
4936        newBuffer = true;
4937
4938        if (mCurrentSampleIndex >= mCurrentSamples.size()) {
4939            // move to next fragment if there is one
4940            if (mNextMoofOffset <= mCurrentMoofOffset) {
4941                return ERROR_END_OF_STREAM;
4942            }
4943            off64_t nextMoof = mNextMoofOffset;
4944            mCurrentMoofOffset = nextMoof;
4945            mCurrentSamples.clear();
4946            mCurrentSampleIndex = 0;
4947            status_t err = parseChunk(&nextMoof);
4948            if (err != OK) {
4949                return err;
4950            }
4951            if (mCurrentSampleIndex >= mCurrentSamples.size()) {
4952                return ERROR_END_OF_STREAM;
4953            }
4954        }
4955
4956        const Sample *smpl = &mCurrentSamples[mCurrentSampleIndex];
4957        offset = smpl->offset;
4958        size = smpl->size;
4959        cts = mCurrentTime + smpl->compositionOffset;
4960        mCurrentTime += smpl->duration;
4961        isSyncSample = (mCurrentSampleIndex == 0); // XXX
4962
4963        status_t err = mGroup->acquire_buffer(&mBuffer);
4964
4965        if (err != OK) {
4966            CHECK(mBuffer == NULL);
4967            ALOGV("acquire_buffer returned %d", err);
4968            return err;
4969        }
4970        if (size > mBuffer->size()) {
4971            ALOGE("buffer too small: %zu > %zu", size, mBuffer->size());
4972            mBuffer->release();
4973            mBuffer = NULL;
4974            return ERROR_BUFFER_TOO_SMALL;
4975        }
4976    }
4977
4978    const Sample *smpl = &mCurrentSamples[mCurrentSampleIndex];
4979    MetaDataBase &bufmeta = mBuffer->meta_data();
4980    bufmeta.clear();
4981    if (smpl->encryptedsizes.size()) {
4982        // store clear/encrypted lengths in metadata
4983        bufmeta.setData(kKeyPlainSizes, 0,
4984                smpl->clearsizes.array(), smpl->clearsizes.size() * 4);
4985        bufmeta.setData(kKeyEncryptedSizes, 0,
4986                smpl->encryptedsizes.array(), smpl->encryptedsizes.size() * 4);
4987        bufmeta.setData(kKeyCryptoIV, 0, smpl->iv, 16); // use 16 or the actual size?
4988        bufmeta.setInt32(kKeyCryptoDefaultIVSize, mDefaultIVSize);
4989        bufmeta.setInt32(kKeyCryptoMode, mCryptoMode);
4990        bufmeta.setData(kKeyCryptoKey, 0, mCryptoKey, 16);
4991    }
4992
4993    if ((!mIsAVC && !mIsHEVC)|| mWantsNALFragments) {
4994        if (newBuffer) {
4995            if (!isInRange((size_t)0u, mBuffer->size(), size)) {
4996                mBuffer->release();
4997                mBuffer = NULL;
4998
4999                ALOGE("fragmentedRead ERROR_MALFORMED size %zu", size);
5000                return ERROR_MALFORMED;
5001            }
5002
5003            ssize_t num_bytes_read =
5004                mDataSource->readAt(offset, (uint8_t *)mBuffer->data(), size);
5005
5006            if (num_bytes_read < (ssize_t)size) {
5007                mBuffer->release();
5008                mBuffer = NULL;
5009
5010                ALOGE("i/o error");
5011                return ERROR_IO;
5012            }
5013
5014            CHECK(mBuffer != NULL);
5015            mBuffer->set_range(0, size);
5016            mBuffer->meta_data().setInt64(
5017                    kKeyTime, ((int64_t)cts * 1000000) / mTimescale);
5018            mBuffer->meta_data().setInt64(
5019                    kKeyDuration, ((int64_t)smpl->duration * 1000000) / mTimescale);
5020
5021            if (targetSampleTimeUs >= 0) {
5022                mBuffer->meta_data().setInt64(
5023                        kKeyTargetTime, targetSampleTimeUs);
5024            }
5025
5026            if (mIsAVC) {
5027                uint32_t layerId = FindAVCLayerId(
5028                        (const uint8_t *)mBuffer->data(), mBuffer->range_length());
5029                mBuffer->meta_data().setInt32(kKeyTemporalLayerId, layerId);
5030            }
5031
5032            if (isSyncSample) {
5033                mBuffer->meta_data().setInt32(kKeyIsSyncFrame, 1);
5034            }
5035
5036            ++mCurrentSampleIndex;
5037        }
5038
5039        if (!mIsAVC && !mIsHEVC) {
5040            *out = mBuffer;
5041            mBuffer = NULL;
5042
5043            return OK;
5044        }
5045
5046        // Each NAL unit is split up into its constituent fragments and
5047        // each one of them returned in its own buffer.
5048
5049        CHECK(mBuffer->range_length() >= mNALLengthSize);
5050
5051        const uint8_t *src =
5052            (const uint8_t *)mBuffer->data() + mBuffer->range_offset();
5053
5054        size_t nal_size = parseNALSize(src);
5055        if (mNALLengthSize > SIZE_MAX - nal_size) {
5056            ALOGE("b/24441553, b/24445122");
5057        }
5058
5059        if (mBuffer->range_length() - mNALLengthSize < nal_size) {
5060            ALOGE("incomplete NAL unit.");
5061
5062            mBuffer->release();
5063            mBuffer = NULL;
5064
5065            return ERROR_MALFORMED;
5066        }
5067
5068        MediaBufferBase *clone = mBuffer->clone();
5069        CHECK(clone != NULL);
5070        clone->set_range(mBuffer->range_offset() + mNALLengthSize, nal_size);
5071
5072        CHECK(mBuffer != NULL);
5073        mBuffer->set_range(
5074                mBuffer->range_offset() + mNALLengthSize + nal_size,
5075                mBuffer->range_length() - mNALLengthSize - nal_size);
5076
5077        if (mBuffer->range_length() == 0) {
5078            mBuffer->release();
5079            mBuffer = NULL;
5080        }
5081
5082        *out = clone;
5083
5084        return OK;
5085    } else {
5086        ALOGV("whole NAL");
5087        // Whole NAL units are returned but each fragment is prefixed by
5088        // the start code (0x00 00 00 01).
5089        ssize_t num_bytes_read = 0;
5090        int32_t drm = 0;
5091        bool usesDRM = (mFormat.findInt32(kKeyIsDRM, &drm) && drm != 0);
5092        void *data = NULL;
5093        bool isMalFormed = false;
5094        if (usesDRM) {
5095            if (mBuffer == NULL || !isInRange((size_t)0u, mBuffer->size(), size)) {
5096                isMalFormed = true;
5097            } else {
5098                data = mBuffer->data();
5099            }
5100        } else {
5101            int32_t max_size;
5102            if (!mFormat.findInt32(kKeyMaxInputSize, &max_size)
5103                    || !isInRange((size_t)0u, (size_t)max_size, size)) {
5104                isMalFormed = true;
5105            } else {
5106                data = mSrcBuffer;
5107            }
5108        }
5109
5110        if (isMalFormed || data == NULL) {
5111            ALOGE("isMalFormed size %zu", size);
5112            if (mBuffer != NULL) {
5113                mBuffer->release();
5114                mBuffer = NULL;
5115            }
5116            return ERROR_MALFORMED;
5117        }
5118        num_bytes_read = mDataSource->readAt(offset, data, size);
5119
5120        if (num_bytes_read < (ssize_t)size) {
5121            mBuffer->release();
5122            mBuffer = NULL;
5123
5124            ALOGE("i/o error");
5125            return ERROR_IO;
5126        }
5127
5128        if (usesDRM) {
5129            CHECK(mBuffer != NULL);
5130            mBuffer->set_range(0, size);
5131
5132        } else {
5133            uint8_t *dstData = (uint8_t *)mBuffer->data();
5134            size_t srcOffset = 0;
5135            size_t dstOffset = 0;
5136
5137            while (srcOffset < size) {
5138                isMalFormed = !isInRange((size_t)0u, size, srcOffset, mNALLengthSize);
5139                size_t nalLength = 0;
5140                if (!isMalFormed) {
5141                    nalLength = parseNALSize(&mSrcBuffer[srcOffset]);
5142                    srcOffset += mNALLengthSize;
5143                    isMalFormed = !isInRange((size_t)0u, size, srcOffset, nalLength)
5144                            || !isInRange((size_t)0u, mBuffer->size(), dstOffset, (size_t)4u)
5145                            || !isInRange((size_t)0u, mBuffer->size(), dstOffset + 4, nalLength);
5146                }
5147
5148                if (isMalFormed) {
5149                    ALOGE("Video is malformed; nalLength %zu", nalLength);
5150                    mBuffer->release();
5151                    mBuffer = NULL;
5152                    return ERROR_MALFORMED;
5153                }
5154
5155                if (nalLength == 0) {
5156                    continue;
5157                }
5158
5159                if (dstOffset > SIZE_MAX - 4 ||
5160                        dstOffset + 4 > SIZE_MAX - nalLength ||
5161                        dstOffset + 4 + nalLength > mBuffer->size()) {
5162                    ALOGE("b/26365349 : %zu %zu", dstOffset, mBuffer->size());
5163                    android_errorWriteLog(0x534e4554, "26365349");
5164                    mBuffer->release();
5165                    mBuffer = NULL;
5166                    return ERROR_MALFORMED;
5167                }
5168
5169                dstData[dstOffset++] = 0;
5170                dstData[dstOffset++] = 0;
5171                dstData[dstOffset++] = 0;
5172                dstData[dstOffset++] = 1;
5173                memcpy(&dstData[dstOffset], &mSrcBuffer[srcOffset], nalLength);
5174                srcOffset += nalLength;
5175                dstOffset += nalLength;
5176            }
5177            CHECK_EQ(srcOffset, size);
5178            CHECK(mBuffer != NULL);
5179            mBuffer->set_range(0, dstOffset);
5180        }
5181
5182        mBuffer->meta_data().setInt64(
5183                kKeyTime, ((int64_t)cts * 1000000) / mTimescale);
5184        mBuffer->meta_data().setInt64(
5185                kKeyDuration, ((int64_t)smpl->duration * 1000000) / mTimescale);
5186
5187        if (targetSampleTimeUs >= 0) {
5188            mBuffer->meta_data().setInt64(
5189                    kKeyTargetTime, targetSampleTimeUs);
5190        }
5191
5192        if (isSyncSample) {
5193            mBuffer->meta_data().setInt32(kKeyIsSyncFrame, 1);
5194        }
5195
5196        ++mCurrentSampleIndex;
5197
5198        *out = mBuffer;
5199        mBuffer = NULL;
5200
5201        return OK;
5202    }
5203}
5204
5205MPEG4Extractor::Track *MPEG4Extractor::findTrackByMimePrefix(
5206        const char *mimePrefix) {
5207    for (Track *track = mFirstTrack; track != NULL; track = track->next) {
5208        const char *mime;
5209        if (track->meta.findCString(kKeyMIMEType, &mime)
5210                && !strncasecmp(mime, mimePrefix, strlen(mimePrefix))) {
5211            return track;
5212        }
5213    }
5214
5215    return NULL;
5216}
5217
5218static bool LegacySniffMPEG4(DataSourceBase *source, float *confidence) {
5219    uint8_t header[8];
5220
5221    ssize_t n = source->readAt(4, header, sizeof(header));
5222    if (n < (ssize_t)sizeof(header)) {
5223        return false;
5224    }
5225
5226    if (!memcmp(header, "ftyp3gp", 7) || !memcmp(header, "ftypmp42", 8)
5227        || !memcmp(header, "ftyp3gr6", 8) || !memcmp(header, "ftyp3gs6", 8)
5228        || !memcmp(header, "ftyp3ge6", 8) || !memcmp(header, "ftyp3gg6", 8)
5229        || !memcmp(header, "ftypisom", 8) || !memcmp(header, "ftypM4V ", 8)
5230        || !memcmp(header, "ftypM4A ", 8) || !memcmp(header, "ftypf4v ", 8)
5231        || !memcmp(header, "ftypkddi", 8) || !memcmp(header, "ftypM4VP", 8)
5232        || !memcmp(header, "ftypmif1", 8) || !memcmp(header, "ftypheic", 8)
5233        || !memcmp(header, "ftypmsf1", 8) || !memcmp(header, "ftyphevc", 8)) {
5234        *confidence = 0.4;
5235
5236        return true;
5237    }
5238
5239    return false;
5240}
5241
5242static bool isCompatibleBrand(uint32_t fourcc) {
5243    static const uint32_t kCompatibleBrands[] = {
5244        FOURCC('i', 's', 'o', 'm'),
5245        FOURCC('i', 's', 'o', '2'),
5246        FOURCC('a', 'v', 'c', '1'),
5247        FOURCC('h', 'v', 'c', '1'),
5248        FOURCC('h', 'e', 'v', '1'),
5249        FOURCC('3', 'g', 'p', '4'),
5250        FOURCC('m', 'p', '4', '1'),
5251        FOURCC('m', 'p', '4', '2'),
5252        FOURCC('d', 'a', 's', 'h'),
5253
5254        // Won't promise that the following file types can be played.
5255        // Just give these file types a chance.
5256        FOURCC('q', 't', ' ', ' '),  // Apple's QuickTime
5257        FOURCC('M', 'S', 'N', 'V'),  // Sony's PSP
5258
5259        FOURCC('3', 'g', '2', 'a'),  // 3GPP2
5260        FOURCC('3', 'g', '2', 'b'),
5261        FOURCC('m', 'i', 'f', '1'),  // HEIF image
5262        FOURCC('h', 'e', 'i', 'c'),  // HEIF image
5263        FOURCC('m', 's', 'f', '1'),  // HEIF image sequence
5264        FOURCC('h', 'e', 'v', 'c'),  // HEIF image sequence
5265    };
5266
5267    for (size_t i = 0;
5268         i < sizeof(kCompatibleBrands) / sizeof(kCompatibleBrands[0]);
5269         ++i) {
5270        if (kCompatibleBrands[i] == fourcc) {
5271            return true;
5272        }
5273    }
5274
5275    return false;
5276}
5277
5278// Attempt to actually parse the 'ftyp' atom and determine if a suitable
5279// compatible brand is present.
5280// Also try to identify where this file's metadata ends
5281// (end of the 'moov' atom) and report it to the caller as part of
5282// the metadata.
5283static bool BetterSniffMPEG4(DataSourceBase *source, float *confidence) {
5284    // We scan up to 128 bytes to identify this file as an MP4.
5285    static const off64_t kMaxScanOffset = 128ll;
5286
5287    off64_t offset = 0ll;
5288    bool foundGoodFileType = false;
5289    off64_t moovAtomEndOffset = -1ll;
5290    bool done = false;
5291
5292    while (!done && offset < kMaxScanOffset) {
5293        uint32_t hdr[2];
5294        if (source->readAt(offset, hdr, 8) < 8) {
5295            return false;
5296        }
5297
5298        uint64_t chunkSize = ntohl(hdr[0]);
5299        uint32_t chunkType = ntohl(hdr[1]);
5300        off64_t chunkDataOffset = offset + 8;
5301
5302        if (chunkSize == 1) {
5303            if (source->readAt(offset + 8, &chunkSize, 8) < 8) {
5304                return false;
5305            }
5306
5307            chunkSize = ntoh64(chunkSize);
5308            chunkDataOffset += 8;
5309
5310            if (chunkSize < 16) {
5311                // The smallest valid chunk is 16 bytes long in this case.
5312                return false;
5313            }
5314
5315        } else if (chunkSize < 8) {
5316            // The smallest valid chunk is 8 bytes long.
5317            return false;
5318        }
5319
5320        // (data_offset - offset) is either 8 or 16
5321        off64_t chunkDataSize = chunkSize - (chunkDataOffset - offset);
5322        if (chunkDataSize < 0) {
5323            ALOGE("b/23540914");
5324            return false;
5325        }
5326
5327        char chunkstring[5];
5328        MakeFourCCString(chunkType, chunkstring);
5329        ALOGV("saw chunk type %s, size %" PRIu64 " @ %lld", chunkstring, chunkSize, (long long)offset);
5330        switch (chunkType) {
5331            case FOURCC('f', 't', 'y', 'p'):
5332            {
5333                if (chunkDataSize < 8) {
5334                    return false;
5335                }
5336
5337                uint32_t numCompatibleBrands = (chunkDataSize - 8) / 4;
5338                for (size_t i = 0; i < numCompatibleBrands + 2; ++i) {
5339                    if (i == 1) {
5340                        // Skip this index, it refers to the minorVersion,
5341                        // not a brand.
5342                        continue;
5343                    }
5344
5345                    uint32_t brand;
5346                    if (source->readAt(
5347                                chunkDataOffset + 4 * i, &brand, 4) < 4) {
5348                        return false;
5349                    }
5350
5351                    brand = ntohl(brand);
5352
5353                    if (isCompatibleBrand(brand)) {
5354                        foundGoodFileType = true;
5355                        break;
5356                    }
5357                }
5358
5359                if (!foundGoodFileType) {
5360                    return false;
5361                }
5362
5363                break;
5364            }
5365
5366            case FOURCC('m', 'o', 'o', 'v'):
5367            {
5368                moovAtomEndOffset = offset + chunkSize;
5369
5370                done = true;
5371                break;
5372            }
5373
5374            default:
5375                break;
5376        }
5377
5378        offset += chunkSize;
5379    }
5380
5381    if (!foundGoodFileType) {
5382        return false;
5383    }
5384
5385    *confidence = 0.4f;
5386
5387    return true;
5388}
5389
5390static MediaExtractor* CreateExtractor(DataSourceBase *source, void *) {
5391    return new MPEG4Extractor(source);
5392}
5393
5394static MediaExtractor::CreatorFunc Sniff(
5395        DataSourceBase *source, float *confidence, void **,
5396        MediaExtractor::FreeMetaFunc *) {
5397    if (BetterSniffMPEG4(source, confidence)) {
5398        return CreateExtractor;
5399    }
5400
5401    if (LegacySniffMPEG4(source, confidence)) {
5402        ALOGW("Identified supported mpeg4 through LegacySniffMPEG4.");
5403        return CreateExtractor;
5404    }
5405
5406    return NULL;
5407}
5408
5409extern "C" {
5410// This is the only symbol that needs to be exported
5411__attribute__ ((visibility ("default")))
5412MediaExtractor::ExtractorDef GETEXTRACTORDEF() {
5413    return {
5414        MediaExtractor::EXTRACTORDEF_VERSION,
5415        UUID("27575c67-4417-4c54-8d3d-8e626985a164"),
5416        1, // version
5417        "MP4 Extractor",
5418        Sniff
5419    };
5420}
5421
5422} // extern "C"
5423
5424}  // namespace android
5425