MPEG4Extractor.cpp revision 7b78bfd448028aeeb642cb35d46cc96b205f24f9
1/*
2 * Copyright (C) 2009 The Android Open Source Project
3 *
4 * Licensed under the Apache License, Version 2.0 (the "License");
5 * you may not use this file except in compliance with the License.
6 * You may obtain a copy of the License at
7 *
8 *      http://www.apache.org/licenses/LICENSE-2.0
9 *
10 * Unless required by applicable law or agreed to in writing, software
11 * distributed under the License is distributed on an "AS IS" BASIS,
12 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13 * See the License for the specific language governing permissions and
14 * limitations under the License.
15 */
16
17//#define LOG_NDEBUG 0
18#define LOG_TAG "MPEG4Extractor"
19
20#include <ctype.h>
21#include <inttypes.h>
22#include <memory>
23#include <stdint.h>
24#include <stdlib.h>
25#include <string.h>
26
27#include <utils/Log.h>
28
29#include "MPEG4Extractor.h"
30#include "SampleTable.h"
31#include "ItemTable.h"
32#include "include/ESDS.h"
33
34#include <media/ExtractorUtils.h>
35#include <media/MediaTrack.h>
36#include <media/stagefright/foundation/ABitReader.h>
37#include <media/stagefright/foundation/ABuffer.h>
38#include <media/stagefright/foundation/ADebug.h>
39#include <media/stagefright/foundation/AMessage.h>
40#include <media/stagefright/foundation/AUtils.h>
41#include <media/stagefright/foundation/ByteUtils.h>
42#include <media/stagefright/foundation/ColorUtils.h>
43#include <media/stagefright/foundation/avc_utils.h>
44#include <media/stagefright/foundation/hexdump.h>
45#include <media/stagefright/MediaBufferBase.h>
46#include <media/stagefright/MediaBufferGroup.h>
47#include <media/stagefright/MediaDefs.h>
48#include <media/stagefright/MetaData.h>
49#include <utils/String8.h>
50
51#include <byteswap.h>
52#include "include/ID3.h"
53
54#ifndef UINT32_MAX
55#define UINT32_MAX       (4294967295U)
56#endif
57
58namespace android {
59
60enum {
61    // max track header chunk to return
62    kMaxTrackHeaderSize = 32,
63
64    // maximum size of an atom. Some atoms can be bigger according to the spec,
65    // but we only allow up to this size.
66    kMaxAtomSize = 64 * 1024 * 1024,
67};
68
69class MPEG4Source : public MediaTrack {
70public:
71    // Caller retains ownership of both "dataSource" and "sampleTable".
72    MPEG4Source(MetaDataBase &format,
73                DataSourceBase *dataSource,
74                int32_t timeScale,
75                const sp<SampleTable> &sampleTable,
76                Vector<SidxEntry> &sidx,
77                const Trex *trex,
78                off64_t firstMoofOffset,
79                const sp<ItemTable> &itemTable);
80    virtual status_t init();
81
82    virtual status_t start(MetaDataBase *params = NULL);
83    virtual status_t stop();
84
85    virtual status_t getFormat(MetaDataBase &);
86
87    virtual status_t read(MediaBufferBase **buffer, const ReadOptions *options = NULL);
88    virtual bool supportNonblockingRead() { return true; }
89    virtual status_t fragmentedRead(MediaBufferBase **buffer, const ReadOptions *options = NULL);
90
91    virtual ~MPEG4Source();
92
93private:
94    Mutex mLock;
95
96    MetaDataBase &mFormat;
97    DataSourceBase *mDataSource;
98    int32_t mTimescale;
99    sp<SampleTable> mSampleTable;
100    uint32_t mCurrentSampleIndex;
101    uint32_t mCurrentFragmentIndex;
102    Vector<SidxEntry> &mSegments;
103    const Trex *mTrex;
104    off64_t mFirstMoofOffset;
105    off64_t mCurrentMoofOffset;
106    off64_t mNextMoofOffset;
107    uint32_t mCurrentTime;
108    int32_t mLastParsedTrackId;
109    int32_t mTrackId;
110
111    int32_t mCryptoMode;    // passed in from extractor
112    int32_t mDefaultIVSize; // passed in from extractor
113    uint8_t mCryptoKey[16]; // passed in from extractor
114    int32_t mDefaultEncryptedByteBlock;
115    int32_t mDefaultSkipByteBlock;
116    uint32_t mCurrentAuxInfoType;
117    uint32_t mCurrentAuxInfoTypeParameter;
118    int32_t mCurrentDefaultSampleInfoSize;
119    uint32_t mCurrentSampleInfoCount;
120    uint32_t mCurrentSampleInfoAllocSize;
121    uint8_t* mCurrentSampleInfoSizes;
122    uint32_t mCurrentSampleInfoOffsetCount;
123    uint32_t mCurrentSampleInfoOffsetsAllocSize;
124    uint64_t* mCurrentSampleInfoOffsets;
125
126    bool mIsAVC;
127    bool mIsHEVC;
128    size_t mNALLengthSize;
129
130    bool mStarted;
131
132    MediaBufferGroup *mGroup;
133
134    MediaBufferBase *mBuffer;
135
136    bool mWantsNALFragments;
137
138    uint8_t *mSrcBuffer;
139
140    bool mIsHeif;
141    sp<ItemTable> mItemTable;
142
143    size_t parseNALSize(const uint8_t *data) const;
144    status_t parseChunk(off64_t *offset);
145    status_t parseTrackFragmentHeader(off64_t offset, off64_t size);
146    status_t parseTrackFragmentRun(off64_t offset, off64_t size);
147    status_t parseSampleAuxiliaryInformationSizes(off64_t offset, off64_t size);
148    status_t parseSampleAuxiliaryInformationOffsets(off64_t offset, off64_t size);
149    status_t parseClearEncryptedSizes(off64_t offset, bool isSubsampleEncryption, uint32_t flags);
150    status_t parseSampleEncryption(off64_t offset);
151
152    struct TrackFragmentHeaderInfo {
153        enum Flags {
154            kBaseDataOffsetPresent         = 0x01,
155            kSampleDescriptionIndexPresent = 0x02,
156            kDefaultSampleDurationPresent  = 0x08,
157            kDefaultSampleSizePresent      = 0x10,
158            kDefaultSampleFlagsPresent     = 0x20,
159            kDurationIsEmpty               = 0x10000,
160        };
161
162        uint32_t mTrackID;
163        uint32_t mFlags;
164        uint64_t mBaseDataOffset;
165        uint32_t mSampleDescriptionIndex;
166        uint32_t mDefaultSampleDuration;
167        uint32_t mDefaultSampleSize;
168        uint32_t mDefaultSampleFlags;
169
170        uint64_t mDataOffset;
171    };
172    TrackFragmentHeaderInfo mTrackFragmentHeaderInfo;
173
174    struct Sample {
175        off64_t offset;
176        size_t size;
177        uint32_t duration;
178        int32_t compositionOffset;
179        uint8_t iv[16];
180        Vector<size_t> clearsizes;
181        Vector<size_t> encryptedsizes;
182    };
183    Vector<Sample> mCurrentSamples;
184
185    MPEG4Source(const MPEG4Source &);
186    MPEG4Source &operator=(const MPEG4Source &);
187};
188
189// This custom data source wraps an existing one and satisfies requests
190// falling entirely within a cached range from the cache while forwarding
191// all remaining requests to the wrapped datasource.
192// This is used to cache the full sampletable metadata for a single track,
193// possibly wrapping multiple times to cover all tracks, i.e.
194// Each CachedRangedDataSource caches the sampletable metadata for a single track.
195
196struct CachedRangedDataSource : public DataSourceBase {
197    explicit CachedRangedDataSource(DataSourceBase *source);
198    virtual ~CachedRangedDataSource();
199
200    virtual status_t initCheck() const;
201    virtual ssize_t readAt(off64_t offset, void *data, size_t size);
202    virtual status_t getSize(off64_t *size);
203    virtual uint32_t flags();
204
205    status_t setCachedRange(off64_t offset, size_t size, bool assumeSourceOwnershipOnSuccess);
206
207
208private:
209    Mutex mLock;
210
211    DataSourceBase *mSource;
212    bool mOwnsDataSource;
213    off64_t mCachedOffset;
214    size_t mCachedSize;
215    uint8_t *mCache;
216
217    void clearCache();
218
219    CachedRangedDataSource(const CachedRangedDataSource &);
220    CachedRangedDataSource &operator=(const CachedRangedDataSource &);
221};
222
223CachedRangedDataSource::CachedRangedDataSource(DataSourceBase *source)
224    : mSource(source),
225      mOwnsDataSource(false),
226      mCachedOffset(0),
227      mCachedSize(0),
228      mCache(NULL) {
229}
230
231CachedRangedDataSource::~CachedRangedDataSource() {
232    clearCache();
233    if (mOwnsDataSource) {
234        delete (CachedRangedDataSource*)mSource;
235    }
236}
237
238void CachedRangedDataSource::clearCache() {
239    if (mCache) {
240        free(mCache);
241        mCache = NULL;
242    }
243
244    mCachedOffset = 0;
245    mCachedSize = 0;
246}
247
248status_t CachedRangedDataSource::initCheck() const {
249    return mSource->initCheck();
250}
251
252ssize_t CachedRangedDataSource::readAt(off64_t offset, void *data, size_t size) {
253    Mutex::Autolock autoLock(mLock);
254
255    if (isInRange(mCachedOffset, mCachedSize, offset, size)) {
256        memcpy(data, &mCache[offset - mCachedOffset], size);
257        return size;
258    }
259
260    return mSource->readAt(offset, data, size);
261}
262
263status_t CachedRangedDataSource::getSize(off64_t *size) {
264    return mSource->getSize(size);
265}
266
267uint32_t CachedRangedDataSource::flags() {
268    return mSource->flags();
269}
270
271status_t CachedRangedDataSource::setCachedRange(off64_t offset,
272        size_t size,
273        bool assumeSourceOwnershipOnSuccess) {
274    Mutex::Autolock autoLock(mLock);
275
276    clearCache();
277
278    mCache = (uint8_t *)malloc(size);
279
280    if (mCache == NULL) {
281        return -ENOMEM;
282    }
283
284    mCachedOffset = offset;
285    mCachedSize = size;
286
287    ssize_t err = mSource->readAt(mCachedOffset, mCache, mCachedSize);
288
289    if (err < (ssize_t)size) {
290        clearCache();
291
292        return ERROR_IO;
293    }
294    mOwnsDataSource = assumeSourceOwnershipOnSuccess;
295    return OK;
296}
297
298////////////////////////////////////////////////////////////////////////////////
299
300static const bool kUseHexDump = false;
301
302static const char *FourCC2MIME(uint32_t fourcc) {
303    switch (fourcc) {
304        case FOURCC('m', 'p', '4', 'a'):
305            return MEDIA_MIMETYPE_AUDIO_AAC;
306
307        case FOURCC('s', 'a', 'm', 'r'):
308            return MEDIA_MIMETYPE_AUDIO_AMR_NB;
309
310        case FOURCC('s', 'a', 'w', 'b'):
311            return MEDIA_MIMETYPE_AUDIO_AMR_WB;
312
313        case FOURCC('m', 'p', '4', 'v'):
314            return MEDIA_MIMETYPE_VIDEO_MPEG4;
315
316        case FOURCC('s', '2', '6', '3'):
317        case FOURCC('h', '2', '6', '3'):
318        case FOURCC('H', '2', '6', '3'):
319            return MEDIA_MIMETYPE_VIDEO_H263;
320
321        case FOURCC('a', 'v', 'c', '1'):
322            return MEDIA_MIMETYPE_VIDEO_AVC;
323
324        case FOURCC('h', 'v', 'c', '1'):
325        case FOURCC('h', 'e', 'v', '1'):
326            return MEDIA_MIMETYPE_VIDEO_HEVC;
327        default:
328            ALOGW("Unknown fourcc: %c%c%c%c",
329                   (fourcc >> 24) & 0xff,
330                   (fourcc >> 16) & 0xff,
331                   (fourcc >> 8) & 0xff,
332                   fourcc & 0xff
333                   );
334            return "application/octet-stream";
335    }
336}
337
338static bool AdjustChannelsAndRate(uint32_t fourcc, uint32_t *channels, uint32_t *rate) {
339    if (!strcasecmp(MEDIA_MIMETYPE_AUDIO_AMR_NB, FourCC2MIME(fourcc))) {
340        // AMR NB audio is always mono, 8kHz
341        *channels = 1;
342        *rate = 8000;
343        return true;
344    } else if (!strcasecmp(MEDIA_MIMETYPE_AUDIO_AMR_WB, FourCC2MIME(fourcc))) {
345        // AMR WB audio is always mono, 16kHz
346        *channels = 1;
347        *rate = 16000;
348        return true;
349    }
350    return false;
351}
352
353MPEG4Extractor::MPEG4Extractor(DataSourceBase *source, const char *mime)
354    : mMoofOffset(0),
355      mMoofFound(false),
356      mMdatFound(false),
357      mDataSource(source),
358      mCachedSource(NULL),
359      mInitCheck(NO_INIT),
360      mHeaderTimescale(0),
361      mIsQT(false),
362      mIsHeif(false),
363      mHasMoovBox(false),
364      mPreferHeif(mime != NULL && !strcasecmp(mime, MEDIA_MIMETYPE_CONTAINER_HEIF)),
365      mFirstTrack(NULL),
366      mLastTrack(NULL) {
367    ALOGV("mime=%s, mPreferHeif=%d", mime, mPreferHeif);
368}
369
370MPEG4Extractor::~MPEG4Extractor() {
371    Track *track = mFirstTrack;
372    while (track) {
373        Track *next = track->next;
374
375        delete track;
376        track = next;
377    }
378    mFirstTrack = mLastTrack = NULL;
379
380    for (size_t i = 0; i < mPssh.size(); i++) {
381        delete [] mPssh[i].data;
382    }
383    mPssh.clear();
384
385    delete mCachedSource;
386}
387
388uint32_t MPEG4Extractor::flags() const {
389    return CAN_PAUSE |
390            ((mMoofOffset == 0 || mSidxEntries.size() != 0) ?
391                    (CAN_SEEK_BACKWARD | CAN_SEEK_FORWARD | CAN_SEEK) : 0);
392}
393
394status_t MPEG4Extractor::getMetaData(MetaDataBase &meta) {
395    status_t err;
396    if ((err = readMetaData()) != OK) {
397        return UNKNOWN_ERROR;
398    }
399    meta = mFileMetaData;
400    return OK;
401}
402
403size_t MPEG4Extractor::countTracks() {
404    status_t err;
405    if ((err = readMetaData()) != OK) {
406        ALOGV("MPEG4Extractor::countTracks: no tracks");
407        return 0;
408    }
409
410    size_t n = 0;
411    Track *track = mFirstTrack;
412    while (track) {
413        ++n;
414        track = track->next;
415    }
416
417    ALOGV("MPEG4Extractor::countTracks: %zu tracks", n);
418    return n;
419}
420
421status_t MPEG4Extractor::getTrackMetaData(
422        MetaDataBase &meta,
423        size_t index, uint32_t flags) {
424    status_t err;
425    if ((err = readMetaData()) != OK) {
426        return UNKNOWN_ERROR;
427    }
428
429    Track *track = mFirstTrack;
430    while (index > 0) {
431        if (track == NULL) {
432            return UNKNOWN_ERROR;
433        }
434
435        track = track->next;
436        --index;
437    }
438
439    if (track == NULL) {
440        return UNKNOWN_ERROR;
441    }
442
443    [=] {
444        int64_t duration;
445        int32_t samplerate;
446        if (track->has_elst && mHeaderTimescale != 0 &&
447                track->meta.findInt64(kKeyDuration, &duration) &&
448                track->meta.findInt32(kKeySampleRate, &samplerate)) {
449
450            track->has_elst = false;
451
452            if (track->elst_segment_duration > INT64_MAX) {
453                return;
454            }
455            int64_t segment_duration = track->elst_segment_duration;
456            int64_t media_time = track->elst_media_time;
457            int64_t halfscale = mHeaderTimescale / 2;
458            ALOGV("segment_duration = %" PRId64 ", media_time = %" PRId64
459                  ", halfscale = %" PRId64 ", timescale = %d",
460                  segment_duration,
461                  media_time,
462                  halfscale,
463                  mHeaderTimescale);
464
465            int64_t delay;
466            // delay = ((media_time * samplerate) + halfscale) / mHeaderTimescale;
467            if (__builtin_mul_overflow(media_time, samplerate, &delay) ||
468                    __builtin_add_overflow(delay, halfscale, &delay) ||
469                    (delay /= mHeaderTimescale, false) ||
470                    delay > INT32_MAX ||
471                    delay < INT32_MIN) {
472                return;
473            }
474            ALOGV("delay = %" PRId64, delay);
475            track->meta.setInt32(kKeyEncoderDelay, delay);
476
477            int64_t scaled_duration;
478            // scaled_duration = duration * mHeaderTimescale;
479            if (__builtin_mul_overflow(duration, mHeaderTimescale, &scaled_duration)) {
480                return;
481            }
482            ALOGV("scaled_duration = %" PRId64, scaled_duration);
483
484            int64_t segment_end;
485            int64_t padding;
486            // padding = scaled_duration - ((segment_duration + media_time) * 1000000);
487            if (__builtin_add_overflow(segment_duration, media_time, &segment_end) ||
488                    __builtin_mul_overflow(segment_end, 1000000, &segment_end) ||
489                    __builtin_sub_overflow(scaled_duration, segment_end, &padding)) {
490                return;
491            }
492            ALOGV("segment_end = %" PRId64 ", padding = %" PRId64, segment_end, padding);
493
494            if (padding < 0) {
495                // track duration from media header (which is what kKeyDuration is) might
496                // be slightly shorter than the segment duration, which would make the
497                // padding negative. Clamp to zero.
498                padding = 0;
499            }
500
501            int64_t paddingsamples;
502            int64_t halfscale_e6;
503            int64_t timescale_e6;
504            // paddingsamples = ((padding * samplerate) + (halfscale * 1000000))
505            //                / (mHeaderTimescale * 1000000);
506            if (__builtin_mul_overflow(padding, samplerate, &paddingsamples) ||
507                    __builtin_mul_overflow(halfscale, 1000000, &halfscale_e6) ||
508                    __builtin_mul_overflow(mHeaderTimescale, 1000000, &timescale_e6) ||
509                    __builtin_add_overflow(paddingsamples, halfscale_e6, &paddingsamples) ||
510                    (paddingsamples /= timescale_e6, false) ||
511                    paddingsamples > INT32_MAX) {
512                return;
513            }
514            ALOGV("paddingsamples = %" PRId64, paddingsamples);
515            track->meta.setInt32(kKeyEncoderPadding, paddingsamples);
516        }
517    }();
518
519    if ((flags & kIncludeExtensiveMetaData)
520            && !track->includes_expensive_metadata) {
521        track->includes_expensive_metadata = true;
522
523        const char *mime;
524        CHECK(track->meta.findCString(kKeyMIMEType, &mime));
525        if (!strncasecmp("video/", mime, 6)) {
526            // MPEG2 tracks do not provide CSD, so read the stream header
527            if (!strcmp(mime, MEDIA_MIMETYPE_VIDEO_MPEG2)) {
528                off64_t offset;
529                size_t size;
530                if (track->sampleTable->getMetaDataForSample(
531                            0 /* sampleIndex */, &offset, &size, NULL /* sampleTime */) == OK) {
532                    if (size > kMaxTrackHeaderSize) {
533                        size = kMaxTrackHeaderSize;
534                    }
535                    uint8_t header[kMaxTrackHeaderSize];
536                    if (mDataSource->readAt(offset, &header, size) == (ssize_t)size) {
537                        track->meta.setData(kKeyStreamHeader, 'mdat', header, size);
538                    }
539                }
540            }
541
542            if (mMoofOffset > 0) {
543                int64_t duration;
544                if (track->meta.findInt64(kKeyDuration, &duration)) {
545                    // nothing fancy, just pick a frame near 1/4th of the duration
546                    track->meta.setInt64(
547                            kKeyThumbnailTime, duration / 4);
548                }
549            } else {
550                uint32_t sampleIndex;
551                uint32_t sampleTime;
552                if (track->timescale != 0 &&
553                        track->sampleTable->findThumbnailSample(&sampleIndex) == OK
554                        && track->sampleTable->getMetaDataForSample(
555                            sampleIndex, NULL /* offset */, NULL /* size */,
556                            &sampleTime) == OK) {
557                    track->meta.setInt64(
558                            kKeyThumbnailTime,
559                            ((int64_t)sampleTime * 1000000) / track->timescale);
560                }
561            }
562        }
563    }
564
565    meta = track->meta;
566    return OK;
567}
568
569status_t MPEG4Extractor::readMetaData() {
570    if (mInitCheck != NO_INIT) {
571        return mInitCheck;
572    }
573
574    off64_t offset = 0;
575    status_t err;
576    bool sawMoovOrSidx = false;
577
578    while (!((mHasMoovBox && sawMoovOrSidx && (mMdatFound || mMoofFound)) ||
579             (mIsHeif && (mPreferHeif || !mHasMoovBox) &&
580                     (mItemTable != NULL) && mItemTable->isValid()))) {
581        off64_t orig_offset = offset;
582        err = parseChunk(&offset, 0);
583
584        if (err != OK && err != UNKNOWN_ERROR) {
585            break;
586        } else if (offset <= orig_offset) {
587            // only continue parsing if the offset was advanced,
588            // otherwise we might end up in an infinite loop
589            ALOGE("did not advance: %lld->%lld", (long long)orig_offset, (long long)offset);
590            err = ERROR_MALFORMED;
591            break;
592        } else if (err == UNKNOWN_ERROR) {
593            sawMoovOrSidx = true;
594        }
595    }
596
597    if (mIsHeif && (mItemTable != NULL) && (mItemTable->countImages() > 0)) {
598        off64_t exifOffset;
599        size_t exifSize;
600        if (mItemTable->getExifOffsetAndSize(&exifOffset, &exifSize) == OK) {
601            mFileMetaData.setInt64(kKeyExifOffset, (int64_t)exifOffset);
602            mFileMetaData.setInt64(kKeyExifSize, (int64_t)exifSize);
603        }
604        for (uint32_t imageIndex = 0;
605                imageIndex < mItemTable->countImages(); imageIndex++) {
606            sp<MetaData> meta = mItemTable->getImageMeta(imageIndex);
607            if (meta == NULL) {
608                ALOGE("heif image %u has no meta!", imageIndex);
609                continue;
610            }
611            // Some heif files advertise image sequence brands (eg. 'hevc') in
612            // ftyp box, but don't have any valid tracks in them. Instead of
613            // reporting the entire file as malformed, we override the error
614            // to allow still images to be extracted.
615            if (err != OK) {
616                ALOGW("Extracting still images only");
617                err = OK;
618            }
619            mInitCheck = OK;
620
621            ALOGV("adding HEIF image track %u", imageIndex);
622            Track *track = new Track;
623            track->next = NULL;
624            if (mLastTrack != NULL) {
625                mLastTrack->next = track;
626            } else {
627                mFirstTrack = track;
628            }
629            mLastTrack = track;
630
631            track->meta = *(meta.get());
632            track->meta.setInt32(kKeyTrackID, imageIndex);
633            track->includes_expensive_metadata = false;
634            track->skipTrack = false;
635            track->timescale = 1000000;
636        }
637    }
638
639    if (mInitCheck == OK) {
640        if (findTrackByMimePrefix("video/") != NULL) {
641            mFileMetaData.setCString(
642                    kKeyMIMEType, MEDIA_MIMETYPE_CONTAINER_MPEG4);
643        } else if (findTrackByMimePrefix("audio/") != NULL) {
644            mFileMetaData.setCString(kKeyMIMEType, "audio/mp4");
645        } else if (findTrackByMimePrefix(
646                MEDIA_MIMETYPE_IMAGE_ANDROID_HEIC) != NULL) {
647            mFileMetaData.setCString(
648                    kKeyMIMEType, MEDIA_MIMETYPE_CONTAINER_HEIF);
649        } else {
650            mFileMetaData.setCString(kKeyMIMEType, "application/octet-stream");
651        }
652    } else {
653        mInitCheck = err;
654    }
655
656    CHECK_NE(err, (status_t)NO_INIT);
657
658    // copy pssh data into file metadata
659    uint64_t psshsize = 0;
660    for (size_t i = 0; i < mPssh.size(); i++) {
661        psshsize += 20 + mPssh[i].datalen;
662    }
663    if (psshsize > 0 && psshsize <= UINT32_MAX) {
664        char *buf = (char*)malloc(psshsize);
665        if (!buf) {
666            ALOGE("b/28471206");
667            return NO_MEMORY;
668        }
669        char *ptr = buf;
670        for (size_t i = 0; i < mPssh.size(); i++) {
671            memcpy(ptr, mPssh[i].uuid, 20); // uuid + length
672            memcpy(ptr + 20, mPssh[i].data, mPssh[i].datalen);
673            ptr += (20 + mPssh[i].datalen);
674        }
675        mFileMetaData.setData(kKeyPssh, 'pssh', buf, psshsize);
676        free(buf);
677    }
678
679    return mInitCheck;
680}
681
682struct PathAdder {
683    PathAdder(Vector<uint32_t> *path, uint32_t chunkType)
684        : mPath(path) {
685        mPath->push(chunkType);
686    }
687
688    ~PathAdder() {
689        mPath->pop();
690    }
691
692private:
693    Vector<uint32_t> *mPath;
694
695    PathAdder(const PathAdder &);
696    PathAdder &operator=(const PathAdder &);
697};
698
699static bool underMetaDataPath(const Vector<uint32_t> &path) {
700    return path.size() >= 5
701        && path[0] == FOURCC('m', 'o', 'o', 'v')
702        && path[1] == FOURCC('u', 'd', 't', 'a')
703        && path[2] == FOURCC('m', 'e', 't', 'a')
704        && path[3] == FOURCC('i', 'l', 's', 't');
705}
706
707static bool underQTMetaPath(const Vector<uint32_t> &path, int32_t depth) {
708    return path.size() >= 2
709            && path[0] == FOURCC('m', 'o', 'o', 'v')
710            && path[1] == FOURCC('m', 'e', 't', 'a')
711            && (depth == 2
712            || (depth == 3
713                    && (path[2] == FOURCC('h', 'd', 'l', 'r')
714                    ||  path[2] == FOURCC('i', 'l', 's', 't')
715                    ||  path[2] == FOURCC('k', 'e', 'y', 's'))));
716}
717
718// Given a time in seconds since Jan 1 1904, produce a human-readable string.
719static bool convertTimeToDate(int64_t time_1904, String8 *s) {
720    // delta between mpeg4 time and unix epoch time
721    static const int64_t delta = (((66 * 365 + 17) * 24) * 3600);
722    if (time_1904 < INT64_MIN + delta) {
723        return false;
724    }
725    time_t time_1970 = time_1904 - delta;
726
727    char tmp[32];
728    struct tm* tm = gmtime(&time_1970);
729    if (tm != NULL &&
730            strftime(tmp, sizeof(tmp), "%Y%m%dT%H%M%S.000Z", tm) > 0) {
731        s->setTo(tmp);
732        return true;
733    }
734    return false;
735}
736
737status_t MPEG4Extractor::parseChunk(off64_t *offset, int depth) {
738    ALOGV("entering parseChunk %lld/%d", (long long)*offset, depth);
739
740    if (*offset < 0) {
741        ALOGE("b/23540914");
742        return ERROR_MALFORMED;
743    }
744    if (depth > 100) {
745        ALOGE("b/27456299");
746        return ERROR_MALFORMED;
747    }
748    uint32_t hdr[2];
749    if (mDataSource->readAt(*offset, hdr, 8) < 8) {
750        return ERROR_IO;
751    }
752    uint64_t chunk_size = ntohl(hdr[0]);
753    int32_t chunk_type = ntohl(hdr[1]);
754    off64_t data_offset = *offset + 8;
755
756    if (chunk_size == 1) {
757        if (mDataSource->readAt(*offset + 8, &chunk_size, 8) < 8) {
758            return ERROR_IO;
759        }
760        chunk_size = ntoh64(chunk_size);
761        data_offset += 8;
762
763        if (chunk_size < 16) {
764            // The smallest valid chunk is 16 bytes long in this case.
765            return ERROR_MALFORMED;
766        }
767    } else if (chunk_size == 0) {
768        if (depth == 0) {
769            // atom extends to end of file
770            off64_t sourceSize;
771            if (mDataSource->getSize(&sourceSize) == OK) {
772                chunk_size = (sourceSize - *offset);
773            } else {
774                // XXX could we just pick a "sufficiently large" value here?
775                ALOGE("atom size is 0, and data source has no size");
776                return ERROR_MALFORMED;
777            }
778        } else {
779            // not allowed for non-toplevel atoms, skip it
780            *offset += 4;
781            return OK;
782        }
783    } else if (chunk_size < 8) {
784        // The smallest valid chunk is 8 bytes long.
785        ALOGE("invalid chunk size: %" PRIu64, chunk_size);
786        return ERROR_MALFORMED;
787    }
788
789    char chunk[5];
790    MakeFourCCString(chunk_type, chunk);
791    ALOGV("chunk: %s @ %lld, %d", chunk, (long long)*offset, depth);
792
793    if (kUseHexDump) {
794        static const char kWhitespace[] = "                                        ";
795        const char *indent = &kWhitespace[sizeof(kWhitespace) - 1 - 2 * depth];
796        printf("%sfound chunk '%s' of size %" PRIu64 "\n", indent, chunk, chunk_size);
797
798        char buffer[256];
799        size_t n = chunk_size;
800        if (n > sizeof(buffer)) {
801            n = sizeof(buffer);
802        }
803        if (mDataSource->readAt(*offset, buffer, n)
804                < (ssize_t)n) {
805            return ERROR_IO;
806        }
807
808        hexdump(buffer, n);
809    }
810
811    PathAdder autoAdder(&mPath, chunk_type);
812
813    // (data_offset - *offset) is either 8 or 16
814    off64_t chunk_data_size = chunk_size - (data_offset - *offset);
815    if (chunk_data_size < 0) {
816        ALOGE("b/23540914");
817        return ERROR_MALFORMED;
818    }
819    if (chunk_type != FOURCC('m', 'd', 'a', 't') && chunk_data_size > kMaxAtomSize) {
820        char errMsg[100];
821        sprintf(errMsg, "%s atom has size %" PRId64, chunk, chunk_data_size);
822        ALOGE("%s (b/28615448)", errMsg);
823        android_errorWriteWithInfoLog(0x534e4554, "28615448", -1, errMsg, strlen(errMsg));
824        return ERROR_MALFORMED;
825    }
826
827    if (chunk_type != FOURCC('c', 'p', 'r', 't')
828            && chunk_type != FOURCC('c', 'o', 'v', 'r')
829            && mPath.size() == 5 && underMetaDataPath(mPath)) {
830        off64_t stop_offset = *offset + chunk_size;
831        *offset = data_offset;
832        while (*offset < stop_offset) {
833            status_t err = parseChunk(offset, depth + 1);
834            if (err != OK) {
835                return err;
836            }
837        }
838
839        if (*offset != stop_offset) {
840            return ERROR_MALFORMED;
841        }
842
843        return OK;
844    }
845
846    switch(chunk_type) {
847        case FOURCC('m', 'o', 'o', 'v'):
848        case FOURCC('t', 'r', 'a', 'k'):
849        case FOURCC('m', 'd', 'i', 'a'):
850        case FOURCC('m', 'i', 'n', 'f'):
851        case FOURCC('d', 'i', 'n', 'f'):
852        case FOURCC('s', 't', 'b', 'l'):
853        case FOURCC('m', 'v', 'e', 'x'):
854        case FOURCC('m', 'o', 'o', 'f'):
855        case FOURCC('t', 'r', 'a', 'f'):
856        case FOURCC('m', 'f', 'r', 'a'):
857        case FOURCC('u', 'd', 't', 'a'):
858        case FOURCC('i', 'l', 's', 't'):
859        case FOURCC('s', 'i', 'n', 'f'):
860        case FOURCC('s', 'c', 'h', 'i'):
861        case FOURCC('e', 'd', 't', 's'):
862        case FOURCC('w', 'a', 'v', 'e'):
863        {
864            if (chunk_type == FOURCC('m', 'o', 'o', 'v') && depth != 0) {
865                ALOGE("moov: depth %d", depth);
866                return ERROR_MALFORMED;
867            }
868
869            if (chunk_type == FOURCC('m', 'o', 'o', 'v') && mInitCheck == OK) {
870                ALOGE("duplicate moov");
871                return ERROR_MALFORMED;
872            }
873
874            if (chunk_type == FOURCC('m', 'o', 'o', 'f') && !mMoofFound) {
875                // store the offset of the first segment
876                mMoofFound = true;
877                mMoofOffset = *offset;
878            }
879
880            if (chunk_type == FOURCC('s', 't', 'b', 'l')) {
881                ALOGV("sampleTable chunk is %" PRIu64 " bytes long.", chunk_size);
882
883                if (mDataSource->flags()
884                        & (DataSourceBase::kWantsPrefetching
885                            | DataSourceBase::kIsCachingDataSource)) {
886                    CachedRangedDataSource *cachedSource =
887                        new CachedRangedDataSource(mDataSource);
888
889                    if (cachedSource->setCachedRange(
890                            *offset, chunk_size,
891                            mCachedSource != NULL /* assume ownership on success */) == OK) {
892                        mDataSource = mCachedSource = cachedSource;
893                    } else {
894                        delete cachedSource;
895                    }
896                }
897
898                if (mLastTrack == NULL) {
899                    return ERROR_MALFORMED;
900                }
901
902                mLastTrack->sampleTable = new SampleTable(mDataSource);
903            }
904
905            bool isTrack = false;
906            if (chunk_type == FOURCC('t', 'r', 'a', 'k')) {
907                if (depth != 1) {
908                    ALOGE("trak: depth %d", depth);
909                    return ERROR_MALFORMED;
910                }
911                isTrack = true;
912
913                ALOGV("adding new track");
914                Track *track = new Track;
915                track->next = NULL;
916                if (mLastTrack) {
917                    mLastTrack->next = track;
918                } else {
919                    mFirstTrack = track;
920                }
921                mLastTrack = track;
922
923                track->includes_expensive_metadata = false;
924                track->skipTrack = false;
925                track->timescale = 0;
926                track->meta.setCString(kKeyMIMEType, "application/octet-stream");
927                track->has_elst = false;
928                track->subsample_encryption = false;
929            }
930
931            off64_t stop_offset = *offset + chunk_size;
932            *offset = data_offset;
933            while (*offset < stop_offset) {
934                status_t err = parseChunk(offset, depth + 1);
935                if (err != OK) {
936                    if (isTrack) {
937                        mLastTrack->skipTrack = true;
938                        break;
939                    }
940                    return err;
941                }
942            }
943
944            if (*offset != stop_offset) {
945                return ERROR_MALFORMED;
946            }
947
948            if (isTrack) {
949                int32_t trackId;
950                // There must be exact one track header per track.
951                if (!mLastTrack->meta.findInt32(kKeyTrackID, &trackId)) {
952                    mLastTrack->skipTrack = true;
953                }
954
955                status_t err = verifyTrack(mLastTrack);
956                if (err != OK) {
957                    mLastTrack->skipTrack = true;
958                }
959
960                if (mLastTrack->skipTrack) {
961                    ALOGV("skipping this track...");
962                    Track *cur = mFirstTrack;
963
964                    if (cur == mLastTrack) {
965                        delete cur;
966                        mFirstTrack = mLastTrack = NULL;
967                    } else {
968                        while (cur && cur->next != mLastTrack) {
969                            cur = cur->next;
970                        }
971                        if (cur) {
972                            cur->next = NULL;
973                        }
974                        delete mLastTrack;
975                        mLastTrack = cur;
976                    }
977
978                    return OK;
979                }
980            } else if (chunk_type == FOURCC('m', 'o', 'o', 'v')) {
981                mInitCheck = OK;
982
983                return UNKNOWN_ERROR;  // Return a dummy error.
984            }
985            break;
986        }
987
988        case FOURCC('s', 'c', 'h', 'm'):
989        {
990
991            *offset += chunk_size;
992            if (!mLastTrack) {
993                return ERROR_MALFORMED;
994            }
995
996            uint32_t scheme_type;
997            if (mDataSource->readAt(data_offset + 4, &scheme_type, 4) < 4) {
998                return ERROR_IO;
999            }
1000            scheme_type = ntohl(scheme_type);
1001            int32_t mode = kCryptoModeUnencrypted;
1002            switch(scheme_type) {
1003                case FOURCC('c', 'b', 'c', '1'):
1004                {
1005                    mode = kCryptoModeAesCbc;
1006                    break;
1007                }
1008                case FOURCC('c', 'b', 'c', 's'):
1009                {
1010                    mode = kCryptoModeAesCbc;
1011                    mLastTrack->subsample_encryption = true;
1012                    break;
1013                }
1014                case FOURCC('c', 'e', 'n', 'c'):
1015                {
1016                    mode = kCryptoModeAesCtr;
1017                    break;
1018                }
1019                case FOURCC('c', 'e', 'n', 's'):
1020                {
1021                    mode = kCryptoModeAesCtr;
1022                    mLastTrack->subsample_encryption = true;
1023                    break;
1024                }
1025            }
1026            mLastTrack->meta.setInt32(kKeyCryptoMode, mode);
1027            break;
1028        }
1029
1030
1031        case FOURCC('e', 'l', 's', 't'):
1032        {
1033            *offset += chunk_size;
1034
1035            if (!mLastTrack) {
1036                return ERROR_MALFORMED;
1037            }
1038
1039            // See 14496-12 8.6.6
1040            uint8_t version;
1041            if (mDataSource->readAt(data_offset, &version, 1) < 1) {
1042                return ERROR_IO;
1043            }
1044
1045            uint32_t entry_count;
1046            if (!mDataSource->getUInt32(data_offset + 4, &entry_count)) {
1047                return ERROR_IO;
1048            }
1049
1050            if (entry_count != 1) {
1051                // we only support a single entry at the moment, for gapless playback
1052                ALOGW("ignoring edit list with %d entries", entry_count);
1053            } else {
1054                off64_t entriesoffset = data_offset + 8;
1055                uint64_t segment_duration;
1056                int64_t media_time;
1057
1058                if (version == 1) {
1059                    if (!mDataSource->getUInt64(entriesoffset, &segment_duration) ||
1060                            !mDataSource->getUInt64(entriesoffset + 8, (uint64_t*)&media_time)) {
1061                        return ERROR_IO;
1062                    }
1063                } else if (version == 0) {
1064                    uint32_t sd;
1065                    int32_t mt;
1066                    if (!mDataSource->getUInt32(entriesoffset, &sd) ||
1067                            !mDataSource->getUInt32(entriesoffset + 4, (uint32_t*)&mt)) {
1068                        return ERROR_IO;
1069                    }
1070                    segment_duration = sd;
1071                    media_time = mt;
1072                } else {
1073                    return ERROR_IO;
1074                }
1075
1076                // save these for later, because the elst atom might precede
1077                // the atoms that actually gives us the duration and sample rate
1078                // needed to calculate the padding and delay values
1079                mLastTrack->has_elst = true;
1080                mLastTrack->elst_media_time = media_time;
1081                mLastTrack->elst_segment_duration = segment_duration;
1082            }
1083            break;
1084        }
1085
1086        case FOURCC('f', 'r', 'm', 'a'):
1087        {
1088            *offset += chunk_size;
1089
1090            uint32_t original_fourcc;
1091            if (mDataSource->readAt(data_offset, &original_fourcc, 4) < 4) {
1092                return ERROR_IO;
1093            }
1094            original_fourcc = ntohl(original_fourcc);
1095            ALOGV("read original format: %d", original_fourcc);
1096
1097            if (mLastTrack == NULL) {
1098                return ERROR_MALFORMED;
1099            }
1100
1101            mLastTrack->meta.setCString(kKeyMIMEType, FourCC2MIME(original_fourcc));
1102            uint32_t num_channels = 0;
1103            uint32_t sample_rate = 0;
1104            if (AdjustChannelsAndRate(original_fourcc, &num_channels, &sample_rate)) {
1105                mLastTrack->meta.setInt32(kKeyChannelCount, num_channels);
1106                mLastTrack->meta.setInt32(kKeySampleRate, sample_rate);
1107            }
1108            break;
1109        }
1110
1111        case FOURCC('t', 'e', 'n', 'c'):
1112        {
1113            *offset += chunk_size;
1114
1115            if (chunk_size < 32) {
1116                return ERROR_MALFORMED;
1117            }
1118
1119            // tenc box contains 1 byte version, 3 byte flags, 3 byte default algorithm id, one byte
1120            // default IV size, 16 bytes default KeyID
1121            // (ISO 23001-7)
1122
1123            uint8_t version;
1124            if (mDataSource->readAt(data_offset, &version, sizeof(version))
1125                    < (ssize_t)sizeof(version)) {
1126                return ERROR_IO;
1127            }
1128
1129            uint8_t buf[4];
1130            memset(buf, 0, 4);
1131            if (mDataSource->readAt(data_offset + 4, buf + 1, 3) < 3) {
1132                return ERROR_IO;
1133            }
1134
1135            if (mLastTrack == NULL) {
1136                return ERROR_MALFORMED;
1137            }
1138
1139            uint8_t defaultEncryptedByteBlock = 0;
1140            uint8_t defaultSkipByteBlock = 0;
1141            uint32_t defaultAlgorithmId = ntohl(*((int32_t*)buf));
1142            if (version == 1) {
1143                uint32_t pattern = buf[2];
1144                defaultEncryptedByteBlock = pattern >> 4;
1145                defaultSkipByteBlock = pattern & 0xf;
1146                if (defaultEncryptedByteBlock == 0 && defaultSkipByteBlock == 0) {
1147                    // use (1,0) to mean "encrypt everything"
1148                    defaultEncryptedByteBlock = 1;
1149                }
1150            } else if (mLastTrack->subsample_encryption) {
1151                ALOGW("subsample_encryption should be version 1");
1152            } else if (defaultAlgorithmId > 1) {
1153                // only 0 (clear) and 1 (AES-128) are valid
1154                ALOGW("defaultAlgorithmId: %u is a reserved value", defaultAlgorithmId);
1155                defaultAlgorithmId = 1;
1156            }
1157
1158            memset(buf, 0, 4);
1159            if (mDataSource->readAt(data_offset + 7, buf + 3, 1) < 1) {
1160                return ERROR_IO;
1161            }
1162            uint32_t defaultIVSize = ntohl(*((int32_t*)buf));
1163
1164            if (defaultAlgorithmId == 0 && defaultIVSize != 0) {
1165                // only unencrypted data must have 0 IV size
1166                return ERROR_MALFORMED;
1167            } else if (defaultIVSize != 0 &&
1168                    defaultIVSize != 8 &&
1169                    defaultIVSize != 16) {
1170                return ERROR_MALFORMED;
1171            }
1172
1173            uint8_t defaultKeyId[16];
1174
1175            if (mDataSource->readAt(data_offset + 8, &defaultKeyId, 16) < 16) {
1176                return ERROR_IO;
1177            }
1178
1179            sp<ABuffer> defaultConstantIv;
1180            if (defaultAlgorithmId != 0 && defaultIVSize == 0) {
1181
1182                uint8_t ivlength;
1183                if (mDataSource->readAt(data_offset + 24, &ivlength, sizeof(ivlength))
1184                        < (ssize_t)sizeof(ivlength)) {
1185                    return ERROR_IO;
1186                }
1187
1188                if (ivlength != 8 && ivlength != 16) {
1189                    ALOGW("unsupported IV length: %u", ivlength);
1190                    return ERROR_MALFORMED;
1191                }
1192
1193                defaultConstantIv = new ABuffer(ivlength);
1194                if (mDataSource->readAt(data_offset + 25, defaultConstantIv->data(), ivlength)
1195                        < (ssize_t)ivlength) {
1196                    return ERROR_IO;
1197                }
1198
1199                defaultConstantIv->setRange(0, ivlength);
1200            }
1201
1202            int32_t tmpAlgorithmId;
1203            if (!mLastTrack->meta.findInt32(kKeyCryptoMode, &tmpAlgorithmId)) {
1204                mLastTrack->meta.setInt32(kKeyCryptoMode, defaultAlgorithmId);
1205            }
1206
1207            mLastTrack->meta.setInt32(kKeyCryptoDefaultIVSize, defaultIVSize);
1208            mLastTrack->meta.setData(kKeyCryptoKey, 'tenc', defaultKeyId, 16);
1209            mLastTrack->meta.setInt32(kKeyEncryptedByteBlock, defaultEncryptedByteBlock);
1210            mLastTrack->meta.setInt32(kKeySkipByteBlock, defaultSkipByteBlock);
1211            if (defaultConstantIv != NULL) {
1212                mLastTrack->meta.setData(kKeyCryptoIV, 'dciv', defaultConstantIv->data(), defaultConstantIv->size());
1213            }
1214            break;
1215        }
1216
1217        case FOURCC('t', 'k', 'h', 'd'):
1218        {
1219            *offset += chunk_size;
1220
1221            status_t err;
1222            if ((err = parseTrackHeader(data_offset, chunk_data_size)) != OK) {
1223                return err;
1224            }
1225
1226            break;
1227        }
1228
1229        case FOURCC('t', 'r', 'e', 'f'):
1230        {
1231            off64_t stop_offset = *offset + chunk_size;
1232            *offset = data_offset;
1233            while (*offset < stop_offset) {
1234                status_t err = parseChunk(offset, depth + 1);
1235                if (err != OK) {
1236                    return err;
1237                }
1238            }
1239            if (*offset != stop_offset) {
1240                return ERROR_MALFORMED;
1241            }
1242            break;
1243        }
1244
1245        case FOURCC('t', 'h', 'm', 'b'):
1246        {
1247            *offset += chunk_size;
1248
1249            if (mLastTrack != NULL) {
1250                // Skip thumbnail track for now since we don't have an
1251                // API to retrieve it yet.
1252                // The thumbnail track can't be accessed by negative index or time,
1253                // because each timed sample has its own corresponding thumbnail
1254                // in the thumbnail track. We'll need a dedicated API to retrieve
1255                // thumbnail at time instead.
1256                mLastTrack->skipTrack = true;
1257            }
1258
1259            break;
1260        }
1261
1262        case FOURCC('p', 's', 's', 'h'):
1263        {
1264            *offset += chunk_size;
1265
1266            PsshInfo pssh;
1267
1268            if (mDataSource->readAt(data_offset + 4, &pssh.uuid, 16) < 16) {
1269                return ERROR_IO;
1270            }
1271
1272            uint32_t psshdatalen = 0;
1273            if (mDataSource->readAt(data_offset + 20, &psshdatalen, 4) < 4) {
1274                return ERROR_IO;
1275            }
1276            pssh.datalen = ntohl(psshdatalen);
1277            ALOGV("pssh data size: %d", pssh.datalen);
1278            if (chunk_size < 20 || pssh.datalen > chunk_size - 20) {
1279                // pssh data length exceeds size of containing box
1280                return ERROR_MALFORMED;
1281            }
1282
1283            pssh.data = new (std::nothrow) uint8_t[pssh.datalen];
1284            if (pssh.data == NULL) {
1285                return ERROR_MALFORMED;
1286            }
1287            ALOGV("allocated pssh @ %p", pssh.data);
1288            ssize_t requested = (ssize_t) pssh.datalen;
1289            if (mDataSource->readAt(data_offset + 24, pssh.data, requested) < requested) {
1290                delete[] pssh.data;
1291                return ERROR_IO;
1292            }
1293            mPssh.push_back(pssh);
1294
1295            break;
1296        }
1297
1298        case FOURCC('m', 'd', 'h', 'd'):
1299        {
1300            *offset += chunk_size;
1301
1302            if (chunk_data_size < 4 || mLastTrack == NULL) {
1303                return ERROR_MALFORMED;
1304            }
1305
1306            uint8_t version;
1307            if (mDataSource->readAt(
1308                        data_offset, &version, sizeof(version))
1309                    < (ssize_t)sizeof(version)) {
1310                return ERROR_IO;
1311            }
1312
1313            off64_t timescale_offset;
1314
1315            if (version == 1) {
1316                timescale_offset = data_offset + 4 + 16;
1317            } else if (version == 0) {
1318                timescale_offset = data_offset + 4 + 8;
1319            } else {
1320                return ERROR_IO;
1321            }
1322
1323            uint32_t timescale;
1324            if (mDataSource->readAt(
1325                        timescale_offset, &timescale, sizeof(timescale))
1326                    < (ssize_t)sizeof(timescale)) {
1327                return ERROR_IO;
1328            }
1329
1330            if (!timescale) {
1331                ALOGE("timescale should not be ZERO.");
1332                return ERROR_MALFORMED;
1333            }
1334
1335            mLastTrack->timescale = ntohl(timescale);
1336
1337            // 14496-12 says all ones means indeterminate, but some files seem to use
1338            // 0 instead. We treat both the same.
1339            int64_t duration = 0;
1340            if (version == 1) {
1341                if (mDataSource->readAt(
1342                            timescale_offset + 4, &duration, sizeof(duration))
1343                        < (ssize_t)sizeof(duration)) {
1344                    return ERROR_IO;
1345                }
1346                if (duration != -1) {
1347                    duration = ntoh64(duration);
1348                }
1349            } else {
1350                uint32_t duration32;
1351                if (mDataSource->readAt(
1352                            timescale_offset + 4, &duration32, sizeof(duration32))
1353                        < (ssize_t)sizeof(duration32)) {
1354                    return ERROR_IO;
1355                }
1356                if (duration32 != 0xffffffff) {
1357                    duration = ntohl(duration32);
1358                }
1359            }
1360            if (duration != 0 && mLastTrack->timescale != 0) {
1361                mLastTrack->meta.setInt64(
1362                        kKeyDuration, (duration * 1000000) / mLastTrack->timescale);
1363            }
1364
1365            uint8_t lang[2];
1366            off64_t lang_offset;
1367            if (version == 1) {
1368                lang_offset = timescale_offset + 4 + 8;
1369            } else if (version == 0) {
1370                lang_offset = timescale_offset + 4 + 4;
1371            } else {
1372                return ERROR_IO;
1373            }
1374
1375            if (mDataSource->readAt(lang_offset, &lang, sizeof(lang))
1376                    < (ssize_t)sizeof(lang)) {
1377                return ERROR_IO;
1378            }
1379
1380            // To get the ISO-639-2/T three character language code
1381            // 1 bit pad followed by 3 5-bits characters. Each character
1382            // is packed as the difference between its ASCII value and 0x60.
1383            char lang_code[4];
1384            lang_code[0] = ((lang[0] >> 2) & 0x1f) + 0x60;
1385            lang_code[1] = ((lang[0] & 0x3) << 3 | (lang[1] >> 5)) + 0x60;
1386            lang_code[2] = (lang[1] & 0x1f) + 0x60;
1387            lang_code[3] = '\0';
1388
1389            mLastTrack->meta.setCString(
1390                    kKeyMediaLanguage, lang_code);
1391
1392            break;
1393        }
1394
1395        case FOURCC('s', 't', 's', 'd'):
1396        {
1397            uint8_t buffer[8];
1398            if (chunk_data_size < (off64_t)sizeof(buffer)) {
1399                return ERROR_MALFORMED;
1400            }
1401
1402            if (mDataSource->readAt(
1403                        data_offset, buffer, 8) < 8) {
1404                return ERROR_IO;
1405            }
1406
1407            if (U32_AT(buffer) != 0) {
1408                // Should be version 0, flags 0.
1409                return ERROR_MALFORMED;
1410            }
1411
1412            uint32_t entry_count = U32_AT(&buffer[4]);
1413
1414            if (entry_count > 1) {
1415                // For 3GPP timed text, there could be multiple tx3g boxes contain
1416                // multiple text display formats. These formats will be used to
1417                // display the timed text.
1418                // For encrypted files, there may also be more than one entry.
1419                const char *mime;
1420
1421                if (mLastTrack == NULL)
1422                    return ERROR_MALFORMED;
1423
1424                CHECK(mLastTrack->meta.findCString(kKeyMIMEType, &mime));
1425                if (strcasecmp(mime, MEDIA_MIMETYPE_TEXT_3GPP) &&
1426                        strcasecmp(mime, "application/octet-stream")) {
1427                    // For now we only support a single type of media per track.
1428                    mLastTrack->skipTrack = true;
1429                    *offset += chunk_size;
1430                    break;
1431                }
1432            }
1433            off64_t stop_offset = *offset + chunk_size;
1434            *offset = data_offset + 8;
1435            for (uint32_t i = 0; i < entry_count; ++i) {
1436                status_t err = parseChunk(offset, depth + 1);
1437                if (err != OK) {
1438                    return err;
1439                }
1440            }
1441
1442            if (*offset != stop_offset) {
1443                return ERROR_MALFORMED;
1444            }
1445            break;
1446        }
1447        case FOURCC('m', 'e', 't', 't'):
1448        {
1449            *offset += chunk_size;
1450
1451            if (mLastTrack == NULL)
1452                return ERROR_MALFORMED;
1453
1454            auto buffer = heapbuffer<uint8_t>(chunk_data_size);
1455            if (buffer.get() == NULL) {
1456                return NO_MEMORY;
1457            }
1458
1459            if (mDataSource->readAt(
1460                        data_offset, buffer.get(), chunk_data_size) < chunk_data_size) {
1461                return ERROR_IO;
1462            }
1463
1464            String8 mimeFormat((const char *)(buffer.get()), chunk_data_size);
1465            mLastTrack->meta.setCString(kKeyMIMEType, mimeFormat.string());
1466
1467            break;
1468        }
1469
1470        case FOURCC('m', 'p', '4', 'a'):
1471        case FOURCC('e', 'n', 'c', 'a'):
1472        case FOURCC('s', 'a', 'm', 'r'):
1473        case FOURCC('s', 'a', 'w', 'b'):
1474        {
1475            if (mIsQT && chunk_type == FOURCC('m', 'p', '4', 'a')
1476                    && depth >= 1 && mPath[depth - 1] == FOURCC('w', 'a', 'v', 'e')) {
1477                // Ignore mp4a embedded in QT wave atom
1478                *offset += chunk_size;
1479                break;
1480            }
1481
1482            uint8_t buffer[8 + 20];
1483            if (chunk_data_size < (ssize_t)sizeof(buffer)) {
1484                // Basic AudioSampleEntry size.
1485                return ERROR_MALFORMED;
1486            }
1487
1488            if (mDataSource->readAt(
1489                        data_offset, buffer, sizeof(buffer)) < (ssize_t)sizeof(buffer)) {
1490                return ERROR_IO;
1491            }
1492
1493            uint16_t data_ref_index __unused = U16_AT(&buffer[6]);
1494            uint16_t version = U16_AT(&buffer[8]);
1495            uint32_t num_channels = U16_AT(&buffer[16]);
1496
1497            uint16_t sample_size = U16_AT(&buffer[18]);
1498            uint32_t sample_rate = U32_AT(&buffer[24]) >> 16;
1499
1500            if (mLastTrack == NULL)
1501                return ERROR_MALFORMED;
1502
1503            off64_t stop_offset = *offset + chunk_size;
1504            *offset = data_offset + sizeof(buffer);
1505
1506            if (mIsQT && chunk_type == FOURCC('m', 'p', '4', 'a')) {
1507                if (version == 1) {
1508                    if (mDataSource->readAt(*offset, buffer, 16) < 16) {
1509                        return ERROR_IO;
1510                    }
1511
1512#if 0
1513                    U32_AT(buffer);  // samples per packet
1514                    U32_AT(&buffer[4]);  // bytes per packet
1515                    U32_AT(&buffer[8]);  // bytes per frame
1516                    U32_AT(&buffer[12]);  // bytes per sample
1517#endif
1518                    *offset += 16;
1519                } else if (version == 2) {
1520                    uint8_t v2buffer[36];
1521                    if (mDataSource->readAt(*offset, v2buffer, 36) < 36) {
1522                        return ERROR_IO;
1523                    }
1524
1525#if 0
1526                    U32_AT(v2buffer);  // size of struct only
1527                    sample_rate = (uint32_t)U64_AT(&v2buffer[4]);  // audio sample rate
1528                    num_channels = U32_AT(&v2buffer[12]);  // num audio channels
1529                    U32_AT(&v2buffer[16]);  // always 0x7f000000
1530                    sample_size = (uint16_t)U32_AT(&v2buffer[20]);  // const bits per channel
1531                    U32_AT(&v2buffer[24]);  // format specifc flags
1532                    U32_AT(&v2buffer[28]);  // const bytes per audio packet
1533                    U32_AT(&v2buffer[32]);  // const LPCM frames per audio packet
1534#endif
1535                    *offset += 36;
1536                }
1537            }
1538
1539            if (chunk_type != FOURCC('e', 'n', 'c', 'a')) {
1540                // if the chunk type is enca, we'll get the type from the frma box later
1541                mLastTrack->meta.setCString(kKeyMIMEType, FourCC2MIME(chunk_type));
1542                AdjustChannelsAndRate(chunk_type, &num_channels, &sample_rate);
1543            }
1544            ALOGV("*** coding='%s' %d channels, size %d, rate %d\n",
1545                   chunk, num_channels, sample_size, sample_rate);
1546            mLastTrack->meta.setInt32(kKeyChannelCount, num_channels);
1547            mLastTrack->meta.setInt32(kKeySampleRate, sample_rate);
1548
1549            while (*offset < stop_offset) {
1550                status_t err = parseChunk(offset, depth + 1);
1551                if (err != OK) {
1552                    return err;
1553                }
1554            }
1555
1556            if (*offset != stop_offset) {
1557                return ERROR_MALFORMED;
1558            }
1559            break;
1560        }
1561
1562        case FOURCC('m', 'p', '4', 'v'):
1563        case FOURCC('e', 'n', 'c', 'v'):
1564        case FOURCC('s', '2', '6', '3'):
1565        case FOURCC('H', '2', '6', '3'):
1566        case FOURCC('h', '2', '6', '3'):
1567        case FOURCC('a', 'v', 'c', '1'):
1568        case FOURCC('h', 'v', 'c', '1'):
1569        case FOURCC('h', 'e', 'v', '1'):
1570        {
1571            uint8_t buffer[78];
1572            if (chunk_data_size < (ssize_t)sizeof(buffer)) {
1573                // Basic VideoSampleEntry size.
1574                return ERROR_MALFORMED;
1575            }
1576
1577            if (mDataSource->readAt(
1578                        data_offset, buffer, sizeof(buffer)) < (ssize_t)sizeof(buffer)) {
1579                return ERROR_IO;
1580            }
1581
1582            uint16_t data_ref_index __unused = U16_AT(&buffer[6]);
1583            uint16_t width = U16_AT(&buffer[6 + 18]);
1584            uint16_t height = U16_AT(&buffer[6 + 20]);
1585
1586            // The video sample is not standard-compliant if it has invalid dimension.
1587            // Use some default width and height value, and
1588            // let the decoder figure out the actual width and height (and thus
1589            // be prepared for INFO_FOMRAT_CHANGED event).
1590            if (width == 0)  width  = 352;
1591            if (height == 0) height = 288;
1592
1593            // printf("*** coding='%s' width=%d height=%d\n",
1594            //        chunk, width, height);
1595
1596            if (mLastTrack == NULL)
1597                return ERROR_MALFORMED;
1598
1599            if (chunk_type != FOURCC('e', 'n', 'c', 'v')) {
1600                // if the chunk type is encv, we'll get the type from the frma box later
1601                mLastTrack->meta.setCString(kKeyMIMEType, FourCC2MIME(chunk_type));
1602            }
1603            mLastTrack->meta.setInt32(kKeyWidth, width);
1604            mLastTrack->meta.setInt32(kKeyHeight, height);
1605
1606            off64_t stop_offset = *offset + chunk_size;
1607            *offset = data_offset + sizeof(buffer);
1608            while (*offset < stop_offset) {
1609                status_t err = parseChunk(offset, depth + 1);
1610                if (err != OK) {
1611                    return err;
1612                }
1613            }
1614
1615            if (*offset != stop_offset) {
1616                return ERROR_MALFORMED;
1617            }
1618            break;
1619        }
1620
1621        case FOURCC('s', 't', 'c', 'o'):
1622        case FOURCC('c', 'o', '6', '4'):
1623        {
1624            if ((mLastTrack == NULL) || (mLastTrack->sampleTable == NULL)) {
1625                return ERROR_MALFORMED;
1626            }
1627
1628            status_t err =
1629                mLastTrack->sampleTable->setChunkOffsetParams(
1630                        chunk_type, data_offset, chunk_data_size);
1631
1632            *offset += chunk_size;
1633
1634            if (err != OK) {
1635                return err;
1636            }
1637
1638            break;
1639        }
1640
1641        case FOURCC('s', 't', 's', 'c'):
1642        {
1643            if ((mLastTrack == NULL) || (mLastTrack->sampleTable == NULL))
1644                return ERROR_MALFORMED;
1645
1646            status_t err =
1647                mLastTrack->sampleTable->setSampleToChunkParams(
1648                        data_offset, chunk_data_size);
1649
1650            *offset += chunk_size;
1651
1652            if (err != OK) {
1653                return err;
1654            }
1655
1656            break;
1657        }
1658
1659        case FOURCC('s', 't', 's', 'z'):
1660        case FOURCC('s', 't', 'z', '2'):
1661        {
1662            if ((mLastTrack == NULL) || (mLastTrack->sampleTable == NULL)) {
1663                return ERROR_MALFORMED;
1664            }
1665
1666            status_t err =
1667                mLastTrack->sampleTable->setSampleSizeParams(
1668                        chunk_type, data_offset, chunk_data_size);
1669
1670            *offset += chunk_size;
1671
1672            if (err != OK) {
1673                return err;
1674            }
1675
1676            size_t max_size;
1677            err = mLastTrack->sampleTable->getMaxSampleSize(&max_size);
1678
1679            if (err != OK) {
1680                return err;
1681            }
1682
1683            if (max_size != 0) {
1684                // Assume that a given buffer only contains at most 10 chunks,
1685                // each chunk originally prefixed with a 2 byte length will
1686                // have a 4 byte header (0x00 0x00 0x00 0x01) after conversion,
1687                // and thus will grow by 2 bytes per chunk.
1688                if (max_size > SIZE_MAX - 10 * 2) {
1689                    ALOGE("max sample size too big: %zu", max_size);
1690                    return ERROR_MALFORMED;
1691                }
1692                mLastTrack->meta.setInt32(kKeyMaxInputSize, max_size + 10 * 2);
1693            } else {
1694                // No size was specified. Pick a conservatively large size.
1695                uint32_t width, height;
1696                if (!mLastTrack->meta.findInt32(kKeyWidth, (int32_t*)&width) ||
1697                    !mLastTrack->meta.findInt32(kKeyHeight,(int32_t*) &height)) {
1698                    ALOGE("No width or height, assuming worst case 1080p");
1699                    width = 1920;
1700                    height = 1080;
1701                } else {
1702                    // A resolution was specified, check that it's not too big. The values below
1703                    // were chosen so that the calculations below don't cause overflows, they're
1704                    // not indicating that resolutions up to 32kx32k are actually supported.
1705                    if (width > 32768 || height > 32768) {
1706                        ALOGE("can't support %u x %u video", width, height);
1707                        return ERROR_MALFORMED;
1708                    }
1709                }
1710
1711                const char *mime;
1712                CHECK(mLastTrack->meta.findCString(kKeyMIMEType, &mime));
1713                if (!strcmp(mime, MEDIA_MIMETYPE_VIDEO_AVC)
1714                        || !strcmp(mime, MEDIA_MIMETYPE_VIDEO_HEVC)) {
1715                    // AVC & HEVC requires compression ratio of at least 2, and uses
1716                    // macroblocks
1717                    max_size = ((width + 15) / 16) * ((height + 15) / 16) * 192;
1718                } else {
1719                    // For all other formats there is no minimum compression
1720                    // ratio. Use compression ratio of 1.
1721                    max_size = width * height * 3 / 2;
1722                }
1723                // HACK: allow 10% overhead
1724                // TODO: read sample size from traf atom for fragmented MPEG4.
1725                max_size += max_size / 10;
1726                mLastTrack->meta.setInt32(kKeyMaxInputSize, max_size);
1727            }
1728
1729            // NOTE: setting another piece of metadata invalidates any pointers (such as the
1730            // mimetype) previously obtained, so don't cache them.
1731            const char *mime;
1732            CHECK(mLastTrack->meta.findCString(kKeyMIMEType, &mime));
1733            // Calculate average frame rate.
1734            if (!strncasecmp("video/", mime, 6)) {
1735                size_t nSamples = mLastTrack->sampleTable->countSamples();
1736                if (nSamples == 0) {
1737                    int32_t trackId;
1738                    if (mLastTrack->meta.findInt32(kKeyTrackID, &trackId)) {
1739                        for (size_t i = 0; i < mTrex.size(); i++) {
1740                            Trex *t = &mTrex.editItemAt(i);
1741                            if (t->track_ID == (uint32_t) trackId) {
1742                                if (t->default_sample_duration > 0) {
1743                                    int32_t frameRate =
1744                                            mLastTrack->timescale / t->default_sample_duration;
1745                                    mLastTrack->meta.setInt32(kKeyFrameRate, frameRate);
1746                                }
1747                                break;
1748                            }
1749                        }
1750                    }
1751                } else {
1752                    int64_t durationUs;
1753                    if (mLastTrack->meta.findInt64(kKeyDuration, &durationUs)) {
1754                        if (durationUs > 0) {
1755                            int32_t frameRate = (nSamples * 1000000LL +
1756                                        (durationUs >> 1)) / durationUs;
1757                            mLastTrack->meta.setInt32(kKeyFrameRate, frameRate);
1758                        }
1759                    }
1760                    ALOGV("setting frame count %zu", nSamples);
1761                    mLastTrack->meta.setInt32(kKeyFrameCount, nSamples);
1762                }
1763            }
1764
1765            break;
1766        }
1767
1768        case FOURCC('s', 't', 't', 's'):
1769        {
1770            if ((mLastTrack == NULL) || (mLastTrack->sampleTable == NULL))
1771                return ERROR_MALFORMED;
1772
1773            *offset += chunk_size;
1774
1775            status_t err =
1776                mLastTrack->sampleTable->setTimeToSampleParams(
1777                        data_offset, chunk_data_size);
1778
1779            if (err != OK) {
1780                return err;
1781            }
1782
1783            break;
1784        }
1785
1786        case FOURCC('c', 't', 't', 's'):
1787        {
1788            if ((mLastTrack == NULL) || (mLastTrack->sampleTable == NULL))
1789                return ERROR_MALFORMED;
1790
1791            *offset += chunk_size;
1792
1793            status_t err =
1794                mLastTrack->sampleTable->setCompositionTimeToSampleParams(
1795                        data_offset, chunk_data_size);
1796
1797            if (err != OK) {
1798                return err;
1799            }
1800
1801            break;
1802        }
1803
1804        case FOURCC('s', 't', 's', 's'):
1805        {
1806            if ((mLastTrack == NULL) || (mLastTrack->sampleTable == NULL))
1807                return ERROR_MALFORMED;
1808
1809            *offset += chunk_size;
1810
1811            status_t err =
1812                mLastTrack->sampleTable->setSyncSampleParams(
1813                        data_offset, chunk_data_size);
1814
1815            if (err != OK) {
1816                return err;
1817            }
1818
1819            break;
1820        }
1821
1822        // \xA9xyz
1823        case FOURCC(0xA9, 'x', 'y', 'z'):
1824        {
1825            *offset += chunk_size;
1826
1827            // Best case the total data length inside "\xA9xyz" box would
1828            // be 9, for instance "\xA9xyz" + "\x00\x05\x15\xc7" + "+0+0/",
1829            // where "\x00\x05" is the text string length with value = 5,
1830            // "\0x15\xc7" is the language code = en, and "+0+0/" is a
1831            // location (string) value with longitude = 0 and latitude = 0.
1832            // Since some devices encountered in the wild omit the trailing
1833            // slash, we'll allow that.
1834            if (chunk_data_size < 8) { // 8 instead of 9 to allow for missing /
1835                return ERROR_MALFORMED;
1836            }
1837
1838            uint16_t len;
1839            if (!mDataSource->getUInt16(data_offset, &len)) {
1840                return ERROR_IO;
1841            }
1842
1843            // allow "+0+0" without trailing slash
1844            if (len < 4 || len > chunk_data_size - 4) {
1845                return ERROR_MALFORMED;
1846            }
1847            // The location string following the language code is formatted
1848            // according to ISO 6709:2008 (https://en.wikipedia.org/wiki/ISO_6709).
1849            // Allocate 2 extra bytes, in case we need to add a trailing slash,
1850            // and to add a terminating 0.
1851            std::unique_ptr<char[]> buffer(new (std::nothrow) char[len+2]());
1852            if (!buffer) {
1853                return NO_MEMORY;
1854            }
1855
1856            if (mDataSource->readAt(
1857                        data_offset + 4, &buffer[0], len) < len) {
1858                return ERROR_IO;
1859            }
1860
1861            len = strlen(&buffer[0]);
1862            if (len < 4) {
1863                return ERROR_MALFORMED;
1864            }
1865            // Add a trailing slash if there wasn't one.
1866            if (buffer[len - 1] != '/') {
1867                buffer[len] = '/';
1868            }
1869            mFileMetaData.setCString(kKeyLocation, &buffer[0]);
1870            break;
1871        }
1872
1873        case FOURCC('e', 's', 'd', 's'):
1874        {
1875            *offset += chunk_size;
1876
1877            if (chunk_data_size < 4) {
1878                return ERROR_MALFORMED;
1879            }
1880
1881            uint8_t buffer[256];
1882            if (chunk_data_size > (off64_t)sizeof(buffer)) {
1883                return ERROR_BUFFER_TOO_SMALL;
1884            }
1885
1886            if (mDataSource->readAt(
1887                        data_offset, buffer, chunk_data_size) < chunk_data_size) {
1888                return ERROR_IO;
1889            }
1890
1891            if (U32_AT(buffer) != 0) {
1892                // Should be version 0, flags 0.
1893                return ERROR_MALFORMED;
1894            }
1895
1896            if (mLastTrack == NULL)
1897                return ERROR_MALFORMED;
1898
1899            mLastTrack->meta.setData(
1900                    kKeyESDS, kTypeESDS, &buffer[4], chunk_data_size - 4);
1901
1902            if (mPath.size() >= 2
1903                    && mPath[mPath.size() - 2] == FOURCC('m', 'p', '4', 'a')) {
1904                // Information from the ESDS must be relied on for proper
1905                // setup of sample rate and channel count for MPEG4 Audio.
1906                // The generic header appears to only contain generic
1907                // information...
1908
1909                status_t err = updateAudioTrackInfoFromESDS_MPEG4Audio(
1910                        &buffer[4], chunk_data_size - 4);
1911
1912                if (err != OK) {
1913                    return err;
1914                }
1915            }
1916            if (mPath.size() >= 2
1917                    && mPath[mPath.size() - 2] == FOURCC('m', 'p', '4', 'v')) {
1918                // Check if the video is MPEG2
1919                ESDS esds(&buffer[4], chunk_data_size - 4);
1920
1921                uint8_t objectTypeIndication;
1922                if (esds.getObjectTypeIndication(&objectTypeIndication) == OK) {
1923                    if (objectTypeIndication >= 0x60 && objectTypeIndication <= 0x65) {
1924                        mLastTrack->meta.setCString(kKeyMIMEType, MEDIA_MIMETYPE_VIDEO_MPEG2);
1925                    }
1926                }
1927            }
1928            break;
1929        }
1930
1931        case FOURCC('b', 't', 'r', 't'):
1932        {
1933            *offset += chunk_size;
1934            if (mLastTrack == NULL) {
1935                return ERROR_MALFORMED;
1936            }
1937
1938            uint8_t buffer[12];
1939            if (chunk_data_size != sizeof(buffer)) {
1940                return ERROR_MALFORMED;
1941            }
1942
1943            if (mDataSource->readAt(
1944                    data_offset, buffer, chunk_data_size) < chunk_data_size) {
1945                return ERROR_IO;
1946            }
1947
1948            uint32_t maxBitrate = U32_AT(&buffer[4]);
1949            uint32_t avgBitrate = U32_AT(&buffer[8]);
1950            if (maxBitrate > 0 && maxBitrate < INT32_MAX) {
1951                mLastTrack->meta.setInt32(kKeyMaxBitRate, (int32_t)maxBitrate);
1952            }
1953            if (avgBitrate > 0 && avgBitrate < INT32_MAX) {
1954                mLastTrack->meta.setInt32(kKeyBitRate, (int32_t)avgBitrate);
1955            }
1956            break;
1957        }
1958
1959        case FOURCC('a', 'v', 'c', 'C'):
1960        {
1961            *offset += chunk_size;
1962
1963            auto buffer = heapbuffer<uint8_t>(chunk_data_size);
1964
1965            if (buffer.get() == NULL) {
1966                ALOGE("b/28471206");
1967                return NO_MEMORY;
1968            }
1969
1970            if (mDataSource->readAt(
1971                        data_offset, buffer.get(), chunk_data_size) < chunk_data_size) {
1972                return ERROR_IO;
1973            }
1974
1975            if (mLastTrack == NULL)
1976                return ERROR_MALFORMED;
1977
1978            mLastTrack->meta.setData(
1979                    kKeyAVCC, kTypeAVCC, buffer.get(), chunk_data_size);
1980
1981            break;
1982        }
1983        case FOURCC('h', 'v', 'c', 'C'):
1984        {
1985            auto buffer = heapbuffer<uint8_t>(chunk_data_size);
1986
1987            if (buffer.get() == NULL) {
1988                ALOGE("b/28471206");
1989                return NO_MEMORY;
1990            }
1991
1992            if (mDataSource->readAt(
1993                        data_offset, buffer.get(), chunk_data_size) < chunk_data_size) {
1994                return ERROR_IO;
1995            }
1996
1997            if (mLastTrack == NULL)
1998                return ERROR_MALFORMED;
1999
2000            mLastTrack->meta.setData(
2001                    kKeyHVCC, kTypeHVCC, buffer.get(), chunk_data_size);
2002
2003            *offset += chunk_size;
2004            break;
2005        }
2006
2007        case FOURCC('d', '2', '6', '3'):
2008        {
2009            *offset += chunk_size;
2010            /*
2011             * d263 contains a fixed 7 bytes part:
2012             *   vendor - 4 bytes
2013             *   version - 1 byte
2014             *   level - 1 byte
2015             *   profile - 1 byte
2016             * optionally, "d263" box itself may contain a 16-byte
2017             * bit rate box (bitr)
2018             *   average bit rate - 4 bytes
2019             *   max bit rate - 4 bytes
2020             */
2021            char buffer[23];
2022            if (chunk_data_size != 7 &&
2023                chunk_data_size != 23) {
2024                ALOGE("Incorrect D263 box size %lld", (long long)chunk_data_size);
2025                return ERROR_MALFORMED;
2026            }
2027
2028            if (mDataSource->readAt(
2029                    data_offset, buffer, chunk_data_size) < chunk_data_size) {
2030                return ERROR_IO;
2031            }
2032
2033            if (mLastTrack == NULL)
2034                return ERROR_MALFORMED;
2035
2036            mLastTrack->meta.setData(kKeyD263, kTypeD263, buffer, chunk_data_size);
2037
2038            break;
2039        }
2040
2041        case FOURCC('m', 'e', 't', 'a'):
2042        {
2043            off64_t stop_offset = *offset + chunk_size;
2044            *offset = data_offset;
2045            bool isParsingMetaKeys = underQTMetaPath(mPath, 2);
2046            if (!isParsingMetaKeys) {
2047                uint8_t buffer[4];
2048                if (chunk_data_size < (off64_t)sizeof(buffer)) {
2049                    *offset = stop_offset;
2050                    return ERROR_MALFORMED;
2051                }
2052
2053                if (mDataSource->readAt(
2054                            data_offset, buffer, 4) < 4) {
2055                    *offset = stop_offset;
2056                    return ERROR_IO;
2057                }
2058
2059                if (U32_AT(buffer) != 0) {
2060                    // Should be version 0, flags 0.
2061
2062                    // If it's not, let's assume this is one of those
2063                    // apparently malformed chunks that don't have flags
2064                    // and completely different semantics than what's
2065                    // in the MPEG4 specs and skip it.
2066                    *offset = stop_offset;
2067                    return OK;
2068                }
2069                *offset +=  sizeof(buffer);
2070            }
2071
2072            while (*offset < stop_offset) {
2073                status_t err = parseChunk(offset, depth + 1);
2074                if (err != OK) {
2075                    return err;
2076                }
2077            }
2078
2079            if (*offset != stop_offset) {
2080                return ERROR_MALFORMED;
2081            }
2082            break;
2083        }
2084
2085        case FOURCC('i', 'l', 'o', 'c'):
2086        case FOURCC('i', 'i', 'n', 'f'):
2087        case FOURCC('i', 'p', 'r', 'p'):
2088        case FOURCC('p', 'i', 't', 'm'):
2089        case FOURCC('i', 'd', 'a', 't'):
2090        case FOURCC('i', 'r', 'e', 'f'):
2091        case FOURCC('i', 'p', 'r', 'o'):
2092        {
2093            if (mIsHeif) {
2094                if (mItemTable == NULL) {
2095                    mItemTable = new ItemTable(mDataSource);
2096                }
2097                status_t err = mItemTable->parse(
2098                        chunk_type, data_offset, chunk_data_size);
2099                if (err != OK) {
2100                    return err;
2101                }
2102            }
2103            *offset += chunk_size;
2104            break;
2105        }
2106
2107        case FOURCC('m', 'e', 'a', 'n'):
2108        case FOURCC('n', 'a', 'm', 'e'):
2109        case FOURCC('d', 'a', 't', 'a'):
2110        {
2111            *offset += chunk_size;
2112
2113            if (mPath.size() == 6 && underMetaDataPath(mPath)) {
2114                status_t err = parseITunesMetaData(data_offset, chunk_data_size);
2115
2116                if (err != OK) {
2117                    return err;
2118                }
2119            }
2120
2121            break;
2122        }
2123
2124        case FOURCC('m', 'v', 'h', 'd'):
2125        {
2126            *offset += chunk_size;
2127
2128            if (depth != 1) {
2129                ALOGE("mvhd: depth %d", depth);
2130                return ERROR_MALFORMED;
2131            }
2132            if (chunk_data_size < 32) {
2133                return ERROR_MALFORMED;
2134            }
2135
2136            uint8_t header[32];
2137            if (mDataSource->readAt(
2138                        data_offset, header, sizeof(header))
2139                    < (ssize_t)sizeof(header)) {
2140                return ERROR_IO;
2141            }
2142
2143            uint64_t creationTime;
2144            uint64_t duration = 0;
2145            if (header[0] == 1) {
2146                creationTime = U64_AT(&header[4]);
2147                mHeaderTimescale = U32_AT(&header[20]);
2148                duration = U64_AT(&header[24]);
2149                if (duration == 0xffffffffffffffff) {
2150                    duration = 0;
2151                }
2152            } else if (header[0] != 0) {
2153                return ERROR_MALFORMED;
2154            } else {
2155                creationTime = U32_AT(&header[4]);
2156                mHeaderTimescale = U32_AT(&header[12]);
2157                uint32_t d32 = U32_AT(&header[16]);
2158                if (d32 == 0xffffffff) {
2159                    d32 = 0;
2160                }
2161                duration = d32;
2162            }
2163            if (duration != 0 && mHeaderTimescale != 0 && duration < UINT64_MAX / 1000000) {
2164                mFileMetaData.setInt64(kKeyDuration, duration * 1000000 / mHeaderTimescale);
2165            }
2166
2167            String8 s;
2168            if (convertTimeToDate(creationTime, &s)) {
2169                mFileMetaData.setCString(kKeyDate, s.string());
2170            }
2171
2172
2173            break;
2174        }
2175
2176        case FOURCC('m', 'e', 'h', 'd'):
2177        {
2178            *offset += chunk_size;
2179
2180            if (chunk_data_size < 8) {
2181                return ERROR_MALFORMED;
2182            }
2183
2184            uint8_t flags[4];
2185            if (mDataSource->readAt(
2186                        data_offset, flags, sizeof(flags))
2187                    < (ssize_t)sizeof(flags)) {
2188                return ERROR_IO;
2189            }
2190
2191            uint64_t duration = 0;
2192            if (flags[0] == 1) {
2193                // 64 bit
2194                if (chunk_data_size < 12) {
2195                    return ERROR_MALFORMED;
2196                }
2197                mDataSource->getUInt64(data_offset + 4, &duration);
2198                if (duration == 0xffffffffffffffff) {
2199                    duration = 0;
2200                }
2201            } else if (flags[0] == 0) {
2202                // 32 bit
2203                uint32_t d32;
2204                mDataSource->getUInt32(data_offset + 4, &d32);
2205                if (d32 == 0xffffffff) {
2206                    d32 = 0;
2207                }
2208                duration = d32;
2209            } else {
2210                return ERROR_MALFORMED;
2211            }
2212
2213            if (duration != 0 && mHeaderTimescale != 0) {
2214                mFileMetaData.setInt64(kKeyDuration, duration * 1000000 / mHeaderTimescale);
2215            }
2216
2217            break;
2218        }
2219
2220        case FOURCC('m', 'd', 'a', 't'):
2221        {
2222            mMdatFound = true;
2223
2224            *offset += chunk_size;
2225            break;
2226        }
2227
2228        case FOURCC('h', 'd', 'l', 'r'):
2229        {
2230            *offset += chunk_size;
2231
2232            if (underQTMetaPath(mPath, 3)) {
2233                break;
2234            }
2235
2236            uint32_t buffer;
2237            if (mDataSource->readAt(
2238                        data_offset + 8, &buffer, 4) < 4) {
2239                return ERROR_IO;
2240            }
2241
2242            uint32_t type = ntohl(buffer);
2243            // For the 3GPP file format, the handler-type within the 'hdlr' box
2244            // shall be 'text'. We also want to support 'sbtl' handler type
2245            // for a practical reason as various MPEG4 containers use it.
2246            if (type == FOURCC('t', 'e', 'x', 't') || type == FOURCC('s', 'b', 't', 'l')) {
2247                if (mLastTrack != NULL) {
2248                    mLastTrack->meta.setCString(kKeyMIMEType, MEDIA_MIMETYPE_TEXT_3GPP);
2249                }
2250            }
2251
2252            break;
2253        }
2254
2255        case FOURCC('k', 'e', 'y', 's'):
2256        {
2257            *offset += chunk_size;
2258
2259            if (underQTMetaPath(mPath, 3)) {
2260                status_t err = parseQTMetaKey(data_offset, chunk_data_size);
2261                if (err != OK) {
2262                    return err;
2263                }
2264            }
2265            break;
2266        }
2267
2268        case FOURCC('t', 'r', 'e', 'x'):
2269        {
2270            *offset += chunk_size;
2271
2272            if (chunk_data_size < 24) {
2273                return ERROR_IO;
2274            }
2275            Trex trex;
2276            if (!mDataSource->getUInt32(data_offset + 4, &trex.track_ID) ||
2277                !mDataSource->getUInt32(data_offset + 8, &trex.default_sample_description_index) ||
2278                !mDataSource->getUInt32(data_offset + 12, &trex.default_sample_duration) ||
2279                !mDataSource->getUInt32(data_offset + 16, &trex.default_sample_size) ||
2280                !mDataSource->getUInt32(data_offset + 20, &trex.default_sample_flags)) {
2281                return ERROR_IO;
2282            }
2283            mTrex.add(trex);
2284            break;
2285        }
2286
2287        case FOURCC('t', 'x', '3', 'g'):
2288        {
2289            if (mLastTrack == NULL)
2290                return ERROR_MALFORMED;
2291
2292            uint32_t type;
2293            const void *data;
2294            size_t size = 0;
2295            if (!mLastTrack->meta.findData(
2296                    kKeyTextFormatData, &type, &data, &size)) {
2297                size = 0;
2298            }
2299
2300            if ((chunk_size > SIZE_MAX) || (SIZE_MAX - chunk_size <= size)) {
2301                return ERROR_MALFORMED;
2302            }
2303
2304            uint8_t *buffer = new (std::nothrow) uint8_t[size + chunk_size];
2305            if (buffer == NULL) {
2306                return ERROR_MALFORMED;
2307            }
2308
2309            if (size > 0) {
2310                memcpy(buffer, data, size);
2311            }
2312
2313            if ((size_t)(mDataSource->readAt(*offset, buffer + size, chunk_size))
2314                    < chunk_size) {
2315                delete[] buffer;
2316                buffer = NULL;
2317
2318                // advance read pointer so we don't end up reading this again
2319                *offset += chunk_size;
2320                return ERROR_IO;
2321            }
2322
2323            mLastTrack->meta.setData(
2324                    kKeyTextFormatData, 0, buffer, size + chunk_size);
2325
2326            delete[] buffer;
2327
2328            *offset += chunk_size;
2329            break;
2330        }
2331
2332        case FOURCC('c', 'o', 'v', 'r'):
2333        {
2334            *offset += chunk_size;
2335
2336            ALOGV("chunk_data_size = %" PRId64 " and data_offset = %" PRId64,
2337                  chunk_data_size, data_offset);
2338
2339            if (chunk_data_size < 0 || static_cast<uint64_t>(chunk_data_size) >= SIZE_MAX - 1) {
2340                return ERROR_MALFORMED;
2341            }
2342            auto buffer = heapbuffer<uint8_t>(chunk_data_size);
2343            if (buffer.get() == NULL) {
2344                ALOGE("b/28471206");
2345                return NO_MEMORY;
2346            }
2347            if (mDataSource->readAt(
2348                data_offset, buffer.get(), chunk_data_size) != (ssize_t)chunk_data_size) {
2349                return ERROR_IO;
2350            }
2351            const int kSkipBytesOfDataBox = 16;
2352            if (chunk_data_size <= kSkipBytesOfDataBox) {
2353                return ERROR_MALFORMED;
2354            }
2355
2356            mFileMetaData.setData(
2357                kKeyAlbumArt, MetaData::TYPE_NONE,
2358                buffer.get() + kSkipBytesOfDataBox, chunk_data_size - kSkipBytesOfDataBox);
2359
2360            break;
2361        }
2362
2363        case FOURCC('c', 'o', 'l', 'r'):
2364        {
2365            *offset += chunk_size;
2366            // this must be in a VisualSampleEntry box under the Sample Description Box ('stsd')
2367            // ignore otherwise
2368            if (depth >= 2 && mPath[depth - 2] == FOURCC('s', 't', 's', 'd')) {
2369                status_t err = parseColorInfo(data_offset, chunk_data_size);
2370                if (err != OK) {
2371                    return err;
2372                }
2373            }
2374
2375            break;
2376        }
2377
2378        case FOURCC('t', 'i', 't', 'l'):
2379        case FOURCC('p', 'e', 'r', 'f'):
2380        case FOURCC('a', 'u', 't', 'h'):
2381        case FOURCC('g', 'n', 'r', 'e'):
2382        case FOURCC('a', 'l', 'b', 'm'):
2383        case FOURCC('y', 'r', 'r', 'c'):
2384        {
2385            *offset += chunk_size;
2386
2387            status_t err = parse3GPPMetaData(data_offset, chunk_data_size, depth);
2388
2389            if (err != OK) {
2390                return err;
2391            }
2392
2393            break;
2394        }
2395
2396        case FOURCC('I', 'D', '3', '2'):
2397        {
2398            *offset += chunk_size;
2399
2400            if (chunk_data_size < 6) {
2401                return ERROR_MALFORMED;
2402            }
2403
2404            parseID3v2MetaData(data_offset + 6);
2405
2406            break;
2407        }
2408
2409        case FOURCC('-', '-', '-', '-'):
2410        {
2411            mLastCommentMean.clear();
2412            mLastCommentName.clear();
2413            mLastCommentData.clear();
2414            *offset += chunk_size;
2415            break;
2416        }
2417
2418        case FOURCC('s', 'i', 'd', 'x'):
2419        {
2420            status_t err = parseSegmentIndex(data_offset, chunk_data_size);
2421            if (err != OK) {
2422                return err;
2423            }
2424            *offset += chunk_size;
2425            return UNKNOWN_ERROR; // stop parsing after sidx
2426        }
2427
2428        case FOURCC('a', 'c', '-', '3'):
2429        {
2430            *offset += chunk_size;
2431            return parseAC3SampleEntry(data_offset);
2432        }
2433
2434        case FOURCC('f', 't', 'y', 'p'):
2435        {
2436            if (chunk_data_size < 8 || depth != 0) {
2437                return ERROR_MALFORMED;
2438            }
2439
2440            off64_t stop_offset = *offset + chunk_size;
2441            uint32_t numCompatibleBrands = (chunk_data_size - 8) / 4;
2442            std::set<uint32_t> brandSet;
2443            for (size_t i = 0; i < numCompatibleBrands + 2; ++i) {
2444                if (i == 1) {
2445                    // Skip this index, it refers to the minorVersion,
2446                    // not a brand.
2447                    continue;
2448                }
2449
2450                uint32_t brand;
2451                if (mDataSource->readAt(data_offset + 4 * i, &brand, 4) < 4) {
2452                    return ERROR_MALFORMED;
2453                }
2454
2455                brand = ntohl(brand);
2456                brandSet.insert(brand);
2457            }
2458
2459            if (brandSet.count(FOURCC('q', 't', ' ', ' ')) > 0) {
2460                mIsQT = true;
2461            } else {
2462                if (brandSet.count(FOURCC('m', 'i', 'f', '1')) > 0
2463                 && brandSet.count(FOURCC('h', 'e', 'i', 'c')) > 0) {
2464                    ALOGV("identified HEIF image");
2465
2466                    mIsHeif = true;
2467                    brandSet.erase(FOURCC('m', 'i', 'f', '1'));
2468                    brandSet.erase(FOURCC('h', 'e', 'i', 'c'));
2469                }
2470
2471                if (!brandSet.empty()) {
2472                    // This means that the file should have moov box.
2473                    // It could be any iso files (mp4, heifs, etc.)
2474                    mHasMoovBox = true;
2475                    if (mIsHeif) {
2476                        ALOGV("identified HEIF image with other tracks");
2477                    }
2478                }
2479            }
2480
2481            *offset = stop_offset;
2482
2483            break;
2484        }
2485
2486        default:
2487        {
2488            // check if we're parsing 'ilst' for meta keys
2489            // if so, treat type as a number (key-id).
2490            if (underQTMetaPath(mPath, 3)) {
2491                status_t err = parseQTMetaVal(chunk_type, data_offset, chunk_data_size);
2492                if (err != OK) {
2493                    return err;
2494                }
2495            }
2496
2497            *offset += chunk_size;
2498            break;
2499        }
2500    }
2501
2502    return OK;
2503}
2504
2505status_t MPEG4Extractor::parseAC3SampleEntry(off64_t offset) {
2506    // skip 16 bytes:
2507    //  + 6-byte reserved,
2508    //  + 2-byte data reference index,
2509    //  + 8-byte reserved
2510    offset += 16;
2511    uint16_t channelCount;
2512    if (!mDataSource->getUInt16(offset, &channelCount)) {
2513        return ERROR_MALFORMED;
2514    }
2515    // skip 8 bytes:
2516    //  + 2-byte channelCount,
2517    //  + 2-byte sample size,
2518    //  + 4-byte reserved
2519    offset += 8;
2520    uint16_t sampleRate;
2521    if (!mDataSource->getUInt16(offset, &sampleRate)) {
2522        ALOGE("MPEG4Extractor: error while reading ac-3 block: cannot read sample rate");
2523        return ERROR_MALFORMED;
2524    }
2525
2526    // skip 4 bytes:
2527    //  + 2-byte sampleRate,
2528    //  + 2-byte reserved
2529    offset += 4;
2530    return parseAC3SpecificBox(offset, sampleRate);
2531}
2532
2533status_t MPEG4Extractor::parseAC3SpecificBox(
2534        off64_t offset, uint16_t sampleRate) {
2535    uint32_t size;
2536    // + 4-byte size
2537    // + 4-byte type
2538    // + 3-byte payload
2539    const uint32_t kAC3SpecificBoxSize = 11;
2540    if (!mDataSource->getUInt32(offset, &size) || size < kAC3SpecificBoxSize) {
2541        ALOGE("MPEG4Extractor: error while reading ac-3 block: cannot read specific box size");
2542        return ERROR_MALFORMED;
2543    }
2544
2545    offset += 4;
2546    uint32_t type;
2547    if (!mDataSource->getUInt32(offset, &type) || type != FOURCC('d', 'a', 'c', '3')) {
2548        ALOGE("MPEG4Extractor: error while reading ac-3 specific block: header not dac3");
2549        return ERROR_MALFORMED;
2550    }
2551
2552    offset += 4;
2553    const uint32_t kAC3SpecificBoxPayloadSize = 3;
2554    uint8_t chunk[kAC3SpecificBoxPayloadSize];
2555    if (mDataSource->readAt(offset, chunk, sizeof(chunk)) != sizeof(chunk)) {
2556        ALOGE("MPEG4Extractor: error while reading ac-3 specific block: bitstream fields");
2557        return ERROR_MALFORMED;
2558    }
2559
2560    ABitReader br(chunk, sizeof(chunk));
2561    static const unsigned channelCountTable[] = {2, 1, 2, 3, 3, 4, 4, 5};
2562    static const unsigned sampleRateTable[] = {48000, 44100, 32000};
2563
2564    unsigned fscod = br.getBits(2);
2565    if (fscod == 3) {
2566        ALOGE("Incorrect fscod (3) in AC3 header");
2567        return ERROR_MALFORMED;
2568    }
2569    unsigned boxSampleRate = sampleRateTable[fscod];
2570    if (boxSampleRate != sampleRate) {
2571        ALOGE("sample rate mismatch: boxSampleRate = %d, sampleRate = %d",
2572            boxSampleRate, sampleRate);
2573        return ERROR_MALFORMED;
2574    }
2575
2576    unsigned bsid = br.getBits(5);
2577    if (bsid > 8) {
2578        ALOGW("Incorrect bsid in AC3 header. Possibly E-AC-3?");
2579        return ERROR_MALFORMED;
2580    }
2581
2582    // skip
2583    unsigned bsmod __unused = br.getBits(3);
2584
2585    unsigned acmod = br.getBits(3);
2586    unsigned lfeon = br.getBits(1);
2587    unsigned channelCount = channelCountTable[acmod] + lfeon;
2588
2589    if (mLastTrack == NULL) {
2590        return ERROR_MALFORMED;
2591    }
2592    mLastTrack->meta.setCString(kKeyMIMEType, MEDIA_MIMETYPE_AUDIO_AC3);
2593    mLastTrack->meta.setInt32(kKeyChannelCount, channelCount);
2594    mLastTrack->meta.setInt32(kKeySampleRate, sampleRate);
2595    return OK;
2596}
2597
2598status_t MPEG4Extractor::parseSegmentIndex(off64_t offset, size_t size) {
2599  ALOGV("MPEG4Extractor::parseSegmentIndex");
2600
2601    if (size < 12) {
2602      return -EINVAL;
2603    }
2604
2605    uint32_t flags;
2606    if (!mDataSource->getUInt32(offset, &flags)) {
2607        return ERROR_MALFORMED;
2608    }
2609
2610    uint32_t version = flags >> 24;
2611    flags &= 0xffffff;
2612
2613    ALOGV("sidx version %d", version);
2614
2615    uint32_t referenceId;
2616    if (!mDataSource->getUInt32(offset + 4, &referenceId)) {
2617        return ERROR_MALFORMED;
2618    }
2619
2620    uint32_t timeScale;
2621    if (!mDataSource->getUInt32(offset + 8, &timeScale)) {
2622        return ERROR_MALFORMED;
2623    }
2624    ALOGV("sidx refid/timescale: %d/%d", referenceId, timeScale);
2625    if (timeScale == 0)
2626        return ERROR_MALFORMED;
2627
2628    uint64_t earliestPresentationTime;
2629    uint64_t firstOffset;
2630
2631    offset += 12;
2632    size -= 12;
2633
2634    if (version == 0) {
2635        if (size < 8) {
2636            return -EINVAL;
2637        }
2638        uint32_t tmp;
2639        if (!mDataSource->getUInt32(offset, &tmp)) {
2640            return ERROR_MALFORMED;
2641        }
2642        earliestPresentationTime = tmp;
2643        if (!mDataSource->getUInt32(offset + 4, &tmp)) {
2644            return ERROR_MALFORMED;
2645        }
2646        firstOffset = tmp;
2647        offset += 8;
2648        size -= 8;
2649    } else {
2650        if (size < 16) {
2651            return -EINVAL;
2652        }
2653        if (!mDataSource->getUInt64(offset, &earliestPresentationTime)) {
2654            return ERROR_MALFORMED;
2655        }
2656        if (!mDataSource->getUInt64(offset + 8, &firstOffset)) {
2657            return ERROR_MALFORMED;
2658        }
2659        offset += 16;
2660        size -= 16;
2661    }
2662    ALOGV("sidx pres/off: %" PRIu64 "/%" PRIu64, earliestPresentationTime, firstOffset);
2663
2664    if (size < 4) {
2665        return -EINVAL;
2666    }
2667
2668    uint16_t referenceCount;
2669    if (!mDataSource->getUInt16(offset + 2, &referenceCount)) {
2670        return ERROR_MALFORMED;
2671    }
2672    offset += 4;
2673    size -= 4;
2674    ALOGV("refcount: %d", referenceCount);
2675
2676    if (size < referenceCount * 12) {
2677        return -EINVAL;
2678    }
2679
2680    uint64_t total_duration = 0;
2681    for (unsigned int i = 0; i < referenceCount; i++) {
2682        uint32_t d1, d2, d3;
2683
2684        if (!mDataSource->getUInt32(offset, &d1) ||     // size
2685            !mDataSource->getUInt32(offset + 4, &d2) || // duration
2686            !mDataSource->getUInt32(offset + 8, &d3)) { // flags
2687            return ERROR_MALFORMED;
2688        }
2689
2690        if (d1 & 0x80000000) {
2691            ALOGW("sub-sidx boxes not supported yet");
2692        }
2693        bool sap = d3 & 0x80000000;
2694        uint32_t saptype = (d3 >> 28) & 7;
2695        if (!sap || (saptype != 1 && saptype != 2)) {
2696            // type 1 and 2 are sync samples
2697            ALOGW("not a stream access point, or unsupported type: %08x", d3);
2698        }
2699        total_duration += d2;
2700        offset += 12;
2701        ALOGV(" item %d, %08x %08x %08x", i, d1, d2, d3);
2702        SidxEntry se;
2703        se.mSize = d1 & 0x7fffffff;
2704        se.mDurationUs = 1000000LL * d2 / timeScale;
2705        mSidxEntries.add(se);
2706    }
2707
2708    uint64_t sidxDuration = total_duration * 1000000 / timeScale;
2709
2710    if (mLastTrack == NULL)
2711        return ERROR_MALFORMED;
2712
2713    int64_t metaDuration;
2714    if (!mLastTrack->meta.findInt64(kKeyDuration, &metaDuration) || metaDuration == 0) {
2715        mLastTrack->meta.setInt64(kKeyDuration, sidxDuration);
2716    }
2717    return OK;
2718}
2719
2720status_t MPEG4Extractor::parseQTMetaKey(off64_t offset, size_t size) {
2721    if (size < 8) {
2722        return ERROR_MALFORMED;
2723    }
2724
2725    uint32_t count;
2726    if (!mDataSource->getUInt32(offset + 4, &count)) {
2727        return ERROR_MALFORMED;
2728    }
2729
2730    if (mMetaKeyMap.size() > 0) {
2731        ALOGW("'keys' atom seen again, discarding existing entries");
2732        mMetaKeyMap.clear();
2733    }
2734
2735    off64_t keyOffset = offset + 8;
2736    off64_t stopOffset = offset + size;
2737    for (size_t i = 1; i <= count; i++) {
2738        if (keyOffset + 8 > stopOffset) {
2739            return ERROR_MALFORMED;
2740        }
2741
2742        uint32_t keySize;
2743        if (!mDataSource->getUInt32(keyOffset, &keySize)
2744                || keySize < 8
2745                || keyOffset + keySize > stopOffset) {
2746            return ERROR_MALFORMED;
2747        }
2748
2749        uint32_t type;
2750        if (!mDataSource->getUInt32(keyOffset + 4, &type)
2751                || type != FOURCC('m', 'd', 't', 'a')) {
2752            return ERROR_MALFORMED;
2753        }
2754
2755        keySize -= 8;
2756        keyOffset += 8;
2757
2758        auto keyData = heapbuffer<uint8_t>(keySize);
2759        if (keyData.get() == NULL) {
2760            return ERROR_MALFORMED;
2761        }
2762        if (mDataSource->readAt(
2763                keyOffset, keyData.get(), keySize) < (ssize_t) keySize) {
2764            return ERROR_MALFORMED;
2765        }
2766
2767        AString key((const char *)keyData.get(), keySize);
2768        mMetaKeyMap.add(i, key);
2769
2770        keyOffset += keySize;
2771    }
2772    return OK;
2773}
2774
2775status_t MPEG4Extractor::parseQTMetaVal(
2776        int32_t keyId, off64_t offset, size_t size) {
2777    ssize_t index = mMetaKeyMap.indexOfKey(keyId);
2778    if (index < 0) {
2779        // corresponding key is not present, ignore
2780        return ERROR_MALFORMED;
2781    }
2782
2783    if (size <= 16) {
2784        return ERROR_MALFORMED;
2785    }
2786    uint32_t dataSize;
2787    if (!mDataSource->getUInt32(offset, &dataSize)
2788            || dataSize > size || dataSize <= 16) {
2789        return ERROR_MALFORMED;
2790    }
2791    uint32_t atomFourCC;
2792    if (!mDataSource->getUInt32(offset + 4, &atomFourCC)
2793            || atomFourCC != FOURCC('d', 'a', 't', 'a')) {
2794        return ERROR_MALFORMED;
2795    }
2796    uint32_t dataType;
2797    if (!mDataSource->getUInt32(offset + 8, &dataType)
2798            || ((dataType & 0xff000000) != 0)) {
2799        // not well-known type
2800        return ERROR_MALFORMED;
2801    }
2802
2803    dataSize -= 16;
2804    offset += 16;
2805
2806    if (dataType == 23 && dataSize >= 4) {
2807        // BE Float32
2808        uint32_t val;
2809        if (!mDataSource->getUInt32(offset, &val)) {
2810            return ERROR_MALFORMED;
2811        }
2812        if (!strcasecmp(mMetaKeyMap[index].c_str(), "com.android.capture.fps")) {
2813            mFileMetaData.setFloat(kKeyCaptureFramerate, *(float *)&val);
2814        }
2815    } else if (dataType == 67 && dataSize >= 4) {
2816        // BE signed int32
2817        uint32_t val;
2818        if (!mDataSource->getUInt32(offset, &val)) {
2819            return ERROR_MALFORMED;
2820        }
2821        if (!strcasecmp(mMetaKeyMap[index].c_str(), "com.android.video.temporal_layers_count")) {
2822            mFileMetaData.setInt32(kKeyTemporalLayerCount, val);
2823        }
2824    } else {
2825        // add more keys if needed
2826        ALOGV("ignoring key: type %d, size %d", dataType, dataSize);
2827    }
2828
2829    return OK;
2830}
2831
2832status_t MPEG4Extractor::parseTrackHeader(
2833        off64_t data_offset, off64_t data_size) {
2834    if (data_size < 4) {
2835        return ERROR_MALFORMED;
2836    }
2837
2838    uint8_t version;
2839    if (mDataSource->readAt(data_offset, &version, 1) < 1) {
2840        return ERROR_IO;
2841    }
2842
2843    size_t dynSize = (version == 1) ? 36 : 24;
2844
2845    uint8_t buffer[36 + 60];
2846
2847    if (data_size != (off64_t)dynSize + 60) {
2848        return ERROR_MALFORMED;
2849    }
2850
2851    if (mDataSource->readAt(
2852                data_offset, buffer, data_size) < (ssize_t)data_size) {
2853        return ERROR_IO;
2854    }
2855
2856    uint64_t ctime __unused, mtime __unused, duration __unused;
2857    int32_t id;
2858
2859    if (version == 1) {
2860        ctime = U64_AT(&buffer[4]);
2861        mtime = U64_AT(&buffer[12]);
2862        id = U32_AT(&buffer[20]);
2863        duration = U64_AT(&buffer[28]);
2864    } else if (version == 0) {
2865        ctime = U32_AT(&buffer[4]);
2866        mtime = U32_AT(&buffer[8]);
2867        id = U32_AT(&buffer[12]);
2868        duration = U32_AT(&buffer[20]);
2869    } else {
2870        return ERROR_UNSUPPORTED;
2871    }
2872
2873    if (mLastTrack == NULL)
2874        return ERROR_MALFORMED;
2875
2876    mLastTrack->meta.setInt32(kKeyTrackID, id);
2877
2878    size_t matrixOffset = dynSize + 16;
2879    int32_t a00 = U32_AT(&buffer[matrixOffset]);
2880    int32_t a01 = U32_AT(&buffer[matrixOffset + 4]);
2881    int32_t a10 = U32_AT(&buffer[matrixOffset + 12]);
2882    int32_t a11 = U32_AT(&buffer[matrixOffset + 16]);
2883
2884#if 0
2885    int32_t dx = U32_AT(&buffer[matrixOffset + 8]);
2886    int32_t dy = U32_AT(&buffer[matrixOffset + 20]);
2887
2888    ALOGI("x' = %.2f * x + %.2f * y + %.2f",
2889         a00 / 65536.0f, a01 / 65536.0f, dx / 65536.0f);
2890    ALOGI("y' = %.2f * x + %.2f * y + %.2f",
2891         a10 / 65536.0f, a11 / 65536.0f, dy / 65536.0f);
2892#endif
2893
2894    uint32_t rotationDegrees;
2895
2896    static const int32_t kFixedOne = 0x10000;
2897    if (a00 == kFixedOne && a01 == 0 && a10 == 0 && a11 == kFixedOne) {
2898        // Identity, no rotation
2899        rotationDegrees = 0;
2900    } else if (a00 == 0 && a01 == kFixedOne && a10 == -kFixedOne && a11 == 0) {
2901        rotationDegrees = 90;
2902    } else if (a00 == 0 && a01 == -kFixedOne && a10 == kFixedOne && a11 == 0) {
2903        rotationDegrees = 270;
2904    } else if (a00 == -kFixedOne && a01 == 0 && a10 == 0 && a11 == -kFixedOne) {
2905        rotationDegrees = 180;
2906    } else {
2907        ALOGW("We only support 0,90,180,270 degree rotation matrices");
2908        rotationDegrees = 0;
2909    }
2910
2911    if (rotationDegrees != 0) {
2912        mLastTrack->meta.setInt32(kKeyRotation, rotationDegrees);
2913    }
2914
2915    // Handle presentation display size, which could be different
2916    // from the image size indicated by kKeyWidth and kKeyHeight.
2917    uint32_t width = U32_AT(&buffer[dynSize + 52]);
2918    uint32_t height = U32_AT(&buffer[dynSize + 56]);
2919    mLastTrack->meta.setInt32(kKeyDisplayWidth, width >> 16);
2920    mLastTrack->meta.setInt32(kKeyDisplayHeight, height >> 16);
2921
2922    return OK;
2923}
2924
2925status_t MPEG4Extractor::parseITunesMetaData(off64_t offset, size_t size) {
2926    if (size == 0) {
2927        return OK;
2928    }
2929
2930    if (size < 4 || size == SIZE_MAX) {
2931        return ERROR_MALFORMED;
2932    }
2933
2934    uint8_t *buffer = new (std::nothrow) uint8_t[size + 1];
2935    if (buffer == NULL) {
2936        return ERROR_MALFORMED;
2937    }
2938    if (mDataSource->readAt(
2939                offset, buffer, size) != (ssize_t)size) {
2940        delete[] buffer;
2941        buffer = NULL;
2942
2943        return ERROR_IO;
2944    }
2945
2946    uint32_t flags = U32_AT(buffer);
2947
2948    uint32_t metadataKey = 0;
2949    char chunk[5];
2950    MakeFourCCString(mPath[4], chunk);
2951    ALOGV("meta: %s @ %lld", chunk, (long long)offset);
2952    switch ((int32_t)mPath[4]) {
2953        case FOURCC(0xa9, 'a', 'l', 'b'):
2954        {
2955            metadataKey = kKeyAlbum;
2956            break;
2957        }
2958        case FOURCC(0xa9, 'A', 'R', 'T'):
2959        {
2960            metadataKey = kKeyArtist;
2961            break;
2962        }
2963        case FOURCC('a', 'A', 'R', 'T'):
2964        {
2965            metadataKey = kKeyAlbumArtist;
2966            break;
2967        }
2968        case FOURCC(0xa9, 'd', 'a', 'y'):
2969        {
2970            metadataKey = kKeyYear;
2971            break;
2972        }
2973        case FOURCC(0xa9, 'n', 'a', 'm'):
2974        {
2975            metadataKey = kKeyTitle;
2976            break;
2977        }
2978        case FOURCC(0xa9, 'w', 'r', 't'):
2979        {
2980            metadataKey = kKeyWriter;
2981            break;
2982        }
2983        case FOURCC('c', 'o', 'v', 'r'):
2984        {
2985            metadataKey = kKeyAlbumArt;
2986            break;
2987        }
2988        case FOURCC('g', 'n', 'r', 'e'):
2989        {
2990            metadataKey = kKeyGenre;
2991            break;
2992        }
2993        case FOURCC(0xa9, 'g', 'e', 'n'):
2994        {
2995            metadataKey = kKeyGenre;
2996            break;
2997        }
2998        case FOURCC('c', 'p', 'i', 'l'):
2999        {
3000            if (size == 9 && flags == 21) {
3001                char tmp[16];
3002                sprintf(tmp, "%d",
3003                        (int)buffer[size - 1]);
3004
3005                mFileMetaData.setCString(kKeyCompilation, tmp);
3006            }
3007            break;
3008        }
3009        case FOURCC('t', 'r', 'k', 'n'):
3010        {
3011            if (size == 16 && flags == 0) {
3012                char tmp[16];
3013                uint16_t* pTrack = (uint16_t*)&buffer[10];
3014                uint16_t* pTotalTracks = (uint16_t*)&buffer[12];
3015                sprintf(tmp, "%d/%d", ntohs(*pTrack), ntohs(*pTotalTracks));
3016
3017                mFileMetaData.setCString(kKeyCDTrackNumber, tmp);
3018            }
3019            break;
3020        }
3021        case FOURCC('d', 'i', 's', 'k'):
3022        {
3023            if ((size == 14 || size == 16) && flags == 0) {
3024                char tmp[16];
3025                uint16_t* pDisc = (uint16_t*)&buffer[10];
3026                uint16_t* pTotalDiscs = (uint16_t*)&buffer[12];
3027                sprintf(tmp, "%d/%d", ntohs(*pDisc), ntohs(*pTotalDiscs));
3028
3029                mFileMetaData.setCString(kKeyDiscNumber, tmp);
3030            }
3031            break;
3032        }
3033        case FOURCC('-', '-', '-', '-'):
3034        {
3035            buffer[size] = '\0';
3036            switch (mPath[5]) {
3037                case FOURCC('m', 'e', 'a', 'n'):
3038                    mLastCommentMean.setTo((const char *)buffer + 4);
3039                    break;
3040                case FOURCC('n', 'a', 'm', 'e'):
3041                    mLastCommentName.setTo((const char *)buffer + 4);
3042                    break;
3043                case FOURCC('d', 'a', 't', 'a'):
3044                    if (size < 8) {
3045                        delete[] buffer;
3046                        buffer = NULL;
3047                        ALOGE("b/24346430");
3048                        return ERROR_MALFORMED;
3049                    }
3050                    mLastCommentData.setTo((const char *)buffer + 8);
3051                    break;
3052            }
3053
3054            // Once we have a set of mean/name/data info, go ahead and process
3055            // it to see if its something we are interested in.  Whether or not
3056            // were are interested in the specific tag, make sure to clear out
3057            // the set so we can be ready to process another tuple should one
3058            // show up later in the file.
3059            if ((mLastCommentMean.length() != 0) &&
3060                (mLastCommentName.length() != 0) &&
3061                (mLastCommentData.length() != 0)) {
3062
3063                if (mLastCommentMean == "com.apple.iTunes"
3064                        && mLastCommentName == "iTunSMPB") {
3065                    int32_t delay, padding;
3066                    if (sscanf(mLastCommentData,
3067                               " %*x %x %x %*x", &delay, &padding) == 2) {
3068                        if (mLastTrack == NULL) {
3069                            delete[] buffer;
3070                            return ERROR_MALFORMED;
3071                        }
3072
3073                        mLastTrack->meta.setInt32(kKeyEncoderDelay, delay);
3074                        mLastTrack->meta.setInt32(kKeyEncoderPadding, padding);
3075                    }
3076                }
3077
3078                mLastCommentMean.clear();
3079                mLastCommentName.clear();
3080                mLastCommentData.clear();
3081            }
3082            break;
3083        }
3084
3085        default:
3086            break;
3087    }
3088
3089    if (size >= 8 && metadataKey && !mFileMetaData.hasData(metadataKey)) {
3090        if (metadataKey == kKeyAlbumArt) {
3091            mFileMetaData.setData(
3092                    kKeyAlbumArt, MetaData::TYPE_NONE,
3093                    buffer + 8, size - 8);
3094        } else if (metadataKey == kKeyGenre) {
3095            if (flags == 0) {
3096                // uint8_t genre code, iTunes genre codes are
3097                // the standard id3 codes, except they start
3098                // at 1 instead of 0 (e.g. Pop is 14, not 13)
3099                // We use standard id3 numbering, so subtract 1.
3100                int genrecode = (int)buffer[size - 1];
3101                genrecode--;
3102                if (genrecode < 0) {
3103                    genrecode = 255; // reserved for 'unknown genre'
3104                }
3105                char genre[10];
3106                sprintf(genre, "%d", genrecode);
3107
3108                mFileMetaData.setCString(metadataKey, genre);
3109            } else if (flags == 1) {
3110                // custom genre string
3111                buffer[size] = '\0';
3112
3113                mFileMetaData.setCString(
3114                        metadataKey, (const char *)buffer + 8);
3115            }
3116        } else {
3117            buffer[size] = '\0';
3118
3119            mFileMetaData.setCString(
3120                    metadataKey, (const char *)buffer + 8);
3121        }
3122    }
3123
3124    delete[] buffer;
3125    buffer = NULL;
3126
3127    return OK;
3128}
3129
3130status_t MPEG4Extractor::parseColorInfo(off64_t offset, size_t size) {
3131    if (size < 4 || size == SIZE_MAX || mLastTrack == NULL) {
3132        return ERROR_MALFORMED;
3133    }
3134
3135    uint8_t *buffer = new (std::nothrow) uint8_t[size + 1];
3136    if (buffer == NULL) {
3137        return ERROR_MALFORMED;
3138    }
3139    if (mDataSource->readAt(offset, buffer, size) != (ssize_t)size) {
3140        delete[] buffer;
3141        buffer = NULL;
3142
3143        return ERROR_IO;
3144    }
3145
3146    int32_t type = U32_AT(&buffer[0]);
3147    if ((type == FOURCC('n', 'c', 'l', 'x') && size >= 11)
3148            || (type == FOURCC('n', 'c', 'l', 'c') && size >= 10)) {
3149        int32_t primaries = U16_AT(&buffer[4]);
3150        int32_t transfer = U16_AT(&buffer[6]);
3151        int32_t coeffs = U16_AT(&buffer[8]);
3152        bool fullRange = (type == FOURCC('n', 'c', 'l', 'x')) && (buffer[10] & 128);
3153
3154        ColorAspects aspects;
3155        ColorUtils::convertIsoColorAspectsToCodecAspects(
3156                primaries, transfer, coeffs, fullRange, aspects);
3157
3158        // only store the first color specification
3159        if (!mLastTrack->meta.hasData(kKeyColorPrimaries)) {
3160            mLastTrack->meta.setInt32(kKeyColorPrimaries, aspects.mPrimaries);
3161            mLastTrack->meta.setInt32(kKeyTransferFunction, aspects.mTransfer);
3162            mLastTrack->meta.setInt32(kKeyColorMatrix, aspects.mMatrixCoeffs);
3163            mLastTrack->meta.setInt32(kKeyColorRange, aspects.mRange);
3164        }
3165    }
3166
3167    delete[] buffer;
3168    buffer = NULL;
3169
3170    return OK;
3171}
3172
3173status_t MPEG4Extractor::parse3GPPMetaData(off64_t offset, size_t size, int depth) {
3174    if (size < 4 || size == SIZE_MAX) {
3175        return ERROR_MALFORMED;
3176    }
3177
3178    uint8_t *buffer = new (std::nothrow) uint8_t[size + 1];
3179    if (buffer == NULL) {
3180        return ERROR_MALFORMED;
3181    }
3182    if (mDataSource->readAt(
3183                offset, buffer, size) != (ssize_t)size) {
3184        delete[] buffer;
3185        buffer = NULL;
3186
3187        return ERROR_IO;
3188    }
3189
3190    uint32_t metadataKey = 0;
3191    switch (mPath[depth]) {
3192        case FOURCC('t', 'i', 't', 'l'):
3193        {
3194            metadataKey = kKeyTitle;
3195            break;
3196        }
3197        case FOURCC('p', 'e', 'r', 'f'):
3198        {
3199            metadataKey = kKeyArtist;
3200            break;
3201        }
3202        case FOURCC('a', 'u', 't', 'h'):
3203        {
3204            metadataKey = kKeyWriter;
3205            break;
3206        }
3207        case FOURCC('g', 'n', 'r', 'e'):
3208        {
3209            metadataKey = kKeyGenre;
3210            break;
3211        }
3212        case FOURCC('a', 'l', 'b', 'm'):
3213        {
3214            if (buffer[size - 1] != '\0') {
3215              char tmp[4];
3216              sprintf(tmp, "%u", buffer[size - 1]);
3217
3218              mFileMetaData.setCString(kKeyCDTrackNumber, tmp);
3219            }
3220
3221            metadataKey = kKeyAlbum;
3222            break;
3223        }
3224        case FOURCC('y', 'r', 'r', 'c'):
3225        {
3226            if (size < 6) {
3227                delete[] buffer;
3228                buffer = NULL;
3229                ALOGE("b/62133227");
3230                android_errorWriteLog(0x534e4554, "62133227");
3231                return ERROR_MALFORMED;
3232            }
3233            char tmp[5];
3234            uint16_t year = U16_AT(&buffer[4]);
3235
3236            if (year < 10000) {
3237                sprintf(tmp, "%u", year);
3238
3239                mFileMetaData.setCString(kKeyYear, tmp);
3240            }
3241            break;
3242        }
3243
3244        default:
3245            break;
3246    }
3247
3248    if (metadataKey > 0) {
3249        bool isUTF8 = true; // Common case
3250        char16_t *framedata = NULL;
3251        int len16 = 0; // Number of UTF-16 characters
3252
3253        // smallest possible valid UTF-16 string w BOM: 0xfe 0xff 0x00 0x00
3254        if (size < 6) {
3255            delete[] buffer;
3256            buffer = NULL;
3257            return ERROR_MALFORMED;
3258        }
3259
3260        if (size - 6 >= 4) {
3261            len16 = ((size - 6) / 2) - 1; // don't include 0x0000 terminator
3262            framedata = (char16_t *)(buffer + 6);
3263            if (0xfffe == *framedata) {
3264                // endianness marker (BOM) doesn't match host endianness
3265                for (int i = 0; i < len16; i++) {
3266                    framedata[i] = bswap_16(framedata[i]);
3267                }
3268                // BOM is now swapped to 0xfeff, we will execute next block too
3269            }
3270
3271            if (0xfeff == *framedata) {
3272                // Remove the BOM
3273                framedata++;
3274                len16--;
3275                isUTF8 = false;
3276            }
3277            // else normal non-zero-length UTF-8 string
3278            // we can't handle UTF-16 without BOM as there is no other
3279            // indication of encoding.
3280        }
3281
3282        if (isUTF8) {
3283            buffer[size] = 0;
3284            mFileMetaData.setCString(metadataKey, (const char *)buffer + 6);
3285        } else {
3286            // Convert from UTF-16 string to UTF-8 string.
3287            String8 tmpUTF8str(framedata, len16);
3288            mFileMetaData.setCString(metadataKey, tmpUTF8str.string());
3289        }
3290    }
3291
3292    delete[] buffer;
3293    buffer = NULL;
3294
3295    return OK;
3296}
3297
3298void MPEG4Extractor::parseID3v2MetaData(off64_t offset) {
3299    ID3 id3(mDataSource, true /* ignorev1 */, offset);
3300
3301    if (id3.isValid()) {
3302        struct Map {
3303            int key;
3304            const char *tag1;
3305            const char *tag2;
3306        };
3307        static const Map kMap[] = {
3308            { kKeyAlbum, "TALB", "TAL" },
3309            { kKeyArtist, "TPE1", "TP1" },
3310            { kKeyAlbumArtist, "TPE2", "TP2" },
3311            { kKeyComposer, "TCOM", "TCM" },
3312            { kKeyGenre, "TCON", "TCO" },
3313            { kKeyTitle, "TIT2", "TT2" },
3314            { kKeyYear, "TYE", "TYER" },
3315            { kKeyAuthor, "TXT", "TEXT" },
3316            { kKeyCDTrackNumber, "TRK", "TRCK" },
3317            { kKeyDiscNumber, "TPA", "TPOS" },
3318            { kKeyCompilation, "TCP", "TCMP" },
3319        };
3320        static const size_t kNumMapEntries = sizeof(kMap) / sizeof(kMap[0]);
3321
3322        for (size_t i = 0; i < kNumMapEntries; ++i) {
3323            if (!mFileMetaData.hasData(kMap[i].key)) {
3324                ID3::Iterator *it = new ID3::Iterator(id3, kMap[i].tag1);
3325                if (it->done()) {
3326                    delete it;
3327                    it = new ID3::Iterator(id3, kMap[i].tag2);
3328                }
3329
3330                if (it->done()) {
3331                    delete it;
3332                    continue;
3333                }
3334
3335                String8 s;
3336                it->getString(&s);
3337                delete it;
3338
3339                mFileMetaData.setCString(kMap[i].key, s);
3340            }
3341        }
3342
3343        size_t dataSize;
3344        String8 mime;
3345        const void *data = id3.getAlbumArt(&dataSize, &mime);
3346
3347        if (data) {
3348            mFileMetaData.setData(kKeyAlbumArt, MetaData::TYPE_NONE, data, dataSize);
3349            mFileMetaData.setCString(kKeyAlbumArtMIME, mime.string());
3350        }
3351    }
3352}
3353
3354MediaTrack *MPEG4Extractor::getTrack(size_t index) {
3355    status_t err;
3356    if ((err = readMetaData()) != OK) {
3357        return NULL;
3358    }
3359
3360    Track *track = mFirstTrack;
3361    while (index > 0) {
3362        if (track == NULL) {
3363            return NULL;
3364        }
3365
3366        track = track->next;
3367        --index;
3368    }
3369
3370    if (track == NULL) {
3371        return NULL;
3372    }
3373
3374
3375    Trex *trex = NULL;
3376    int32_t trackId;
3377    if (track->meta.findInt32(kKeyTrackID, &trackId)) {
3378        for (size_t i = 0; i < mTrex.size(); i++) {
3379            Trex *t = &mTrex.editItemAt(i);
3380            if (t->track_ID == (uint32_t) trackId) {
3381                trex = t;
3382                break;
3383            }
3384        }
3385    } else {
3386        ALOGE("b/21657957");
3387        return NULL;
3388    }
3389
3390    ALOGV("getTrack called, pssh: %zu", mPssh.size());
3391
3392    const char *mime;
3393    if (!track->meta.findCString(kKeyMIMEType, &mime)) {
3394        return NULL;
3395    }
3396
3397    sp<ItemTable> itemTable;
3398    if (!strcasecmp(mime, MEDIA_MIMETYPE_VIDEO_AVC)) {
3399        uint32_t type;
3400        const void *data;
3401        size_t size;
3402        if (!track->meta.findData(kKeyAVCC, &type, &data, &size)) {
3403            return NULL;
3404        }
3405
3406        const uint8_t *ptr = (const uint8_t *)data;
3407
3408        if (size < 7 || ptr[0] != 1) {  // configurationVersion == 1
3409            return NULL;
3410        }
3411    } else if (!strcasecmp(mime, MEDIA_MIMETYPE_VIDEO_HEVC)
3412            || !strcasecmp(mime, MEDIA_MIMETYPE_IMAGE_ANDROID_HEIC)) {
3413        uint32_t type;
3414        const void *data;
3415        size_t size;
3416        if (!track->meta.findData(kKeyHVCC, &type, &data, &size)) {
3417            return NULL;
3418        }
3419
3420        const uint8_t *ptr = (const uint8_t *)data;
3421
3422        if (size < 22 || ptr[0] != 1) {  // configurationVersion == 1
3423            return NULL;
3424        }
3425        if (!strcasecmp(mime, MEDIA_MIMETYPE_IMAGE_ANDROID_HEIC)) {
3426            itemTable = mItemTable;
3427        }
3428    }
3429
3430    MPEG4Source *source =  new MPEG4Source(
3431            track->meta, mDataSource, track->timescale, track->sampleTable,
3432            mSidxEntries, trex, mMoofOffset, itemTable);
3433    if (source->init() != OK) {
3434        delete source;
3435        return NULL;
3436    }
3437    return source;
3438}
3439
3440// static
3441status_t MPEG4Extractor::verifyTrack(Track *track) {
3442    const char *mime;
3443    CHECK(track->meta.findCString(kKeyMIMEType, &mime));
3444
3445    uint32_t type;
3446    const void *data;
3447    size_t size;
3448    if (!strcasecmp(mime, MEDIA_MIMETYPE_VIDEO_AVC)) {
3449        if (!track->meta.findData(kKeyAVCC, &type, &data, &size)
3450                || type != kTypeAVCC) {
3451            return ERROR_MALFORMED;
3452        }
3453    } else if (!strcasecmp(mime, MEDIA_MIMETYPE_VIDEO_HEVC)) {
3454        if (!track->meta.findData(kKeyHVCC, &type, &data, &size)
3455                    || type != kTypeHVCC) {
3456            return ERROR_MALFORMED;
3457        }
3458    } else if (!strcasecmp(mime, MEDIA_MIMETYPE_VIDEO_MPEG4)
3459            || !strcasecmp(mime, MEDIA_MIMETYPE_VIDEO_MPEG2)
3460            || !strcasecmp(mime, MEDIA_MIMETYPE_AUDIO_AAC)) {
3461        if (!track->meta.findData(kKeyESDS, &type, &data, &size)
3462                || type != kTypeESDS) {
3463            return ERROR_MALFORMED;
3464        }
3465    }
3466
3467    if (track->sampleTable == NULL || !track->sampleTable->isValid()) {
3468        // Make sure we have all the metadata we need.
3469        ALOGE("stbl atom missing/invalid.");
3470        return ERROR_MALFORMED;
3471    }
3472
3473    if (track->timescale == 0) {
3474        ALOGE("timescale invalid.");
3475        return ERROR_MALFORMED;
3476    }
3477
3478    return OK;
3479}
3480
3481typedef enum {
3482    //AOT_NONE             = -1,
3483    //AOT_NULL_OBJECT      = 0,
3484    //AOT_AAC_MAIN         = 1, /**< Main profile                              */
3485    AOT_AAC_LC           = 2,   /**< Low Complexity object                     */
3486    //AOT_AAC_SSR          = 3,
3487    //AOT_AAC_LTP          = 4,
3488    AOT_SBR              = 5,
3489    //AOT_AAC_SCAL         = 6,
3490    //AOT_TWIN_VQ          = 7,
3491    //AOT_CELP             = 8,
3492    //AOT_HVXC             = 9,
3493    //AOT_RSVD_10          = 10, /**< (reserved)                                */
3494    //AOT_RSVD_11          = 11, /**< (reserved)                                */
3495    //AOT_TTSI             = 12, /**< TTSI Object                               */
3496    //AOT_MAIN_SYNTH       = 13, /**< Main Synthetic object                     */
3497    //AOT_WAV_TAB_SYNTH    = 14, /**< Wavetable Synthesis object                */
3498    //AOT_GEN_MIDI         = 15, /**< General MIDI object                       */
3499    //AOT_ALG_SYNTH_AUD_FX = 16, /**< Algorithmic Synthesis and Audio FX object */
3500    AOT_ER_AAC_LC        = 17,   /**< Error Resilient(ER) AAC Low Complexity    */
3501    //AOT_RSVD_18          = 18, /**< (reserved)                                */
3502    //AOT_ER_AAC_LTP       = 19, /**< Error Resilient(ER) AAC LTP object        */
3503    AOT_ER_AAC_SCAL      = 20,   /**< Error Resilient(ER) AAC Scalable object   */
3504    //AOT_ER_TWIN_VQ       = 21, /**< Error Resilient(ER) TwinVQ object         */
3505    AOT_ER_BSAC          = 22,   /**< Error Resilient(ER) BSAC object           */
3506    AOT_ER_AAC_LD        = 23,   /**< Error Resilient(ER) AAC LowDelay object   */
3507    //AOT_ER_CELP          = 24, /**< Error Resilient(ER) CELP object           */
3508    //AOT_ER_HVXC          = 25, /**< Error Resilient(ER) HVXC object           */
3509    //AOT_ER_HILN          = 26, /**< Error Resilient(ER) HILN object           */
3510    //AOT_ER_PARA          = 27, /**< Error Resilient(ER) Parametric object     */
3511    //AOT_RSVD_28          = 28, /**< might become SSC                          */
3512    AOT_PS               = 29,   /**< PS, Parametric Stereo (includes SBR)      */
3513    //AOT_MPEGS            = 30, /**< MPEG Surround                             */
3514
3515    AOT_ESCAPE           = 31,   /**< Signal AOT uses more than 5 bits          */
3516
3517    //AOT_MP3ONMP4_L1      = 32, /**< MPEG-Layer1 in mp4                        */
3518    //AOT_MP3ONMP4_L2      = 33, /**< MPEG-Layer2 in mp4                        */
3519    //AOT_MP3ONMP4_L3      = 34, /**< MPEG-Layer3 in mp4                        */
3520    //AOT_RSVD_35          = 35, /**< might become DST                          */
3521    //AOT_RSVD_36          = 36, /**< might become ALS                          */
3522    //AOT_AAC_SLS          = 37, /**< AAC + SLS                                 */
3523    //AOT_SLS              = 38, /**< SLS                                       */
3524    //AOT_ER_AAC_ELD       = 39, /**< AAC Enhanced Low Delay                    */
3525
3526    //AOT_USAC             = 42, /**< USAC                                      */
3527    //AOT_SAOC             = 43, /**< SAOC                                      */
3528    //AOT_LD_MPEGS         = 44, /**< Low Delay MPEG Surround                   */
3529
3530    //AOT_RSVD50           = 50,  /**< Interim AOT for Rsvd50                   */
3531} AUDIO_OBJECT_TYPE;
3532
3533status_t MPEG4Extractor::updateAudioTrackInfoFromESDS_MPEG4Audio(
3534        const void *esds_data, size_t esds_size) {
3535    ESDS esds(esds_data, esds_size);
3536
3537    uint8_t objectTypeIndication;
3538    if (esds.getObjectTypeIndication(&objectTypeIndication) != OK) {
3539        return ERROR_MALFORMED;
3540    }
3541
3542    if (objectTypeIndication == 0xe1) {
3543        // This isn't MPEG4 audio at all, it's QCELP 14k...
3544        if (mLastTrack == NULL)
3545            return ERROR_MALFORMED;
3546
3547        mLastTrack->meta.setCString(kKeyMIMEType, MEDIA_MIMETYPE_AUDIO_QCELP);
3548        return OK;
3549    }
3550
3551    if (objectTypeIndication  == 0x6b) {
3552        // The media subtype is MP3 audio
3553        // Our software MP3 audio decoder may not be able to handle
3554        // packetized MP3 audio; for now, lets just return ERROR_UNSUPPORTED
3555        ALOGE("MP3 track in MP4/3GPP file is not supported");
3556        return ERROR_UNSUPPORTED;
3557    }
3558
3559    if (mLastTrack != NULL) {
3560        uint32_t maxBitrate = 0;
3561        uint32_t avgBitrate = 0;
3562        esds.getBitRate(&maxBitrate, &avgBitrate);
3563        if (maxBitrate > 0 && maxBitrate < INT32_MAX) {
3564            mLastTrack->meta.setInt32(kKeyMaxBitRate, (int32_t)maxBitrate);
3565        }
3566        if (avgBitrate > 0 && avgBitrate < INT32_MAX) {
3567            mLastTrack->meta.setInt32(kKeyBitRate, (int32_t)avgBitrate);
3568        }
3569    }
3570
3571    const uint8_t *csd;
3572    size_t csd_size;
3573    if (esds.getCodecSpecificInfo(
3574                (const void **)&csd, &csd_size) != OK) {
3575        return ERROR_MALFORMED;
3576    }
3577
3578    if (kUseHexDump) {
3579        printf("ESD of size %zu\n", csd_size);
3580        hexdump(csd, csd_size);
3581    }
3582
3583    if (csd_size == 0) {
3584        // There's no further information, i.e. no codec specific data
3585        // Let's assume that the information provided in the mpeg4 headers
3586        // is accurate and hope for the best.
3587
3588        return OK;
3589    }
3590
3591    if (csd_size < 2) {
3592        return ERROR_MALFORMED;
3593    }
3594
3595    static uint32_t kSamplingRate[] = {
3596        96000, 88200, 64000, 48000, 44100, 32000, 24000, 22050,
3597        16000, 12000, 11025, 8000, 7350
3598    };
3599
3600    ABitReader br(csd, csd_size);
3601    uint32_t objectType = br.getBits(5);
3602
3603    if (objectType == 31) {  // AAC-ELD => additional 6 bits
3604        objectType = 32 + br.getBits(6);
3605    }
3606
3607    if (mLastTrack == NULL)
3608        return ERROR_MALFORMED;
3609
3610    //keep AOT type
3611    mLastTrack->meta.setInt32(kKeyAACAOT, objectType);
3612
3613    uint32_t freqIndex = br.getBits(4);
3614
3615    int32_t sampleRate = 0;
3616    int32_t numChannels = 0;
3617    if (freqIndex == 15) {
3618        if (br.numBitsLeft() < 28) return ERROR_MALFORMED;
3619        sampleRate = br.getBits(24);
3620        numChannels = br.getBits(4);
3621    } else {
3622        if (br.numBitsLeft() < 4) return ERROR_MALFORMED;
3623        numChannels = br.getBits(4);
3624
3625        if (freqIndex == 13 || freqIndex == 14) {
3626            return ERROR_MALFORMED;
3627        }
3628
3629        sampleRate = kSamplingRate[freqIndex];
3630    }
3631
3632    if (objectType == AOT_SBR || objectType == AOT_PS) {//SBR specific config per 14496-3 table 1.13
3633        if (br.numBitsLeft() < 4) return ERROR_MALFORMED;
3634        uint32_t extFreqIndex = br.getBits(4);
3635        int32_t extSampleRate __unused;
3636        if (extFreqIndex == 15) {
3637            if (csd_size < 8) {
3638                return ERROR_MALFORMED;
3639            }
3640            if (br.numBitsLeft() < 24) return ERROR_MALFORMED;
3641            extSampleRate = br.getBits(24);
3642        } else {
3643            if (extFreqIndex == 13 || extFreqIndex == 14) {
3644                return ERROR_MALFORMED;
3645            }
3646            extSampleRate = kSamplingRate[extFreqIndex];
3647        }
3648        //TODO: save the extension sampling rate value in meta data =>
3649        //      mLastTrack->meta.setInt32(kKeyExtSampleRate, extSampleRate);
3650    }
3651
3652    switch (numChannels) {
3653        // values defined in 14496-3_2009 amendment-4 Table 1.19 - Channel Configuration
3654        case 0:
3655        case 1:// FC
3656        case 2:// FL FR
3657        case 3:// FC, FL FR
3658        case 4:// FC, FL FR, RC
3659        case 5:// FC, FL FR, SL SR
3660        case 6:// FC, FL FR, SL SR, LFE
3661            //numChannels already contains the right value
3662            break;
3663        case 11:// FC, FL FR, SL SR, RC, LFE
3664            numChannels = 7;
3665            break;
3666        case 7: // FC, FCL FCR, FL FR, SL SR, LFE
3667        case 12:// FC, FL  FR,  SL SR, RL RR, LFE
3668        case 14:// FC, FL  FR,  SL SR, LFE, FHL FHR
3669            numChannels = 8;
3670            break;
3671        default:
3672            return ERROR_UNSUPPORTED;
3673    }
3674
3675    {
3676        if (objectType == AOT_SBR || objectType == AOT_PS) {
3677            if (br.numBitsLeft() < 5) return ERROR_MALFORMED;
3678            objectType = br.getBits(5);
3679
3680            if (objectType == AOT_ESCAPE) {
3681                if (br.numBitsLeft() < 6) return ERROR_MALFORMED;
3682                objectType = 32 + br.getBits(6);
3683            }
3684        }
3685        if (objectType == AOT_AAC_LC || objectType == AOT_ER_AAC_LC ||
3686                objectType == AOT_ER_AAC_LD || objectType == AOT_ER_AAC_SCAL ||
3687                objectType == AOT_ER_BSAC) {
3688            if (br.numBitsLeft() < 2) return ERROR_MALFORMED;
3689            const int32_t frameLengthFlag __unused = br.getBits(1);
3690
3691            const int32_t dependsOnCoreCoder = br.getBits(1);
3692
3693            if (dependsOnCoreCoder ) {
3694                if (br.numBitsLeft() < 14) return ERROR_MALFORMED;
3695                const int32_t coreCoderDelay __unused = br.getBits(14);
3696            }
3697
3698            int32_t extensionFlag = -1;
3699            if (br.numBitsLeft() > 0) {
3700                extensionFlag = br.getBits(1);
3701            } else {
3702                switch (objectType) {
3703                // 14496-3 4.5.1.1 extensionFlag
3704                case AOT_AAC_LC:
3705                    extensionFlag = 0;
3706                    break;
3707                case AOT_ER_AAC_LC:
3708                case AOT_ER_AAC_SCAL:
3709                case AOT_ER_BSAC:
3710                case AOT_ER_AAC_LD:
3711                    extensionFlag = 1;
3712                    break;
3713                default:
3714                    return ERROR_MALFORMED;
3715                    break;
3716                }
3717                ALOGW("csd missing extension flag; assuming %d for object type %u.",
3718                        extensionFlag, objectType);
3719            }
3720
3721            if (numChannels == 0) {
3722                int32_t channelsEffectiveNum = 0;
3723                int32_t channelsNum = 0;
3724                if (br.numBitsLeft() < 32) {
3725                    return ERROR_MALFORMED;
3726                }
3727                const int32_t ElementInstanceTag __unused = br.getBits(4);
3728                const int32_t Profile __unused = br.getBits(2);
3729                const int32_t SamplingFrequencyIndex __unused = br.getBits(4);
3730                const int32_t NumFrontChannelElements = br.getBits(4);
3731                const int32_t NumSideChannelElements = br.getBits(4);
3732                const int32_t NumBackChannelElements = br.getBits(4);
3733                const int32_t NumLfeChannelElements = br.getBits(2);
3734                const int32_t NumAssocDataElements __unused = br.getBits(3);
3735                const int32_t NumValidCcElements __unused = br.getBits(4);
3736
3737                const int32_t MonoMixdownPresent = br.getBits(1);
3738
3739                if (MonoMixdownPresent != 0) {
3740                    if (br.numBitsLeft() < 4) return ERROR_MALFORMED;
3741                    const int32_t MonoMixdownElementNumber __unused = br.getBits(4);
3742                }
3743
3744                if (br.numBitsLeft() < 1) return ERROR_MALFORMED;
3745                const int32_t StereoMixdownPresent = br.getBits(1);
3746                if (StereoMixdownPresent != 0) {
3747                    if (br.numBitsLeft() < 4) return ERROR_MALFORMED;
3748                    const int32_t StereoMixdownElementNumber __unused = br.getBits(4);
3749                }
3750
3751                if (br.numBitsLeft() < 1) return ERROR_MALFORMED;
3752                const int32_t MatrixMixdownIndexPresent = br.getBits(1);
3753                if (MatrixMixdownIndexPresent != 0) {
3754                    if (br.numBitsLeft() < 3) return ERROR_MALFORMED;
3755                    const int32_t MatrixMixdownIndex __unused = br.getBits(2);
3756                    const int32_t PseudoSurroundEnable __unused = br.getBits(1);
3757                }
3758
3759                int i;
3760                for (i=0; i < NumFrontChannelElements; i++) {
3761                    if (br.numBitsLeft() < 5) return ERROR_MALFORMED;
3762                    const int32_t FrontElementIsCpe = br.getBits(1);
3763                    const int32_t FrontElementTagSelect __unused = br.getBits(4);
3764                    channelsNum += FrontElementIsCpe ? 2 : 1;
3765                }
3766
3767                for (i=0; i < NumSideChannelElements; i++) {
3768                    if (br.numBitsLeft() < 5) return ERROR_MALFORMED;
3769                    const int32_t SideElementIsCpe = br.getBits(1);
3770                    const int32_t SideElementTagSelect __unused = br.getBits(4);
3771                    channelsNum += SideElementIsCpe ? 2 : 1;
3772                }
3773
3774                for (i=0; i < NumBackChannelElements; i++) {
3775                    if (br.numBitsLeft() < 5) return ERROR_MALFORMED;
3776                    const int32_t BackElementIsCpe = br.getBits(1);
3777                    const int32_t BackElementTagSelect __unused = br.getBits(4);
3778                    channelsNum += BackElementIsCpe ? 2 : 1;
3779                }
3780                channelsEffectiveNum = channelsNum;
3781
3782                for (i=0; i < NumLfeChannelElements; i++) {
3783                    if (br.numBitsLeft() < 4) return ERROR_MALFORMED;
3784                    const int32_t LfeElementTagSelect __unused = br.getBits(4);
3785                    channelsNum += 1;
3786                }
3787                ALOGV("mpeg4 audio channelsNum = %d", channelsNum);
3788                ALOGV("mpeg4 audio channelsEffectiveNum = %d", channelsEffectiveNum);
3789                numChannels = channelsNum;
3790            }
3791        }
3792    }
3793
3794    if (numChannels == 0) {
3795        return ERROR_UNSUPPORTED;
3796    }
3797
3798    if (mLastTrack == NULL)
3799        return ERROR_MALFORMED;
3800
3801    int32_t prevSampleRate;
3802    CHECK(mLastTrack->meta.findInt32(kKeySampleRate, &prevSampleRate));
3803
3804    if (prevSampleRate != sampleRate) {
3805        ALOGV("mpeg4 audio sample rate different from previous setting. "
3806             "was: %d, now: %d", prevSampleRate, sampleRate);
3807    }
3808
3809    mLastTrack->meta.setInt32(kKeySampleRate, sampleRate);
3810
3811    int32_t prevChannelCount;
3812    CHECK(mLastTrack->meta.findInt32(kKeyChannelCount, &prevChannelCount));
3813
3814    if (prevChannelCount != numChannels) {
3815        ALOGV("mpeg4 audio channel count different from previous setting. "
3816             "was: %d, now: %d", prevChannelCount, numChannels);
3817    }
3818
3819    mLastTrack->meta.setInt32(kKeyChannelCount, numChannels);
3820
3821    return OK;
3822}
3823
3824////////////////////////////////////////////////////////////////////////////////
3825
3826MPEG4Source::MPEG4Source(
3827        MetaDataBase &format,
3828        DataSourceBase *dataSource,
3829        int32_t timeScale,
3830        const sp<SampleTable> &sampleTable,
3831        Vector<SidxEntry> &sidx,
3832        const Trex *trex,
3833        off64_t firstMoofOffset,
3834        const sp<ItemTable> &itemTable)
3835    : mFormat(format),
3836      mDataSource(dataSource),
3837      mTimescale(timeScale),
3838      mSampleTable(sampleTable),
3839      mCurrentSampleIndex(0),
3840      mCurrentFragmentIndex(0),
3841      mSegments(sidx),
3842      mTrex(trex),
3843      mFirstMoofOffset(firstMoofOffset),
3844      mCurrentMoofOffset(firstMoofOffset),
3845      mNextMoofOffset(-1),
3846      mCurrentTime(0),
3847      mDefaultEncryptedByteBlock(0),
3848      mDefaultSkipByteBlock(0),
3849      mCurrentSampleInfoAllocSize(0),
3850      mCurrentSampleInfoSizes(NULL),
3851      mCurrentSampleInfoOffsetsAllocSize(0),
3852      mCurrentSampleInfoOffsets(NULL),
3853      mIsAVC(false),
3854      mIsHEVC(false),
3855      mNALLengthSize(0),
3856      mStarted(false),
3857      mGroup(NULL),
3858      mBuffer(NULL),
3859      mWantsNALFragments(false),
3860      mSrcBuffer(NULL),
3861      mIsHeif(itemTable != NULL),
3862      mItemTable(itemTable) {
3863
3864    memset(&mTrackFragmentHeaderInfo, 0, sizeof(mTrackFragmentHeaderInfo));
3865
3866    mFormat.findInt32(kKeyCryptoMode, &mCryptoMode);
3867    mDefaultIVSize = 0;
3868    mFormat.findInt32(kKeyCryptoDefaultIVSize, &mDefaultIVSize);
3869    uint32_t keytype;
3870    const void *key;
3871    size_t keysize;
3872    if (mFormat.findData(kKeyCryptoKey, &keytype, &key, &keysize)) {
3873        CHECK(keysize <= 16);
3874        memset(mCryptoKey, 0, 16);
3875        memcpy(mCryptoKey, key, keysize);
3876    }
3877
3878    mFormat.findInt32(kKeyEncryptedByteBlock, &mDefaultEncryptedByteBlock);
3879    mFormat.findInt32(kKeySkipByteBlock, &mDefaultSkipByteBlock);
3880
3881    const char *mime;
3882    bool success = mFormat.findCString(kKeyMIMEType, &mime);
3883    CHECK(success);
3884
3885    mIsAVC = !strcasecmp(mime, MEDIA_MIMETYPE_VIDEO_AVC);
3886    mIsHEVC = !strcasecmp(mime, MEDIA_MIMETYPE_VIDEO_HEVC) ||
3887              !strcasecmp(mime, MEDIA_MIMETYPE_IMAGE_ANDROID_HEIC);
3888
3889    if (mIsAVC) {
3890        uint32_t type;
3891        const void *data;
3892        size_t size;
3893        CHECK(format.findData(kKeyAVCC, &type, &data, &size));
3894
3895        const uint8_t *ptr = (const uint8_t *)data;
3896
3897        CHECK(size >= 7);
3898        CHECK_EQ((unsigned)ptr[0], 1u);  // configurationVersion == 1
3899
3900        // The number of bytes used to encode the length of a NAL unit.
3901        mNALLengthSize = 1 + (ptr[4] & 3);
3902    } else if (mIsHEVC) {
3903        uint32_t type;
3904        const void *data;
3905        size_t size;
3906        CHECK(format.findData(kKeyHVCC, &type, &data, &size));
3907
3908        const uint8_t *ptr = (const uint8_t *)data;
3909
3910        CHECK(size >= 22);
3911        CHECK_EQ((unsigned)ptr[0], 1u);  // configurationVersion == 1
3912
3913        mNALLengthSize = 1 + (ptr[14 + 7] & 3);
3914    }
3915
3916    CHECK(format.findInt32(kKeyTrackID, &mTrackId));
3917
3918}
3919
3920status_t MPEG4Source::init() {
3921    if (mFirstMoofOffset != 0) {
3922        off64_t offset = mFirstMoofOffset;
3923        return parseChunk(&offset);
3924    }
3925    return OK;
3926}
3927
3928MPEG4Source::~MPEG4Source() {
3929    if (mStarted) {
3930        stop();
3931    }
3932    free(mCurrentSampleInfoSizes);
3933    free(mCurrentSampleInfoOffsets);
3934}
3935
3936status_t MPEG4Source::start(MetaDataBase *params) {
3937    Mutex::Autolock autoLock(mLock);
3938
3939    CHECK(!mStarted);
3940
3941    int32_t val;
3942    if (params && params->findInt32(kKeyWantsNALFragments, &val)
3943        && val != 0) {
3944        mWantsNALFragments = true;
3945    } else {
3946        mWantsNALFragments = false;
3947    }
3948
3949    int32_t tmp;
3950    CHECK(mFormat.findInt32(kKeyMaxInputSize, &tmp));
3951    size_t max_size = tmp;
3952
3953    // A somewhat arbitrary limit that should be sufficient for 8k video frames
3954    // If you see the message below for a valid input stream: increase the limit
3955    const size_t kMaxBufferSize = 64 * 1024 * 1024;
3956    if (max_size > kMaxBufferSize) {
3957        ALOGE("bogus max input size: %zu > %zu", max_size, kMaxBufferSize);
3958        return ERROR_MALFORMED;
3959    }
3960    if (max_size == 0) {
3961        ALOGE("zero max input size");
3962        return ERROR_MALFORMED;
3963    }
3964
3965    // Allow up to kMaxBuffers, but not if the total exceeds kMaxBufferSize.
3966    const size_t kMaxBuffers = 8;
3967    const size_t buffers = min(kMaxBufferSize / max_size, kMaxBuffers);
3968    mGroup = new MediaBufferGroup(buffers, max_size);
3969    mSrcBuffer = new (std::nothrow) uint8_t[max_size];
3970    if (mSrcBuffer == NULL) {
3971        // file probably specified a bad max size
3972        delete mGroup;
3973        mGroup = NULL;
3974        return ERROR_MALFORMED;
3975    }
3976
3977    mStarted = true;
3978
3979    return OK;
3980}
3981
3982status_t MPEG4Source::stop() {
3983    Mutex::Autolock autoLock(mLock);
3984
3985    CHECK(mStarted);
3986
3987    if (mBuffer != NULL) {
3988        mBuffer->release();
3989        mBuffer = NULL;
3990    }
3991
3992    delete[] mSrcBuffer;
3993    mSrcBuffer = NULL;
3994
3995    delete mGroup;
3996    mGroup = NULL;
3997
3998    mStarted = false;
3999    mCurrentSampleIndex = 0;
4000
4001    return OK;
4002}
4003
4004status_t MPEG4Source::parseChunk(off64_t *offset) {
4005    uint32_t hdr[2];
4006    if (mDataSource->readAt(*offset, hdr, 8) < 8) {
4007        return ERROR_IO;
4008    }
4009    uint64_t chunk_size = ntohl(hdr[0]);
4010    uint32_t chunk_type = ntohl(hdr[1]);
4011    off64_t data_offset = *offset + 8;
4012
4013    if (chunk_size == 1) {
4014        if (mDataSource->readAt(*offset + 8, &chunk_size, 8) < 8) {
4015            return ERROR_IO;
4016        }
4017        chunk_size = ntoh64(chunk_size);
4018        data_offset += 8;
4019
4020        if (chunk_size < 16) {
4021            // The smallest valid chunk is 16 bytes long in this case.
4022            return ERROR_MALFORMED;
4023        }
4024    } else if (chunk_size < 8) {
4025        // The smallest valid chunk is 8 bytes long.
4026        return ERROR_MALFORMED;
4027    }
4028
4029    char chunk[5];
4030    MakeFourCCString(chunk_type, chunk);
4031    ALOGV("MPEG4Source chunk %s @ %#llx", chunk, (long long)*offset);
4032
4033    off64_t chunk_data_size = *offset + chunk_size - data_offset;
4034
4035    switch(chunk_type) {
4036
4037        case FOURCC('t', 'r', 'a', 'f'):
4038        case FOURCC('m', 'o', 'o', 'f'): {
4039            off64_t stop_offset = *offset + chunk_size;
4040            *offset = data_offset;
4041            while (*offset < stop_offset) {
4042                status_t err = parseChunk(offset);
4043                if (err != OK) {
4044                    return err;
4045                }
4046            }
4047            if (chunk_type == FOURCC('m', 'o', 'o', 'f')) {
4048                // *offset points to the box following this moof. Find the next moof from there.
4049
4050                while (true) {
4051                    if (mDataSource->readAt(*offset, hdr, 8) < 8) {
4052                        // no more box to the end of file.
4053                        break;
4054                    }
4055                    chunk_size = ntohl(hdr[0]);
4056                    chunk_type = ntohl(hdr[1]);
4057                    if (chunk_size == 1) {
4058                        // ISO/IEC 14496-12:2012, 8.8.4 Movie Fragment Box, moof is a Box
4059                        // which is defined in 4.2 Object Structure.
4060                        // When chunk_size==1, 8 bytes follows as "largesize".
4061                        if (mDataSource->readAt(*offset + 8, &chunk_size, 8) < 8) {
4062                            return ERROR_IO;
4063                        }
4064                        chunk_size = ntoh64(chunk_size);
4065                        if (chunk_size < 16) {
4066                            // The smallest valid chunk is 16 bytes long in this case.
4067                            return ERROR_MALFORMED;
4068                        }
4069                    } else if (chunk_size == 0) {
4070                        // next box extends to end of file.
4071                    } else if (chunk_size < 8) {
4072                        // The smallest valid chunk is 8 bytes long in this case.
4073                        return ERROR_MALFORMED;
4074                    }
4075
4076                    if (chunk_type == FOURCC('m', 'o', 'o', 'f')) {
4077                        mNextMoofOffset = *offset;
4078                        break;
4079                    } else if (chunk_size == 0) {
4080                        break;
4081                    }
4082                    *offset += chunk_size;
4083                }
4084            }
4085            break;
4086        }
4087
4088        case FOURCC('t', 'f', 'h', 'd'): {
4089                status_t err;
4090                if ((err = parseTrackFragmentHeader(data_offset, chunk_data_size)) != OK) {
4091                    return err;
4092                }
4093                *offset += chunk_size;
4094                break;
4095        }
4096
4097        case FOURCC('t', 'r', 'u', 'n'): {
4098                status_t err;
4099                if (mLastParsedTrackId == mTrackId) {
4100                    if ((err = parseTrackFragmentRun(data_offset, chunk_data_size)) != OK) {
4101                        return err;
4102                    }
4103                }
4104
4105                *offset += chunk_size;
4106                break;
4107        }
4108
4109        case FOURCC('s', 'a', 'i', 'z'): {
4110            status_t err;
4111            if ((err = parseSampleAuxiliaryInformationSizes(data_offset, chunk_data_size)) != OK) {
4112                return err;
4113            }
4114            *offset += chunk_size;
4115            break;
4116        }
4117        case FOURCC('s', 'a', 'i', 'o'): {
4118            status_t err;
4119            if ((err = parseSampleAuxiliaryInformationOffsets(data_offset, chunk_data_size)) != OK) {
4120                return err;
4121            }
4122            *offset += chunk_size;
4123            break;
4124        }
4125
4126        case FOURCC('s', 'e', 'n', 'c'): {
4127            status_t err;
4128            if ((err = parseSampleEncryption(data_offset)) != OK) {
4129                return err;
4130            }
4131            *offset += chunk_size;
4132            break;
4133        }
4134
4135        case FOURCC('m', 'd', 'a', 't'): {
4136            // parse DRM info if present
4137            ALOGV("MPEG4Source::parseChunk mdat");
4138            // if saiz/saoi was previously observed, do something with the sampleinfos
4139            *offset += chunk_size;
4140            break;
4141        }
4142
4143        default: {
4144            *offset += chunk_size;
4145            break;
4146        }
4147    }
4148    return OK;
4149}
4150
4151status_t MPEG4Source::parseSampleAuxiliaryInformationSizes(
4152        off64_t offset, off64_t /* size */) {
4153    ALOGV("parseSampleAuxiliaryInformationSizes");
4154    // 14496-12 8.7.12
4155    uint8_t version;
4156    if (mDataSource->readAt(
4157            offset, &version, sizeof(version))
4158            < (ssize_t)sizeof(version)) {
4159        return ERROR_IO;
4160    }
4161
4162    if (version != 0) {
4163        return ERROR_UNSUPPORTED;
4164    }
4165    offset++;
4166
4167    uint32_t flags;
4168    if (!mDataSource->getUInt24(offset, &flags)) {
4169        return ERROR_IO;
4170    }
4171    offset += 3;
4172
4173    if (flags & 1) {
4174        uint32_t tmp;
4175        if (!mDataSource->getUInt32(offset, &tmp)) {
4176            return ERROR_MALFORMED;
4177        }
4178        mCurrentAuxInfoType = tmp;
4179        offset += 4;
4180        if (!mDataSource->getUInt32(offset, &tmp)) {
4181            return ERROR_MALFORMED;
4182        }
4183        mCurrentAuxInfoTypeParameter = tmp;
4184        offset += 4;
4185    }
4186
4187    uint8_t defsize;
4188    if (mDataSource->readAt(offset, &defsize, 1) != 1) {
4189        return ERROR_MALFORMED;
4190    }
4191    mCurrentDefaultSampleInfoSize = defsize;
4192    offset++;
4193
4194    uint32_t smplcnt;
4195    if (!mDataSource->getUInt32(offset, &smplcnt)) {
4196        return ERROR_MALFORMED;
4197    }
4198    mCurrentSampleInfoCount = smplcnt;
4199    offset += 4;
4200
4201    if (mCurrentDefaultSampleInfoSize != 0) {
4202        ALOGV("@@@@ using default sample info size of %d", mCurrentDefaultSampleInfoSize);
4203        return OK;
4204    }
4205    if (smplcnt > mCurrentSampleInfoAllocSize) {
4206        uint8_t * newPtr =  (uint8_t*) realloc(mCurrentSampleInfoSizes, smplcnt);
4207        if (newPtr == NULL) {
4208            ALOGE("failed to realloc %u -> %u", mCurrentSampleInfoAllocSize, smplcnt);
4209            return NO_MEMORY;
4210        }
4211        mCurrentSampleInfoSizes = newPtr;
4212        mCurrentSampleInfoAllocSize = smplcnt;
4213    }
4214
4215    mDataSource->readAt(offset, mCurrentSampleInfoSizes, smplcnt);
4216    return OK;
4217}
4218
4219status_t MPEG4Source::parseSampleAuxiliaryInformationOffsets(
4220        off64_t offset, off64_t /* size */) {
4221    ALOGV("parseSampleAuxiliaryInformationOffsets");
4222    // 14496-12 8.7.13
4223    uint8_t version;
4224    if (mDataSource->readAt(offset, &version, sizeof(version)) != 1) {
4225        return ERROR_IO;
4226    }
4227    offset++;
4228
4229    uint32_t flags;
4230    if (!mDataSource->getUInt24(offset, &flags)) {
4231        return ERROR_IO;
4232    }
4233    offset += 3;
4234
4235    uint32_t entrycount;
4236    if (!mDataSource->getUInt32(offset, &entrycount)) {
4237        return ERROR_IO;
4238    }
4239    offset += 4;
4240    if (entrycount == 0) {
4241        return OK;
4242    }
4243    if (entrycount > UINT32_MAX / 8) {
4244        return ERROR_MALFORMED;
4245    }
4246
4247    if (entrycount > mCurrentSampleInfoOffsetsAllocSize) {
4248        uint64_t *newPtr = (uint64_t *)realloc(mCurrentSampleInfoOffsets, entrycount * 8);
4249        if (newPtr == NULL) {
4250            ALOGE("failed to realloc %u -> %u", mCurrentSampleInfoOffsetsAllocSize, entrycount * 8);
4251            return NO_MEMORY;
4252        }
4253        mCurrentSampleInfoOffsets = newPtr;
4254        mCurrentSampleInfoOffsetsAllocSize = entrycount;
4255    }
4256    mCurrentSampleInfoOffsetCount = entrycount;
4257
4258    if (mCurrentSampleInfoOffsets == NULL) {
4259        return OK;
4260    }
4261
4262    for (size_t i = 0; i < entrycount; i++) {
4263        if (version == 0) {
4264            uint32_t tmp;
4265            if (!mDataSource->getUInt32(offset, &tmp)) {
4266                return ERROR_IO;
4267            }
4268            mCurrentSampleInfoOffsets[i] = tmp;
4269            offset += 4;
4270        } else {
4271            uint64_t tmp;
4272            if (!mDataSource->getUInt64(offset, &tmp)) {
4273                return ERROR_IO;
4274            }
4275            mCurrentSampleInfoOffsets[i] = tmp;
4276            offset += 8;
4277        }
4278    }
4279
4280    // parse clear/encrypted data
4281
4282    off64_t drmoffset = mCurrentSampleInfoOffsets[0]; // from moof
4283
4284    drmoffset += mCurrentMoofOffset;
4285
4286    return parseClearEncryptedSizes(drmoffset, false, 0);
4287}
4288
4289status_t MPEG4Source::parseClearEncryptedSizes(off64_t offset, bool isSubsampleEncryption, uint32_t flags) {
4290
4291    int ivlength;
4292    CHECK(mFormat.findInt32(kKeyCryptoDefaultIVSize, &ivlength));
4293
4294    // only 0, 8 and 16 byte initialization vectors are supported
4295    if (ivlength != 0 && ivlength != 8 && ivlength != 16) {
4296        ALOGW("unsupported IV length: %d", ivlength);
4297        return ERROR_MALFORMED;
4298    }
4299
4300    uint32_t sampleCount = mCurrentSampleInfoCount;
4301    if (isSubsampleEncryption) {
4302        if (!mDataSource->getUInt32(offset, &sampleCount)) {
4303            return ERROR_IO;
4304        }
4305        offset += 4;
4306    }
4307
4308    // read CencSampleAuxiliaryDataFormats
4309    for (size_t i = 0; i < sampleCount; i++) {
4310        if (i >= mCurrentSamples.size()) {
4311            ALOGW("too few samples");
4312            break;
4313        }
4314        Sample *smpl = &mCurrentSamples.editItemAt(i);
4315        if (!smpl->clearsizes.isEmpty()) {
4316            continue;
4317        }
4318
4319        memset(smpl->iv, 0, 16);
4320        if (mDataSource->readAt(offset, smpl->iv, ivlength) != ivlength) {
4321            return ERROR_IO;
4322        }
4323
4324        offset += ivlength;
4325
4326        bool readSubsamples;
4327        if (isSubsampleEncryption) {
4328            readSubsamples = flags & 2;
4329        } else {
4330            int32_t smplinfosize = mCurrentDefaultSampleInfoSize;
4331            if (smplinfosize == 0) {
4332                smplinfosize = mCurrentSampleInfoSizes[i];
4333            }
4334            readSubsamples = smplinfosize > ivlength;
4335        }
4336
4337        if (readSubsamples) {
4338            uint16_t numsubsamples;
4339            if (!mDataSource->getUInt16(offset, &numsubsamples)) {
4340                return ERROR_IO;
4341            }
4342            offset += 2;
4343            for (size_t j = 0; j < numsubsamples; j++) {
4344                uint16_t numclear;
4345                uint32_t numencrypted;
4346                if (!mDataSource->getUInt16(offset, &numclear)) {
4347                    return ERROR_IO;
4348                }
4349                offset += 2;
4350                if (!mDataSource->getUInt32(offset, &numencrypted)) {
4351                    return ERROR_IO;
4352                }
4353                offset += 4;
4354                smpl->clearsizes.add(numclear);
4355                smpl->encryptedsizes.add(numencrypted);
4356            }
4357        } else {
4358            smpl->clearsizes.add(0);
4359            smpl->encryptedsizes.add(smpl->size);
4360        }
4361    }
4362
4363    return OK;
4364}
4365
4366status_t MPEG4Source::parseSampleEncryption(off64_t offset) {
4367    uint32_t flags;
4368    if (!mDataSource->getUInt32(offset, &flags)) { // actually version + flags
4369        return ERROR_MALFORMED;
4370    }
4371    return parseClearEncryptedSizes(offset + 4, true, flags);
4372}
4373
4374status_t MPEG4Source::parseTrackFragmentHeader(off64_t offset, off64_t size) {
4375
4376    if (size < 8) {
4377        return -EINVAL;
4378    }
4379
4380    uint32_t flags;
4381    if (!mDataSource->getUInt32(offset, &flags)) { // actually version + flags
4382        return ERROR_MALFORMED;
4383    }
4384
4385    if (flags & 0xff000000) {
4386        return -EINVAL;
4387    }
4388
4389    if (!mDataSource->getUInt32(offset + 4, (uint32_t*)&mLastParsedTrackId)) {
4390        return ERROR_MALFORMED;
4391    }
4392
4393    if (mLastParsedTrackId != mTrackId) {
4394        // this is not the right track, skip it
4395        return OK;
4396    }
4397
4398    mTrackFragmentHeaderInfo.mFlags = flags;
4399    mTrackFragmentHeaderInfo.mTrackID = mLastParsedTrackId;
4400    offset += 8;
4401    size -= 8;
4402
4403    ALOGV("fragment header: %08x %08x", flags, mTrackFragmentHeaderInfo.mTrackID);
4404
4405    if (flags & TrackFragmentHeaderInfo::kBaseDataOffsetPresent) {
4406        if (size < 8) {
4407            return -EINVAL;
4408        }
4409
4410        if (!mDataSource->getUInt64(offset, &mTrackFragmentHeaderInfo.mBaseDataOffset)) {
4411            return ERROR_MALFORMED;
4412        }
4413        offset += 8;
4414        size -= 8;
4415    }
4416
4417    if (flags & TrackFragmentHeaderInfo::kSampleDescriptionIndexPresent) {
4418        if (size < 4) {
4419            return -EINVAL;
4420        }
4421
4422        if (!mDataSource->getUInt32(offset, &mTrackFragmentHeaderInfo.mSampleDescriptionIndex)) {
4423            return ERROR_MALFORMED;
4424        }
4425        offset += 4;
4426        size -= 4;
4427    }
4428
4429    if (flags & TrackFragmentHeaderInfo::kDefaultSampleDurationPresent) {
4430        if (size < 4) {
4431            return -EINVAL;
4432        }
4433
4434        if (!mDataSource->getUInt32(offset, &mTrackFragmentHeaderInfo.mDefaultSampleDuration)) {
4435            return ERROR_MALFORMED;
4436        }
4437        offset += 4;
4438        size -= 4;
4439    }
4440
4441    if (flags & TrackFragmentHeaderInfo::kDefaultSampleSizePresent) {
4442        if (size < 4) {
4443            return -EINVAL;
4444        }
4445
4446        if (!mDataSource->getUInt32(offset, &mTrackFragmentHeaderInfo.mDefaultSampleSize)) {
4447            return ERROR_MALFORMED;
4448        }
4449        offset += 4;
4450        size -= 4;
4451    }
4452
4453    if (flags & TrackFragmentHeaderInfo::kDefaultSampleFlagsPresent) {
4454        if (size < 4) {
4455            return -EINVAL;
4456        }
4457
4458        if (!mDataSource->getUInt32(offset, &mTrackFragmentHeaderInfo.mDefaultSampleFlags)) {
4459            return ERROR_MALFORMED;
4460        }
4461        offset += 4;
4462        size -= 4;
4463    }
4464
4465    if (!(flags & TrackFragmentHeaderInfo::kBaseDataOffsetPresent)) {
4466        mTrackFragmentHeaderInfo.mBaseDataOffset = mCurrentMoofOffset;
4467    }
4468
4469    mTrackFragmentHeaderInfo.mDataOffset = 0;
4470    return OK;
4471}
4472
4473status_t MPEG4Source::parseTrackFragmentRun(off64_t offset, off64_t size) {
4474
4475    ALOGV("MPEG4Extractor::parseTrackFragmentRun");
4476    if (size < 8) {
4477        return -EINVAL;
4478    }
4479
4480    enum {
4481        kDataOffsetPresent                  = 0x01,
4482        kFirstSampleFlagsPresent            = 0x04,
4483        kSampleDurationPresent              = 0x100,
4484        kSampleSizePresent                  = 0x200,
4485        kSampleFlagsPresent                 = 0x400,
4486        kSampleCompositionTimeOffsetPresent = 0x800,
4487    };
4488
4489    uint32_t flags;
4490    if (!mDataSource->getUInt32(offset, &flags)) {
4491        return ERROR_MALFORMED;
4492    }
4493    // |version| only affects SampleCompositionTimeOffset field.
4494    // If version == 0, SampleCompositionTimeOffset is uint32_t;
4495    // Otherwise, SampleCompositionTimeOffset is int32_t.
4496    // Sample.compositionOffset is defined as int32_t.
4497    uint8_t version = flags >> 24;
4498    flags &= 0xffffff;
4499    ALOGV("fragment run version: 0x%02x, flags: 0x%06x", version, flags);
4500
4501    if ((flags & kFirstSampleFlagsPresent) && (flags & kSampleFlagsPresent)) {
4502        // These two shall not be used together.
4503        return -EINVAL;
4504    }
4505
4506    uint32_t sampleCount;
4507    if (!mDataSource->getUInt32(offset + 4, &sampleCount)) {
4508        return ERROR_MALFORMED;
4509    }
4510    offset += 8;
4511    size -= 8;
4512
4513    uint64_t dataOffset = mTrackFragmentHeaderInfo.mDataOffset;
4514
4515    uint32_t firstSampleFlags = 0;
4516
4517    if (flags & kDataOffsetPresent) {
4518        if (size < 4) {
4519            return -EINVAL;
4520        }
4521
4522        int32_t dataOffsetDelta;
4523        if (!mDataSource->getUInt32(offset, (uint32_t*)&dataOffsetDelta)) {
4524            return ERROR_MALFORMED;
4525        }
4526
4527        dataOffset = mTrackFragmentHeaderInfo.mBaseDataOffset + dataOffsetDelta;
4528
4529        offset += 4;
4530        size -= 4;
4531    }
4532
4533    if (flags & kFirstSampleFlagsPresent) {
4534        if (size < 4) {
4535            return -EINVAL;
4536        }
4537
4538        if (!mDataSource->getUInt32(offset, &firstSampleFlags)) {
4539            return ERROR_MALFORMED;
4540        }
4541        offset += 4;
4542        size -= 4;
4543    }
4544
4545    uint32_t sampleDuration = 0, sampleSize = 0, sampleFlags = 0,
4546             sampleCtsOffset = 0;
4547
4548    size_t bytesPerSample = 0;
4549    if (flags & kSampleDurationPresent) {
4550        bytesPerSample += 4;
4551    } else if (mTrackFragmentHeaderInfo.mFlags
4552            & TrackFragmentHeaderInfo::kDefaultSampleDurationPresent) {
4553        sampleDuration = mTrackFragmentHeaderInfo.mDefaultSampleDuration;
4554    } else if (mTrex) {
4555        sampleDuration = mTrex->default_sample_duration;
4556    }
4557
4558    if (flags & kSampleSizePresent) {
4559        bytesPerSample += 4;
4560    } else if (mTrackFragmentHeaderInfo.mFlags
4561            & TrackFragmentHeaderInfo::kDefaultSampleSizePresent) {
4562        sampleSize = mTrackFragmentHeaderInfo.mDefaultSampleSize;
4563    } else {
4564        sampleSize = mTrackFragmentHeaderInfo.mDefaultSampleSize;
4565    }
4566
4567    if (flags & kSampleFlagsPresent) {
4568        bytesPerSample += 4;
4569    } else if (mTrackFragmentHeaderInfo.mFlags
4570            & TrackFragmentHeaderInfo::kDefaultSampleFlagsPresent) {
4571        sampleFlags = mTrackFragmentHeaderInfo.mDefaultSampleFlags;
4572    } else {
4573        sampleFlags = mTrackFragmentHeaderInfo.mDefaultSampleFlags;
4574    }
4575
4576    if (flags & kSampleCompositionTimeOffsetPresent) {
4577        bytesPerSample += 4;
4578    } else {
4579        sampleCtsOffset = 0;
4580    }
4581
4582    if (size < (off64_t)(sampleCount * bytesPerSample)) {
4583        return -EINVAL;
4584    }
4585
4586    Sample tmp;
4587    for (uint32_t i = 0; i < sampleCount; ++i) {
4588        if (flags & kSampleDurationPresent) {
4589            if (!mDataSource->getUInt32(offset, &sampleDuration)) {
4590                return ERROR_MALFORMED;
4591            }
4592            offset += 4;
4593        }
4594
4595        if (flags & kSampleSizePresent) {
4596            if (!mDataSource->getUInt32(offset, &sampleSize)) {
4597                return ERROR_MALFORMED;
4598            }
4599            offset += 4;
4600        }
4601
4602        if (flags & kSampleFlagsPresent) {
4603            if (!mDataSource->getUInt32(offset, &sampleFlags)) {
4604                return ERROR_MALFORMED;
4605            }
4606            offset += 4;
4607        }
4608
4609        if (flags & kSampleCompositionTimeOffsetPresent) {
4610            if (!mDataSource->getUInt32(offset, &sampleCtsOffset)) {
4611                return ERROR_MALFORMED;
4612            }
4613            offset += 4;
4614        }
4615
4616        ALOGV("adding sample %d at offset 0x%08" PRIx64 ", size %u, duration %u, "
4617              " flags 0x%08x", i + 1,
4618                dataOffset, sampleSize, sampleDuration,
4619                (flags & kFirstSampleFlagsPresent) && i == 0
4620                    ? firstSampleFlags : sampleFlags);
4621        tmp.offset = dataOffset;
4622        tmp.size = sampleSize;
4623        tmp.duration = sampleDuration;
4624        tmp.compositionOffset = sampleCtsOffset;
4625        memset(tmp.iv, 0, sizeof(tmp.iv));
4626        mCurrentSamples.add(tmp);
4627
4628        dataOffset += sampleSize;
4629    }
4630
4631    mTrackFragmentHeaderInfo.mDataOffset = dataOffset;
4632
4633    return OK;
4634}
4635
4636status_t MPEG4Source::getFormat(MetaDataBase &meta) {
4637    Mutex::Autolock autoLock(mLock);
4638    meta = mFormat;
4639    return OK;
4640}
4641
4642size_t MPEG4Source::parseNALSize(const uint8_t *data) const {
4643    switch (mNALLengthSize) {
4644        case 1:
4645            return *data;
4646        case 2:
4647            return U16_AT(data);
4648        case 3:
4649            return ((size_t)data[0] << 16) | U16_AT(&data[1]);
4650        case 4:
4651            return U32_AT(data);
4652    }
4653
4654    // This cannot happen, mNALLengthSize springs to life by adding 1 to
4655    // a 2-bit integer.
4656    CHECK(!"Should not be here.");
4657
4658    return 0;
4659}
4660
4661status_t MPEG4Source::read(
4662        MediaBufferBase **out, const ReadOptions *options) {
4663    Mutex::Autolock autoLock(mLock);
4664
4665    CHECK(mStarted);
4666
4667    if (options != nullptr && options->getNonBlocking() && !mGroup->has_buffers()) {
4668        *out = nullptr;
4669        return WOULD_BLOCK;
4670    }
4671
4672    if (mFirstMoofOffset > 0) {
4673        return fragmentedRead(out, options);
4674    }
4675
4676    *out = NULL;
4677
4678    int64_t targetSampleTimeUs = -1;
4679
4680    int64_t seekTimeUs;
4681    ReadOptions::SeekMode mode;
4682    if (options && options->getSeekTo(&seekTimeUs, &mode)) {
4683        if (mIsHeif) {
4684            CHECK(mSampleTable == NULL);
4685            CHECK(mItemTable != NULL);
4686            int32_t imageIndex;
4687            if (!mFormat.findInt32(kKeyTrackID, &imageIndex)) {
4688                return ERROR_MALFORMED;
4689            }
4690
4691            status_t err;
4692            if (seekTimeUs >= 0) {
4693                err = mItemTable->findImageItem(imageIndex, &mCurrentSampleIndex);
4694            } else {
4695                err = mItemTable->findThumbnailItem(imageIndex, &mCurrentSampleIndex);
4696            }
4697            if (err != OK) {
4698                return err;
4699            }
4700        } else {
4701            uint32_t findFlags = 0;
4702            switch (mode) {
4703                case ReadOptions::SEEK_PREVIOUS_SYNC:
4704                    findFlags = SampleTable::kFlagBefore;
4705                    break;
4706                case ReadOptions::SEEK_NEXT_SYNC:
4707                    findFlags = SampleTable::kFlagAfter;
4708                    break;
4709                case ReadOptions::SEEK_CLOSEST_SYNC:
4710                case ReadOptions::SEEK_CLOSEST:
4711                    findFlags = SampleTable::kFlagClosest;
4712                    break;
4713                case ReadOptions::SEEK_FRAME_INDEX:
4714                    findFlags = SampleTable::kFlagFrameIndex;
4715                    break;
4716                default:
4717                    CHECK(!"Should not be here.");
4718                    break;
4719            }
4720
4721            uint32_t sampleIndex;
4722            status_t err = mSampleTable->findSampleAtTime(
4723                    seekTimeUs, 1000000, mTimescale,
4724                    &sampleIndex, findFlags);
4725
4726            if (mode == ReadOptions::SEEK_CLOSEST
4727                    || mode == ReadOptions::SEEK_FRAME_INDEX) {
4728                // We found the closest sample already, now we want the sync
4729                // sample preceding it (or the sample itself of course), even
4730                // if the subsequent sync sample is closer.
4731                findFlags = SampleTable::kFlagBefore;
4732            }
4733
4734            uint32_t syncSampleIndex;
4735            if (err == OK) {
4736                err = mSampleTable->findSyncSampleNear(
4737                        sampleIndex, &syncSampleIndex, findFlags);
4738            }
4739
4740            uint32_t sampleTime;
4741            if (err == OK) {
4742                err = mSampleTable->getMetaDataForSample(
4743                        sampleIndex, NULL, NULL, &sampleTime);
4744            }
4745
4746            if (err != OK) {
4747                if (err == ERROR_OUT_OF_RANGE) {
4748                    // An attempt to seek past the end of the stream would
4749                    // normally cause this ERROR_OUT_OF_RANGE error. Propagating
4750                    // this all the way to the MediaPlayer would cause abnormal
4751                    // termination. Legacy behaviour appears to be to behave as if
4752                    // we had seeked to the end of stream, ending normally.
4753                    err = ERROR_END_OF_STREAM;
4754                }
4755                ALOGV("end of stream");
4756                return err;
4757            }
4758
4759            if (mode == ReadOptions::SEEK_CLOSEST
4760                || mode == ReadOptions::SEEK_FRAME_INDEX) {
4761                targetSampleTimeUs = (sampleTime * 1000000ll) / mTimescale;
4762            }
4763
4764#if 0
4765            uint32_t syncSampleTime;
4766            CHECK_EQ(OK, mSampleTable->getMetaDataForSample(
4767                        syncSampleIndex, NULL, NULL, &syncSampleTime));
4768
4769            ALOGI("seek to time %lld us => sample at time %lld us, "
4770                 "sync sample at time %lld us",
4771                 seekTimeUs,
4772                 sampleTime * 1000000ll / mTimescale,
4773                 syncSampleTime * 1000000ll / mTimescale);
4774#endif
4775
4776            mCurrentSampleIndex = syncSampleIndex;
4777        }
4778
4779        if (mBuffer != NULL) {
4780            mBuffer->release();
4781            mBuffer = NULL;
4782        }
4783
4784        // fall through
4785    }
4786
4787    off64_t offset = 0;
4788    size_t size = 0;
4789    uint32_t cts, stts;
4790    bool isSyncSample;
4791    bool newBuffer = false;
4792    if (mBuffer == NULL) {
4793        newBuffer = true;
4794
4795        status_t err;
4796        if (!mIsHeif) {
4797            err = mSampleTable->getMetaDataForSample(
4798                    mCurrentSampleIndex, &offset, &size, &cts, &isSyncSample, &stts);
4799        } else {
4800            err = mItemTable->getImageOffsetAndSize(
4801                    options && options->getSeekTo(&seekTimeUs, &mode) ?
4802                            &mCurrentSampleIndex : NULL, &offset, &size);
4803
4804            cts = stts = 0;
4805            isSyncSample = 0;
4806            ALOGV("image offset %lld, size %zu", (long long)offset, size);
4807        }
4808
4809        if (err != OK) {
4810            return err;
4811        }
4812
4813        err = mGroup->acquire_buffer(&mBuffer);
4814
4815        if (err != OK) {
4816            CHECK(mBuffer == NULL);
4817            return err;
4818        }
4819        if (size > mBuffer->size()) {
4820            ALOGE("buffer too small: %zu > %zu", size, mBuffer->size());
4821            mBuffer->release();
4822            mBuffer = NULL;
4823            return ERROR_BUFFER_TOO_SMALL;
4824        }
4825    }
4826
4827    if ((!mIsAVC && !mIsHEVC) || mWantsNALFragments) {
4828        if (newBuffer) {
4829            ssize_t num_bytes_read =
4830                mDataSource->readAt(offset, (uint8_t *)mBuffer->data(), size);
4831
4832            if (num_bytes_read < (ssize_t)size) {
4833                mBuffer->release();
4834                mBuffer = NULL;
4835
4836                return ERROR_IO;
4837            }
4838
4839            CHECK(mBuffer != NULL);
4840            mBuffer->set_range(0, size);
4841            mBuffer->meta_data().clear();
4842            mBuffer->meta_data().setInt64(
4843                    kKeyTime, ((int64_t)cts * 1000000) / mTimescale);
4844            mBuffer->meta_data().setInt64(
4845                    kKeyDuration, ((int64_t)stts * 1000000) / mTimescale);
4846
4847            if (targetSampleTimeUs >= 0) {
4848                mBuffer->meta_data().setInt64(
4849                        kKeyTargetTime, targetSampleTimeUs);
4850            }
4851
4852            if (isSyncSample) {
4853                mBuffer->meta_data().setInt32(kKeyIsSyncFrame, 1);
4854            }
4855
4856            ++mCurrentSampleIndex;
4857        }
4858
4859        if (!mIsAVC && !mIsHEVC) {
4860            *out = mBuffer;
4861            mBuffer = NULL;
4862
4863            return OK;
4864        }
4865
4866        // Each NAL unit is split up into its constituent fragments and
4867        // each one of them returned in its own buffer.
4868
4869        CHECK(mBuffer->range_length() >= mNALLengthSize);
4870
4871        const uint8_t *src =
4872            (const uint8_t *)mBuffer->data() + mBuffer->range_offset();
4873
4874        size_t nal_size = parseNALSize(src);
4875        if (mNALLengthSize > SIZE_MAX - nal_size) {
4876            ALOGE("b/24441553, b/24445122");
4877        }
4878        if (mBuffer->range_length() - mNALLengthSize < nal_size) {
4879            ALOGE("incomplete NAL unit.");
4880
4881            mBuffer->release();
4882            mBuffer = NULL;
4883
4884            return ERROR_MALFORMED;
4885        }
4886
4887        MediaBufferBase *clone = mBuffer->clone();
4888        CHECK(clone != NULL);
4889        clone->set_range(mBuffer->range_offset() + mNALLengthSize, nal_size);
4890
4891        CHECK(mBuffer != NULL);
4892        mBuffer->set_range(
4893                mBuffer->range_offset() + mNALLengthSize + nal_size,
4894                mBuffer->range_length() - mNALLengthSize - nal_size);
4895
4896        if (mBuffer->range_length() == 0) {
4897            mBuffer->release();
4898            mBuffer = NULL;
4899        }
4900
4901        *out = clone;
4902
4903        return OK;
4904    } else {
4905        // Whole NAL units are returned but each fragment is prefixed by
4906        // the start code (0x00 00 00 01).
4907        ssize_t num_bytes_read = 0;
4908        int32_t drm = 0;
4909        bool usesDRM = (mFormat.findInt32(kKeyIsDRM, &drm) && drm != 0);
4910        if (usesDRM) {
4911            num_bytes_read =
4912                mDataSource->readAt(offset, (uint8_t*)mBuffer->data(), size);
4913        } else {
4914            num_bytes_read = mDataSource->readAt(offset, mSrcBuffer, size);
4915        }
4916
4917        if (num_bytes_read < (ssize_t)size) {
4918            mBuffer->release();
4919            mBuffer = NULL;
4920
4921            return ERROR_IO;
4922        }
4923
4924        if (usesDRM) {
4925            CHECK(mBuffer != NULL);
4926            mBuffer->set_range(0, size);
4927
4928        } else {
4929            uint8_t *dstData = (uint8_t *)mBuffer->data();
4930            size_t srcOffset = 0;
4931            size_t dstOffset = 0;
4932
4933            while (srcOffset < size) {
4934                bool isMalFormed = !isInRange((size_t)0u, size, srcOffset, mNALLengthSize);
4935                size_t nalLength = 0;
4936                if (!isMalFormed) {
4937                    nalLength = parseNALSize(&mSrcBuffer[srcOffset]);
4938                    srcOffset += mNALLengthSize;
4939                    isMalFormed = !isInRange((size_t)0u, size, srcOffset, nalLength);
4940                }
4941
4942                if (isMalFormed) {
4943                    ALOGE("Video is malformed");
4944                    mBuffer->release();
4945                    mBuffer = NULL;
4946                    return ERROR_MALFORMED;
4947                }
4948
4949                if (nalLength == 0) {
4950                    continue;
4951                }
4952
4953                if (dstOffset > SIZE_MAX - 4 ||
4954                        dstOffset + 4 > SIZE_MAX - nalLength ||
4955                        dstOffset + 4 + nalLength > mBuffer->size()) {
4956                    ALOGE("b/27208621 : %zu %zu", dstOffset, mBuffer->size());
4957                    android_errorWriteLog(0x534e4554, "27208621");
4958                    mBuffer->release();
4959                    mBuffer = NULL;
4960                    return ERROR_MALFORMED;
4961                }
4962
4963                dstData[dstOffset++] = 0;
4964                dstData[dstOffset++] = 0;
4965                dstData[dstOffset++] = 0;
4966                dstData[dstOffset++] = 1;
4967                memcpy(&dstData[dstOffset], &mSrcBuffer[srcOffset], nalLength);
4968                srcOffset += nalLength;
4969                dstOffset += nalLength;
4970            }
4971            CHECK_EQ(srcOffset, size);
4972            CHECK(mBuffer != NULL);
4973            mBuffer->set_range(0, dstOffset);
4974        }
4975
4976        mBuffer->meta_data().clear();
4977        mBuffer->meta_data().setInt64(
4978                kKeyTime, ((int64_t)cts * 1000000) / mTimescale);
4979        mBuffer->meta_data().setInt64(
4980                kKeyDuration, ((int64_t)stts * 1000000) / mTimescale);
4981
4982        if (targetSampleTimeUs >= 0) {
4983            mBuffer->meta_data().setInt64(
4984                    kKeyTargetTime, targetSampleTimeUs);
4985        }
4986
4987        if (mIsAVC) {
4988            uint32_t layerId = FindAVCLayerId(
4989                    (const uint8_t *)mBuffer->data(), mBuffer->range_length());
4990            mBuffer->meta_data().setInt32(kKeyTemporalLayerId, layerId);
4991        }
4992
4993        if (isSyncSample) {
4994            mBuffer->meta_data().setInt32(kKeyIsSyncFrame, 1);
4995        }
4996
4997        ++mCurrentSampleIndex;
4998
4999        *out = mBuffer;
5000        mBuffer = NULL;
5001
5002        return OK;
5003    }
5004}
5005
5006status_t MPEG4Source::fragmentedRead(
5007        MediaBufferBase **out, const ReadOptions *options) {
5008
5009    ALOGV("MPEG4Source::fragmentedRead");
5010
5011    CHECK(mStarted);
5012
5013    *out = NULL;
5014
5015    int64_t targetSampleTimeUs = -1;
5016
5017    int64_t seekTimeUs;
5018    ReadOptions::SeekMode mode;
5019    if (options && options->getSeekTo(&seekTimeUs, &mode)) {
5020
5021        int numSidxEntries = mSegments.size();
5022        if (numSidxEntries != 0) {
5023            int64_t totalTime = 0;
5024            off64_t totalOffset = mFirstMoofOffset;
5025            for (int i = 0; i < numSidxEntries; i++) {
5026                const SidxEntry *se = &mSegments[i];
5027                if (totalTime + se->mDurationUs > seekTimeUs) {
5028                    // The requested time is somewhere in this segment
5029                    if ((mode == ReadOptions::SEEK_NEXT_SYNC && seekTimeUs > totalTime) ||
5030                        (mode == ReadOptions::SEEK_CLOSEST_SYNC &&
5031                        (seekTimeUs - totalTime) > (totalTime + se->mDurationUs - seekTimeUs))) {
5032                        // requested next sync, or closest sync and it was closer to the end of
5033                        // this segment
5034                        totalTime += se->mDurationUs;
5035                        totalOffset += se->mSize;
5036                    }
5037                    break;
5038                }
5039                totalTime += se->mDurationUs;
5040                totalOffset += se->mSize;
5041            }
5042            mCurrentMoofOffset = totalOffset;
5043            mNextMoofOffset = -1;
5044            mCurrentSamples.clear();
5045            mCurrentSampleIndex = 0;
5046            status_t err = parseChunk(&totalOffset);
5047            if (err != OK) {
5048                return err;
5049            }
5050            mCurrentTime = totalTime * mTimescale / 1000000ll;
5051        } else {
5052            // without sidx boxes, we can only seek to 0
5053            mCurrentMoofOffset = mFirstMoofOffset;
5054            mNextMoofOffset = -1;
5055            mCurrentSamples.clear();
5056            mCurrentSampleIndex = 0;
5057            off64_t tmp = mCurrentMoofOffset;
5058            status_t err = parseChunk(&tmp);
5059            if (err != OK) {
5060                return err;
5061            }
5062            mCurrentTime = 0;
5063        }
5064
5065        if (mBuffer != NULL) {
5066            mBuffer->release();
5067            mBuffer = NULL;
5068        }
5069
5070        // fall through
5071    }
5072
5073    off64_t offset = 0;
5074    size_t size = 0;
5075    uint32_t cts = 0;
5076    bool isSyncSample = false;
5077    bool newBuffer = false;
5078    if (mBuffer == NULL) {
5079        newBuffer = true;
5080
5081        if (mCurrentSampleIndex >= mCurrentSamples.size()) {
5082            // move to next fragment if there is one
5083            if (mNextMoofOffset <= mCurrentMoofOffset) {
5084                return ERROR_END_OF_STREAM;
5085            }
5086            off64_t nextMoof = mNextMoofOffset;
5087            mCurrentMoofOffset = nextMoof;
5088            mCurrentSamples.clear();
5089            mCurrentSampleIndex = 0;
5090            status_t err = parseChunk(&nextMoof);
5091            if (err != OK) {
5092                return err;
5093            }
5094            if (mCurrentSampleIndex >= mCurrentSamples.size()) {
5095                return ERROR_END_OF_STREAM;
5096            }
5097        }
5098
5099        const Sample *smpl = &mCurrentSamples[mCurrentSampleIndex];
5100        offset = smpl->offset;
5101        size = smpl->size;
5102        cts = mCurrentTime + smpl->compositionOffset;
5103        mCurrentTime += smpl->duration;
5104        isSyncSample = (mCurrentSampleIndex == 0); // XXX
5105
5106        status_t err = mGroup->acquire_buffer(&mBuffer);
5107
5108        if (err != OK) {
5109            CHECK(mBuffer == NULL);
5110            ALOGV("acquire_buffer returned %d", err);
5111            return err;
5112        }
5113        if (size > mBuffer->size()) {
5114            ALOGE("buffer too small: %zu > %zu", size, mBuffer->size());
5115            mBuffer->release();
5116            mBuffer = NULL;
5117            return ERROR_BUFFER_TOO_SMALL;
5118        }
5119    }
5120
5121    const Sample *smpl = &mCurrentSamples[mCurrentSampleIndex];
5122    MetaDataBase &bufmeta = mBuffer->meta_data();
5123    bufmeta.clear();
5124    if (smpl->encryptedsizes.size()) {
5125        // store clear/encrypted lengths in metadata
5126        bufmeta.setData(kKeyPlainSizes, 0,
5127                smpl->clearsizes.array(), smpl->clearsizes.size() * 4);
5128        bufmeta.setData(kKeyEncryptedSizes, 0,
5129                smpl->encryptedsizes.array(), smpl->encryptedsizes.size() * 4);
5130        bufmeta.setInt32(kKeyCryptoDefaultIVSize, mDefaultIVSize);
5131        bufmeta.setInt32(kKeyCryptoMode, mCryptoMode);
5132        bufmeta.setData(kKeyCryptoKey, 0, mCryptoKey, 16);
5133        bufmeta.setInt32(kKeyEncryptedByteBlock, mDefaultEncryptedByteBlock);
5134        bufmeta.setInt32(kKeySkipByteBlock, mDefaultSkipByteBlock);
5135
5136        uint32_t type = 0;
5137        const void *iv = NULL;
5138        size_t ivlength = 0;
5139        if (!mFormat.findData(
5140                kKeyCryptoIV, &type, &iv, &ivlength)) {
5141            iv = smpl->iv;
5142            ivlength = 16; // use 16 or the actual size?
5143        }
5144        bufmeta.setData(kKeyCryptoIV, 0, iv, ivlength);
5145
5146    }
5147
5148    if ((!mIsAVC && !mIsHEVC)|| mWantsNALFragments) {
5149        if (newBuffer) {
5150            if (!isInRange((size_t)0u, mBuffer->size(), size)) {
5151                mBuffer->release();
5152                mBuffer = NULL;
5153
5154                ALOGE("fragmentedRead ERROR_MALFORMED size %zu", size);
5155                return ERROR_MALFORMED;
5156            }
5157
5158            ssize_t num_bytes_read =
5159                mDataSource->readAt(offset, (uint8_t *)mBuffer->data(), size);
5160
5161            if (num_bytes_read < (ssize_t)size) {
5162                mBuffer->release();
5163                mBuffer = NULL;
5164
5165                ALOGE("i/o error");
5166                return ERROR_IO;
5167            }
5168
5169            CHECK(mBuffer != NULL);
5170            mBuffer->set_range(0, size);
5171            mBuffer->meta_data().setInt64(
5172                    kKeyTime, ((int64_t)cts * 1000000) / mTimescale);
5173            mBuffer->meta_data().setInt64(
5174                    kKeyDuration, ((int64_t)smpl->duration * 1000000) / mTimescale);
5175
5176            if (targetSampleTimeUs >= 0) {
5177                mBuffer->meta_data().setInt64(
5178                        kKeyTargetTime, targetSampleTimeUs);
5179            }
5180
5181            if (mIsAVC) {
5182                uint32_t layerId = FindAVCLayerId(
5183                        (const uint8_t *)mBuffer->data(), mBuffer->range_length());
5184                mBuffer->meta_data().setInt32(kKeyTemporalLayerId, layerId);
5185            }
5186
5187            if (isSyncSample) {
5188                mBuffer->meta_data().setInt32(kKeyIsSyncFrame, 1);
5189            }
5190
5191            ++mCurrentSampleIndex;
5192        }
5193
5194        if (!mIsAVC && !mIsHEVC) {
5195            *out = mBuffer;
5196            mBuffer = NULL;
5197
5198            return OK;
5199        }
5200
5201        // Each NAL unit is split up into its constituent fragments and
5202        // each one of them returned in its own buffer.
5203
5204        CHECK(mBuffer->range_length() >= mNALLengthSize);
5205
5206        const uint8_t *src =
5207            (const uint8_t *)mBuffer->data() + mBuffer->range_offset();
5208
5209        size_t nal_size = parseNALSize(src);
5210        if (mNALLengthSize > SIZE_MAX - nal_size) {
5211            ALOGE("b/24441553, b/24445122");
5212        }
5213
5214        if (mBuffer->range_length() - mNALLengthSize < nal_size) {
5215            ALOGE("incomplete NAL unit.");
5216
5217            mBuffer->release();
5218            mBuffer = NULL;
5219
5220            return ERROR_MALFORMED;
5221        }
5222
5223        MediaBufferBase *clone = mBuffer->clone();
5224        CHECK(clone != NULL);
5225        clone->set_range(mBuffer->range_offset() + mNALLengthSize, nal_size);
5226
5227        CHECK(mBuffer != NULL);
5228        mBuffer->set_range(
5229                mBuffer->range_offset() + mNALLengthSize + nal_size,
5230                mBuffer->range_length() - mNALLengthSize - nal_size);
5231
5232        if (mBuffer->range_length() == 0) {
5233            mBuffer->release();
5234            mBuffer = NULL;
5235        }
5236
5237        *out = clone;
5238
5239        return OK;
5240    } else {
5241        ALOGV("whole NAL");
5242        // Whole NAL units are returned but each fragment is prefixed by
5243        // the start code (0x00 00 00 01).
5244        ssize_t num_bytes_read = 0;
5245        int32_t drm = 0;
5246        bool usesDRM = (mFormat.findInt32(kKeyIsDRM, &drm) && drm != 0);
5247        void *data = NULL;
5248        bool isMalFormed = false;
5249        if (usesDRM) {
5250            if (mBuffer == NULL || !isInRange((size_t)0u, mBuffer->size(), size)) {
5251                isMalFormed = true;
5252            } else {
5253                data = mBuffer->data();
5254            }
5255        } else {
5256            int32_t max_size;
5257            if (!mFormat.findInt32(kKeyMaxInputSize, &max_size)
5258                    || !isInRange((size_t)0u, (size_t)max_size, size)) {
5259                isMalFormed = true;
5260            } else {
5261                data = mSrcBuffer;
5262            }
5263        }
5264
5265        if (isMalFormed || data == NULL) {
5266            ALOGE("isMalFormed size %zu", size);
5267            if (mBuffer != NULL) {
5268                mBuffer->release();
5269                mBuffer = NULL;
5270            }
5271            return ERROR_MALFORMED;
5272        }
5273        num_bytes_read = mDataSource->readAt(offset, data, size);
5274
5275        if (num_bytes_read < (ssize_t)size) {
5276            mBuffer->release();
5277            mBuffer = NULL;
5278
5279            ALOGE("i/o error");
5280            return ERROR_IO;
5281        }
5282
5283        if (usesDRM) {
5284            CHECK(mBuffer != NULL);
5285            mBuffer->set_range(0, size);
5286
5287        } else {
5288            uint8_t *dstData = (uint8_t *)mBuffer->data();
5289            size_t srcOffset = 0;
5290            size_t dstOffset = 0;
5291
5292            while (srcOffset < size) {
5293                isMalFormed = !isInRange((size_t)0u, size, srcOffset, mNALLengthSize);
5294                size_t nalLength = 0;
5295                if (!isMalFormed) {
5296                    nalLength = parseNALSize(&mSrcBuffer[srcOffset]);
5297                    srcOffset += mNALLengthSize;
5298                    isMalFormed = !isInRange((size_t)0u, size, srcOffset, nalLength)
5299                            || !isInRange((size_t)0u, mBuffer->size(), dstOffset, (size_t)4u)
5300                            || !isInRange((size_t)0u, mBuffer->size(), dstOffset + 4, nalLength);
5301                }
5302
5303                if (isMalFormed) {
5304                    ALOGE("Video is malformed; nalLength %zu", nalLength);
5305                    mBuffer->release();
5306                    mBuffer = NULL;
5307                    return ERROR_MALFORMED;
5308                }
5309
5310                if (nalLength == 0) {
5311                    continue;
5312                }
5313
5314                if (dstOffset > SIZE_MAX - 4 ||
5315                        dstOffset + 4 > SIZE_MAX - nalLength ||
5316                        dstOffset + 4 + nalLength > mBuffer->size()) {
5317                    ALOGE("b/26365349 : %zu %zu", dstOffset, mBuffer->size());
5318                    android_errorWriteLog(0x534e4554, "26365349");
5319                    mBuffer->release();
5320                    mBuffer = NULL;
5321                    return ERROR_MALFORMED;
5322                }
5323
5324                dstData[dstOffset++] = 0;
5325                dstData[dstOffset++] = 0;
5326                dstData[dstOffset++] = 0;
5327                dstData[dstOffset++] = 1;
5328                memcpy(&dstData[dstOffset], &mSrcBuffer[srcOffset], nalLength);
5329                srcOffset += nalLength;
5330                dstOffset += nalLength;
5331            }
5332            CHECK_EQ(srcOffset, size);
5333            CHECK(mBuffer != NULL);
5334            mBuffer->set_range(0, dstOffset);
5335        }
5336
5337        mBuffer->meta_data().setInt64(
5338                kKeyTime, ((int64_t)cts * 1000000) / mTimescale);
5339        mBuffer->meta_data().setInt64(
5340                kKeyDuration, ((int64_t)smpl->duration * 1000000) / mTimescale);
5341
5342        if (targetSampleTimeUs >= 0) {
5343            mBuffer->meta_data().setInt64(
5344                    kKeyTargetTime, targetSampleTimeUs);
5345        }
5346
5347        if (isSyncSample) {
5348            mBuffer->meta_data().setInt32(kKeyIsSyncFrame, 1);
5349        }
5350
5351        ++mCurrentSampleIndex;
5352
5353        *out = mBuffer;
5354        mBuffer = NULL;
5355
5356        return OK;
5357    }
5358}
5359
5360MPEG4Extractor::Track *MPEG4Extractor::findTrackByMimePrefix(
5361        const char *mimePrefix) {
5362    for (Track *track = mFirstTrack; track != NULL; track = track->next) {
5363        const char *mime;
5364        if (track->meta.findCString(kKeyMIMEType, &mime)
5365                && !strncasecmp(mime, mimePrefix, strlen(mimePrefix))) {
5366            return track;
5367        }
5368    }
5369
5370    return NULL;
5371}
5372
5373static bool LegacySniffMPEG4(DataSourceBase *source, float *confidence) {
5374    uint8_t header[8];
5375
5376    ssize_t n = source->readAt(4, header, sizeof(header));
5377    if (n < (ssize_t)sizeof(header)) {
5378        return false;
5379    }
5380
5381    if (!memcmp(header, "ftyp3gp", 7) || !memcmp(header, "ftypmp42", 8)
5382        || !memcmp(header, "ftyp3gr6", 8) || !memcmp(header, "ftyp3gs6", 8)
5383        || !memcmp(header, "ftyp3ge6", 8) || !memcmp(header, "ftyp3gg6", 8)
5384        || !memcmp(header, "ftypisom", 8) || !memcmp(header, "ftypM4V ", 8)
5385        || !memcmp(header, "ftypM4A ", 8) || !memcmp(header, "ftypf4v ", 8)
5386        || !memcmp(header, "ftypkddi", 8) || !memcmp(header, "ftypM4VP", 8)
5387        || !memcmp(header, "ftypmif1", 8) || !memcmp(header, "ftypheic", 8)
5388        || !memcmp(header, "ftypmsf1", 8) || !memcmp(header, "ftyphevc", 8)) {
5389        *confidence = 0.4;
5390
5391        return true;
5392    }
5393
5394    return false;
5395}
5396
5397static bool isCompatibleBrand(uint32_t fourcc) {
5398    static const uint32_t kCompatibleBrands[] = {
5399        FOURCC('i', 's', 'o', 'm'),
5400        FOURCC('i', 's', 'o', '2'),
5401        FOURCC('a', 'v', 'c', '1'),
5402        FOURCC('h', 'v', 'c', '1'),
5403        FOURCC('h', 'e', 'v', '1'),
5404        FOURCC('3', 'g', 'p', '4'),
5405        FOURCC('m', 'p', '4', '1'),
5406        FOURCC('m', 'p', '4', '2'),
5407        FOURCC('d', 'a', 's', 'h'),
5408
5409        // Won't promise that the following file types can be played.
5410        // Just give these file types a chance.
5411        FOURCC('q', 't', ' ', ' '),  // Apple's QuickTime
5412        FOURCC('M', 'S', 'N', 'V'),  // Sony's PSP
5413
5414        FOURCC('3', 'g', '2', 'a'),  // 3GPP2
5415        FOURCC('3', 'g', '2', 'b'),
5416        FOURCC('m', 'i', 'f', '1'),  // HEIF image
5417        FOURCC('h', 'e', 'i', 'c'),  // HEIF image
5418        FOURCC('m', 's', 'f', '1'),  // HEIF image sequence
5419        FOURCC('h', 'e', 'v', 'c'),  // HEIF image sequence
5420    };
5421
5422    for (size_t i = 0;
5423         i < sizeof(kCompatibleBrands) / sizeof(kCompatibleBrands[0]);
5424         ++i) {
5425        if (kCompatibleBrands[i] == fourcc) {
5426            return true;
5427        }
5428    }
5429
5430    return false;
5431}
5432
5433// Attempt to actually parse the 'ftyp' atom and determine if a suitable
5434// compatible brand is present.
5435// Also try to identify where this file's metadata ends
5436// (end of the 'moov' atom) and report it to the caller as part of
5437// the metadata.
5438static bool BetterSniffMPEG4(DataSourceBase *source, float *confidence) {
5439    // We scan up to 128 bytes to identify this file as an MP4.
5440    static const off64_t kMaxScanOffset = 128ll;
5441
5442    off64_t offset = 0ll;
5443    bool foundGoodFileType = false;
5444    off64_t moovAtomEndOffset = -1ll;
5445    bool done = false;
5446
5447    while (!done && offset < kMaxScanOffset) {
5448        uint32_t hdr[2];
5449        if (source->readAt(offset, hdr, 8) < 8) {
5450            return false;
5451        }
5452
5453        uint64_t chunkSize = ntohl(hdr[0]);
5454        uint32_t chunkType = ntohl(hdr[1]);
5455        off64_t chunkDataOffset = offset + 8;
5456
5457        if (chunkSize == 1) {
5458            if (source->readAt(offset + 8, &chunkSize, 8) < 8) {
5459                return false;
5460            }
5461
5462            chunkSize = ntoh64(chunkSize);
5463            chunkDataOffset += 8;
5464
5465            if (chunkSize < 16) {
5466                // The smallest valid chunk is 16 bytes long in this case.
5467                return false;
5468            }
5469
5470        } else if (chunkSize < 8) {
5471            // The smallest valid chunk is 8 bytes long.
5472            return false;
5473        }
5474
5475        // (data_offset - offset) is either 8 or 16
5476        off64_t chunkDataSize = chunkSize - (chunkDataOffset - offset);
5477        if (chunkDataSize < 0) {
5478            ALOGE("b/23540914");
5479            return false;
5480        }
5481
5482        char chunkstring[5];
5483        MakeFourCCString(chunkType, chunkstring);
5484        ALOGV("saw chunk type %s, size %" PRIu64 " @ %lld", chunkstring, chunkSize, (long long)offset);
5485        switch (chunkType) {
5486            case FOURCC('f', 't', 'y', 'p'):
5487            {
5488                if (chunkDataSize < 8) {
5489                    return false;
5490                }
5491
5492                uint32_t numCompatibleBrands = (chunkDataSize - 8) / 4;
5493                for (size_t i = 0; i < numCompatibleBrands + 2; ++i) {
5494                    if (i == 1) {
5495                        // Skip this index, it refers to the minorVersion,
5496                        // not a brand.
5497                        continue;
5498                    }
5499
5500                    uint32_t brand;
5501                    if (source->readAt(
5502                                chunkDataOffset + 4 * i, &brand, 4) < 4) {
5503                        return false;
5504                    }
5505
5506                    brand = ntohl(brand);
5507
5508                    if (isCompatibleBrand(brand)) {
5509                        foundGoodFileType = true;
5510                        break;
5511                    }
5512                }
5513
5514                if (!foundGoodFileType) {
5515                    return false;
5516                }
5517
5518                break;
5519            }
5520
5521            case FOURCC('m', 'o', 'o', 'v'):
5522            {
5523                moovAtomEndOffset = offset + chunkSize;
5524
5525                done = true;
5526                break;
5527            }
5528
5529            default:
5530                break;
5531        }
5532
5533        offset += chunkSize;
5534    }
5535
5536    if (!foundGoodFileType) {
5537        return false;
5538    }
5539
5540    *confidence = 0.4f;
5541
5542    return true;
5543}
5544
5545static MediaExtractor* CreateExtractor(DataSourceBase *source, void *) {
5546    return new MPEG4Extractor(source);
5547}
5548
5549static MediaExtractor::CreatorFunc Sniff(
5550        DataSourceBase *source, float *confidence, void **,
5551        MediaExtractor::FreeMetaFunc *) {
5552    if (BetterSniffMPEG4(source, confidence)) {
5553        return CreateExtractor;
5554    }
5555
5556    if (LegacySniffMPEG4(source, confidence)) {
5557        ALOGW("Identified supported mpeg4 through LegacySniffMPEG4.");
5558        return CreateExtractor;
5559    }
5560
5561    return NULL;
5562}
5563
5564extern "C" {
5565// This is the only symbol that needs to be exported
5566__attribute__ ((visibility ("default")))
5567MediaExtractor::ExtractorDef GETEXTRACTORDEF() {
5568    return {
5569        MediaExtractor::EXTRACTORDEF_VERSION,
5570        UUID("27575c67-4417-4c54-8d3d-8e626985a164"),
5571        1, // version
5572        "MP4 Extractor",
5573        Sniff
5574    };
5575}
5576
5577} // extern "C"
5578
5579}  // namespace android
5580