MPEG4Extractor.cpp revision 8c66cf58054d705c3b169eaf830f793adb8d4019
1/*
2 * Copyright (C) 2009 The Android Open Source Project
3 *
4 * Licensed under the Apache License, Version 2.0 (the "License");
5 * you may not use this file except in compliance with the License.
6 * You may obtain a copy of the License at
7 *
8 *      http://www.apache.org/licenses/LICENSE-2.0
9 *
10 * Unless required by applicable law or agreed to in writing, software
11 * distributed under the License is distributed on an "AS IS" BASIS,
12 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13 * See the License for the specific language governing permissions and
14 * limitations under the License.
15 */
16
17//#define LOG_NDEBUG 0
18#define LOG_TAG "MPEG4Extractor"
19
20#include <ctype.h>
21#include <inttypes.h>
22#include <memory>
23#include <stdint.h>
24#include <stdlib.h>
25#include <string.h>
26
27#include <utils/Log.h>
28
29#include "MPEG4Extractor.h"
30#include "SampleTable.h"
31#include "ItemTable.h"
32#include "include/ESDS.h"
33
34#include <media/ExtractorUtils.h>
35#include <media/MediaTrack.h>
36#include <media/stagefright/foundation/ABitReader.h>
37#include <media/stagefright/foundation/ABuffer.h>
38#include <media/stagefright/foundation/ADebug.h>
39#include <media/stagefright/foundation/AMessage.h>
40#include <media/stagefright/foundation/AUtils.h>
41#include <media/stagefright/foundation/ByteUtils.h>
42#include <media/stagefright/foundation/ColorUtils.h>
43#include <media/stagefright/foundation/avc_utils.h>
44#include <media/stagefright/foundation/hexdump.h>
45#include <media/stagefright/MediaBufferBase.h>
46#include <media/stagefright/MediaBufferGroup.h>
47#include <media/stagefright/MediaDefs.h>
48#include <media/stagefright/MetaData.h>
49#include <utils/String8.h>
50
51#include <byteswap.h>
52#include "include/ID3.h"
53
54#ifndef UINT32_MAX
55#define UINT32_MAX       (4294967295U)
56#endif
57
58namespace android {
59
60enum {
61    // max track header chunk to return
62    kMaxTrackHeaderSize = 32,
63
64    // maximum size of an atom. Some atoms can be bigger according to the spec,
65    // but we only allow up to this size.
66    kMaxAtomSize = 64 * 1024 * 1024,
67};
68
69class MPEG4Source : public MediaTrack {
70public:
71    // Caller retains ownership of both "dataSource" and "sampleTable".
72    MPEG4Source(MetaDataBase &format,
73                DataSourceBase *dataSource,
74                int32_t timeScale,
75                const sp<SampleTable> &sampleTable,
76                Vector<SidxEntry> &sidx,
77                const Trex *trex,
78                off64_t firstMoofOffset,
79                const sp<ItemTable> &itemTable);
80    virtual status_t init();
81
82    virtual status_t start(MetaDataBase *params = NULL);
83    virtual status_t stop();
84
85    virtual status_t getFormat(MetaDataBase &);
86
87    virtual status_t read(MediaBufferBase **buffer, const ReadOptions *options = NULL);
88    virtual bool supportNonblockingRead() { return true; }
89    virtual status_t fragmentedRead(MediaBufferBase **buffer, const ReadOptions *options = NULL);
90
91    virtual ~MPEG4Source();
92
93private:
94    Mutex mLock;
95
96    MetaDataBase &mFormat;
97    DataSourceBase *mDataSource;
98    int32_t mTimescale;
99    sp<SampleTable> mSampleTable;
100    uint32_t mCurrentSampleIndex;
101    uint32_t mCurrentFragmentIndex;
102    Vector<SidxEntry> &mSegments;
103    const Trex *mTrex;
104    off64_t mFirstMoofOffset;
105    off64_t mCurrentMoofOffset;
106    off64_t mNextMoofOffset;
107    uint32_t mCurrentTime;
108    int32_t mLastParsedTrackId;
109    int32_t mTrackId;
110
111    int32_t mCryptoMode;    // passed in from extractor
112    int32_t mDefaultIVSize; // passed in from extractor
113    uint8_t mCryptoKey[16]; // passed in from extractor
114    int32_t mDefaultEncryptedByteBlock;
115    int32_t mDefaultSkipByteBlock;
116    uint32_t mCurrentAuxInfoType;
117    uint32_t mCurrentAuxInfoTypeParameter;
118    int32_t mCurrentDefaultSampleInfoSize;
119    uint32_t mCurrentSampleInfoCount;
120    uint32_t mCurrentSampleInfoAllocSize;
121    uint8_t* mCurrentSampleInfoSizes;
122    uint32_t mCurrentSampleInfoOffsetCount;
123    uint32_t mCurrentSampleInfoOffsetsAllocSize;
124    uint64_t* mCurrentSampleInfoOffsets;
125
126    bool mIsAVC;
127    bool mIsHEVC;
128    size_t mNALLengthSize;
129
130    bool mStarted;
131
132    MediaBufferGroup *mGroup;
133
134    MediaBufferBase *mBuffer;
135
136    bool mWantsNALFragments;
137
138    uint8_t *mSrcBuffer;
139
140    bool mIsHeif;
141    sp<ItemTable> mItemTable;
142
143    size_t parseNALSize(const uint8_t *data) const;
144    status_t parseChunk(off64_t *offset);
145    status_t parseTrackFragmentHeader(off64_t offset, off64_t size);
146    status_t parseTrackFragmentRun(off64_t offset, off64_t size);
147    status_t parseSampleAuxiliaryInformationSizes(off64_t offset, off64_t size);
148    status_t parseSampleAuxiliaryInformationOffsets(off64_t offset, off64_t size);
149    status_t parseClearEncryptedSizes(off64_t offset, bool isSubsampleEncryption, uint32_t flags);
150    status_t parseSampleEncryption(off64_t offset);
151
152    struct TrackFragmentHeaderInfo {
153        enum Flags {
154            kBaseDataOffsetPresent         = 0x01,
155            kSampleDescriptionIndexPresent = 0x02,
156            kDefaultSampleDurationPresent  = 0x08,
157            kDefaultSampleSizePresent      = 0x10,
158            kDefaultSampleFlagsPresent     = 0x20,
159            kDurationIsEmpty               = 0x10000,
160        };
161
162        uint32_t mTrackID;
163        uint32_t mFlags;
164        uint64_t mBaseDataOffset;
165        uint32_t mSampleDescriptionIndex;
166        uint32_t mDefaultSampleDuration;
167        uint32_t mDefaultSampleSize;
168        uint32_t mDefaultSampleFlags;
169
170        uint64_t mDataOffset;
171    };
172    TrackFragmentHeaderInfo mTrackFragmentHeaderInfo;
173
174    struct Sample {
175        off64_t offset;
176        size_t size;
177        uint32_t duration;
178        int32_t compositionOffset;
179        uint8_t iv[16];
180        Vector<size_t> clearsizes;
181        Vector<size_t> encryptedsizes;
182    };
183    Vector<Sample> mCurrentSamples;
184
185    MPEG4Source(const MPEG4Source &);
186    MPEG4Source &operator=(const MPEG4Source &);
187};
188
189// This custom data source wraps an existing one and satisfies requests
190// falling entirely within a cached range from the cache while forwarding
191// all remaining requests to the wrapped datasource.
192// This is used to cache the full sampletable metadata for a single track,
193// possibly wrapping multiple times to cover all tracks, i.e.
194// Each CachedRangedDataSource caches the sampletable metadata for a single track.
195
196struct CachedRangedDataSource : public DataSourceBase {
197    explicit CachedRangedDataSource(DataSourceBase *source);
198    virtual ~CachedRangedDataSource();
199
200    virtual status_t initCheck() const;
201    virtual ssize_t readAt(off64_t offset, void *data, size_t size);
202    virtual status_t getSize(off64_t *size);
203    virtual uint32_t flags();
204
205    status_t setCachedRange(off64_t offset, size_t size, bool assumeSourceOwnershipOnSuccess);
206
207
208private:
209    Mutex mLock;
210
211    DataSourceBase *mSource;
212    bool mOwnsDataSource;
213    off64_t mCachedOffset;
214    size_t mCachedSize;
215    uint8_t *mCache;
216
217    void clearCache();
218
219    CachedRangedDataSource(const CachedRangedDataSource &);
220    CachedRangedDataSource &operator=(const CachedRangedDataSource &);
221};
222
223CachedRangedDataSource::CachedRangedDataSource(DataSourceBase *source)
224    : mSource(source),
225      mOwnsDataSource(false),
226      mCachedOffset(0),
227      mCachedSize(0),
228      mCache(NULL) {
229}
230
231CachedRangedDataSource::~CachedRangedDataSource() {
232    clearCache();
233    if (mOwnsDataSource) {
234        delete (CachedRangedDataSource*)mSource;
235    }
236}
237
238void CachedRangedDataSource::clearCache() {
239    if (mCache) {
240        free(mCache);
241        mCache = NULL;
242    }
243
244    mCachedOffset = 0;
245    mCachedSize = 0;
246}
247
248status_t CachedRangedDataSource::initCheck() const {
249    return mSource->initCheck();
250}
251
252ssize_t CachedRangedDataSource::readAt(off64_t offset, void *data, size_t size) {
253    Mutex::Autolock autoLock(mLock);
254
255    if (isInRange(mCachedOffset, mCachedSize, offset, size)) {
256        memcpy(data, &mCache[offset - mCachedOffset], size);
257        return size;
258    }
259
260    return mSource->readAt(offset, data, size);
261}
262
263status_t CachedRangedDataSource::getSize(off64_t *size) {
264    return mSource->getSize(size);
265}
266
267uint32_t CachedRangedDataSource::flags() {
268    return mSource->flags();
269}
270
271status_t CachedRangedDataSource::setCachedRange(off64_t offset,
272        size_t size,
273        bool assumeSourceOwnershipOnSuccess) {
274    Mutex::Autolock autoLock(mLock);
275
276    clearCache();
277
278    mCache = (uint8_t *)malloc(size);
279
280    if (mCache == NULL) {
281        return -ENOMEM;
282    }
283
284    mCachedOffset = offset;
285    mCachedSize = size;
286
287    ssize_t err = mSource->readAt(mCachedOffset, mCache, mCachedSize);
288
289    if (err < (ssize_t)size) {
290        clearCache();
291
292        return ERROR_IO;
293    }
294    mOwnsDataSource = assumeSourceOwnershipOnSuccess;
295    return OK;
296}
297
298////////////////////////////////////////////////////////////////////////////////
299
300static const bool kUseHexDump = false;
301
302static const char *FourCC2MIME(uint32_t fourcc) {
303    switch (fourcc) {
304        case FOURCC('m', 'p', '4', 'a'):
305            return MEDIA_MIMETYPE_AUDIO_AAC;
306
307        case FOURCC('s', 'a', 'm', 'r'):
308            return MEDIA_MIMETYPE_AUDIO_AMR_NB;
309
310        case FOURCC('s', 'a', 'w', 'b'):
311            return MEDIA_MIMETYPE_AUDIO_AMR_WB;
312
313        case FOURCC('m', 'p', '4', 'v'):
314            return MEDIA_MIMETYPE_VIDEO_MPEG4;
315
316        case FOURCC('s', '2', '6', '3'):
317        case FOURCC('h', '2', '6', '3'):
318        case FOURCC('H', '2', '6', '3'):
319            return MEDIA_MIMETYPE_VIDEO_H263;
320
321        case FOURCC('a', 'v', 'c', '1'):
322            return MEDIA_MIMETYPE_VIDEO_AVC;
323
324        case FOURCC('h', 'v', 'c', '1'):
325        case FOURCC('h', 'e', 'v', '1'):
326            return MEDIA_MIMETYPE_VIDEO_HEVC;
327        default:
328            ALOGW("Unknown fourcc: %c%c%c%c",
329                   (fourcc >> 24) & 0xff,
330                   (fourcc >> 16) & 0xff,
331                   (fourcc >> 8) & 0xff,
332                   fourcc & 0xff
333                   );
334            return "application/octet-stream";
335    }
336}
337
338static bool AdjustChannelsAndRate(uint32_t fourcc, uint32_t *channels, uint32_t *rate) {
339    if (!strcasecmp(MEDIA_MIMETYPE_AUDIO_AMR_NB, FourCC2MIME(fourcc))) {
340        // AMR NB audio is always mono, 8kHz
341        *channels = 1;
342        *rate = 8000;
343        return true;
344    } else if (!strcasecmp(MEDIA_MIMETYPE_AUDIO_AMR_WB, FourCC2MIME(fourcc))) {
345        // AMR WB audio is always mono, 16kHz
346        *channels = 1;
347        *rate = 16000;
348        return true;
349    }
350    return false;
351}
352
353MPEG4Extractor::MPEG4Extractor(DataSourceBase *source, const char *mime)
354    : mMoofOffset(0),
355      mMoofFound(false),
356      mMdatFound(false),
357      mDataSource(source),
358      mCachedSource(NULL),
359      mInitCheck(NO_INIT),
360      mHeaderTimescale(0),
361      mIsQT(false),
362      mIsHeif(false),
363      mHasMoovBox(false),
364      mPreferHeif(mime != NULL && !strcasecmp(mime, MEDIA_MIMETYPE_CONTAINER_HEIF)),
365      mFirstTrack(NULL),
366      mLastTrack(NULL) {
367    ALOGV("mime=%s, mPreferHeif=%d", mime, mPreferHeif);
368}
369
370MPEG4Extractor::~MPEG4Extractor() {
371    Track *track = mFirstTrack;
372    while (track) {
373        Track *next = track->next;
374
375        delete track;
376        track = next;
377    }
378    mFirstTrack = mLastTrack = NULL;
379
380    for (size_t i = 0; i < mPssh.size(); i++) {
381        delete [] mPssh[i].data;
382    }
383    mPssh.clear();
384
385    delete mCachedSource;
386}
387
388uint32_t MPEG4Extractor::flags() const {
389    return CAN_PAUSE |
390            ((mMoofOffset == 0 || mSidxEntries.size() != 0) ?
391                    (CAN_SEEK_BACKWARD | CAN_SEEK_FORWARD | CAN_SEEK) : 0);
392}
393
394status_t MPEG4Extractor::getMetaData(MetaDataBase &meta) {
395    status_t err;
396    if ((err = readMetaData()) != OK) {
397        return UNKNOWN_ERROR;
398    }
399    meta = mFileMetaData;
400    return OK;
401}
402
403size_t MPEG4Extractor::countTracks() {
404    status_t err;
405    if ((err = readMetaData()) != OK) {
406        ALOGV("MPEG4Extractor::countTracks: no tracks");
407        return 0;
408    }
409
410    size_t n = 0;
411    Track *track = mFirstTrack;
412    while (track) {
413        ++n;
414        track = track->next;
415    }
416
417    ALOGV("MPEG4Extractor::countTracks: %zu tracks", n);
418    return n;
419}
420
421status_t MPEG4Extractor::getTrackMetaData(
422        MetaDataBase &meta,
423        size_t index, uint32_t flags) {
424    status_t err;
425    if ((err = readMetaData()) != OK) {
426        return UNKNOWN_ERROR;
427    }
428
429    Track *track = mFirstTrack;
430    while (index > 0) {
431        if (track == NULL) {
432            return UNKNOWN_ERROR;
433        }
434
435        track = track->next;
436        --index;
437    }
438
439    if (track == NULL) {
440        return UNKNOWN_ERROR;
441    }
442
443    [=] {
444        int64_t duration;
445        int32_t samplerate;
446        if (track->has_elst && mHeaderTimescale != 0 &&
447                track->meta.findInt64(kKeyDuration, &duration) &&
448                track->meta.findInt32(kKeySampleRate, &samplerate)) {
449
450            track->has_elst = false;
451
452            if (track->elst_segment_duration > INT64_MAX) {
453                return;
454            }
455            int64_t segment_duration = track->elst_segment_duration;
456            int64_t media_time = track->elst_media_time;
457            int64_t halfscale = mHeaderTimescale / 2;
458            ALOGV("segment_duration = %" PRId64 ", media_time = %" PRId64
459                  ", halfscale = %" PRId64 ", timescale = %d",
460                  segment_duration,
461                  media_time,
462                  halfscale,
463                  mHeaderTimescale);
464
465            int64_t delay;
466            // delay = ((media_time * samplerate) + halfscale) / mHeaderTimescale;
467            if (__builtin_mul_overflow(media_time, samplerate, &delay) ||
468                    __builtin_add_overflow(delay, halfscale, &delay) ||
469                    (delay /= mHeaderTimescale, false) ||
470                    delay > INT32_MAX ||
471                    delay < INT32_MIN) {
472                return;
473            }
474            ALOGV("delay = %" PRId64, delay);
475            track->meta.setInt32(kKeyEncoderDelay, delay);
476
477            int64_t scaled_duration;
478            // scaled_duration = duration * mHeaderTimescale;
479            if (__builtin_mul_overflow(duration, mHeaderTimescale, &scaled_duration)) {
480                return;
481            }
482            ALOGV("scaled_duration = %" PRId64, scaled_duration);
483
484            int64_t segment_end;
485            int64_t padding;
486            // padding = scaled_duration - ((segment_duration + media_time) * 1000000);
487            if (__builtin_add_overflow(segment_duration, media_time, &segment_end) ||
488                    __builtin_mul_overflow(segment_end, 1000000, &segment_end) ||
489                    __builtin_sub_overflow(scaled_duration, segment_end, &padding)) {
490                return;
491            }
492            ALOGV("segment_end = %" PRId64 ", padding = %" PRId64, segment_end, padding);
493
494            if (padding < 0) {
495                // track duration from media header (which is what kKeyDuration is) might
496                // be slightly shorter than the segment duration, which would make the
497                // padding negative. Clamp to zero.
498                padding = 0;
499            }
500
501            int64_t paddingsamples;
502            int64_t halfscale_e6;
503            int64_t timescale_e6;
504            // paddingsamples = ((padding * samplerate) + (halfscale * 1000000))
505            //                / (mHeaderTimescale * 1000000);
506            if (__builtin_mul_overflow(padding, samplerate, &paddingsamples) ||
507                    __builtin_mul_overflow(halfscale, 1000000, &halfscale_e6) ||
508                    __builtin_mul_overflow(mHeaderTimescale, 1000000, &timescale_e6) ||
509                    __builtin_add_overflow(paddingsamples, halfscale_e6, &paddingsamples) ||
510                    (paddingsamples /= timescale_e6, false) ||
511                    paddingsamples > INT32_MAX) {
512                return;
513            }
514            ALOGV("paddingsamples = %" PRId64, paddingsamples);
515            track->meta.setInt32(kKeyEncoderPadding, paddingsamples);
516        }
517    }();
518
519    if ((flags & kIncludeExtensiveMetaData)
520            && !track->includes_expensive_metadata) {
521        track->includes_expensive_metadata = true;
522
523        const char *mime;
524        CHECK(track->meta.findCString(kKeyMIMEType, &mime));
525        if (!strncasecmp("video/", mime, 6)) {
526            // MPEG2 tracks do not provide CSD, so read the stream header
527            if (!strcmp(mime, MEDIA_MIMETYPE_VIDEO_MPEG2)) {
528                off64_t offset;
529                size_t size;
530                if (track->sampleTable->getMetaDataForSample(
531                            0 /* sampleIndex */, &offset, &size, NULL /* sampleTime */) == OK) {
532                    if (size > kMaxTrackHeaderSize) {
533                        size = kMaxTrackHeaderSize;
534                    }
535                    uint8_t header[kMaxTrackHeaderSize];
536                    if (mDataSource->readAt(offset, &header, size) == (ssize_t)size) {
537                        track->meta.setData(kKeyStreamHeader, 'mdat', header, size);
538                    }
539                }
540            }
541
542            if (mMoofOffset > 0) {
543                int64_t duration;
544                if (track->meta.findInt64(kKeyDuration, &duration)) {
545                    // nothing fancy, just pick a frame near 1/4th of the duration
546                    track->meta.setInt64(
547                            kKeyThumbnailTime, duration / 4);
548                }
549            } else {
550                uint32_t sampleIndex;
551                uint32_t sampleTime;
552                if (track->timescale != 0 &&
553                        track->sampleTable->findThumbnailSample(&sampleIndex) == OK
554                        && track->sampleTable->getMetaDataForSample(
555                            sampleIndex, NULL /* offset */, NULL /* size */,
556                            &sampleTime) == OK) {
557                    track->meta.setInt64(
558                            kKeyThumbnailTime,
559                            ((int64_t)sampleTime * 1000000) / track->timescale);
560                }
561            }
562        }
563    }
564
565    meta = track->meta;
566    return OK;
567}
568
569status_t MPEG4Extractor::readMetaData() {
570    if (mInitCheck != NO_INIT) {
571        return mInitCheck;
572    }
573
574    off64_t offset = 0;
575    status_t err;
576    bool sawMoovOrSidx = false;
577
578    while (!((mHasMoovBox && sawMoovOrSidx && (mMdatFound || mMoofFound)) ||
579             (mIsHeif && (mPreferHeif || !mHasMoovBox) &&
580                     (mItemTable != NULL) && mItemTable->isValid()))) {
581        off64_t orig_offset = offset;
582        err = parseChunk(&offset, 0);
583
584        if (err != OK && err != UNKNOWN_ERROR) {
585            break;
586        } else if (offset <= orig_offset) {
587            // only continue parsing if the offset was advanced,
588            // otherwise we might end up in an infinite loop
589            ALOGE("did not advance: %lld->%lld", (long long)orig_offset, (long long)offset);
590            err = ERROR_MALFORMED;
591            break;
592        } else if (err == UNKNOWN_ERROR) {
593            sawMoovOrSidx = true;
594        }
595    }
596
597    if (mIsHeif && (mItemTable != NULL) && (mItemTable->countImages() > 0)) {
598        off64_t exifOffset;
599        size_t exifSize;
600        if (mItemTable->getExifOffsetAndSize(&exifOffset, &exifSize) == OK) {
601            mFileMetaData.setInt64(kKeyExifOffset, (int64_t)exifOffset);
602            mFileMetaData.setInt64(kKeyExifSize, (int64_t)exifSize);
603        }
604        for (uint32_t imageIndex = 0;
605                imageIndex < mItemTable->countImages(); imageIndex++) {
606            sp<MetaData> meta = mItemTable->getImageMeta(imageIndex);
607            if (meta == NULL) {
608                ALOGE("heif image %u has no meta!", imageIndex);
609                continue;
610            }
611            // Some heif files advertise image sequence brands (eg. 'hevc') in
612            // ftyp box, but don't have any valid tracks in them. Instead of
613            // reporting the entire file as malformed, we override the error
614            // to allow still images to be extracted.
615            if (err != OK) {
616                ALOGW("Extracting still images only");
617                err = OK;
618            }
619            mInitCheck = OK;
620
621            ALOGV("adding HEIF image track %u", imageIndex);
622            Track *track = new Track;
623            track->next = NULL;
624            if (mLastTrack != NULL) {
625                mLastTrack->next = track;
626            } else {
627                mFirstTrack = track;
628            }
629            mLastTrack = track;
630
631            track->meta = *(meta.get());
632            track->meta.setInt32(kKeyTrackID, imageIndex);
633            track->includes_expensive_metadata = false;
634            track->skipTrack = false;
635            track->timescale = 1000000;
636        }
637    }
638
639    if (mInitCheck == OK) {
640        if (findTrackByMimePrefix("video/") != NULL) {
641            mFileMetaData.setCString(
642                    kKeyMIMEType, MEDIA_MIMETYPE_CONTAINER_MPEG4);
643        } else if (findTrackByMimePrefix("audio/") != NULL) {
644            mFileMetaData.setCString(kKeyMIMEType, "audio/mp4");
645        } else if (findTrackByMimePrefix(
646                MEDIA_MIMETYPE_IMAGE_ANDROID_HEIC) != NULL) {
647            mFileMetaData.setCString(
648                    kKeyMIMEType, MEDIA_MIMETYPE_CONTAINER_HEIF);
649        } else {
650            mFileMetaData.setCString(kKeyMIMEType, "application/octet-stream");
651        }
652    } else {
653        mInitCheck = err;
654    }
655
656    CHECK_NE(err, (status_t)NO_INIT);
657
658    // copy pssh data into file metadata
659    uint64_t psshsize = 0;
660    for (size_t i = 0; i < mPssh.size(); i++) {
661        psshsize += 20 + mPssh[i].datalen;
662    }
663    if (psshsize > 0 && psshsize <= UINT32_MAX) {
664        char *buf = (char*)malloc(psshsize);
665        if (!buf) {
666            ALOGE("b/28471206");
667            return NO_MEMORY;
668        }
669        char *ptr = buf;
670        for (size_t i = 0; i < mPssh.size(); i++) {
671            memcpy(ptr, mPssh[i].uuid, 20); // uuid + length
672            memcpy(ptr + 20, mPssh[i].data, mPssh[i].datalen);
673            ptr += (20 + mPssh[i].datalen);
674        }
675        mFileMetaData.setData(kKeyPssh, 'pssh', buf, psshsize);
676        free(buf);
677    }
678
679    return mInitCheck;
680}
681
682struct PathAdder {
683    PathAdder(Vector<uint32_t> *path, uint32_t chunkType)
684        : mPath(path) {
685        mPath->push(chunkType);
686    }
687
688    ~PathAdder() {
689        mPath->pop();
690    }
691
692private:
693    Vector<uint32_t> *mPath;
694
695    PathAdder(const PathAdder &);
696    PathAdder &operator=(const PathAdder &);
697};
698
699static bool underMetaDataPath(const Vector<uint32_t> &path) {
700    return path.size() >= 5
701        && path[0] == FOURCC('m', 'o', 'o', 'v')
702        && path[1] == FOURCC('u', 'd', 't', 'a')
703        && path[2] == FOURCC('m', 'e', 't', 'a')
704        && path[3] == FOURCC('i', 'l', 's', 't');
705}
706
707static bool underQTMetaPath(const Vector<uint32_t> &path, int32_t depth) {
708    return path.size() >= 2
709            && path[0] == FOURCC('m', 'o', 'o', 'v')
710            && path[1] == FOURCC('m', 'e', 't', 'a')
711            && (depth == 2
712            || (depth == 3
713                    && (path[2] == FOURCC('h', 'd', 'l', 'r')
714                    ||  path[2] == FOURCC('i', 'l', 's', 't')
715                    ||  path[2] == FOURCC('k', 'e', 'y', 's'))));
716}
717
718// Given a time in seconds since Jan 1 1904, produce a human-readable string.
719static bool convertTimeToDate(int64_t time_1904, String8 *s) {
720    // delta between mpeg4 time and unix epoch time
721    static const int64_t delta = (((66 * 365 + 17) * 24) * 3600);
722    if (time_1904 < INT64_MIN + delta) {
723        return false;
724    }
725    time_t time_1970 = time_1904 - delta;
726
727    char tmp[32];
728    struct tm* tm = gmtime(&time_1970);
729    if (tm != NULL &&
730            strftime(tmp, sizeof(tmp), "%Y%m%dT%H%M%S.000Z", tm) > 0) {
731        s->setTo(tmp);
732        return true;
733    }
734    return false;
735}
736
737status_t MPEG4Extractor::parseChunk(off64_t *offset, int depth) {
738    ALOGV("entering parseChunk %lld/%d", (long long)*offset, depth);
739
740    if (*offset < 0) {
741        ALOGE("b/23540914");
742        return ERROR_MALFORMED;
743    }
744    if (depth > 100) {
745        ALOGE("b/27456299");
746        return ERROR_MALFORMED;
747    }
748    uint32_t hdr[2];
749    if (mDataSource->readAt(*offset, hdr, 8) < 8) {
750        return ERROR_IO;
751    }
752    uint64_t chunk_size = ntohl(hdr[0]);
753    int32_t chunk_type = ntohl(hdr[1]);
754    off64_t data_offset = *offset + 8;
755
756    if (chunk_size == 1) {
757        if (mDataSource->readAt(*offset + 8, &chunk_size, 8) < 8) {
758            return ERROR_IO;
759        }
760        chunk_size = ntoh64(chunk_size);
761        data_offset += 8;
762
763        if (chunk_size < 16) {
764            // The smallest valid chunk is 16 bytes long in this case.
765            return ERROR_MALFORMED;
766        }
767    } else if (chunk_size == 0) {
768        if (depth == 0) {
769            // atom extends to end of file
770            off64_t sourceSize;
771            if (mDataSource->getSize(&sourceSize) == OK) {
772                chunk_size = (sourceSize - *offset);
773            } else {
774                // XXX could we just pick a "sufficiently large" value here?
775                ALOGE("atom size is 0, and data source has no size");
776                return ERROR_MALFORMED;
777            }
778        } else {
779            // not allowed for non-toplevel atoms, skip it
780            *offset += 4;
781            return OK;
782        }
783    } else if (chunk_size < 8) {
784        // The smallest valid chunk is 8 bytes long.
785        ALOGE("invalid chunk size: %" PRIu64, chunk_size);
786        return ERROR_MALFORMED;
787    }
788
789    char chunk[5];
790    MakeFourCCString(chunk_type, chunk);
791    ALOGV("chunk: %s @ %lld, %d", chunk, (long long)*offset, depth);
792
793    if (kUseHexDump) {
794        static const char kWhitespace[] = "                                        ";
795        const char *indent = &kWhitespace[sizeof(kWhitespace) - 1 - 2 * depth];
796        printf("%sfound chunk '%s' of size %" PRIu64 "\n", indent, chunk, chunk_size);
797
798        char buffer[256];
799        size_t n = chunk_size;
800        if (n > sizeof(buffer)) {
801            n = sizeof(buffer);
802        }
803        if (mDataSource->readAt(*offset, buffer, n)
804                < (ssize_t)n) {
805            return ERROR_IO;
806        }
807
808        hexdump(buffer, n);
809    }
810
811    PathAdder autoAdder(&mPath, chunk_type);
812
813    // (data_offset - *offset) is either 8 or 16
814    off64_t chunk_data_size = chunk_size - (data_offset - *offset);
815    if (chunk_data_size < 0) {
816        ALOGE("b/23540914");
817        return ERROR_MALFORMED;
818    }
819    if (chunk_type != FOURCC('m', 'd', 'a', 't') && chunk_data_size > kMaxAtomSize) {
820        char errMsg[100];
821        sprintf(errMsg, "%s atom has size %" PRId64, chunk, chunk_data_size);
822        ALOGE("%s (b/28615448)", errMsg);
823        android_errorWriteWithInfoLog(0x534e4554, "28615448", -1, errMsg, strlen(errMsg));
824        return ERROR_MALFORMED;
825    }
826
827    if (chunk_type != FOURCC('c', 'p', 'r', 't')
828            && chunk_type != FOURCC('c', 'o', 'v', 'r')
829            && mPath.size() == 5 && underMetaDataPath(mPath)) {
830        off64_t stop_offset = *offset + chunk_size;
831        *offset = data_offset;
832        while (*offset < stop_offset) {
833            status_t err = parseChunk(offset, depth + 1);
834            if (err != OK) {
835                return err;
836            }
837        }
838
839        if (*offset != stop_offset) {
840            return ERROR_MALFORMED;
841        }
842
843        return OK;
844    }
845
846    switch(chunk_type) {
847        case FOURCC('m', 'o', 'o', 'v'):
848        case FOURCC('t', 'r', 'a', 'k'):
849        case FOURCC('m', 'd', 'i', 'a'):
850        case FOURCC('m', 'i', 'n', 'f'):
851        case FOURCC('d', 'i', 'n', 'f'):
852        case FOURCC('s', 't', 'b', 'l'):
853        case FOURCC('m', 'v', 'e', 'x'):
854        case FOURCC('m', 'o', 'o', 'f'):
855        case FOURCC('t', 'r', 'a', 'f'):
856        case FOURCC('m', 'f', 'r', 'a'):
857        case FOURCC('u', 'd', 't', 'a'):
858        case FOURCC('i', 'l', 's', 't'):
859        case FOURCC('s', 'i', 'n', 'f'):
860        case FOURCC('s', 'c', 'h', 'i'):
861        case FOURCC('e', 'd', 't', 's'):
862        case FOURCC('w', 'a', 'v', 'e'):
863        {
864            if (chunk_type == FOURCC('m', 'o', 'o', 'v') && depth != 0) {
865                ALOGE("moov: depth %d", depth);
866                return ERROR_MALFORMED;
867            }
868
869            if (chunk_type == FOURCC('m', 'o', 'o', 'v') && mInitCheck == OK) {
870                ALOGE("duplicate moov");
871                return ERROR_MALFORMED;
872            }
873
874            if (chunk_type == FOURCC('m', 'o', 'o', 'f') && !mMoofFound) {
875                // store the offset of the first segment
876                mMoofFound = true;
877                mMoofOffset = *offset;
878            }
879
880            if (chunk_type == FOURCC('s', 't', 'b', 'l')) {
881                ALOGV("sampleTable chunk is %" PRIu64 " bytes long.", chunk_size);
882
883                if (mDataSource->flags()
884                        & (DataSourceBase::kWantsPrefetching
885                            | DataSourceBase::kIsCachingDataSource)) {
886                    CachedRangedDataSource *cachedSource =
887                        new CachedRangedDataSource(mDataSource);
888
889                    if (cachedSource->setCachedRange(
890                            *offset, chunk_size,
891                            mCachedSource != NULL /* assume ownership on success */) == OK) {
892                        mDataSource = mCachedSource = cachedSource;
893                    } else {
894                        delete cachedSource;
895                    }
896                }
897
898                if (mLastTrack == NULL) {
899                    return ERROR_MALFORMED;
900                }
901
902                mLastTrack->sampleTable = new SampleTable(mDataSource);
903            }
904
905            bool isTrack = false;
906            if (chunk_type == FOURCC('t', 'r', 'a', 'k')) {
907                if (depth != 1) {
908                    ALOGE("trak: depth %d", depth);
909                    return ERROR_MALFORMED;
910                }
911                isTrack = true;
912
913                ALOGV("adding new track");
914                Track *track = new Track;
915                track->next = NULL;
916                if (mLastTrack) {
917                    mLastTrack->next = track;
918                } else {
919                    mFirstTrack = track;
920                }
921                mLastTrack = track;
922
923                track->includes_expensive_metadata = false;
924                track->skipTrack = false;
925                track->timescale = 0;
926                track->meta.setCString(kKeyMIMEType, "application/octet-stream");
927                track->has_elst = false;
928                track->subsample_encryption = false;
929            }
930
931            off64_t stop_offset = *offset + chunk_size;
932            *offset = data_offset;
933            while (*offset < stop_offset) {
934                status_t err = parseChunk(offset, depth + 1);
935                if (err != OK) {
936                    if (isTrack) {
937                        mLastTrack->skipTrack = true;
938                        break;
939                    }
940                    return err;
941                }
942            }
943
944            if (*offset != stop_offset) {
945                return ERROR_MALFORMED;
946            }
947
948            if (isTrack) {
949                int32_t trackId;
950                // There must be exact one track header per track.
951                if (!mLastTrack->meta.findInt32(kKeyTrackID, &trackId)) {
952                    mLastTrack->skipTrack = true;
953                }
954
955                status_t err = verifyTrack(mLastTrack);
956                if (err != OK) {
957                    mLastTrack->skipTrack = true;
958                }
959
960                if (mLastTrack->skipTrack) {
961                    ALOGV("skipping this track...");
962                    Track *cur = mFirstTrack;
963
964                    if (cur == mLastTrack) {
965                        delete cur;
966                        mFirstTrack = mLastTrack = NULL;
967                    } else {
968                        while (cur && cur->next != mLastTrack) {
969                            cur = cur->next;
970                        }
971                        if (cur) {
972                            cur->next = NULL;
973                        }
974                        delete mLastTrack;
975                        mLastTrack = cur;
976                    }
977
978                    return OK;
979                }
980            } else if (chunk_type == FOURCC('m', 'o', 'o', 'v')) {
981                mInitCheck = OK;
982
983                return UNKNOWN_ERROR;  // Return a dummy error.
984            }
985            break;
986        }
987
988        case FOURCC('s', 'c', 'h', 'm'):
989        {
990
991            *offset += chunk_size;
992            if (!mLastTrack) {
993                return ERROR_MALFORMED;
994            }
995
996            uint32_t scheme_type;
997            if (mDataSource->readAt(data_offset + 4, &scheme_type, 4) < 4) {
998                return ERROR_IO;
999            }
1000            scheme_type = ntohl(scheme_type);
1001            int32_t mode = kCryptoModeUnencrypted;
1002            switch(scheme_type) {
1003                case FOURCC('c', 'b', 'c', '1'):
1004                {
1005                    mode = kCryptoModeAesCbc;
1006                    break;
1007                }
1008                case FOURCC('c', 'b', 'c', 's'):
1009                {
1010                    mode = kCryptoModeAesCbc;
1011                    mLastTrack->subsample_encryption = true;
1012                    break;
1013                }
1014                case FOURCC('c', 'e', 'n', 'c'):
1015                {
1016                    mode = kCryptoModeAesCtr;
1017                    break;
1018                }
1019                case FOURCC('c', 'e', 'n', 's'):
1020                {
1021                    mode = kCryptoModeAesCtr;
1022                    mLastTrack->subsample_encryption = true;
1023                    break;
1024                }
1025            }
1026            mLastTrack->meta.setInt32(kKeyCryptoMode, mode);
1027            break;
1028        }
1029
1030
1031        case FOURCC('e', 'l', 's', 't'):
1032        {
1033            *offset += chunk_size;
1034
1035            if (!mLastTrack) {
1036                return ERROR_MALFORMED;
1037            }
1038
1039            // See 14496-12 8.6.6
1040            uint8_t version;
1041            if (mDataSource->readAt(data_offset, &version, 1) < 1) {
1042                return ERROR_IO;
1043            }
1044
1045            uint32_t entry_count;
1046            if (!mDataSource->getUInt32(data_offset + 4, &entry_count)) {
1047                return ERROR_IO;
1048            }
1049
1050            if (entry_count != 1) {
1051                // we only support a single entry at the moment, for gapless playback
1052                ALOGW("ignoring edit list with %d entries", entry_count);
1053            } else {
1054                off64_t entriesoffset = data_offset + 8;
1055                uint64_t segment_duration;
1056                int64_t media_time;
1057
1058                if (version == 1) {
1059                    if (!mDataSource->getUInt64(entriesoffset, &segment_duration) ||
1060                            !mDataSource->getUInt64(entriesoffset + 8, (uint64_t*)&media_time)) {
1061                        return ERROR_IO;
1062                    }
1063                } else if (version == 0) {
1064                    uint32_t sd;
1065                    int32_t mt;
1066                    if (!mDataSource->getUInt32(entriesoffset, &sd) ||
1067                            !mDataSource->getUInt32(entriesoffset + 4, (uint32_t*)&mt)) {
1068                        return ERROR_IO;
1069                    }
1070                    segment_duration = sd;
1071                    media_time = mt;
1072                } else {
1073                    return ERROR_IO;
1074                }
1075
1076                // save these for later, because the elst atom might precede
1077                // the atoms that actually gives us the duration and sample rate
1078                // needed to calculate the padding and delay values
1079                mLastTrack->has_elst = true;
1080                mLastTrack->elst_media_time = media_time;
1081                mLastTrack->elst_segment_duration = segment_duration;
1082            }
1083            break;
1084        }
1085
1086        case FOURCC('f', 'r', 'm', 'a'):
1087        {
1088            *offset += chunk_size;
1089
1090            uint32_t original_fourcc;
1091            if (mDataSource->readAt(data_offset, &original_fourcc, 4) < 4) {
1092                return ERROR_IO;
1093            }
1094            original_fourcc = ntohl(original_fourcc);
1095            ALOGV("read original format: %d", original_fourcc);
1096
1097            if (mLastTrack == NULL) {
1098                return ERROR_MALFORMED;
1099            }
1100
1101            mLastTrack->meta.setCString(kKeyMIMEType, FourCC2MIME(original_fourcc));
1102            uint32_t num_channels = 0;
1103            uint32_t sample_rate = 0;
1104            if (AdjustChannelsAndRate(original_fourcc, &num_channels, &sample_rate)) {
1105                mLastTrack->meta.setInt32(kKeyChannelCount, num_channels);
1106                mLastTrack->meta.setInt32(kKeySampleRate, sample_rate);
1107            }
1108            break;
1109        }
1110
1111        case FOURCC('t', 'e', 'n', 'c'):
1112        {
1113            *offset += chunk_size;
1114
1115            if (chunk_size < 32) {
1116                return ERROR_MALFORMED;
1117            }
1118
1119            // tenc box contains 1 byte version, 3 byte flags, 3 byte default algorithm id, one byte
1120            // default IV size, 16 bytes default KeyID
1121            // (ISO 23001-7)
1122
1123            uint8_t version;
1124            if (mDataSource->readAt(data_offset, &version, sizeof(version))
1125                    < (ssize_t)sizeof(version)) {
1126                return ERROR_IO;
1127            }
1128
1129            uint8_t buf[4];
1130            memset(buf, 0, 4);
1131            if (mDataSource->readAt(data_offset + 4, buf + 1, 3) < 3) {
1132                return ERROR_IO;
1133            }
1134
1135            if (mLastTrack == NULL) {
1136                return ERROR_MALFORMED;
1137            }
1138
1139            uint8_t defaultEncryptedByteBlock = 0;
1140            uint8_t defaultSkipByteBlock = 0;
1141            uint32_t defaultAlgorithmId = ntohl(*((int32_t*)buf));
1142            if (version == 1) {
1143                uint32_t pattern = buf[2];
1144                defaultEncryptedByteBlock = pattern >> 4;
1145                defaultSkipByteBlock = pattern & 0xf;
1146                if (defaultEncryptedByteBlock == 0 && defaultSkipByteBlock == 0) {
1147                    // use (1,0) to mean "encrypt everything"
1148                    defaultEncryptedByteBlock = 1;
1149                }
1150            } else if (mLastTrack->subsample_encryption) {
1151                ALOGW("subsample_encryption should be version 1");
1152            } else if (defaultAlgorithmId > 1) {
1153                // only 0 (clear) and 1 (AES-128) are valid
1154                ALOGW("defaultAlgorithmId: %u is a reserved value", defaultAlgorithmId);
1155                defaultAlgorithmId = 1;
1156            }
1157
1158            memset(buf, 0, 4);
1159            if (mDataSource->readAt(data_offset + 7, buf + 3, 1) < 1) {
1160                return ERROR_IO;
1161            }
1162            uint32_t defaultIVSize = ntohl(*((int32_t*)buf));
1163
1164            if (defaultAlgorithmId == 0 && defaultIVSize != 0) {
1165                // only unencrypted data must have 0 IV size
1166                return ERROR_MALFORMED;
1167            } else if (defaultIVSize != 0 &&
1168                    defaultIVSize != 8 &&
1169                    defaultIVSize != 16) {
1170                return ERROR_MALFORMED;
1171            }
1172
1173            uint8_t defaultKeyId[16];
1174
1175            if (mDataSource->readAt(data_offset + 8, &defaultKeyId, 16) < 16) {
1176                return ERROR_IO;
1177            }
1178
1179            sp<ABuffer> defaultConstantIv;
1180            if (defaultAlgorithmId != 0 && defaultIVSize == 0) {
1181
1182                uint8_t ivlength;
1183                if (mDataSource->readAt(data_offset + 24, &ivlength, sizeof(ivlength))
1184                        < (ssize_t)sizeof(ivlength)) {
1185                    return ERROR_IO;
1186                }
1187
1188                if (ivlength != 8 && ivlength != 16) {
1189                    ALOGW("unsupported IV length: %u", ivlength);
1190                    return ERROR_MALFORMED;
1191                }
1192
1193                defaultConstantIv = new ABuffer(ivlength);
1194                if (mDataSource->readAt(data_offset + 25, defaultConstantIv->data(), ivlength)
1195                        < (ssize_t)ivlength) {
1196                    return ERROR_IO;
1197                }
1198
1199                defaultConstantIv->setRange(0, ivlength);
1200            }
1201
1202            int32_t tmpAlgorithmId;
1203            if (!mLastTrack->meta.findInt32(kKeyCryptoMode, &tmpAlgorithmId)) {
1204                mLastTrack->meta.setInt32(kKeyCryptoMode, defaultAlgorithmId);
1205            }
1206
1207            mLastTrack->meta.setInt32(kKeyCryptoDefaultIVSize, defaultIVSize);
1208            mLastTrack->meta.setData(kKeyCryptoKey, 'tenc', defaultKeyId, 16);
1209            mLastTrack->meta.setInt32(kKeyEncryptedByteBlock, defaultEncryptedByteBlock);
1210            mLastTrack->meta.setInt32(kKeySkipByteBlock, defaultSkipByteBlock);
1211            if (defaultConstantIv != NULL) {
1212                mLastTrack->meta.setData(kKeyCryptoIV, 'dciv', defaultConstantIv->data(), defaultConstantIv->size());
1213            }
1214            break;
1215        }
1216
1217        case FOURCC('t', 'k', 'h', 'd'):
1218        {
1219            *offset += chunk_size;
1220
1221            status_t err;
1222            if ((err = parseTrackHeader(data_offset, chunk_data_size)) != OK) {
1223                return err;
1224            }
1225
1226            break;
1227        }
1228
1229        case FOURCC('t', 'r', 'e', 'f'):
1230        {
1231            off64_t stop_offset = *offset + chunk_size;
1232            *offset = data_offset;
1233            while (*offset < stop_offset) {
1234                status_t err = parseChunk(offset, depth + 1);
1235                if (err != OK) {
1236                    return err;
1237                }
1238            }
1239            if (*offset != stop_offset) {
1240                return ERROR_MALFORMED;
1241            }
1242            break;
1243        }
1244
1245        case FOURCC('t', 'h', 'm', 'b'):
1246        {
1247            *offset += chunk_size;
1248
1249            if (mLastTrack != NULL) {
1250                // Skip thumbnail track for now since we don't have an
1251                // API to retrieve it yet.
1252                // The thumbnail track can't be accessed by negative index or time,
1253                // because each timed sample has its own corresponding thumbnail
1254                // in the thumbnail track. We'll need a dedicated API to retrieve
1255                // thumbnail at time instead.
1256                mLastTrack->skipTrack = true;
1257            }
1258
1259            break;
1260        }
1261
1262        case FOURCC('p', 's', 's', 'h'):
1263        {
1264            *offset += chunk_size;
1265
1266            PsshInfo pssh;
1267
1268            if (mDataSource->readAt(data_offset + 4, &pssh.uuid, 16) < 16) {
1269                return ERROR_IO;
1270            }
1271
1272            uint32_t psshdatalen = 0;
1273            if (mDataSource->readAt(data_offset + 20, &psshdatalen, 4) < 4) {
1274                return ERROR_IO;
1275            }
1276            pssh.datalen = ntohl(psshdatalen);
1277            ALOGV("pssh data size: %d", pssh.datalen);
1278            if (chunk_size < 20 || pssh.datalen > chunk_size - 20) {
1279                // pssh data length exceeds size of containing box
1280                return ERROR_MALFORMED;
1281            }
1282
1283            pssh.data = new (std::nothrow) uint8_t[pssh.datalen];
1284            if (pssh.data == NULL) {
1285                return ERROR_MALFORMED;
1286            }
1287            ALOGV("allocated pssh @ %p", pssh.data);
1288            ssize_t requested = (ssize_t) pssh.datalen;
1289            if (mDataSource->readAt(data_offset + 24, pssh.data, requested) < requested) {
1290                delete[] pssh.data;
1291                return ERROR_IO;
1292            }
1293            mPssh.push_back(pssh);
1294
1295            break;
1296        }
1297
1298        case FOURCC('m', 'd', 'h', 'd'):
1299        {
1300            *offset += chunk_size;
1301
1302            if (chunk_data_size < 4 || mLastTrack == NULL) {
1303                return ERROR_MALFORMED;
1304            }
1305
1306            uint8_t version;
1307            if (mDataSource->readAt(
1308                        data_offset, &version, sizeof(version))
1309                    < (ssize_t)sizeof(version)) {
1310                return ERROR_IO;
1311            }
1312
1313            off64_t timescale_offset;
1314
1315            if (version == 1) {
1316                timescale_offset = data_offset + 4 + 16;
1317            } else if (version == 0) {
1318                timescale_offset = data_offset + 4 + 8;
1319            } else {
1320                return ERROR_IO;
1321            }
1322
1323            uint32_t timescale;
1324            if (mDataSource->readAt(
1325                        timescale_offset, &timescale, sizeof(timescale))
1326                    < (ssize_t)sizeof(timescale)) {
1327                return ERROR_IO;
1328            }
1329
1330            if (!timescale) {
1331                ALOGE("timescale should not be ZERO.");
1332                return ERROR_MALFORMED;
1333            }
1334
1335            mLastTrack->timescale = ntohl(timescale);
1336
1337            // 14496-12 says all ones means indeterminate, but some files seem to use
1338            // 0 instead. We treat both the same.
1339            int64_t duration = 0;
1340            if (version == 1) {
1341                if (mDataSource->readAt(
1342                            timescale_offset + 4, &duration, sizeof(duration))
1343                        < (ssize_t)sizeof(duration)) {
1344                    return ERROR_IO;
1345                }
1346                if (duration != -1) {
1347                    duration = ntoh64(duration);
1348                }
1349            } else {
1350                uint32_t duration32;
1351                if (mDataSource->readAt(
1352                            timescale_offset + 4, &duration32, sizeof(duration32))
1353                        < (ssize_t)sizeof(duration32)) {
1354                    return ERROR_IO;
1355                }
1356                if (duration32 != 0xffffffff) {
1357                    duration = ntohl(duration32);
1358                }
1359            }
1360            if (duration != 0 && mLastTrack->timescale != 0) {
1361                mLastTrack->meta.setInt64(
1362                        kKeyDuration, (duration * 1000000) / mLastTrack->timescale);
1363            }
1364
1365            uint8_t lang[2];
1366            off64_t lang_offset;
1367            if (version == 1) {
1368                lang_offset = timescale_offset + 4 + 8;
1369            } else if (version == 0) {
1370                lang_offset = timescale_offset + 4 + 4;
1371            } else {
1372                return ERROR_IO;
1373            }
1374
1375            if (mDataSource->readAt(lang_offset, &lang, sizeof(lang))
1376                    < (ssize_t)sizeof(lang)) {
1377                return ERROR_IO;
1378            }
1379
1380            // To get the ISO-639-2/T three character language code
1381            // 1 bit pad followed by 3 5-bits characters. Each character
1382            // is packed as the difference between its ASCII value and 0x60.
1383            char lang_code[4];
1384            lang_code[0] = ((lang[0] >> 2) & 0x1f) + 0x60;
1385            lang_code[1] = ((lang[0] & 0x3) << 3 | (lang[1] >> 5)) + 0x60;
1386            lang_code[2] = (lang[1] & 0x1f) + 0x60;
1387            lang_code[3] = '\0';
1388
1389            mLastTrack->meta.setCString(
1390                    kKeyMediaLanguage, lang_code);
1391
1392            break;
1393        }
1394
1395        case FOURCC('s', 't', 's', 'd'):
1396        {
1397            uint8_t buffer[8];
1398            if (chunk_data_size < (off64_t)sizeof(buffer)) {
1399                return ERROR_MALFORMED;
1400            }
1401
1402            if (mDataSource->readAt(
1403                        data_offset, buffer, 8) < 8) {
1404                return ERROR_IO;
1405            }
1406
1407            if (U32_AT(buffer) != 0) {
1408                // Should be version 0, flags 0.
1409                return ERROR_MALFORMED;
1410            }
1411
1412            uint32_t entry_count = U32_AT(&buffer[4]);
1413
1414            if (entry_count > 1) {
1415                // For 3GPP timed text, there could be multiple tx3g boxes contain
1416                // multiple text display formats. These formats will be used to
1417                // display the timed text.
1418                // For encrypted files, there may also be more than one entry.
1419                const char *mime;
1420
1421                if (mLastTrack == NULL)
1422                    return ERROR_MALFORMED;
1423
1424                CHECK(mLastTrack->meta.findCString(kKeyMIMEType, &mime));
1425                if (strcasecmp(mime, MEDIA_MIMETYPE_TEXT_3GPP) &&
1426                        strcasecmp(mime, "application/octet-stream")) {
1427                    // For now we only support a single type of media per track.
1428                    mLastTrack->skipTrack = true;
1429                    *offset += chunk_size;
1430                    break;
1431                }
1432            }
1433            off64_t stop_offset = *offset + chunk_size;
1434            *offset = data_offset + 8;
1435            for (uint32_t i = 0; i < entry_count; ++i) {
1436                status_t err = parseChunk(offset, depth + 1);
1437                if (err != OK) {
1438                    return err;
1439                }
1440            }
1441
1442            if (*offset != stop_offset) {
1443                return ERROR_MALFORMED;
1444            }
1445            break;
1446        }
1447        case FOURCC('m', 'e', 't', 't'):
1448        {
1449            *offset += chunk_size;
1450
1451            if (mLastTrack == NULL)
1452                return ERROR_MALFORMED;
1453
1454            auto buffer = heapbuffer<uint8_t>(chunk_data_size);
1455            if (buffer.get() == NULL) {
1456                return NO_MEMORY;
1457            }
1458
1459            if (mDataSource->readAt(
1460                        data_offset, buffer.get(), chunk_data_size) < chunk_data_size) {
1461                return ERROR_IO;
1462            }
1463
1464            String8 mimeFormat((const char *)(buffer.get()), chunk_data_size);
1465            mLastTrack->meta.setCString(kKeyMIMEType, mimeFormat.string());
1466
1467            break;
1468        }
1469
1470        case FOURCC('m', 'p', '4', 'a'):
1471        case FOURCC('e', 'n', 'c', 'a'):
1472        case FOURCC('s', 'a', 'm', 'r'):
1473        case FOURCC('s', 'a', 'w', 'b'):
1474        {
1475            if (mIsQT && chunk_type == FOURCC('m', 'p', '4', 'a')
1476                    && depth >= 1 && mPath[depth - 1] == FOURCC('w', 'a', 'v', 'e')) {
1477                // Ignore mp4a embedded in QT wave atom
1478                *offset += chunk_size;
1479                break;
1480            }
1481
1482            uint8_t buffer[8 + 20];
1483            if (chunk_data_size < (ssize_t)sizeof(buffer)) {
1484                // Basic AudioSampleEntry size.
1485                return ERROR_MALFORMED;
1486            }
1487
1488            if (mDataSource->readAt(
1489                        data_offset, buffer, sizeof(buffer)) < (ssize_t)sizeof(buffer)) {
1490                return ERROR_IO;
1491            }
1492
1493            uint16_t data_ref_index __unused = U16_AT(&buffer[6]);
1494            uint16_t version = U16_AT(&buffer[8]);
1495            uint32_t num_channels = U16_AT(&buffer[16]);
1496
1497            uint16_t sample_size = U16_AT(&buffer[18]);
1498            uint32_t sample_rate = U32_AT(&buffer[24]) >> 16;
1499
1500            if (mLastTrack == NULL)
1501                return ERROR_MALFORMED;
1502
1503            off64_t stop_offset = *offset + chunk_size;
1504            *offset = data_offset + sizeof(buffer);
1505
1506            if (mIsQT && chunk_type == FOURCC('m', 'p', '4', 'a')) {
1507                if (version == 1) {
1508                    if (mDataSource->readAt(*offset, buffer, 16) < 16) {
1509                        return ERROR_IO;
1510                    }
1511
1512#if 0
1513                    U32_AT(buffer);  // samples per packet
1514                    U32_AT(&buffer[4]);  // bytes per packet
1515                    U32_AT(&buffer[8]);  // bytes per frame
1516                    U32_AT(&buffer[12]);  // bytes per sample
1517#endif
1518                    *offset += 16;
1519                } else if (version == 2) {
1520                    uint8_t v2buffer[36];
1521                    if (mDataSource->readAt(*offset, v2buffer, 36) < 36) {
1522                        return ERROR_IO;
1523                    }
1524
1525#if 0
1526                    U32_AT(v2buffer);  // size of struct only
1527                    sample_rate = (uint32_t)U64_AT(&v2buffer[4]);  // audio sample rate
1528                    num_channels = U32_AT(&v2buffer[12]);  // num audio channels
1529                    U32_AT(&v2buffer[16]);  // always 0x7f000000
1530                    sample_size = (uint16_t)U32_AT(&v2buffer[20]);  // const bits per channel
1531                    U32_AT(&v2buffer[24]);  // format specifc flags
1532                    U32_AT(&v2buffer[28]);  // const bytes per audio packet
1533                    U32_AT(&v2buffer[32]);  // const LPCM frames per audio packet
1534#endif
1535                    *offset += 36;
1536                }
1537            }
1538
1539            if (chunk_type != FOURCC('e', 'n', 'c', 'a')) {
1540                // if the chunk type is enca, we'll get the type from the frma box later
1541                mLastTrack->meta.setCString(kKeyMIMEType, FourCC2MIME(chunk_type));
1542                AdjustChannelsAndRate(chunk_type, &num_channels, &sample_rate);
1543            }
1544            ALOGV("*** coding='%s' %d channels, size %d, rate %d\n",
1545                   chunk, num_channels, sample_size, sample_rate);
1546            mLastTrack->meta.setInt32(kKeyChannelCount, num_channels);
1547            mLastTrack->meta.setInt32(kKeySampleRate, sample_rate);
1548
1549            while (*offset < stop_offset) {
1550                status_t err = parseChunk(offset, depth + 1);
1551                if (err != OK) {
1552                    return err;
1553                }
1554            }
1555
1556            if (*offset != stop_offset) {
1557                return ERROR_MALFORMED;
1558            }
1559            break;
1560        }
1561
1562        case FOURCC('m', 'p', '4', 'v'):
1563        case FOURCC('e', 'n', 'c', 'v'):
1564        case FOURCC('s', '2', '6', '3'):
1565        case FOURCC('H', '2', '6', '3'):
1566        case FOURCC('h', '2', '6', '3'):
1567        case FOURCC('a', 'v', 'c', '1'):
1568        case FOURCC('h', 'v', 'c', '1'):
1569        case FOURCC('h', 'e', 'v', '1'):
1570        {
1571            uint8_t buffer[78];
1572            if (chunk_data_size < (ssize_t)sizeof(buffer)) {
1573                // Basic VideoSampleEntry size.
1574                return ERROR_MALFORMED;
1575            }
1576
1577            if (mDataSource->readAt(
1578                        data_offset, buffer, sizeof(buffer)) < (ssize_t)sizeof(buffer)) {
1579                return ERROR_IO;
1580            }
1581
1582            uint16_t data_ref_index __unused = U16_AT(&buffer[6]);
1583            uint16_t width = U16_AT(&buffer[6 + 18]);
1584            uint16_t height = U16_AT(&buffer[6 + 20]);
1585
1586            // The video sample is not standard-compliant if it has invalid dimension.
1587            // Use some default width and height value, and
1588            // let the decoder figure out the actual width and height (and thus
1589            // be prepared for INFO_FOMRAT_CHANGED event).
1590            if (width == 0)  width  = 352;
1591            if (height == 0) height = 288;
1592
1593            // printf("*** coding='%s' width=%d height=%d\n",
1594            //        chunk, width, height);
1595
1596            if (mLastTrack == NULL)
1597                return ERROR_MALFORMED;
1598
1599            if (chunk_type != FOURCC('e', 'n', 'c', 'v')) {
1600                // if the chunk type is encv, we'll get the type from the frma box later
1601                mLastTrack->meta.setCString(kKeyMIMEType, FourCC2MIME(chunk_type));
1602            }
1603            mLastTrack->meta.setInt32(kKeyWidth, width);
1604            mLastTrack->meta.setInt32(kKeyHeight, height);
1605
1606            off64_t stop_offset = *offset + chunk_size;
1607            *offset = data_offset + sizeof(buffer);
1608            while (*offset < stop_offset) {
1609                status_t err = parseChunk(offset, depth + 1);
1610                if (err != OK) {
1611                    return err;
1612                }
1613            }
1614
1615            if (*offset != stop_offset) {
1616                return ERROR_MALFORMED;
1617            }
1618            break;
1619        }
1620
1621        case FOURCC('s', 't', 'c', 'o'):
1622        case FOURCC('c', 'o', '6', '4'):
1623        {
1624            if ((mLastTrack == NULL) || (mLastTrack->sampleTable == NULL)) {
1625                return ERROR_MALFORMED;
1626            }
1627
1628            status_t err =
1629                mLastTrack->sampleTable->setChunkOffsetParams(
1630                        chunk_type, data_offset, chunk_data_size);
1631
1632            *offset += chunk_size;
1633
1634            if (err != OK) {
1635                return err;
1636            }
1637
1638            break;
1639        }
1640
1641        case FOURCC('s', 't', 's', 'c'):
1642        {
1643            if ((mLastTrack == NULL) || (mLastTrack->sampleTable == NULL))
1644                return ERROR_MALFORMED;
1645
1646            status_t err =
1647                mLastTrack->sampleTable->setSampleToChunkParams(
1648                        data_offset, chunk_data_size);
1649
1650            *offset += chunk_size;
1651
1652            if (err != OK) {
1653                return err;
1654            }
1655
1656            break;
1657        }
1658
1659        case FOURCC('s', 't', 's', 'z'):
1660        case FOURCC('s', 't', 'z', '2'):
1661        {
1662            if ((mLastTrack == NULL) || (mLastTrack->sampleTable == NULL)) {
1663                return ERROR_MALFORMED;
1664            }
1665
1666            status_t err =
1667                mLastTrack->sampleTable->setSampleSizeParams(
1668                        chunk_type, data_offset, chunk_data_size);
1669
1670            *offset += chunk_size;
1671
1672            if (err != OK) {
1673                return err;
1674            }
1675
1676            size_t max_size;
1677            err = mLastTrack->sampleTable->getMaxSampleSize(&max_size);
1678
1679            if (err != OK) {
1680                return err;
1681            }
1682
1683            if (max_size != 0) {
1684                // Assume that a given buffer only contains at most 10 chunks,
1685                // each chunk originally prefixed with a 2 byte length will
1686                // have a 4 byte header (0x00 0x00 0x00 0x01) after conversion,
1687                // and thus will grow by 2 bytes per chunk.
1688                if (max_size > SIZE_MAX - 10 * 2) {
1689                    ALOGE("max sample size too big: %zu", max_size);
1690                    return ERROR_MALFORMED;
1691                }
1692                mLastTrack->meta.setInt32(kKeyMaxInputSize, max_size + 10 * 2);
1693            } else {
1694                // No size was specified. Pick a conservatively large size.
1695                uint32_t width, height;
1696                if (!mLastTrack->meta.findInt32(kKeyWidth, (int32_t*)&width) ||
1697                    !mLastTrack->meta.findInt32(kKeyHeight,(int32_t*) &height)) {
1698                    ALOGE("No width or height, assuming worst case 1080p");
1699                    width = 1920;
1700                    height = 1080;
1701                } else {
1702                    // A resolution was specified, check that it's not too big. The values below
1703                    // were chosen so that the calculations below don't cause overflows, they're
1704                    // not indicating that resolutions up to 32kx32k are actually supported.
1705                    if (width > 32768 || height > 32768) {
1706                        ALOGE("can't support %u x %u video", width, height);
1707                        return ERROR_MALFORMED;
1708                    }
1709                }
1710
1711                const char *mime;
1712                CHECK(mLastTrack->meta.findCString(kKeyMIMEType, &mime));
1713                if (!strncmp(mime, "audio/", 6)) {
1714                    // for audio, use 128KB
1715                    max_size = 1024 * 128;
1716                } else if (!strcmp(mime, MEDIA_MIMETYPE_VIDEO_AVC)
1717                        || !strcmp(mime, MEDIA_MIMETYPE_VIDEO_HEVC)) {
1718                    // AVC & HEVC requires compression ratio of at least 2, and uses
1719                    // macroblocks
1720                    max_size = ((width + 15) / 16) * ((height + 15) / 16) * 192;
1721                } else {
1722                    // For all other formats there is no minimum compression
1723                    // ratio. Use compression ratio of 1.
1724                    max_size = width * height * 3 / 2;
1725                }
1726                // HACK: allow 10% overhead
1727                // TODO: read sample size from traf atom for fragmented MPEG4.
1728                max_size += max_size / 10;
1729                mLastTrack->meta.setInt32(kKeyMaxInputSize, max_size);
1730            }
1731
1732            // NOTE: setting another piece of metadata invalidates any pointers (such as the
1733            // mimetype) previously obtained, so don't cache them.
1734            const char *mime;
1735            CHECK(mLastTrack->meta.findCString(kKeyMIMEType, &mime));
1736            // Calculate average frame rate.
1737            if (!strncasecmp("video/", mime, 6)) {
1738                size_t nSamples = mLastTrack->sampleTable->countSamples();
1739                if (nSamples == 0) {
1740                    int32_t trackId;
1741                    if (mLastTrack->meta.findInt32(kKeyTrackID, &trackId)) {
1742                        for (size_t i = 0; i < mTrex.size(); i++) {
1743                            Trex *t = &mTrex.editItemAt(i);
1744                            if (t->track_ID == (uint32_t) trackId) {
1745                                if (t->default_sample_duration > 0) {
1746                                    int32_t frameRate =
1747                                            mLastTrack->timescale / t->default_sample_duration;
1748                                    mLastTrack->meta.setInt32(kKeyFrameRate, frameRate);
1749                                }
1750                                break;
1751                            }
1752                        }
1753                    }
1754                } else {
1755                    int64_t durationUs;
1756                    if (mLastTrack->meta.findInt64(kKeyDuration, &durationUs)) {
1757                        if (durationUs > 0) {
1758                            int32_t frameRate = (nSamples * 1000000LL +
1759                                        (durationUs >> 1)) / durationUs;
1760                            mLastTrack->meta.setInt32(kKeyFrameRate, frameRate);
1761                        }
1762                    }
1763                    ALOGV("setting frame count %zu", nSamples);
1764                    mLastTrack->meta.setInt32(kKeyFrameCount, nSamples);
1765                }
1766            }
1767
1768            break;
1769        }
1770
1771        case FOURCC('s', 't', 't', 's'):
1772        {
1773            if ((mLastTrack == NULL) || (mLastTrack->sampleTable == NULL))
1774                return ERROR_MALFORMED;
1775
1776            *offset += chunk_size;
1777
1778            status_t err =
1779                mLastTrack->sampleTable->setTimeToSampleParams(
1780                        data_offset, chunk_data_size);
1781
1782            if (err != OK) {
1783                return err;
1784            }
1785
1786            break;
1787        }
1788
1789        case FOURCC('c', 't', 't', 's'):
1790        {
1791            if ((mLastTrack == NULL) || (mLastTrack->sampleTable == NULL))
1792                return ERROR_MALFORMED;
1793
1794            *offset += chunk_size;
1795
1796            status_t err =
1797                mLastTrack->sampleTable->setCompositionTimeToSampleParams(
1798                        data_offset, chunk_data_size);
1799
1800            if (err != OK) {
1801                return err;
1802            }
1803
1804            break;
1805        }
1806
1807        case FOURCC('s', 't', 's', 's'):
1808        {
1809            if ((mLastTrack == NULL) || (mLastTrack->sampleTable == NULL))
1810                return ERROR_MALFORMED;
1811
1812            *offset += chunk_size;
1813
1814            status_t err =
1815                mLastTrack->sampleTable->setSyncSampleParams(
1816                        data_offset, chunk_data_size);
1817
1818            if (err != OK) {
1819                return err;
1820            }
1821
1822            break;
1823        }
1824
1825        // \xA9xyz
1826        case FOURCC(0xA9, 'x', 'y', 'z'):
1827        {
1828            *offset += chunk_size;
1829
1830            // Best case the total data length inside "\xA9xyz" box would
1831            // be 9, for instance "\xA9xyz" + "\x00\x05\x15\xc7" + "+0+0/",
1832            // where "\x00\x05" is the text string length with value = 5,
1833            // "\0x15\xc7" is the language code = en, and "+0+0/" is a
1834            // location (string) value with longitude = 0 and latitude = 0.
1835            // Since some devices encountered in the wild omit the trailing
1836            // slash, we'll allow that.
1837            if (chunk_data_size < 8) { // 8 instead of 9 to allow for missing /
1838                return ERROR_MALFORMED;
1839            }
1840
1841            uint16_t len;
1842            if (!mDataSource->getUInt16(data_offset, &len)) {
1843                return ERROR_IO;
1844            }
1845
1846            // allow "+0+0" without trailing slash
1847            if (len < 4 || len > chunk_data_size - 4) {
1848                return ERROR_MALFORMED;
1849            }
1850            // The location string following the language code is formatted
1851            // according to ISO 6709:2008 (https://en.wikipedia.org/wiki/ISO_6709).
1852            // Allocate 2 extra bytes, in case we need to add a trailing slash,
1853            // and to add a terminating 0.
1854            std::unique_ptr<char[]> buffer(new (std::nothrow) char[len+2]());
1855            if (!buffer) {
1856                return NO_MEMORY;
1857            }
1858
1859            if (mDataSource->readAt(
1860                        data_offset + 4, &buffer[0], len) < len) {
1861                return ERROR_IO;
1862            }
1863
1864            len = strlen(&buffer[0]);
1865            if (len < 4) {
1866                return ERROR_MALFORMED;
1867            }
1868            // Add a trailing slash if there wasn't one.
1869            if (buffer[len - 1] != '/') {
1870                buffer[len] = '/';
1871            }
1872            mFileMetaData.setCString(kKeyLocation, &buffer[0]);
1873            break;
1874        }
1875
1876        case FOURCC('e', 's', 'd', 's'):
1877        {
1878            *offset += chunk_size;
1879
1880            if (chunk_data_size < 4) {
1881                return ERROR_MALFORMED;
1882            }
1883
1884            uint8_t buffer[256];
1885            if (chunk_data_size > (off64_t)sizeof(buffer)) {
1886                return ERROR_BUFFER_TOO_SMALL;
1887            }
1888
1889            if (mDataSource->readAt(
1890                        data_offset, buffer, chunk_data_size) < chunk_data_size) {
1891                return ERROR_IO;
1892            }
1893
1894            if (U32_AT(buffer) != 0) {
1895                // Should be version 0, flags 0.
1896                return ERROR_MALFORMED;
1897            }
1898
1899            if (mLastTrack == NULL)
1900                return ERROR_MALFORMED;
1901
1902            mLastTrack->meta.setData(
1903                    kKeyESDS, kTypeESDS, &buffer[4], chunk_data_size - 4);
1904
1905            if (mPath.size() >= 2
1906                    && mPath[mPath.size() - 2] == FOURCC('m', 'p', '4', 'a')) {
1907                // Information from the ESDS must be relied on for proper
1908                // setup of sample rate and channel count for MPEG4 Audio.
1909                // The generic header appears to only contain generic
1910                // information...
1911
1912                status_t err = updateAudioTrackInfoFromESDS_MPEG4Audio(
1913                        &buffer[4], chunk_data_size - 4);
1914
1915                if (err != OK) {
1916                    return err;
1917                }
1918            }
1919            if (mPath.size() >= 2
1920                    && mPath[mPath.size() - 2] == FOURCC('m', 'p', '4', 'v')) {
1921                // Check if the video is MPEG2
1922                ESDS esds(&buffer[4], chunk_data_size - 4);
1923
1924                uint8_t objectTypeIndication;
1925                if (esds.getObjectTypeIndication(&objectTypeIndication) == OK) {
1926                    if (objectTypeIndication >= 0x60 && objectTypeIndication <= 0x65) {
1927                        mLastTrack->meta.setCString(kKeyMIMEType, MEDIA_MIMETYPE_VIDEO_MPEG2);
1928                    }
1929                }
1930            }
1931            break;
1932        }
1933
1934        case FOURCC('b', 't', 'r', 't'):
1935        {
1936            *offset += chunk_size;
1937            if (mLastTrack == NULL) {
1938                return ERROR_MALFORMED;
1939            }
1940
1941            uint8_t buffer[12];
1942            if (chunk_data_size != sizeof(buffer)) {
1943                return ERROR_MALFORMED;
1944            }
1945
1946            if (mDataSource->readAt(
1947                    data_offset, buffer, chunk_data_size) < chunk_data_size) {
1948                return ERROR_IO;
1949            }
1950
1951            uint32_t maxBitrate = U32_AT(&buffer[4]);
1952            uint32_t avgBitrate = U32_AT(&buffer[8]);
1953            if (maxBitrate > 0 && maxBitrate < INT32_MAX) {
1954                mLastTrack->meta.setInt32(kKeyMaxBitRate, (int32_t)maxBitrate);
1955            }
1956            if (avgBitrate > 0 && avgBitrate < INT32_MAX) {
1957                mLastTrack->meta.setInt32(kKeyBitRate, (int32_t)avgBitrate);
1958            }
1959            break;
1960        }
1961
1962        case FOURCC('a', 'v', 'c', 'C'):
1963        {
1964            *offset += chunk_size;
1965
1966            auto buffer = heapbuffer<uint8_t>(chunk_data_size);
1967
1968            if (buffer.get() == NULL) {
1969                ALOGE("b/28471206");
1970                return NO_MEMORY;
1971            }
1972
1973            if (mDataSource->readAt(
1974                        data_offset, buffer.get(), chunk_data_size) < chunk_data_size) {
1975                return ERROR_IO;
1976            }
1977
1978            if (mLastTrack == NULL)
1979                return ERROR_MALFORMED;
1980
1981            mLastTrack->meta.setData(
1982                    kKeyAVCC, kTypeAVCC, buffer.get(), chunk_data_size);
1983
1984            break;
1985        }
1986        case FOURCC('h', 'v', 'c', 'C'):
1987        {
1988            auto buffer = heapbuffer<uint8_t>(chunk_data_size);
1989
1990            if (buffer.get() == NULL) {
1991                ALOGE("b/28471206");
1992                return NO_MEMORY;
1993            }
1994
1995            if (mDataSource->readAt(
1996                        data_offset, buffer.get(), chunk_data_size) < chunk_data_size) {
1997                return ERROR_IO;
1998            }
1999
2000            if (mLastTrack == NULL)
2001                return ERROR_MALFORMED;
2002
2003            mLastTrack->meta.setData(
2004                    kKeyHVCC, kTypeHVCC, buffer.get(), chunk_data_size);
2005
2006            *offset += chunk_size;
2007            break;
2008        }
2009
2010        case FOURCC('d', '2', '6', '3'):
2011        {
2012            *offset += chunk_size;
2013            /*
2014             * d263 contains a fixed 7 bytes part:
2015             *   vendor - 4 bytes
2016             *   version - 1 byte
2017             *   level - 1 byte
2018             *   profile - 1 byte
2019             * optionally, "d263" box itself may contain a 16-byte
2020             * bit rate box (bitr)
2021             *   average bit rate - 4 bytes
2022             *   max bit rate - 4 bytes
2023             */
2024            char buffer[23];
2025            if (chunk_data_size != 7 &&
2026                chunk_data_size != 23) {
2027                ALOGE("Incorrect D263 box size %lld", (long long)chunk_data_size);
2028                return ERROR_MALFORMED;
2029            }
2030
2031            if (mDataSource->readAt(
2032                    data_offset, buffer, chunk_data_size) < chunk_data_size) {
2033                return ERROR_IO;
2034            }
2035
2036            if (mLastTrack == NULL)
2037                return ERROR_MALFORMED;
2038
2039            mLastTrack->meta.setData(kKeyD263, kTypeD263, buffer, chunk_data_size);
2040
2041            break;
2042        }
2043
2044        case FOURCC('m', 'e', 't', 'a'):
2045        {
2046            off64_t stop_offset = *offset + chunk_size;
2047            *offset = data_offset;
2048            bool isParsingMetaKeys = underQTMetaPath(mPath, 2);
2049            if (!isParsingMetaKeys) {
2050                uint8_t buffer[4];
2051                if (chunk_data_size < (off64_t)sizeof(buffer)) {
2052                    *offset = stop_offset;
2053                    return ERROR_MALFORMED;
2054                }
2055
2056                if (mDataSource->readAt(
2057                            data_offset, buffer, 4) < 4) {
2058                    *offset = stop_offset;
2059                    return ERROR_IO;
2060                }
2061
2062                if (U32_AT(buffer) != 0) {
2063                    // Should be version 0, flags 0.
2064
2065                    // If it's not, let's assume this is one of those
2066                    // apparently malformed chunks that don't have flags
2067                    // and completely different semantics than what's
2068                    // in the MPEG4 specs and skip it.
2069                    *offset = stop_offset;
2070                    return OK;
2071                }
2072                *offset +=  sizeof(buffer);
2073            }
2074
2075            while (*offset < stop_offset) {
2076                status_t err = parseChunk(offset, depth + 1);
2077                if (err != OK) {
2078                    return err;
2079                }
2080            }
2081
2082            if (*offset != stop_offset) {
2083                return ERROR_MALFORMED;
2084            }
2085            break;
2086        }
2087
2088        case FOURCC('i', 'l', 'o', 'c'):
2089        case FOURCC('i', 'i', 'n', 'f'):
2090        case FOURCC('i', 'p', 'r', 'p'):
2091        case FOURCC('p', 'i', 't', 'm'):
2092        case FOURCC('i', 'd', 'a', 't'):
2093        case FOURCC('i', 'r', 'e', 'f'):
2094        case FOURCC('i', 'p', 'r', 'o'):
2095        {
2096            if (mIsHeif) {
2097                if (mItemTable == NULL) {
2098                    mItemTable = new ItemTable(mDataSource);
2099                }
2100                status_t err = mItemTable->parse(
2101                        chunk_type, data_offset, chunk_data_size);
2102                if (err != OK) {
2103                    return err;
2104                }
2105            }
2106            *offset += chunk_size;
2107            break;
2108        }
2109
2110        case FOURCC('m', 'e', 'a', 'n'):
2111        case FOURCC('n', 'a', 'm', 'e'):
2112        case FOURCC('d', 'a', 't', 'a'):
2113        {
2114            *offset += chunk_size;
2115
2116            if (mPath.size() == 6 && underMetaDataPath(mPath)) {
2117                status_t err = parseITunesMetaData(data_offset, chunk_data_size);
2118
2119                if (err != OK) {
2120                    return err;
2121                }
2122            }
2123
2124            break;
2125        }
2126
2127        case FOURCC('m', 'v', 'h', 'd'):
2128        {
2129            *offset += chunk_size;
2130
2131            if (depth != 1) {
2132                ALOGE("mvhd: depth %d", depth);
2133                return ERROR_MALFORMED;
2134            }
2135            if (chunk_data_size < 32) {
2136                return ERROR_MALFORMED;
2137            }
2138
2139            uint8_t header[32];
2140            if (mDataSource->readAt(
2141                        data_offset, header, sizeof(header))
2142                    < (ssize_t)sizeof(header)) {
2143                return ERROR_IO;
2144            }
2145
2146            uint64_t creationTime;
2147            uint64_t duration = 0;
2148            if (header[0] == 1) {
2149                creationTime = U64_AT(&header[4]);
2150                mHeaderTimescale = U32_AT(&header[20]);
2151                duration = U64_AT(&header[24]);
2152                if (duration == 0xffffffffffffffff) {
2153                    duration = 0;
2154                }
2155            } else if (header[0] != 0) {
2156                return ERROR_MALFORMED;
2157            } else {
2158                creationTime = U32_AT(&header[4]);
2159                mHeaderTimescale = U32_AT(&header[12]);
2160                uint32_t d32 = U32_AT(&header[16]);
2161                if (d32 == 0xffffffff) {
2162                    d32 = 0;
2163                }
2164                duration = d32;
2165            }
2166            if (duration != 0 && mHeaderTimescale != 0 && duration < UINT64_MAX / 1000000) {
2167                mFileMetaData.setInt64(kKeyDuration, duration * 1000000 / mHeaderTimescale);
2168            }
2169
2170            String8 s;
2171            if (convertTimeToDate(creationTime, &s)) {
2172                mFileMetaData.setCString(kKeyDate, s.string());
2173            }
2174
2175
2176            break;
2177        }
2178
2179        case FOURCC('m', 'e', 'h', 'd'):
2180        {
2181            *offset += chunk_size;
2182
2183            if (chunk_data_size < 8) {
2184                return ERROR_MALFORMED;
2185            }
2186
2187            uint8_t flags[4];
2188            if (mDataSource->readAt(
2189                        data_offset, flags, sizeof(flags))
2190                    < (ssize_t)sizeof(flags)) {
2191                return ERROR_IO;
2192            }
2193
2194            uint64_t duration = 0;
2195            if (flags[0] == 1) {
2196                // 64 bit
2197                if (chunk_data_size < 12) {
2198                    return ERROR_MALFORMED;
2199                }
2200                mDataSource->getUInt64(data_offset + 4, &duration);
2201                if (duration == 0xffffffffffffffff) {
2202                    duration = 0;
2203                }
2204            } else if (flags[0] == 0) {
2205                // 32 bit
2206                uint32_t d32;
2207                mDataSource->getUInt32(data_offset + 4, &d32);
2208                if (d32 == 0xffffffff) {
2209                    d32 = 0;
2210                }
2211                duration = d32;
2212            } else {
2213                return ERROR_MALFORMED;
2214            }
2215
2216            if (duration != 0 && mHeaderTimescale != 0) {
2217                mFileMetaData.setInt64(kKeyDuration, duration * 1000000 / mHeaderTimescale);
2218            }
2219
2220            break;
2221        }
2222
2223        case FOURCC('m', 'd', 'a', 't'):
2224        {
2225            mMdatFound = true;
2226
2227            *offset += chunk_size;
2228            break;
2229        }
2230
2231        case FOURCC('h', 'd', 'l', 'r'):
2232        {
2233            *offset += chunk_size;
2234
2235            if (underQTMetaPath(mPath, 3)) {
2236                break;
2237            }
2238
2239            uint32_t buffer;
2240            if (mDataSource->readAt(
2241                        data_offset + 8, &buffer, 4) < 4) {
2242                return ERROR_IO;
2243            }
2244
2245            uint32_t type = ntohl(buffer);
2246            // For the 3GPP file format, the handler-type within the 'hdlr' box
2247            // shall be 'text'. We also want to support 'sbtl' handler type
2248            // for a practical reason as various MPEG4 containers use it.
2249            if (type == FOURCC('t', 'e', 'x', 't') || type == FOURCC('s', 'b', 't', 'l')) {
2250                if (mLastTrack != NULL) {
2251                    mLastTrack->meta.setCString(kKeyMIMEType, MEDIA_MIMETYPE_TEXT_3GPP);
2252                }
2253            }
2254
2255            break;
2256        }
2257
2258        case FOURCC('k', 'e', 'y', 's'):
2259        {
2260            *offset += chunk_size;
2261
2262            if (underQTMetaPath(mPath, 3)) {
2263                status_t err = parseQTMetaKey(data_offset, chunk_data_size);
2264                if (err != OK) {
2265                    return err;
2266                }
2267            }
2268            break;
2269        }
2270
2271        case FOURCC('t', 'r', 'e', 'x'):
2272        {
2273            *offset += chunk_size;
2274
2275            if (chunk_data_size < 24) {
2276                return ERROR_IO;
2277            }
2278            Trex trex;
2279            if (!mDataSource->getUInt32(data_offset + 4, &trex.track_ID) ||
2280                !mDataSource->getUInt32(data_offset + 8, &trex.default_sample_description_index) ||
2281                !mDataSource->getUInt32(data_offset + 12, &trex.default_sample_duration) ||
2282                !mDataSource->getUInt32(data_offset + 16, &trex.default_sample_size) ||
2283                !mDataSource->getUInt32(data_offset + 20, &trex.default_sample_flags)) {
2284                return ERROR_IO;
2285            }
2286            mTrex.add(trex);
2287            break;
2288        }
2289
2290        case FOURCC('t', 'x', '3', 'g'):
2291        {
2292            if (mLastTrack == NULL)
2293                return ERROR_MALFORMED;
2294
2295            uint32_t type;
2296            const void *data;
2297            size_t size = 0;
2298            if (!mLastTrack->meta.findData(
2299                    kKeyTextFormatData, &type, &data, &size)) {
2300                size = 0;
2301            }
2302
2303            if ((chunk_size > SIZE_MAX) || (SIZE_MAX - chunk_size <= size)) {
2304                return ERROR_MALFORMED;
2305            }
2306
2307            uint8_t *buffer = new (std::nothrow) uint8_t[size + chunk_size];
2308            if (buffer == NULL) {
2309                return ERROR_MALFORMED;
2310            }
2311
2312            if (size > 0) {
2313                memcpy(buffer, data, size);
2314            }
2315
2316            if ((size_t)(mDataSource->readAt(*offset, buffer + size, chunk_size))
2317                    < chunk_size) {
2318                delete[] buffer;
2319                buffer = NULL;
2320
2321                // advance read pointer so we don't end up reading this again
2322                *offset += chunk_size;
2323                return ERROR_IO;
2324            }
2325
2326            mLastTrack->meta.setData(
2327                    kKeyTextFormatData, 0, buffer, size + chunk_size);
2328
2329            delete[] buffer;
2330
2331            *offset += chunk_size;
2332            break;
2333        }
2334
2335        case FOURCC('c', 'o', 'v', 'r'):
2336        {
2337            *offset += chunk_size;
2338
2339            ALOGV("chunk_data_size = %" PRId64 " and data_offset = %" PRId64,
2340                  chunk_data_size, data_offset);
2341
2342            if (chunk_data_size < 0 || static_cast<uint64_t>(chunk_data_size) >= SIZE_MAX - 1) {
2343                return ERROR_MALFORMED;
2344            }
2345            auto buffer = heapbuffer<uint8_t>(chunk_data_size);
2346            if (buffer.get() == NULL) {
2347                ALOGE("b/28471206");
2348                return NO_MEMORY;
2349            }
2350            if (mDataSource->readAt(
2351                data_offset, buffer.get(), chunk_data_size) != (ssize_t)chunk_data_size) {
2352                return ERROR_IO;
2353            }
2354            const int kSkipBytesOfDataBox = 16;
2355            if (chunk_data_size <= kSkipBytesOfDataBox) {
2356                return ERROR_MALFORMED;
2357            }
2358
2359            mFileMetaData.setData(
2360                kKeyAlbumArt, MetaData::TYPE_NONE,
2361                buffer.get() + kSkipBytesOfDataBox, chunk_data_size - kSkipBytesOfDataBox);
2362
2363            break;
2364        }
2365
2366        case FOURCC('c', 'o', 'l', 'r'):
2367        {
2368            *offset += chunk_size;
2369            // this must be in a VisualSampleEntry box under the Sample Description Box ('stsd')
2370            // ignore otherwise
2371            if (depth >= 2 && mPath[depth - 2] == FOURCC('s', 't', 's', 'd')) {
2372                status_t err = parseColorInfo(data_offset, chunk_data_size);
2373                if (err != OK) {
2374                    return err;
2375                }
2376            }
2377
2378            break;
2379        }
2380
2381        case FOURCC('t', 'i', 't', 'l'):
2382        case FOURCC('p', 'e', 'r', 'f'):
2383        case FOURCC('a', 'u', 't', 'h'):
2384        case FOURCC('g', 'n', 'r', 'e'):
2385        case FOURCC('a', 'l', 'b', 'm'):
2386        case FOURCC('y', 'r', 'r', 'c'):
2387        {
2388            *offset += chunk_size;
2389
2390            status_t err = parse3GPPMetaData(data_offset, chunk_data_size, depth);
2391
2392            if (err != OK) {
2393                return err;
2394            }
2395
2396            break;
2397        }
2398
2399        case FOURCC('I', 'D', '3', '2'):
2400        {
2401            *offset += chunk_size;
2402
2403            if (chunk_data_size < 6) {
2404                return ERROR_MALFORMED;
2405            }
2406
2407            parseID3v2MetaData(data_offset + 6);
2408
2409            break;
2410        }
2411
2412        case FOURCC('-', '-', '-', '-'):
2413        {
2414            mLastCommentMean.clear();
2415            mLastCommentName.clear();
2416            mLastCommentData.clear();
2417            *offset += chunk_size;
2418            break;
2419        }
2420
2421        case FOURCC('s', 'i', 'd', 'x'):
2422        {
2423            status_t err = parseSegmentIndex(data_offset, chunk_data_size);
2424            if (err != OK) {
2425                return err;
2426            }
2427            *offset += chunk_size;
2428            return UNKNOWN_ERROR; // stop parsing after sidx
2429        }
2430
2431        case FOURCC('a', 'c', '-', '3'):
2432        {
2433            *offset += chunk_size;
2434            return parseAC3SampleEntry(data_offset);
2435        }
2436
2437        case FOURCC('f', 't', 'y', 'p'):
2438        {
2439            if (chunk_data_size < 8 || depth != 0) {
2440                return ERROR_MALFORMED;
2441            }
2442
2443            off64_t stop_offset = *offset + chunk_size;
2444            uint32_t numCompatibleBrands = (chunk_data_size - 8) / 4;
2445            std::set<uint32_t> brandSet;
2446            for (size_t i = 0; i < numCompatibleBrands + 2; ++i) {
2447                if (i == 1) {
2448                    // Skip this index, it refers to the minorVersion,
2449                    // not a brand.
2450                    continue;
2451                }
2452
2453                uint32_t brand;
2454                if (mDataSource->readAt(data_offset + 4 * i, &brand, 4) < 4) {
2455                    return ERROR_MALFORMED;
2456                }
2457
2458                brand = ntohl(brand);
2459                brandSet.insert(brand);
2460            }
2461
2462            if (brandSet.count(FOURCC('q', 't', ' ', ' ')) > 0) {
2463                mIsQT = true;
2464            } else {
2465                if (brandSet.count(FOURCC('m', 'i', 'f', '1')) > 0
2466                 && brandSet.count(FOURCC('h', 'e', 'i', 'c')) > 0) {
2467                    ALOGV("identified HEIF image");
2468
2469                    mIsHeif = true;
2470                    brandSet.erase(FOURCC('m', 'i', 'f', '1'));
2471                    brandSet.erase(FOURCC('h', 'e', 'i', 'c'));
2472                }
2473
2474                if (!brandSet.empty()) {
2475                    // This means that the file should have moov box.
2476                    // It could be any iso files (mp4, heifs, etc.)
2477                    mHasMoovBox = true;
2478                    if (mIsHeif) {
2479                        ALOGV("identified HEIF image with other tracks");
2480                    }
2481                }
2482            }
2483
2484            *offset = stop_offset;
2485
2486            break;
2487        }
2488
2489        default:
2490        {
2491            // check if we're parsing 'ilst' for meta keys
2492            // if so, treat type as a number (key-id).
2493            if (underQTMetaPath(mPath, 3)) {
2494                status_t err = parseQTMetaVal(chunk_type, data_offset, chunk_data_size);
2495                if (err != OK) {
2496                    return err;
2497                }
2498            }
2499
2500            *offset += chunk_size;
2501            break;
2502        }
2503    }
2504
2505    return OK;
2506}
2507
2508status_t MPEG4Extractor::parseAC3SampleEntry(off64_t offset) {
2509    // skip 16 bytes:
2510    //  + 6-byte reserved,
2511    //  + 2-byte data reference index,
2512    //  + 8-byte reserved
2513    offset += 16;
2514    uint16_t channelCount;
2515    if (!mDataSource->getUInt16(offset, &channelCount)) {
2516        return ERROR_MALFORMED;
2517    }
2518    // skip 8 bytes:
2519    //  + 2-byte channelCount,
2520    //  + 2-byte sample size,
2521    //  + 4-byte reserved
2522    offset += 8;
2523    uint16_t sampleRate;
2524    if (!mDataSource->getUInt16(offset, &sampleRate)) {
2525        ALOGE("MPEG4Extractor: error while reading ac-3 block: cannot read sample rate");
2526        return ERROR_MALFORMED;
2527    }
2528
2529    // skip 4 bytes:
2530    //  + 2-byte sampleRate,
2531    //  + 2-byte reserved
2532    offset += 4;
2533    return parseAC3SpecificBox(offset, sampleRate);
2534}
2535
2536status_t MPEG4Extractor::parseAC3SpecificBox(
2537        off64_t offset, uint16_t sampleRate) {
2538    uint32_t size;
2539    // + 4-byte size
2540    // + 4-byte type
2541    // + 3-byte payload
2542    const uint32_t kAC3SpecificBoxSize = 11;
2543    if (!mDataSource->getUInt32(offset, &size) || size < kAC3SpecificBoxSize) {
2544        ALOGE("MPEG4Extractor: error while reading ac-3 block: cannot read specific box size");
2545        return ERROR_MALFORMED;
2546    }
2547
2548    offset += 4;
2549    uint32_t type;
2550    if (!mDataSource->getUInt32(offset, &type) || type != FOURCC('d', 'a', 'c', '3')) {
2551        ALOGE("MPEG4Extractor: error while reading ac-3 specific block: header not dac3");
2552        return ERROR_MALFORMED;
2553    }
2554
2555    offset += 4;
2556    const uint32_t kAC3SpecificBoxPayloadSize = 3;
2557    uint8_t chunk[kAC3SpecificBoxPayloadSize];
2558    if (mDataSource->readAt(offset, chunk, sizeof(chunk)) != sizeof(chunk)) {
2559        ALOGE("MPEG4Extractor: error while reading ac-3 specific block: bitstream fields");
2560        return ERROR_MALFORMED;
2561    }
2562
2563    ABitReader br(chunk, sizeof(chunk));
2564    static const unsigned channelCountTable[] = {2, 1, 2, 3, 3, 4, 4, 5};
2565    static const unsigned sampleRateTable[] = {48000, 44100, 32000};
2566
2567    unsigned fscod = br.getBits(2);
2568    if (fscod == 3) {
2569        ALOGE("Incorrect fscod (3) in AC3 header");
2570        return ERROR_MALFORMED;
2571    }
2572    unsigned boxSampleRate = sampleRateTable[fscod];
2573    if (boxSampleRate != sampleRate) {
2574        ALOGE("sample rate mismatch: boxSampleRate = %d, sampleRate = %d",
2575            boxSampleRate, sampleRate);
2576        return ERROR_MALFORMED;
2577    }
2578
2579    unsigned bsid = br.getBits(5);
2580    if (bsid > 8) {
2581        ALOGW("Incorrect bsid in AC3 header. Possibly E-AC-3?");
2582        return ERROR_MALFORMED;
2583    }
2584
2585    // skip
2586    unsigned bsmod __unused = br.getBits(3);
2587
2588    unsigned acmod = br.getBits(3);
2589    unsigned lfeon = br.getBits(1);
2590    unsigned channelCount = channelCountTable[acmod] + lfeon;
2591
2592    if (mLastTrack == NULL) {
2593        return ERROR_MALFORMED;
2594    }
2595    mLastTrack->meta.setCString(kKeyMIMEType, MEDIA_MIMETYPE_AUDIO_AC3);
2596    mLastTrack->meta.setInt32(kKeyChannelCount, channelCount);
2597    mLastTrack->meta.setInt32(kKeySampleRate, sampleRate);
2598    return OK;
2599}
2600
2601status_t MPEG4Extractor::parseSegmentIndex(off64_t offset, size_t size) {
2602  ALOGV("MPEG4Extractor::parseSegmentIndex");
2603
2604    if (size < 12) {
2605      return -EINVAL;
2606    }
2607
2608    uint32_t flags;
2609    if (!mDataSource->getUInt32(offset, &flags)) {
2610        return ERROR_MALFORMED;
2611    }
2612
2613    uint32_t version = flags >> 24;
2614    flags &= 0xffffff;
2615
2616    ALOGV("sidx version %d", version);
2617
2618    uint32_t referenceId;
2619    if (!mDataSource->getUInt32(offset + 4, &referenceId)) {
2620        return ERROR_MALFORMED;
2621    }
2622
2623    uint32_t timeScale;
2624    if (!mDataSource->getUInt32(offset + 8, &timeScale)) {
2625        return ERROR_MALFORMED;
2626    }
2627    ALOGV("sidx refid/timescale: %d/%d", referenceId, timeScale);
2628    if (timeScale == 0)
2629        return ERROR_MALFORMED;
2630
2631    uint64_t earliestPresentationTime;
2632    uint64_t firstOffset;
2633
2634    offset += 12;
2635    size -= 12;
2636
2637    if (version == 0) {
2638        if (size < 8) {
2639            return -EINVAL;
2640        }
2641        uint32_t tmp;
2642        if (!mDataSource->getUInt32(offset, &tmp)) {
2643            return ERROR_MALFORMED;
2644        }
2645        earliestPresentationTime = tmp;
2646        if (!mDataSource->getUInt32(offset + 4, &tmp)) {
2647            return ERROR_MALFORMED;
2648        }
2649        firstOffset = tmp;
2650        offset += 8;
2651        size -= 8;
2652    } else {
2653        if (size < 16) {
2654            return -EINVAL;
2655        }
2656        if (!mDataSource->getUInt64(offset, &earliestPresentationTime)) {
2657            return ERROR_MALFORMED;
2658        }
2659        if (!mDataSource->getUInt64(offset + 8, &firstOffset)) {
2660            return ERROR_MALFORMED;
2661        }
2662        offset += 16;
2663        size -= 16;
2664    }
2665    ALOGV("sidx pres/off: %" PRIu64 "/%" PRIu64, earliestPresentationTime, firstOffset);
2666
2667    if (size < 4) {
2668        return -EINVAL;
2669    }
2670
2671    uint16_t referenceCount;
2672    if (!mDataSource->getUInt16(offset + 2, &referenceCount)) {
2673        return ERROR_MALFORMED;
2674    }
2675    offset += 4;
2676    size -= 4;
2677    ALOGV("refcount: %d", referenceCount);
2678
2679    if (size < referenceCount * 12) {
2680        return -EINVAL;
2681    }
2682
2683    uint64_t total_duration = 0;
2684    for (unsigned int i = 0; i < referenceCount; i++) {
2685        uint32_t d1, d2, d3;
2686
2687        if (!mDataSource->getUInt32(offset, &d1) ||     // size
2688            !mDataSource->getUInt32(offset + 4, &d2) || // duration
2689            !mDataSource->getUInt32(offset + 8, &d3)) { // flags
2690            return ERROR_MALFORMED;
2691        }
2692
2693        if (d1 & 0x80000000) {
2694            ALOGW("sub-sidx boxes not supported yet");
2695        }
2696        bool sap = d3 & 0x80000000;
2697        uint32_t saptype = (d3 >> 28) & 7;
2698        if (!sap || (saptype != 1 && saptype != 2)) {
2699            // type 1 and 2 are sync samples
2700            ALOGW("not a stream access point, or unsupported type: %08x", d3);
2701        }
2702        total_duration += d2;
2703        offset += 12;
2704        ALOGV(" item %d, %08x %08x %08x", i, d1, d2, d3);
2705        SidxEntry se;
2706        se.mSize = d1 & 0x7fffffff;
2707        se.mDurationUs = 1000000LL * d2 / timeScale;
2708        mSidxEntries.add(se);
2709    }
2710
2711    uint64_t sidxDuration = total_duration * 1000000 / timeScale;
2712
2713    if (mLastTrack == NULL)
2714        return ERROR_MALFORMED;
2715
2716    int64_t metaDuration;
2717    if (!mLastTrack->meta.findInt64(kKeyDuration, &metaDuration) || metaDuration == 0) {
2718        mLastTrack->meta.setInt64(kKeyDuration, sidxDuration);
2719    }
2720    return OK;
2721}
2722
2723status_t MPEG4Extractor::parseQTMetaKey(off64_t offset, size_t size) {
2724    if (size < 8) {
2725        return ERROR_MALFORMED;
2726    }
2727
2728    uint32_t count;
2729    if (!mDataSource->getUInt32(offset + 4, &count)) {
2730        return ERROR_MALFORMED;
2731    }
2732
2733    if (mMetaKeyMap.size() > 0) {
2734        ALOGW("'keys' atom seen again, discarding existing entries");
2735        mMetaKeyMap.clear();
2736    }
2737
2738    off64_t keyOffset = offset + 8;
2739    off64_t stopOffset = offset + size;
2740    for (size_t i = 1; i <= count; i++) {
2741        if (keyOffset + 8 > stopOffset) {
2742            return ERROR_MALFORMED;
2743        }
2744
2745        uint32_t keySize;
2746        if (!mDataSource->getUInt32(keyOffset, &keySize)
2747                || keySize < 8
2748                || keyOffset + keySize > stopOffset) {
2749            return ERROR_MALFORMED;
2750        }
2751
2752        uint32_t type;
2753        if (!mDataSource->getUInt32(keyOffset + 4, &type)
2754                || type != FOURCC('m', 'd', 't', 'a')) {
2755            return ERROR_MALFORMED;
2756        }
2757
2758        keySize -= 8;
2759        keyOffset += 8;
2760
2761        auto keyData = heapbuffer<uint8_t>(keySize);
2762        if (keyData.get() == NULL) {
2763            return ERROR_MALFORMED;
2764        }
2765        if (mDataSource->readAt(
2766                keyOffset, keyData.get(), keySize) < (ssize_t) keySize) {
2767            return ERROR_MALFORMED;
2768        }
2769
2770        AString key((const char *)keyData.get(), keySize);
2771        mMetaKeyMap.add(i, key);
2772
2773        keyOffset += keySize;
2774    }
2775    return OK;
2776}
2777
2778status_t MPEG4Extractor::parseQTMetaVal(
2779        int32_t keyId, off64_t offset, size_t size) {
2780    ssize_t index = mMetaKeyMap.indexOfKey(keyId);
2781    if (index < 0) {
2782        // corresponding key is not present, ignore
2783        return ERROR_MALFORMED;
2784    }
2785
2786    if (size <= 16) {
2787        return ERROR_MALFORMED;
2788    }
2789    uint32_t dataSize;
2790    if (!mDataSource->getUInt32(offset, &dataSize)
2791            || dataSize > size || dataSize <= 16) {
2792        return ERROR_MALFORMED;
2793    }
2794    uint32_t atomFourCC;
2795    if (!mDataSource->getUInt32(offset + 4, &atomFourCC)
2796            || atomFourCC != FOURCC('d', 'a', 't', 'a')) {
2797        return ERROR_MALFORMED;
2798    }
2799    uint32_t dataType;
2800    if (!mDataSource->getUInt32(offset + 8, &dataType)
2801            || ((dataType & 0xff000000) != 0)) {
2802        // not well-known type
2803        return ERROR_MALFORMED;
2804    }
2805
2806    dataSize -= 16;
2807    offset += 16;
2808
2809    if (dataType == 23 && dataSize >= 4) {
2810        // BE Float32
2811        uint32_t val;
2812        if (!mDataSource->getUInt32(offset, &val)) {
2813            return ERROR_MALFORMED;
2814        }
2815        if (!strcasecmp(mMetaKeyMap[index].c_str(), "com.android.capture.fps")) {
2816            mFileMetaData.setFloat(kKeyCaptureFramerate, *(float *)&val);
2817        }
2818    } else if (dataType == 67 && dataSize >= 4) {
2819        // BE signed int32
2820        uint32_t val;
2821        if (!mDataSource->getUInt32(offset, &val)) {
2822            return ERROR_MALFORMED;
2823        }
2824        if (!strcasecmp(mMetaKeyMap[index].c_str(), "com.android.video.temporal_layers_count")) {
2825            mFileMetaData.setInt32(kKeyTemporalLayerCount, val);
2826        }
2827    } else {
2828        // add more keys if needed
2829        ALOGV("ignoring key: type %d, size %d", dataType, dataSize);
2830    }
2831
2832    return OK;
2833}
2834
2835status_t MPEG4Extractor::parseTrackHeader(
2836        off64_t data_offset, off64_t data_size) {
2837    if (data_size < 4) {
2838        return ERROR_MALFORMED;
2839    }
2840
2841    uint8_t version;
2842    if (mDataSource->readAt(data_offset, &version, 1) < 1) {
2843        return ERROR_IO;
2844    }
2845
2846    size_t dynSize = (version == 1) ? 36 : 24;
2847
2848    uint8_t buffer[36 + 60];
2849
2850    if (data_size != (off64_t)dynSize + 60) {
2851        return ERROR_MALFORMED;
2852    }
2853
2854    if (mDataSource->readAt(
2855                data_offset, buffer, data_size) < (ssize_t)data_size) {
2856        return ERROR_IO;
2857    }
2858
2859    uint64_t ctime __unused, mtime __unused, duration __unused;
2860    int32_t id;
2861
2862    if (version == 1) {
2863        ctime = U64_AT(&buffer[4]);
2864        mtime = U64_AT(&buffer[12]);
2865        id = U32_AT(&buffer[20]);
2866        duration = U64_AT(&buffer[28]);
2867    } else if (version == 0) {
2868        ctime = U32_AT(&buffer[4]);
2869        mtime = U32_AT(&buffer[8]);
2870        id = U32_AT(&buffer[12]);
2871        duration = U32_AT(&buffer[20]);
2872    } else {
2873        return ERROR_UNSUPPORTED;
2874    }
2875
2876    if (mLastTrack == NULL)
2877        return ERROR_MALFORMED;
2878
2879    mLastTrack->meta.setInt32(kKeyTrackID, id);
2880
2881    size_t matrixOffset = dynSize + 16;
2882    int32_t a00 = U32_AT(&buffer[matrixOffset]);
2883    int32_t a01 = U32_AT(&buffer[matrixOffset + 4]);
2884    int32_t a10 = U32_AT(&buffer[matrixOffset + 12]);
2885    int32_t a11 = U32_AT(&buffer[matrixOffset + 16]);
2886
2887#if 0
2888    int32_t dx = U32_AT(&buffer[matrixOffset + 8]);
2889    int32_t dy = U32_AT(&buffer[matrixOffset + 20]);
2890
2891    ALOGI("x' = %.2f * x + %.2f * y + %.2f",
2892         a00 / 65536.0f, a01 / 65536.0f, dx / 65536.0f);
2893    ALOGI("y' = %.2f * x + %.2f * y + %.2f",
2894         a10 / 65536.0f, a11 / 65536.0f, dy / 65536.0f);
2895#endif
2896
2897    uint32_t rotationDegrees;
2898
2899    static const int32_t kFixedOne = 0x10000;
2900    if (a00 == kFixedOne && a01 == 0 && a10 == 0 && a11 == kFixedOne) {
2901        // Identity, no rotation
2902        rotationDegrees = 0;
2903    } else if (a00 == 0 && a01 == kFixedOne && a10 == -kFixedOne && a11 == 0) {
2904        rotationDegrees = 90;
2905    } else if (a00 == 0 && a01 == -kFixedOne && a10 == kFixedOne && a11 == 0) {
2906        rotationDegrees = 270;
2907    } else if (a00 == -kFixedOne && a01 == 0 && a10 == 0 && a11 == -kFixedOne) {
2908        rotationDegrees = 180;
2909    } else {
2910        ALOGW("We only support 0,90,180,270 degree rotation matrices");
2911        rotationDegrees = 0;
2912    }
2913
2914    if (rotationDegrees != 0) {
2915        mLastTrack->meta.setInt32(kKeyRotation, rotationDegrees);
2916    }
2917
2918    // Handle presentation display size, which could be different
2919    // from the image size indicated by kKeyWidth and kKeyHeight.
2920    uint32_t width = U32_AT(&buffer[dynSize + 52]);
2921    uint32_t height = U32_AT(&buffer[dynSize + 56]);
2922    mLastTrack->meta.setInt32(kKeyDisplayWidth, width >> 16);
2923    mLastTrack->meta.setInt32(kKeyDisplayHeight, height >> 16);
2924
2925    return OK;
2926}
2927
2928status_t MPEG4Extractor::parseITunesMetaData(off64_t offset, size_t size) {
2929    if (size == 0) {
2930        return OK;
2931    }
2932
2933    if (size < 4 || size == SIZE_MAX) {
2934        return ERROR_MALFORMED;
2935    }
2936
2937    uint8_t *buffer = new (std::nothrow) uint8_t[size + 1];
2938    if (buffer == NULL) {
2939        return ERROR_MALFORMED;
2940    }
2941    if (mDataSource->readAt(
2942                offset, buffer, size) != (ssize_t)size) {
2943        delete[] buffer;
2944        buffer = NULL;
2945
2946        return ERROR_IO;
2947    }
2948
2949    uint32_t flags = U32_AT(buffer);
2950
2951    uint32_t metadataKey = 0;
2952    char chunk[5];
2953    MakeFourCCString(mPath[4], chunk);
2954    ALOGV("meta: %s @ %lld", chunk, (long long)offset);
2955    switch ((int32_t)mPath[4]) {
2956        case FOURCC(0xa9, 'a', 'l', 'b'):
2957        {
2958            metadataKey = kKeyAlbum;
2959            break;
2960        }
2961        case FOURCC(0xa9, 'A', 'R', 'T'):
2962        {
2963            metadataKey = kKeyArtist;
2964            break;
2965        }
2966        case FOURCC('a', 'A', 'R', 'T'):
2967        {
2968            metadataKey = kKeyAlbumArtist;
2969            break;
2970        }
2971        case FOURCC(0xa9, 'd', 'a', 'y'):
2972        {
2973            metadataKey = kKeyYear;
2974            break;
2975        }
2976        case FOURCC(0xa9, 'n', 'a', 'm'):
2977        {
2978            metadataKey = kKeyTitle;
2979            break;
2980        }
2981        case FOURCC(0xa9, 'w', 'r', 't'):
2982        {
2983            metadataKey = kKeyWriter;
2984            break;
2985        }
2986        case FOURCC('c', 'o', 'v', 'r'):
2987        {
2988            metadataKey = kKeyAlbumArt;
2989            break;
2990        }
2991        case FOURCC('g', 'n', 'r', 'e'):
2992        {
2993            metadataKey = kKeyGenre;
2994            break;
2995        }
2996        case FOURCC(0xa9, 'g', 'e', 'n'):
2997        {
2998            metadataKey = kKeyGenre;
2999            break;
3000        }
3001        case FOURCC('c', 'p', 'i', 'l'):
3002        {
3003            if (size == 9 && flags == 21) {
3004                char tmp[16];
3005                sprintf(tmp, "%d",
3006                        (int)buffer[size - 1]);
3007
3008                mFileMetaData.setCString(kKeyCompilation, tmp);
3009            }
3010            break;
3011        }
3012        case FOURCC('t', 'r', 'k', 'n'):
3013        {
3014            if (size == 16 && flags == 0) {
3015                char tmp[16];
3016                uint16_t* pTrack = (uint16_t*)&buffer[10];
3017                uint16_t* pTotalTracks = (uint16_t*)&buffer[12];
3018                sprintf(tmp, "%d/%d", ntohs(*pTrack), ntohs(*pTotalTracks));
3019
3020                mFileMetaData.setCString(kKeyCDTrackNumber, tmp);
3021            }
3022            break;
3023        }
3024        case FOURCC('d', 'i', 's', 'k'):
3025        {
3026            if ((size == 14 || size == 16) && flags == 0) {
3027                char tmp[16];
3028                uint16_t* pDisc = (uint16_t*)&buffer[10];
3029                uint16_t* pTotalDiscs = (uint16_t*)&buffer[12];
3030                sprintf(tmp, "%d/%d", ntohs(*pDisc), ntohs(*pTotalDiscs));
3031
3032                mFileMetaData.setCString(kKeyDiscNumber, tmp);
3033            }
3034            break;
3035        }
3036        case FOURCC('-', '-', '-', '-'):
3037        {
3038            buffer[size] = '\0';
3039            switch (mPath[5]) {
3040                case FOURCC('m', 'e', 'a', 'n'):
3041                    mLastCommentMean.setTo((const char *)buffer + 4);
3042                    break;
3043                case FOURCC('n', 'a', 'm', 'e'):
3044                    mLastCommentName.setTo((const char *)buffer + 4);
3045                    break;
3046                case FOURCC('d', 'a', 't', 'a'):
3047                    if (size < 8) {
3048                        delete[] buffer;
3049                        buffer = NULL;
3050                        ALOGE("b/24346430");
3051                        return ERROR_MALFORMED;
3052                    }
3053                    mLastCommentData.setTo((const char *)buffer + 8);
3054                    break;
3055            }
3056
3057            // Once we have a set of mean/name/data info, go ahead and process
3058            // it to see if its something we are interested in.  Whether or not
3059            // were are interested in the specific tag, make sure to clear out
3060            // the set so we can be ready to process another tuple should one
3061            // show up later in the file.
3062            if ((mLastCommentMean.length() != 0) &&
3063                (mLastCommentName.length() != 0) &&
3064                (mLastCommentData.length() != 0)) {
3065
3066                if (mLastCommentMean == "com.apple.iTunes"
3067                        && mLastCommentName == "iTunSMPB") {
3068                    int32_t delay, padding;
3069                    if (sscanf(mLastCommentData,
3070                               " %*x %x %x %*x", &delay, &padding) == 2) {
3071                        if (mLastTrack == NULL) {
3072                            delete[] buffer;
3073                            return ERROR_MALFORMED;
3074                        }
3075
3076                        mLastTrack->meta.setInt32(kKeyEncoderDelay, delay);
3077                        mLastTrack->meta.setInt32(kKeyEncoderPadding, padding);
3078                    }
3079                }
3080
3081                mLastCommentMean.clear();
3082                mLastCommentName.clear();
3083                mLastCommentData.clear();
3084            }
3085            break;
3086        }
3087
3088        default:
3089            break;
3090    }
3091
3092    if (size >= 8 && metadataKey && !mFileMetaData.hasData(metadataKey)) {
3093        if (metadataKey == kKeyAlbumArt) {
3094            mFileMetaData.setData(
3095                    kKeyAlbumArt, MetaData::TYPE_NONE,
3096                    buffer + 8, size - 8);
3097        } else if (metadataKey == kKeyGenre) {
3098            if (flags == 0) {
3099                // uint8_t genre code, iTunes genre codes are
3100                // the standard id3 codes, except they start
3101                // at 1 instead of 0 (e.g. Pop is 14, not 13)
3102                // We use standard id3 numbering, so subtract 1.
3103                int genrecode = (int)buffer[size - 1];
3104                genrecode--;
3105                if (genrecode < 0) {
3106                    genrecode = 255; // reserved for 'unknown genre'
3107                }
3108                char genre[10];
3109                sprintf(genre, "%d", genrecode);
3110
3111                mFileMetaData.setCString(metadataKey, genre);
3112            } else if (flags == 1) {
3113                // custom genre string
3114                buffer[size] = '\0';
3115
3116                mFileMetaData.setCString(
3117                        metadataKey, (const char *)buffer + 8);
3118            }
3119        } else {
3120            buffer[size] = '\0';
3121
3122            mFileMetaData.setCString(
3123                    metadataKey, (const char *)buffer + 8);
3124        }
3125    }
3126
3127    delete[] buffer;
3128    buffer = NULL;
3129
3130    return OK;
3131}
3132
3133status_t MPEG4Extractor::parseColorInfo(off64_t offset, size_t size) {
3134    if (size < 4 || size == SIZE_MAX || mLastTrack == NULL) {
3135        return ERROR_MALFORMED;
3136    }
3137
3138    uint8_t *buffer = new (std::nothrow) uint8_t[size + 1];
3139    if (buffer == NULL) {
3140        return ERROR_MALFORMED;
3141    }
3142    if (mDataSource->readAt(offset, buffer, size) != (ssize_t)size) {
3143        delete[] buffer;
3144        buffer = NULL;
3145
3146        return ERROR_IO;
3147    }
3148
3149    int32_t type = U32_AT(&buffer[0]);
3150    if ((type == FOURCC('n', 'c', 'l', 'x') && size >= 11)
3151            || (type == FOURCC('n', 'c', 'l', 'c') && size >= 10)) {
3152        int32_t primaries = U16_AT(&buffer[4]);
3153        int32_t transfer = U16_AT(&buffer[6]);
3154        int32_t coeffs = U16_AT(&buffer[8]);
3155        bool fullRange = (type == FOURCC('n', 'c', 'l', 'x')) && (buffer[10] & 128);
3156
3157        ColorAspects aspects;
3158        ColorUtils::convertIsoColorAspectsToCodecAspects(
3159                primaries, transfer, coeffs, fullRange, aspects);
3160
3161        // only store the first color specification
3162        if (!mLastTrack->meta.hasData(kKeyColorPrimaries)) {
3163            mLastTrack->meta.setInt32(kKeyColorPrimaries, aspects.mPrimaries);
3164            mLastTrack->meta.setInt32(kKeyTransferFunction, aspects.mTransfer);
3165            mLastTrack->meta.setInt32(kKeyColorMatrix, aspects.mMatrixCoeffs);
3166            mLastTrack->meta.setInt32(kKeyColorRange, aspects.mRange);
3167        }
3168    }
3169
3170    delete[] buffer;
3171    buffer = NULL;
3172
3173    return OK;
3174}
3175
3176status_t MPEG4Extractor::parse3GPPMetaData(off64_t offset, size_t size, int depth) {
3177    if (size < 4 || size == SIZE_MAX) {
3178        return ERROR_MALFORMED;
3179    }
3180
3181    uint8_t *buffer = new (std::nothrow) uint8_t[size + 1];
3182    if (buffer == NULL) {
3183        return ERROR_MALFORMED;
3184    }
3185    if (mDataSource->readAt(
3186                offset, buffer, size) != (ssize_t)size) {
3187        delete[] buffer;
3188        buffer = NULL;
3189
3190        return ERROR_IO;
3191    }
3192
3193    uint32_t metadataKey = 0;
3194    switch (mPath[depth]) {
3195        case FOURCC('t', 'i', 't', 'l'):
3196        {
3197            metadataKey = kKeyTitle;
3198            break;
3199        }
3200        case FOURCC('p', 'e', 'r', 'f'):
3201        {
3202            metadataKey = kKeyArtist;
3203            break;
3204        }
3205        case FOURCC('a', 'u', 't', 'h'):
3206        {
3207            metadataKey = kKeyWriter;
3208            break;
3209        }
3210        case FOURCC('g', 'n', 'r', 'e'):
3211        {
3212            metadataKey = kKeyGenre;
3213            break;
3214        }
3215        case FOURCC('a', 'l', 'b', 'm'):
3216        {
3217            if (buffer[size - 1] != '\0') {
3218              char tmp[4];
3219              sprintf(tmp, "%u", buffer[size - 1]);
3220
3221              mFileMetaData.setCString(kKeyCDTrackNumber, tmp);
3222            }
3223
3224            metadataKey = kKeyAlbum;
3225            break;
3226        }
3227        case FOURCC('y', 'r', 'r', 'c'):
3228        {
3229            if (size < 6) {
3230                delete[] buffer;
3231                buffer = NULL;
3232                ALOGE("b/62133227");
3233                android_errorWriteLog(0x534e4554, "62133227");
3234                return ERROR_MALFORMED;
3235            }
3236            char tmp[5];
3237            uint16_t year = U16_AT(&buffer[4]);
3238
3239            if (year < 10000) {
3240                sprintf(tmp, "%u", year);
3241
3242                mFileMetaData.setCString(kKeyYear, tmp);
3243            }
3244            break;
3245        }
3246
3247        default:
3248            break;
3249    }
3250
3251    if (metadataKey > 0) {
3252        bool isUTF8 = true; // Common case
3253        char16_t *framedata = NULL;
3254        int len16 = 0; // Number of UTF-16 characters
3255
3256        // smallest possible valid UTF-16 string w BOM: 0xfe 0xff 0x00 0x00
3257        if (size < 6) {
3258            delete[] buffer;
3259            buffer = NULL;
3260            return ERROR_MALFORMED;
3261        }
3262
3263        if (size - 6 >= 4) {
3264            len16 = ((size - 6) / 2) - 1; // don't include 0x0000 terminator
3265            framedata = (char16_t *)(buffer + 6);
3266            if (0xfffe == *framedata) {
3267                // endianness marker (BOM) doesn't match host endianness
3268                for (int i = 0; i < len16; i++) {
3269                    framedata[i] = bswap_16(framedata[i]);
3270                }
3271                // BOM is now swapped to 0xfeff, we will execute next block too
3272            }
3273
3274            if (0xfeff == *framedata) {
3275                // Remove the BOM
3276                framedata++;
3277                len16--;
3278                isUTF8 = false;
3279            }
3280            // else normal non-zero-length UTF-8 string
3281            // we can't handle UTF-16 without BOM as there is no other
3282            // indication of encoding.
3283        }
3284
3285        if (isUTF8) {
3286            buffer[size] = 0;
3287            mFileMetaData.setCString(metadataKey, (const char *)buffer + 6);
3288        } else {
3289            // Convert from UTF-16 string to UTF-8 string.
3290            String8 tmpUTF8str(framedata, len16);
3291            mFileMetaData.setCString(metadataKey, tmpUTF8str.string());
3292        }
3293    }
3294
3295    delete[] buffer;
3296    buffer = NULL;
3297
3298    return OK;
3299}
3300
3301void MPEG4Extractor::parseID3v2MetaData(off64_t offset) {
3302    ID3 id3(mDataSource, true /* ignorev1 */, offset);
3303
3304    if (id3.isValid()) {
3305        struct Map {
3306            int key;
3307            const char *tag1;
3308            const char *tag2;
3309        };
3310        static const Map kMap[] = {
3311            { kKeyAlbum, "TALB", "TAL" },
3312            { kKeyArtist, "TPE1", "TP1" },
3313            { kKeyAlbumArtist, "TPE2", "TP2" },
3314            { kKeyComposer, "TCOM", "TCM" },
3315            { kKeyGenre, "TCON", "TCO" },
3316            { kKeyTitle, "TIT2", "TT2" },
3317            { kKeyYear, "TYE", "TYER" },
3318            { kKeyAuthor, "TXT", "TEXT" },
3319            { kKeyCDTrackNumber, "TRK", "TRCK" },
3320            { kKeyDiscNumber, "TPA", "TPOS" },
3321            { kKeyCompilation, "TCP", "TCMP" },
3322        };
3323        static const size_t kNumMapEntries = sizeof(kMap) / sizeof(kMap[0]);
3324
3325        for (size_t i = 0; i < kNumMapEntries; ++i) {
3326            if (!mFileMetaData.hasData(kMap[i].key)) {
3327                ID3::Iterator *it = new ID3::Iterator(id3, kMap[i].tag1);
3328                if (it->done()) {
3329                    delete it;
3330                    it = new ID3::Iterator(id3, kMap[i].tag2);
3331                }
3332
3333                if (it->done()) {
3334                    delete it;
3335                    continue;
3336                }
3337
3338                String8 s;
3339                it->getString(&s);
3340                delete it;
3341
3342                mFileMetaData.setCString(kMap[i].key, s);
3343            }
3344        }
3345
3346        size_t dataSize;
3347        String8 mime;
3348        const void *data = id3.getAlbumArt(&dataSize, &mime);
3349
3350        if (data) {
3351            mFileMetaData.setData(kKeyAlbumArt, MetaData::TYPE_NONE, data, dataSize);
3352            mFileMetaData.setCString(kKeyAlbumArtMIME, mime.string());
3353        }
3354    }
3355}
3356
3357MediaTrack *MPEG4Extractor::getTrack(size_t index) {
3358    status_t err;
3359    if ((err = readMetaData()) != OK) {
3360        return NULL;
3361    }
3362
3363    Track *track = mFirstTrack;
3364    while (index > 0) {
3365        if (track == NULL) {
3366            return NULL;
3367        }
3368
3369        track = track->next;
3370        --index;
3371    }
3372
3373    if (track == NULL) {
3374        return NULL;
3375    }
3376
3377
3378    Trex *trex = NULL;
3379    int32_t trackId;
3380    if (track->meta.findInt32(kKeyTrackID, &trackId)) {
3381        for (size_t i = 0; i < mTrex.size(); i++) {
3382            Trex *t = &mTrex.editItemAt(i);
3383            if (t->track_ID == (uint32_t) trackId) {
3384                trex = t;
3385                break;
3386            }
3387        }
3388    } else {
3389        ALOGE("b/21657957");
3390        return NULL;
3391    }
3392
3393    ALOGV("getTrack called, pssh: %zu", mPssh.size());
3394
3395    const char *mime;
3396    if (!track->meta.findCString(kKeyMIMEType, &mime)) {
3397        return NULL;
3398    }
3399
3400    sp<ItemTable> itemTable;
3401    if (!strcasecmp(mime, MEDIA_MIMETYPE_VIDEO_AVC)) {
3402        uint32_t type;
3403        const void *data;
3404        size_t size;
3405        if (!track->meta.findData(kKeyAVCC, &type, &data, &size)) {
3406            return NULL;
3407        }
3408
3409        const uint8_t *ptr = (const uint8_t *)data;
3410
3411        if (size < 7 || ptr[0] != 1) {  // configurationVersion == 1
3412            return NULL;
3413        }
3414    } else if (!strcasecmp(mime, MEDIA_MIMETYPE_VIDEO_HEVC)
3415            || !strcasecmp(mime, MEDIA_MIMETYPE_IMAGE_ANDROID_HEIC)) {
3416        uint32_t type;
3417        const void *data;
3418        size_t size;
3419        if (!track->meta.findData(kKeyHVCC, &type, &data, &size)) {
3420            return NULL;
3421        }
3422
3423        const uint8_t *ptr = (const uint8_t *)data;
3424
3425        if (size < 22 || ptr[0] != 1) {  // configurationVersion == 1
3426            return NULL;
3427        }
3428        if (!strcasecmp(mime, MEDIA_MIMETYPE_IMAGE_ANDROID_HEIC)) {
3429            itemTable = mItemTable;
3430        }
3431    }
3432
3433    MPEG4Source *source =  new MPEG4Source(
3434            track->meta, mDataSource, track->timescale, track->sampleTable,
3435            mSidxEntries, trex, mMoofOffset, itemTable);
3436    if (source->init() != OK) {
3437        delete source;
3438        return NULL;
3439    }
3440    return source;
3441}
3442
3443// static
3444status_t MPEG4Extractor::verifyTrack(Track *track) {
3445    const char *mime;
3446    CHECK(track->meta.findCString(kKeyMIMEType, &mime));
3447
3448    uint32_t type;
3449    const void *data;
3450    size_t size;
3451    if (!strcasecmp(mime, MEDIA_MIMETYPE_VIDEO_AVC)) {
3452        if (!track->meta.findData(kKeyAVCC, &type, &data, &size)
3453                || type != kTypeAVCC) {
3454            return ERROR_MALFORMED;
3455        }
3456    } else if (!strcasecmp(mime, MEDIA_MIMETYPE_VIDEO_HEVC)) {
3457        if (!track->meta.findData(kKeyHVCC, &type, &data, &size)
3458                    || type != kTypeHVCC) {
3459            return ERROR_MALFORMED;
3460        }
3461    } else if (!strcasecmp(mime, MEDIA_MIMETYPE_VIDEO_MPEG4)
3462            || !strcasecmp(mime, MEDIA_MIMETYPE_VIDEO_MPEG2)
3463            || !strcasecmp(mime, MEDIA_MIMETYPE_AUDIO_AAC)) {
3464        if (!track->meta.findData(kKeyESDS, &type, &data, &size)
3465                || type != kTypeESDS) {
3466            return ERROR_MALFORMED;
3467        }
3468    }
3469
3470    if (track->sampleTable == NULL || !track->sampleTable->isValid()) {
3471        // Make sure we have all the metadata we need.
3472        ALOGE("stbl atom missing/invalid.");
3473        return ERROR_MALFORMED;
3474    }
3475
3476    if (track->timescale == 0) {
3477        ALOGE("timescale invalid.");
3478        return ERROR_MALFORMED;
3479    }
3480
3481    return OK;
3482}
3483
3484typedef enum {
3485    //AOT_NONE             = -1,
3486    //AOT_NULL_OBJECT      = 0,
3487    //AOT_AAC_MAIN         = 1, /**< Main profile                              */
3488    AOT_AAC_LC           = 2,   /**< Low Complexity object                     */
3489    //AOT_AAC_SSR          = 3,
3490    //AOT_AAC_LTP          = 4,
3491    AOT_SBR              = 5,
3492    //AOT_AAC_SCAL         = 6,
3493    //AOT_TWIN_VQ          = 7,
3494    //AOT_CELP             = 8,
3495    //AOT_HVXC             = 9,
3496    //AOT_RSVD_10          = 10, /**< (reserved)                                */
3497    //AOT_RSVD_11          = 11, /**< (reserved)                                */
3498    //AOT_TTSI             = 12, /**< TTSI Object                               */
3499    //AOT_MAIN_SYNTH       = 13, /**< Main Synthetic object                     */
3500    //AOT_WAV_TAB_SYNTH    = 14, /**< Wavetable Synthesis object                */
3501    //AOT_GEN_MIDI         = 15, /**< General MIDI object                       */
3502    //AOT_ALG_SYNTH_AUD_FX = 16, /**< Algorithmic Synthesis and Audio FX object */
3503    AOT_ER_AAC_LC        = 17,   /**< Error Resilient(ER) AAC Low Complexity    */
3504    //AOT_RSVD_18          = 18, /**< (reserved)                                */
3505    //AOT_ER_AAC_LTP       = 19, /**< Error Resilient(ER) AAC LTP object        */
3506    AOT_ER_AAC_SCAL      = 20,   /**< Error Resilient(ER) AAC Scalable object   */
3507    //AOT_ER_TWIN_VQ       = 21, /**< Error Resilient(ER) TwinVQ object         */
3508    AOT_ER_BSAC          = 22,   /**< Error Resilient(ER) BSAC object           */
3509    AOT_ER_AAC_LD        = 23,   /**< Error Resilient(ER) AAC LowDelay object   */
3510    //AOT_ER_CELP          = 24, /**< Error Resilient(ER) CELP object           */
3511    //AOT_ER_HVXC          = 25, /**< Error Resilient(ER) HVXC object           */
3512    //AOT_ER_HILN          = 26, /**< Error Resilient(ER) HILN object           */
3513    //AOT_ER_PARA          = 27, /**< Error Resilient(ER) Parametric object     */
3514    //AOT_RSVD_28          = 28, /**< might become SSC                          */
3515    AOT_PS               = 29,   /**< PS, Parametric Stereo (includes SBR)      */
3516    //AOT_MPEGS            = 30, /**< MPEG Surround                             */
3517
3518    AOT_ESCAPE           = 31,   /**< Signal AOT uses more than 5 bits          */
3519
3520    //AOT_MP3ONMP4_L1      = 32, /**< MPEG-Layer1 in mp4                        */
3521    //AOT_MP3ONMP4_L2      = 33, /**< MPEG-Layer2 in mp4                        */
3522    //AOT_MP3ONMP4_L3      = 34, /**< MPEG-Layer3 in mp4                        */
3523    //AOT_RSVD_35          = 35, /**< might become DST                          */
3524    //AOT_RSVD_36          = 36, /**< might become ALS                          */
3525    //AOT_AAC_SLS          = 37, /**< AAC + SLS                                 */
3526    //AOT_SLS              = 38, /**< SLS                                       */
3527    //AOT_ER_AAC_ELD       = 39, /**< AAC Enhanced Low Delay                    */
3528
3529    //AOT_USAC             = 42, /**< USAC                                      */
3530    //AOT_SAOC             = 43, /**< SAOC                                      */
3531    //AOT_LD_MPEGS         = 44, /**< Low Delay MPEG Surround                   */
3532
3533    //AOT_RSVD50           = 50,  /**< Interim AOT for Rsvd50                   */
3534} AUDIO_OBJECT_TYPE;
3535
3536status_t MPEG4Extractor::updateAudioTrackInfoFromESDS_MPEG4Audio(
3537        const void *esds_data, size_t esds_size) {
3538    ESDS esds(esds_data, esds_size);
3539
3540    uint8_t objectTypeIndication;
3541    if (esds.getObjectTypeIndication(&objectTypeIndication) != OK) {
3542        return ERROR_MALFORMED;
3543    }
3544
3545    if (objectTypeIndication == 0xe1) {
3546        // This isn't MPEG4 audio at all, it's QCELP 14k...
3547        if (mLastTrack == NULL)
3548            return ERROR_MALFORMED;
3549
3550        mLastTrack->meta.setCString(kKeyMIMEType, MEDIA_MIMETYPE_AUDIO_QCELP);
3551        return OK;
3552    }
3553
3554    if (objectTypeIndication  == 0x6b) {
3555        // The media subtype is MP3 audio
3556        // Our software MP3 audio decoder may not be able to handle
3557        // packetized MP3 audio; for now, lets just return ERROR_UNSUPPORTED
3558        ALOGE("MP3 track in MP4/3GPP file is not supported");
3559        return ERROR_UNSUPPORTED;
3560    }
3561
3562    if (mLastTrack != NULL) {
3563        uint32_t maxBitrate = 0;
3564        uint32_t avgBitrate = 0;
3565        esds.getBitRate(&maxBitrate, &avgBitrate);
3566        if (maxBitrate > 0 && maxBitrate < INT32_MAX) {
3567            mLastTrack->meta.setInt32(kKeyMaxBitRate, (int32_t)maxBitrate);
3568        }
3569        if (avgBitrate > 0 && avgBitrate < INT32_MAX) {
3570            mLastTrack->meta.setInt32(kKeyBitRate, (int32_t)avgBitrate);
3571        }
3572    }
3573
3574    const uint8_t *csd;
3575    size_t csd_size;
3576    if (esds.getCodecSpecificInfo(
3577                (const void **)&csd, &csd_size) != OK) {
3578        return ERROR_MALFORMED;
3579    }
3580
3581    if (kUseHexDump) {
3582        printf("ESD of size %zu\n", csd_size);
3583        hexdump(csd, csd_size);
3584    }
3585
3586    if (csd_size == 0) {
3587        // There's no further information, i.e. no codec specific data
3588        // Let's assume that the information provided in the mpeg4 headers
3589        // is accurate and hope for the best.
3590
3591        return OK;
3592    }
3593
3594    if (csd_size < 2) {
3595        return ERROR_MALFORMED;
3596    }
3597
3598    static uint32_t kSamplingRate[] = {
3599        96000, 88200, 64000, 48000, 44100, 32000, 24000, 22050,
3600        16000, 12000, 11025, 8000, 7350
3601    };
3602
3603    ABitReader br(csd, csd_size);
3604    uint32_t objectType = br.getBits(5);
3605
3606    if (objectType == 31) {  // AAC-ELD => additional 6 bits
3607        objectType = 32 + br.getBits(6);
3608    }
3609
3610    if (mLastTrack == NULL)
3611        return ERROR_MALFORMED;
3612
3613    //keep AOT type
3614    mLastTrack->meta.setInt32(kKeyAACAOT, objectType);
3615
3616    uint32_t freqIndex = br.getBits(4);
3617
3618    int32_t sampleRate = 0;
3619    int32_t numChannels = 0;
3620    if (freqIndex == 15) {
3621        if (br.numBitsLeft() < 28) return ERROR_MALFORMED;
3622        sampleRate = br.getBits(24);
3623        numChannels = br.getBits(4);
3624    } else {
3625        if (br.numBitsLeft() < 4) return ERROR_MALFORMED;
3626        numChannels = br.getBits(4);
3627
3628        if (freqIndex == 13 || freqIndex == 14) {
3629            return ERROR_MALFORMED;
3630        }
3631
3632        sampleRate = kSamplingRate[freqIndex];
3633    }
3634
3635    if (objectType == AOT_SBR || objectType == AOT_PS) {//SBR specific config per 14496-3 table 1.13
3636        if (br.numBitsLeft() < 4) return ERROR_MALFORMED;
3637        uint32_t extFreqIndex = br.getBits(4);
3638        int32_t extSampleRate __unused;
3639        if (extFreqIndex == 15) {
3640            if (csd_size < 8) {
3641                return ERROR_MALFORMED;
3642            }
3643            if (br.numBitsLeft() < 24) return ERROR_MALFORMED;
3644            extSampleRate = br.getBits(24);
3645        } else {
3646            if (extFreqIndex == 13 || extFreqIndex == 14) {
3647                return ERROR_MALFORMED;
3648            }
3649            extSampleRate = kSamplingRate[extFreqIndex];
3650        }
3651        //TODO: save the extension sampling rate value in meta data =>
3652        //      mLastTrack->meta.setInt32(kKeyExtSampleRate, extSampleRate);
3653    }
3654
3655    switch (numChannels) {
3656        // values defined in 14496-3_2009 amendment-4 Table 1.19 - Channel Configuration
3657        case 0:
3658        case 1:// FC
3659        case 2:// FL FR
3660        case 3:// FC, FL FR
3661        case 4:// FC, FL FR, RC
3662        case 5:// FC, FL FR, SL SR
3663        case 6:// FC, FL FR, SL SR, LFE
3664            //numChannels already contains the right value
3665            break;
3666        case 11:// FC, FL FR, SL SR, RC, LFE
3667            numChannels = 7;
3668            break;
3669        case 7: // FC, FCL FCR, FL FR, SL SR, LFE
3670        case 12:// FC, FL  FR,  SL SR, RL RR, LFE
3671        case 14:// FC, FL  FR,  SL SR, LFE, FHL FHR
3672            numChannels = 8;
3673            break;
3674        default:
3675            return ERROR_UNSUPPORTED;
3676    }
3677
3678    {
3679        if (objectType == AOT_SBR || objectType == AOT_PS) {
3680            if (br.numBitsLeft() < 5) return ERROR_MALFORMED;
3681            objectType = br.getBits(5);
3682
3683            if (objectType == AOT_ESCAPE) {
3684                if (br.numBitsLeft() < 6) return ERROR_MALFORMED;
3685                objectType = 32 + br.getBits(6);
3686            }
3687        }
3688        if (objectType == AOT_AAC_LC || objectType == AOT_ER_AAC_LC ||
3689                objectType == AOT_ER_AAC_LD || objectType == AOT_ER_AAC_SCAL ||
3690                objectType == AOT_ER_BSAC) {
3691            if (br.numBitsLeft() < 2) return ERROR_MALFORMED;
3692            const int32_t frameLengthFlag __unused = br.getBits(1);
3693
3694            const int32_t dependsOnCoreCoder = br.getBits(1);
3695
3696            if (dependsOnCoreCoder ) {
3697                if (br.numBitsLeft() < 14) return ERROR_MALFORMED;
3698                const int32_t coreCoderDelay __unused = br.getBits(14);
3699            }
3700
3701            int32_t extensionFlag = -1;
3702            if (br.numBitsLeft() > 0) {
3703                extensionFlag = br.getBits(1);
3704            } else {
3705                switch (objectType) {
3706                // 14496-3 4.5.1.1 extensionFlag
3707                case AOT_AAC_LC:
3708                    extensionFlag = 0;
3709                    break;
3710                case AOT_ER_AAC_LC:
3711                case AOT_ER_AAC_SCAL:
3712                case AOT_ER_BSAC:
3713                case AOT_ER_AAC_LD:
3714                    extensionFlag = 1;
3715                    break;
3716                default:
3717                    return ERROR_MALFORMED;
3718                    break;
3719                }
3720                ALOGW("csd missing extension flag; assuming %d for object type %u.",
3721                        extensionFlag, objectType);
3722            }
3723
3724            if (numChannels == 0) {
3725                int32_t channelsEffectiveNum = 0;
3726                int32_t channelsNum = 0;
3727                if (br.numBitsLeft() < 32) {
3728                    return ERROR_MALFORMED;
3729                }
3730                const int32_t ElementInstanceTag __unused = br.getBits(4);
3731                const int32_t Profile __unused = br.getBits(2);
3732                const int32_t SamplingFrequencyIndex __unused = br.getBits(4);
3733                const int32_t NumFrontChannelElements = br.getBits(4);
3734                const int32_t NumSideChannelElements = br.getBits(4);
3735                const int32_t NumBackChannelElements = br.getBits(4);
3736                const int32_t NumLfeChannelElements = br.getBits(2);
3737                const int32_t NumAssocDataElements __unused = br.getBits(3);
3738                const int32_t NumValidCcElements __unused = br.getBits(4);
3739
3740                const int32_t MonoMixdownPresent = br.getBits(1);
3741
3742                if (MonoMixdownPresent != 0) {
3743                    if (br.numBitsLeft() < 4) return ERROR_MALFORMED;
3744                    const int32_t MonoMixdownElementNumber __unused = br.getBits(4);
3745                }
3746
3747                if (br.numBitsLeft() < 1) return ERROR_MALFORMED;
3748                const int32_t StereoMixdownPresent = br.getBits(1);
3749                if (StereoMixdownPresent != 0) {
3750                    if (br.numBitsLeft() < 4) return ERROR_MALFORMED;
3751                    const int32_t StereoMixdownElementNumber __unused = br.getBits(4);
3752                }
3753
3754                if (br.numBitsLeft() < 1) return ERROR_MALFORMED;
3755                const int32_t MatrixMixdownIndexPresent = br.getBits(1);
3756                if (MatrixMixdownIndexPresent != 0) {
3757                    if (br.numBitsLeft() < 3) return ERROR_MALFORMED;
3758                    const int32_t MatrixMixdownIndex __unused = br.getBits(2);
3759                    const int32_t PseudoSurroundEnable __unused = br.getBits(1);
3760                }
3761
3762                int i;
3763                for (i=0; i < NumFrontChannelElements; i++) {
3764                    if (br.numBitsLeft() < 5) return ERROR_MALFORMED;
3765                    const int32_t FrontElementIsCpe = br.getBits(1);
3766                    const int32_t FrontElementTagSelect __unused = br.getBits(4);
3767                    channelsNum += FrontElementIsCpe ? 2 : 1;
3768                }
3769
3770                for (i=0; i < NumSideChannelElements; i++) {
3771                    if (br.numBitsLeft() < 5) return ERROR_MALFORMED;
3772                    const int32_t SideElementIsCpe = br.getBits(1);
3773                    const int32_t SideElementTagSelect __unused = br.getBits(4);
3774                    channelsNum += SideElementIsCpe ? 2 : 1;
3775                }
3776
3777                for (i=0; i < NumBackChannelElements; i++) {
3778                    if (br.numBitsLeft() < 5) return ERROR_MALFORMED;
3779                    const int32_t BackElementIsCpe = br.getBits(1);
3780                    const int32_t BackElementTagSelect __unused = br.getBits(4);
3781                    channelsNum += BackElementIsCpe ? 2 : 1;
3782                }
3783                channelsEffectiveNum = channelsNum;
3784
3785                for (i=0; i < NumLfeChannelElements; i++) {
3786                    if (br.numBitsLeft() < 4) return ERROR_MALFORMED;
3787                    const int32_t LfeElementTagSelect __unused = br.getBits(4);
3788                    channelsNum += 1;
3789                }
3790                ALOGV("mpeg4 audio channelsNum = %d", channelsNum);
3791                ALOGV("mpeg4 audio channelsEffectiveNum = %d", channelsEffectiveNum);
3792                numChannels = channelsNum;
3793            }
3794        }
3795    }
3796
3797    if (numChannels == 0) {
3798        return ERROR_UNSUPPORTED;
3799    }
3800
3801    if (mLastTrack == NULL)
3802        return ERROR_MALFORMED;
3803
3804    int32_t prevSampleRate;
3805    CHECK(mLastTrack->meta.findInt32(kKeySampleRate, &prevSampleRate));
3806
3807    if (prevSampleRate != sampleRate) {
3808        ALOGV("mpeg4 audio sample rate different from previous setting. "
3809             "was: %d, now: %d", prevSampleRate, sampleRate);
3810    }
3811
3812    mLastTrack->meta.setInt32(kKeySampleRate, sampleRate);
3813
3814    int32_t prevChannelCount;
3815    CHECK(mLastTrack->meta.findInt32(kKeyChannelCount, &prevChannelCount));
3816
3817    if (prevChannelCount != numChannels) {
3818        ALOGV("mpeg4 audio channel count different from previous setting. "
3819             "was: %d, now: %d", prevChannelCount, numChannels);
3820    }
3821
3822    mLastTrack->meta.setInt32(kKeyChannelCount, numChannels);
3823
3824    return OK;
3825}
3826
3827////////////////////////////////////////////////////////////////////////////////
3828
3829MPEG4Source::MPEG4Source(
3830        MetaDataBase &format,
3831        DataSourceBase *dataSource,
3832        int32_t timeScale,
3833        const sp<SampleTable> &sampleTable,
3834        Vector<SidxEntry> &sidx,
3835        const Trex *trex,
3836        off64_t firstMoofOffset,
3837        const sp<ItemTable> &itemTable)
3838    : mFormat(format),
3839      mDataSource(dataSource),
3840      mTimescale(timeScale),
3841      mSampleTable(sampleTable),
3842      mCurrentSampleIndex(0),
3843      mCurrentFragmentIndex(0),
3844      mSegments(sidx),
3845      mTrex(trex),
3846      mFirstMoofOffset(firstMoofOffset),
3847      mCurrentMoofOffset(firstMoofOffset),
3848      mNextMoofOffset(-1),
3849      mCurrentTime(0),
3850      mDefaultEncryptedByteBlock(0),
3851      mDefaultSkipByteBlock(0),
3852      mCurrentSampleInfoAllocSize(0),
3853      mCurrentSampleInfoSizes(NULL),
3854      mCurrentSampleInfoOffsetsAllocSize(0),
3855      mCurrentSampleInfoOffsets(NULL),
3856      mIsAVC(false),
3857      mIsHEVC(false),
3858      mNALLengthSize(0),
3859      mStarted(false),
3860      mGroup(NULL),
3861      mBuffer(NULL),
3862      mWantsNALFragments(false),
3863      mSrcBuffer(NULL),
3864      mIsHeif(itemTable != NULL),
3865      mItemTable(itemTable) {
3866
3867    memset(&mTrackFragmentHeaderInfo, 0, sizeof(mTrackFragmentHeaderInfo));
3868
3869    mFormat.findInt32(kKeyCryptoMode, &mCryptoMode);
3870    mDefaultIVSize = 0;
3871    mFormat.findInt32(kKeyCryptoDefaultIVSize, &mDefaultIVSize);
3872    uint32_t keytype;
3873    const void *key;
3874    size_t keysize;
3875    if (mFormat.findData(kKeyCryptoKey, &keytype, &key, &keysize)) {
3876        CHECK(keysize <= 16);
3877        memset(mCryptoKey, 0, 16);
3878        memcpy(mCryptoKey, key, keysize);
3879    }
3880
3881    mFormat.findInt32(kKeyEncryptedByteBlock, &mDefaultEncryptedByteBlock);
3882    mFormat.findInt32(kKeySkipByteBlock, &mDefaultSkipByteBlock);
3883
3884    const char *mime;
3885    bool success = mFormat.findCString(kKeyMIMEType, &mime);
3886    CHECK(success);
3887
3888    mIsAVC = !strcasecmp(mime, MEDIA_MIMETYPE_VIDEO_AVC);
3889    mIsHEVC = !strcasecmp(mime, MEDIA_MIMETYPE_VIDEO_HEVC) ||
3890              !strcasecmp(mime, MEDIA_MIMETYPE_IMAGE_ANDROID_HEIC);
3891
3892    if (mIsAVC) {
3893        uint32_t type;
3894        const void *data;
3895        size_t size;
3896        CHECK(format.findData(kKeyAVCC, &type, &data, &size));
3897
3898        const uint8_t *ptr = (const uint8_t *)data;
3899
3900        CHECK(size >= 7);
3901        CHECK_EQ((unsigned)ptr[0], 1u);  // configurationVersion == 1
3902
3903        // The number of bytes used to encode the length of a NAL unit.
3904        mNALLengthSize = 1 + (ptr[4] & 3);
3905    } else if (mIsHEVC) {
3906        uint32_t type;
3907        const void *data;
3908        size_t size;
3909        CHECK(format.findData(kKeyHVCC, &type, &data, &size));
3910
3911        const uint8_t *ptr = (const uint8_t *)data;
3912
3913        CHECK(size >= 22);
3914        CHECK_EQ((unsigned)ptr[0], 1u);  // configurationVersion == 1
3915
3916        mNALLengthSize = 1 + (ptr[14 + 7] & 3);
3917    }
3918
3919    CHECK(format.findInt32(kKeyTrackID, &mTrackId));
3920
3921}
3922
3923status_t MPEG4Source::init() {
3924    if (mFirstMoofOffset != 0) {
3925        off64_t offset = mFirstMoofOffset;
3926        return parseChunk(&offset);
3927    }
3928    return OK;
3929}
3930
3931MPEG4Source::~MPEG4Source() {
3932    if (mStarted) {
3933        stop();
3934    }
3935    free(mCurrentSampleInfoSizes);
3936    free(mCurrentSampleInfoOffsets);
3937}
3938
3939status_t MPEG4Source::start(MetaDataBase *params) {
3940    Mutex::Autolock autoLock(mLock);
3941
3942    CHECK(!mStarted);
3943
3944    int32_t val;
3945    if (params && params->findInt32(kKeyWantsNALFragments, &val)
3946        && val != 0) {
3947        mWantsNALFragments = true;
3948    } else {
3949        mWantsNALFragments = false;
3950    }
3951
3952    int32_t tmp;
3953    CHECK(mFormat.findInt32(kKeyMaxInputSize, &tmp));
3954    size_t max_size = tmp;
3955
3956    // A somewhat arbitrary limit that should be sufficient for 8k video frames
3957    // If you see the message below for a valid input stream: increase the limit
3958    const size_t kMaxBufferSize = 64 * 1024 * 1024;
3959    if (max_size > kMaxBufferSize) {
3960        ALOGE("bogus max input size: %zu > %zu", max_size, kMaxBufferSize);
3961        return ERROR_MALFORMED;
3962    }
3963    if (max_size == 0) {
3964        ALOGE("zero max input size");
3965        return ERROR_MALFORMED;
3966    }
3967
3968    // Allow up to kMaxBuffers, but not if the total exceeds kMaxBufferSize.
3969    const size_t kInitialBuffers = 2;
3970    const size_t kMaxBuffers = 8;
3971    const size_t realMaxBuffers = min(kMaxBufferSize / max_size, kMaxBuffers);
3972    mGroup = new MediaBufferGroup(kInitialBuffers, max_size, realMaxBuffers);
3973    mSrcBuffer = new (std::nothrow) uint8_t[max_size];
3974    if (mSrcBuffer == NULL) {
3975        // file probably specified a bad max size
3976        delete mGroup;
3977        mGroup = NULL;
3978        return ERROR_MALFORMED;
3979    }
3980
3981    mStarted = true;
3982
3983    return OK;
3984}
3985
3986status_t MPEG4Source::stop() {
3987    Mutex::Autolock autoLock(mLock);
3988
3989    CHECK(mStarted);
3990
3991    if (mBuffer != NULL) {
3992        mBuffer->release();
3993        mBuffer = NULL;
3994    }
3995
3996    delete[] mSrcBuffer;
3997    mSrcBuffer = NULL;
3998
3999    delete mGroup;
4000    mGroup = NULL;
4001
4002    mStarted = false;
4003    mCurrentSampleIndex = 0;
4004
4005    return OK;
4006}
4007
4008status_t MPEG4Source::parseChunk(off64_t *offset) {
4009    uint32_t hdr[2];
4010    if (mDataSource->readAt(*offset, hdr, 8) < 8) {
4011        return ERROR_IO;
4012    }
4013    uint64_t chunk_size = ntohl(hdr[0]);
4014    uint32_t chunk_type = ntohl(hdr[1]);
4015    off64_t data_offset = *offset + 8;
4016
4017    if (chunk_size == 1) {
4018        if (mDataSource->readAt(*offset + 8, &chunk_size, 8) < 8) {
4019            return ERROR_IO;
4020        }
4021        chunk_size = ntoh64(chunk_size);
4022        data_offset += 8;
4023
4024        if (chunk_size < 16) {
4025            // The smallest valid chunk is 16 bytes long in this case.
4026            return ERROR_MALFORMED;
4027        }
4028    } else if (chunk_size < 8) {
4029        // The smallest valid chunk is 8 bytes long.
4030        return ERROR_MALFORMED;
4031    }
4032
4033    char chunk[5];
4034    MakeFourCCString(chunk_type, chunk);
4035    ALOGV("MPEG4Source chunk %s @ %#llx", chunk, (long long)*offset);
4036
4037    off64_t chunk_data_size = *offset + chunk_size - data_offset;
4038
4039    switch(chunk_type) {
4040
4041        case FOURCC('t', 'r', 'a', 'f'):
4042        case FOURCC('m', 'o', 'o', 'f'): {
4043            off64_t stop_offset = *offset + chunk_size;
4044            *offset = data_offset;
4045            while (*offset < stop_offset) {
4046                status_t err = parseChunk(offset);
4047                if (err != OK) {
4048                    return err;
4049                }
4050            }
4051            if (chunk_type == FOURCC('m', 'o', 'o', 'f')) {
4052                // *offset points to the box following this moof. Find the next moof from there.
4053
4054                while (true) {
4055                    if (mDataSource->readAt(*offset, hdr, 8) < 8) {
4056                        // no more box to the end of file.
4057                        break;
4058                    }
4059                    chunk_size = ntohl(hdr[0]);
4060                    chunk_type = ntohl(hdr[1]);
4061                    if (chunk_size == 1) {
4062                        // ISO/IEC 14496-12:2012, 8.8.4 Movie Fragment Box, moof is a Box
4063                        // which is defined in 4.2 Object Structure.
4064                        // When chunk_size==1, 8 bytes follows as "largesize".
4065                        if (mDataSource->readAt(*offset + 8, &chunk_size, 8) < 8) {
4066                            return ERROR_IO;
4067                        }
4068                        chunk_size = ntoh64(chunk_size);
4069                        if (chunk_size < 16) {
4070                            // The smallest valid chunk is 16 bytes long in this case.
4071                            return ERROR_MALFORMED;
4072                        }
4073                    } else if (chunk_size == 0) {
4074                        // next box extends to end of file.
4075                    } else if (chunk_size < 8) {
4076                        // The smallest valid chunk is 8 bytes long in this case.
4077                        return ERROR_MALFORMED;
4078                    }
4079
4080                    if (chunk_type == FOURCC('m', 'o', 'o', 'f')) {
4081                        mNextMoofOffset = *offset;
4082                        break;
4083                    } else if (chunk_size == 0) {
4084                        break;
4085                    }
4086                    *offset += chunk_size;
4087                }
4088            }
4089            break;
4090        }
4091
4092        case FOURCC('t', 'f', 'h', 'd'): {
4093                status_t err;
4094                if ((err = parseTrackFragmentHeader(data_offset, chunk_data_size)) != OK) {
4095                    return err;
4096                }
4097                *offset += chunk_size;
4098                break;
4099        }
4100
4101        case FOURCC('t', 'r', 'u', 'n'): {
4102                status_t err;
4103                if (mLastParsedTrackId == mTrackId) {
4104                    if ((err = parseTrackFragmentRun(data_offset, chunk_data_size)) != OK) {
4105                        return err;
4106                    }
4107                }
4108
4109                *offset += chunk_size;
4110                break;
4111        }
4112
4113        case FOURCC('s', 'a', 'i', 'z'): {
4114            status_t err;
4115            if ((err = parseSampleAuxiliaryInformationSizes(data_offset, chunk_data_size)) != OK) {
4116                return err;
4117            }
4118            *offset += chunk_size;
4119            break;
4120        }
4121        case FOURCC('s', 'a', 'i', 'o'): {
4122            status_t err;
4123            if ((err = parseSampleAuxiliaryInformationOffsets(data_offset, chunk_data_size)) != OK) {
4124                return err;
4125            }
4126            *offset += chunk_size;
4127            break;
4128        }
4129
4130        case FOURCC('s', 'e', 'n', 'c'): {
4131            status_t err;
4132            if ((err = parseSampleEncryption(data_offset)) != OK) {
4133                return err;
4134            }
4135            *offset += chunk_size;
4136            break;
4137        }
4138
4139        case FOURCC('m', 'd', 'a', 't'): {
4140            // parse DRM info if present
4141            ALOGV("MPEG4Source::parseChunk mdat");
4142            // if saiz/saoi was previously observed, do something with the sampleinfos
4143            *offset += chunk_size;
4144            break;
4145        }
4146
4147        default: {
4148            *offset += chunk_size;
4149            break;
4150        }
4151    }
4152    return OK;
4153}
4154
4155status_t MPEG4Source::parseSampleAuxiliaryInformationSizes(
4156        off64_t offset, off64_t /* size */) {
4157    ALOGV("parseSampleAuxiliaryInformationSizes");
4158    // 14496-12 8.7.12
4159    uint8_t version;
4160    if (mDataSource->readAt(
4161            offset, &version, sizeof(version))
4162            < (ssize_t)sizeof(version)) {
4163        return ERROR_IO;
4164    }
4165
4166    if (version != 0) {
4167        return ERROR_UNSUPPORTED;
4168    }
4169    offset++;
4170
4171    uint32_t flags;
4172    if (!mDataSource->getUInt24(offset, &flags)) {
4173        return ERROR_IO;
4174    }
4175    offset += 3;
4176
4177    if (flags & 1) {
4178        uint32_t tmp;
4179        if (!mDataSource->getUInt32(offset, &tmp)) {
4180            return ERROR_MALFORMED;
4181        }
4182        mCurrentAuxInfoType = tmp;
4183        offset += 4;
4184        if (!mDataSource->getUInt32(offset, &tmp)) {
4185            return ERROR_MALFORMED;
4186        }
4187        mCurrentAuxInfoTypeParameter = tmp;
4188        offset += 4;
4189    }
4190
4191    uint8_t defsize;
4192    if (mDataSource->readAt(offset, &defsize, 1) != 1) {
4193        return ERROR_MALFORMED;
4194    }
4195    mCurrentDefaultSampleInfoSize = defsize;
4196    offset++;
4197
4198    uint32_t smplcnt;
4199    if (!mDataSource->getUInt32(offset, &smplcnt)) {
4200        return ERROR_MALFORMED;
4201    }
4202    mCurrentSampleInfoCount = smplcnt;
4203    offset += 4;
4204
4205    if (mCurrentDefaultSampleInfoSize != 0) {
4206        ALOGV("@@@@ using default sample info size of %d", mCurrentDefaultSampleInfoSize);
4207        return OK;
4208    }
4209    if (smplcnt > mCurrentSampleInfoAllocSize) {
4210        uint8_t * newPtr =  (uint8_t*) realloc(mCurrentSampleInfoSizes, smplcnt);
4211        if (newPtr == NULL) {
4212            ALOGE("failed to realloc %u -> %u", mCurrentSampleInfoAllocSize, smplcnt);
4213            return NO_MEMORY;
4214        }
4215        mCurrentSampleInfoSizes = newPtr;
4216        mCurrentSampleInfoAllocSize = smplcnt;
4217    }
4218
4219    mDataSource->readAt(offset, mCurrentSampleInfoSizes, smplcnt);
4220    return OK;
4221}
4222
4223status_t MPEG4Source::parseSampleAuxiliaryInformationOffsets(
4224        off64_t offset, off64_t /* size */) {
4225    ALOGV("parseSampleAuxiliaryInformationOffsets");
4226    // 14496-12 8.7.13
4227    uint8_t version;
4228    if (mDataSource->readAt(offset, &version, sizeof(version)) != 1) {
4229        return ERROR_IO;
4230    }
4231    offset++;
4232
4233    uint32_t flags;
4234    if (!mDataSource->getUInt24(offset, &flags)) {
4235        return ERROR_IO;
4236    }
4237    offset += 3;
4238
4239    uint32_t entrycount;
4240    if (!mDataSource->getUInt32(offset, &entrycount)) {
4241        return ERROR_IO;
4242    }
4243    offset += 4;
4244    if (entrycount == 0) {
4245        return OK;
4246    }
4247    if (entrycount > UINT32_MAX / 8) {
4248        return ERROR_MALFORMED;
4249    }
4250
4251    if (entrycount > mCurrentSampleInfoOffsetsAllocSize) {
4252        uint64_t *newPtr = (uint64_t *)realloc(mCurrentSampleInfoOffsets, entrycount * 8);
4253        if (newPtr == NULL) {
4254            ALOGE("failed to realloc %u -> %u", mCurrentSampleInfoOffsetsAllocSize, entrycount * 8);
4255            return NO_MEMORY;
4256        }
4257        mCurrentSampleInfoOffsets = newPtr;
4258        mCurrentSampleInfoOffsetsAllocSize = entrycount;
4259    }
4260    mCurrentSampleInfoOffsetCount = entrycount;
4261
4262    if (mCurrentSampleInfoOffsets == NULL) {
4263        return OK;
4264    }
4265
4266    for (size_t i = 0; i < entrycount; i++) {
4267        if (version == 0) {
4268            uint32_t tmp;
4269            if (!mDataSource->getUInt32(offset, &tmp)) {
4270                return ERROR_IO;
4271            }
4272            mCurrentSampleInfoOffsets[i] = tmp;
4273            offset += 4;
4274        } else {
4275            uint64_t tmp;
4276            if (!mDataSource->getUInt64(offset, &tmp)) {
4277                return ERROR_IO;
4278            }
4279            mCurrentSampleInfoOffsets[i] = tmp;
4280            offset += 8;
4281        }
4282    }
4283
4284    // parse clear/encrypted data
4285
4286    off64_t drmoffset = mCurrentSampleInfoOffsets[0]; // from moof
4287
4288    drmoffset += mCurrentMoofOffset;
4289
4290    return parseClearEncryptedSizes(drmoffset, false, 0);
4291}
4292
4293status_t MPEG4Source::parseClearEncryptedSizes(off64_t offset, bool isSubsampleEncryption, uint32_t flags) {
4294
4295    int ivlength;
4296    CHECK(mFormat.findInt32(kKeyCryptoDefaultIVSize, &ivlength));
4297
4298    // only 0, 8 and 16 byte initialization vectors are supported
4299    if (ivlength != 0 && ivlength != 8 && ivlength != 16) {
4300        ALOGW("unsupported IV length: %d", ivlength);
4301        return ERROR_MALFORMED;
4302    }
4303
4304    uint32_t sampleCount = mCurrentSampleInfoCount;
4305    if (isSubsampleEncryption) {
4306        if (!mDataSource->getUInt32(offset, &sampleCount)) {
4307            return ERROR_IO;
4308        }
4309        offset += 4;
4310    }
4311
4312    // read CencSampleAuxiliaryDataFormats
4313    for (size_t i = 0; i < sampleCount; i++) {
4314        if (i >= mCurrentSamples.size()) {
4315            ALOGW("too few samples");
4316            break;
4317        }
4318        Sample *smpl = &mCurrentSamples.editItemAt(i);
4319        if (!smpl->clearsizes.isEmpty()) {
4320            continue;
4321        }
4322
4323        memset(smpl->iv, 0, 16);
4324        if (mDataSource->readAt(offset, smpl->iv, ivlength) != ivlength) {
4325            return ERROR_IO;
4326        }
4327
4328        offset += ivlength;
4329
4330        bool readSubsamples;
4331        if (isSubsampleEncryption) {
4332            readSubsamples = flags & 2;
4333        } else {
4334            int32_t smplinfosize = mCurrentDefaultSampleInfoSize;
4335            if (smplinfosize == 0) {
4336                smplinfosize = mCurrentSampleInfoSizes[i];
4337            }
4338            readSubsamples = smplinfosize > ivlength;
4339        }
4340
4341        if (readSubsamples) {
4342            uint16_t numsubsamples;
4343            if (!mDataSource->getUInt16(offset, &numsubsamples)) {
4344                return ERROR_IO;
4345            }
4346            offset += 2;
4347            for (size_t j = 0; j < numsubsamples; j++) {
4348                uint16_t numclear;
4349                uint32_t numencrypted;
4350                if (!mDataSource->getUInt16(offset, &numclear)) {
4351                    return ERROR_IO;
4352                }
4353                offset += 2;
4354                if (!mDataSource->getUInt32(offset, &numencrypted)) {
4355                    return ERROR_IO;
4356                }
4357                offset += 4;
4358                smpl->clearsizes.add(numclear);
4359                smpl->encryptedsizes.add(numencrypted);
4360            }
4361        } else {
4362            smpl->clearsizes.add(0);
4363            smpl->encryptedsizes.add(smpl->size);
4364        }
4365    }
4366
4367    return OK;
4368}
4369
4370status_t MPEG4Source::parseSampleEncryption(off64_t offset) {
4371    uint32_t flags;
4372    if (!mDataSource->getUInt32(offset, &flags)) { // actually version + flags
4373        return ERROR_MALFORMED;
4374    }
4375    return parseClearEncryptedSizes(offset + 4, true, flags);
4376}
4377
4378status_t MPEG4Source::parseTrackFragmentHeader(off64_t offset, off64_t size) {
4379
4380    if (size < 8) {
4381        return -EINVAL;
4382    }
4383
4384    uint32_t flags;
4385    if (!mDataSource->getUInt32(offset, &flags)) { // actually version + flags
4386        return ERROR_MALFORMED;
4387    }
4388
4389    if (flags & 0xff000000) {
4390        return -EINVAL;
4391    }
4392
4393    if (!mDataSource->getUInt32(offset + 4, (uint32_t*)&mLastParsedTrackId)) {
4394        return ERROR_MALFORMED;
4395    }
4396
4397    if (mLastParsedTrackId != mTrackId) {
4398        // this is not the right track, skip it
4399        return OK;
4400    }
4401
4402    mTrackFragmentHeaderInfo.mFlags = flags;
4403    mTrackFragmentHeaderInfo.mTrackID = mLastParsedTrackId;
4404    offset += 8;
4405    size -= 8;
4406
4407    ALOGV("fragment header: %08x %08x", flags, mTrackFragmentHeaderInfo.mTrackID);
4408
4409    if (flags & TrackFragmentHeaderInfo::kBaseDataOffsetPresent) {
4410        if (size < 8) {
4411            return -EINVAL;
4412        }
4413
4414        if (!mDataSource->getUInt64(offset, &mTrackFragmentHeaderInfo.mBaseDataOffset)) {
4415            return ERROR_MALFORMED;
4416        }
4417        offset += 8;
4418        size -= 8;
4419    }
4420
4421    if (flags & TrackFragmentHeaderInfo::kSampleDescriptionIndexPresent) {
4422        if (size < 4) {
4423            return -EINVAL;
4424        }
4425
4426        if (!mDataSource->getUInt32(offset, &mTrackFragmentHeaderInfo.mSampleDescriptionIndex)) {
4427            return ERROR_MALFORMED;
4428        }
4429        offset += 4;
4430        size -= 4;
4431    }
4432
4433    if (flags & TrackFragmentHeaderInfo::kDefaultSampleDurationPresent) {
4434        if (size < 4) {
4435            return -EINVAL;
4436        }
4437
4438        if (!mDataSource->getUInt32(offset, &mTrackFragmentHeaderInfo.mDefaultSampleDuration)) {
4439            return ERROR_MALFORMED;
4440        }
4441        offset += 4;
4442        size -= 4;
4443    }
4444
4445    if (flags & TrackFragmentHeaderInfo::kDefaultSampleSizePresent) {
4446        if (size < 4) {
4447            return -EINVAL;
4448        }
4449
4450        if (!mDataSource->getUInt32(offset, &mTrackFragmentHeaderInfo.mDefaultSampleSize)) {
4451            return ERROR_MALFORMED;
4452        }
4453        offset += 4;
4454        size -= 4;
4455    }
4456
4457    if (flags & TrackFragmentHeaderInfo::kDefaultSampleFlagsPresent) {
4458        if (size < 4) {
4459            return -EINVAL;
4460        }
4461
4462        if (!mDataSource->getUInt32(offset, &mTrackFragmentHeaderInfo.mDefaultSampleFlags)) {
4463            return ERROR_MALFORMED;
4464        }
4465        offset += 4;
4466        size -= 4;
4467    }
4468
4469    if (!(flags & TrackFragmentHeaderInfo::kBaseDataOffsetPresent)) {
4470        mTrackFragmentHeaderInfo.mBaseDataOffset = mCurrentMoofOffset;
4471    }
4472
4473    mTrackFragmentHeaderInfo.mDataOffset = 0;
4474    return OK;
4475}
4476
4477status_t MPEG4Source::parseTrackFragmentRun(off64_t offset, off64_t size) {
4478
4479    ALOGV("MPEG4Extractor::parseTrackFragmentRun");
4480    if (size < 8) {
4481        return -EINVAL;
4482    }
4483
4484    enum {
4485        kDataOffsetPresent                  = 0x01,
4486        kFirstSampleFlagsPresent            = 0x04,
4487        kSampleDurationPresent              = 0x100,
4488        kSampleSizePresent                  = 0x200,
4489        kSampleFlagsPresent                 = 0x400,
4490        kSampleCompositionTimeOffsetPresent = 0x800,
4491    };
4492
4493    uint32_t flags;
4494    if (!mDataSource->getUInt32(offset, &flags)) {
4495        return ERROR_MALFORMED;
4496    }
4497    // |version| only affects SampleCompositionTimeOffset field.
4498    // If version == 0, SampleCompositionTimeOffset is uint32_t;
4499    // Otherwise, SampleCompositionTimeOffset is int32_t.
4500    // Sample.compositionOffset is defined as int32_t.
4501    uint8_t version = flags >> 24;
4502    flags &= 0xffffff;
4503    ALOGV("fragment run version: 0x%02x, flags: 0x%06x", version, flags);
4504
4505    if ((flags & kFirstSampleFlagsPresent) && (flags & kSampleFlagsPresent)) {
4506        // These two shall not be used together.
4507        return -EINVAL;
4508    }
4509
4510    uint32_t sampleCount;
4511    if (!mDataSource->getUInt32(offset + 4, &sampleCount)) {
4512        return ERROR_MALFORMED;
4513    }
4514    offset += 8;
4515    size -= 8;
4516
4517    uint64_t dataOffset = mTrackFragmentHeaderInfo.mDataOffset;
4518
4519    uint32_t firstSampleFlags = 0;
4520
4521    if (flags & kDataOffsetPresent) {
4522        if (size < 4) {
4523            return -EINVAL;
4524        }
4525
4526        int32_t dataOffsetDelta;
4527        if (!mDataSource->getUInt32(offset, (uint32_t*)&dataOffsetDelta)) {
4528            return ERROR_MALFORMED;
4529        }
4530
4531        dataOffset = mTrackFragmentHeaderInfo.mBaseDataOffset + dataOffsetDelta;
4532
4533        offset += 4;
4534        size -= 4;
4535    }
4536
4537    if (flags & kFirstSampleFlagsPresent) {
4538        if (size < 4) {
4539            return -EINVAL;
4540        }
4541
4542        if (!mDataSource->getUInt32(offset, &firstSampleFlags)) {
4543            return ERROR_MALFORMED;
4544        }
4545        offset += 4;
4546        size -= 4;
4547    }
4548
4549    uint32_t sampleDuration = 0, sampleSize = 0, sampleFlags = 0,
4550             sampleCtsOffset = 0;
4551
4552    size_t bytesPerSample = 0;
4553    if (flags & kSampleDurationPresent) {
4554        bytesPerSample += 4;
4555    } else if (mTrackFragmentHeaderInfo.mFlags
4556            & TrackFragmentHeaderInfo::kDefaultSampleDurationPresent) {
4557        sampleDuration = mTrackFragmentHeaderInfo.mDefaultSampleDuration;
4558    } else if (mTrex) {
4559        sampleDuration = mTrex->default_sample_duration;
4560    }
4561
4562    if (flags & kSampleSizePresent) {
4563        bytesPerSample += 4;
4564    } else if (mTrackFragmentHeaderInfo.mFlags
4565            & TrackFragmentHeaderInfo::kDefaultSampleSizePresent) {
4566        sampleSize = mTrackFragmentHeaderInfo.mDefaultSampleSize;
4567    } else {
4568        sampleSize = mTrackFragmentHeaderInfo.mDefaultSampleSize;
4569    }
4570
4571    if (flags & kSampleFlagsPresent) {
4572        bytesPerSample += 4;
4573    } else if (mTrackFragmentHeaderInfo.mFlags
4574            & TrackFragmentHeaderInfo::kDefaultSampleFlagsPresent) {
4575        sampleFlags = mTrackFragmentHeaderInfo.mDefaultSampleFlags;
4576    } else {
4577        sampleFlags = mTrackFragmentHeaderInfo.mDefaultSampleFlags;
4578    }
4579
4580    if (flags & kSampleCompositionTimeOffsetPresent) {
4581        bytesPerSample += 4;
4582    } else {
4583        sampleCtsOffset = 0;
4584    }
4585
4586    if (size < (off64_t)(sampleCount * bytesPerSample)) {
4587        return -EINVAL;
4588    }
4589
4590    Sample tmp;
4591    for (uint32_t i = 0; i < sampleCount; ++i) {
4592        if (flags & kSampleDurationPresent) {
4593            if (!mDataSource->getUInt32(offset, &sampleDuration)) {
4594                return ERROR_MALFORMED;
4595            }
4596            offset += 4;
4597        }
4598
4599        if (flags & kSampleSizePresent) {
4600            if (!mDataSource->getUInt32(offset, &sampleSize)) {
4601                return ERROR_MALFORMED;
4602            }
4603            offset += 4;
4604        }
4605
4606        if (flags & kSampleFlagsPresent) {
4607            if (!mDataSource->getUInt32(offset, &sampleFlags)) {
4608                return ERROR_MALFORMED;
4609            }
4610            offset += 4;
4611        }
4612
4613        if (flags & kSampleCompositionTimeOffsetPresent) {
4614            if (!mDataSource->getUInt32(offset, &sampleCtsOffset)) {
4615                return ERROR_MALFORMED;
4616            }
4617            offset += 4;
4618        }
4619
4620        ALOGV("adding sample %d at offset 0x%08" PRIx64 ", size %u, duration %u, "
4621              " flags 0x%08x", i + 1,
4622                dataOffset, sampleSize, sampleDuration,
4623                (flags & kFirstSampleFlagsPresent) && i == 0
4624                    ? firstSampleFlags : sampleFlags);
4625        tmp.offset = dataOffset;
4626        tmp.size = sampleSize;
4627        tmp.duration = sampleDuration;
4628        tmp.compositionOffset = sampleCtsOffset;
4629        memset(tmp.iv, 0, sizeof(tmp.iv));
4630        mCurrentSamples.add(tmp);
4631
4632        dataOffset += sampleSize;
4633    }
4634
4635    mTrackFragmentHeaderInfo.mDataOffset = dataOffset;
4636
4637    return OK;
4638}
4639
4640status_t MPEG4Source::getFormat(MetaDataBase &meta) {
4641    Mutex::Autolock autoLock(mLock);
4642    meta = mFormat;
4643    return OK;
4644}
4645
4646size_t MPEG4Source::parseNALSize(const uint8_t *data) const {
4647    switch (mNALLengthSize) {
4648        case 1:
4649            return *data;
4650        case 2:
4651            return U16_AT(data);
4652        case 3:
4653            return ((size_t)data[0] << 16) | U16_AT(&data[1]);
4654        case 4:
4655            return U32_AT(data);
4656    }
4657
4658    // This cannot happen, mNALLengthSize springs to life by adding 1 to
4659    // a 2-bit integer.
4660    CHECK(!"Should not be here.");
4661
4662    return 0;
4663}
4664
4665status_t MPEG4Source::read(
4666        MediaBufferBase **out, const ReadOptions *options) {
4667    Mutex::Autolock autoLock(mLock);
4668
4669    CHECK(mStarted);
4670
4671    if (options != nullptr && options->getNonBlocking() && !mGroup->has_buffers()) {
4672        *out = nullptr;
4673        return WOULD_BLOCK;
4674    }
4675
4676    if (mFirstMoofOffset > 0) {
4677        return fragmentedRead(out, options);
4678    }
4679
4680    *out = NULL;
4681
4682    int64_t targetSampleTimeUs = -1;
4683
4684    int64_t seekTimeUs;
4685    ReadOptions::SeekMode mode;
4686    if (options && options->getSeekTo(&seekTimeUs, &mode)) {
4687        if (mIsHeif) {
4688            CHECK(mSampleTable == NULL);
4689            CHECK(mItemTable != NULL);
4690            int32_t imageIndex;
4691            if (!mFormat.findInt32(kKeyTrackID, &imageIndex)) {
4692                return ERROR_MALFORMED;
4693            }
4694
4695            status_t err;
4696            if (seekTimeUs >= 0) {
4697                err = mItemTable->findImageItem(imageIndex, &mCurrentSampleIndex);
4698            } else {
4699                err = mItemTable->findThumbnailItem(imageIndex, &mCurrentSampleIndex);
4700            }
4701            if (err != OK) {
4702                return err;
4703            }
4704        } else {
4705            uint32_t findFlags = 0;
4706            switch (mode) {
4707                case ReadOptions::SEEK_PREVIOUS_SYNC:
4708                    findFlags = SampleTable::kFlagBefore;
4709                    break;
4710                case ReadOptions::SEEK_NEXT_SYNC:
4711                    findFlags = SampleTable::kFlagAfter;
4712                    break;
4713                case ReadOptions::SEEK_CLOSEST_SYNC:
4714                case ReadOptions::SEEK_CLOSEST:
4715                    findFlags = SampleTable::kFlagClosest;
4716                    break;
4717                case ReadOptions::SEEK_FRAME_INDEX:
4718                    findFlags = SampleTable::kFlagFrameIndex;
4719                    break;
4720                default:
4721                    CHECK(!"Should not be here.");
4722                    break;
4723            }
4724
4725            uint32_t sampleIndex;
4726            status_t err = mSampleTable->findSampleAtTime(
4727                    seekTimeUs, 1000000, mTimescale,
4728                    &sampleIndex, findFlags);
4729
4730            if (mode == ReadOptions::SEEK_CLOSEST
4731                    || mode == ReadOptions::SEEK_FRAME_INDEX) {
4732                // We found the closest sample already, now we want the sync
4733                // sample preceding it (or the sample itself of course), even
4734                // if the subsequent sync sample is closer.
4735                findFlags = SampleTable::kFlagBefore;
4736            }
4737
4738            uint32_t syncSampleIndex;
4739            if (err == OK) {
4740                err = mSampleTable->findSyncSampleNear(
4741                        sampleIndex, &syncSampleIndex, findFlags);
4742            }
4743
4744            uint32_t sampleTime;
4745            if (err == OK) {
4746                err = mSampleTable->getMetaDataForSample(
4747                        sampleIndex, NULL, NULL, &sampleTime);
4748            }
4749
4750            if (err != OK) {
4751                if (err == ERROR_OUT_OF_RANGE) {
4752                    // An attempt to seek past the end of the stream would
4753                    // normally cause this ERROR_OUT_OF_RANGE error. Propagating
4754                    // this all the way to the MediaPlayer would cause abnormal
4755                    // termination. Legacy behaviour appears to be to behave as if
4756                    // we had seeked to the end of stream, ending normally.
4757                    err = ERROR_END_OF_STREAM;
4758                }
4759                ALOGV("end of stream");
4760                return err;
4761            }
4762
4763            if (mode == ReadOptions::SEEK_CLOSEST
4764                || mode == ReadOptions::SEEK_FRAME_INDEX) {
4765                targetSampleTimeUs = (sampleTime * 1000000ll) / mTimescale;
4766            }
4767
4768#if 0
4769            uint32_t syncSampleTime;
4770            CHECK_EQ(OK, mSampleTable->getMetaDataForSample(
4771                        syncSampleIndex, NULL, NULL, &syncSampleTime));
4772
4773            ALOGI("seek to time %lld us => sample at time %lld us, "
4774                 "sync sample at time %lld us",
4775                 seekTimeUs,
4776                 sampleTime * 1000000ll / mTimescale,
4777                 syncSampleTime * 1000000ll / mTimescale);
4778#endif
4779
4780            mCurrentSampleIndex = syncSampleIndex;
4781        }
4782
4783        if (mBuffer != NULL) {
4784            mBuffer->release();
4785            mBuffer = NULL;
4786        }
4787
4788        // fall through
4789    }
4790
4791    off64_t offset = 0;
4792    size_t size = 0;
4793    uint32_t cts, stts;
4794    bool isSyncSample;
4795    bool newBuffer = false;
4796    if (mBuffer == NULL) {
4797        newBuffer = true;
4798
4799        status_t err;
4800        if (!mIsHeif) {
4801            err = mSampleTable->getMetaDataForSample(
4802                    mCurrentSampleIndex, &offset, &size, &cts, &isSyncSample, &stts);
4803        } else {
4804            err = mItemTable->getImageOffsetAndSize(
4805                    options && options->getSeekTo(&seekTimeUs, &mode) ?
4806                            &mCurrentSampleIndex : NULL, &offset, &size);
4807
4808            cts = stts = 0;
4809            isSyncSample = 0;
4810            ALOGV("image offset %lld, size %zu", (long long)offset, size);
4811        }
4812
4813        if (err != OK) {
4814            return err;
4815        }
4816
4817        err = mGroup->acquire_buffer(&mBuffer);
4818
4819        if (err != OK) {
4820            CHECK(mBuffer == NULL);
4821            return err;
4822        }
4823        if (size > mBuffer->size()) {
4824            ALOGE("buffer too small: %zu > %zu", size, mBuffer->size());
4825            mBuffer->release();
4826            mBuffer = NULL;
4827            return ERROR_BUFFER_TOO_SMALL;
4828        }
4829    }
4830
4831    if ((!mIsAVC && !mIsHEVC) || mWantsNALFragments) {
4832        if (newBuffer) {
4833            ssize_t num_bytes_read =
4834                mDataSource->readAt(offset, (uint8_t *)mBuffer->data(), size);
4835
4836            if (num_bytes_read < (ssize_t)size) {
4837                mBuffer->release();
4838                mBuffer = NULL;
4839
4840                return ERROR_IO;
4841            }
4842
4843            CHECK(mBuffer != NULL);
4844            mBuffer->set_range(0, size);
4845            mBuffer->meta_data().clear();
4846            mBuffer->meta_data().setInt64(
4847                    kKeyTime, ((int64_t)cts * 1000000) / mTimescale);
4848            mBuffer->meta_data().setInt64(
4849                    kKeyDuration, ((int64_t)stts * 1000000) / mTimescale);
4850
4851            if (targetSampleTimeUs >= 0) {
4852                mBuffer->meta_data().setInt64(
4853                        kKeyTargetTime, targetSampleTimeUs);
4854            }
4855
4856            if (isSyncSample) {
4857                mBuffer->meta_data().setInt32(kKeyIsSyncFrame, 1);
4858            }
4859
4860            ++mCurrentSampleIndex;
4861        }
4862
4863        if (!mIsAVC && !mIsHEVC) {
4864            *out = mBuffer;
4865            mBuffer = NULL;
4866
4867            return OK;
4868        }
4869
4870        // Each NAL unit is split up into its constituent fragments and
4871        // each one of them returned in its own buffer.
4872
4873        CHECK(mBuffer->range_length() >= mNALLengthSize);
4874
4875        const uint8_t *src =
4876            (const uint8_t *)mBuffer->data() + mBuffer->range_offset();
4877
4878        size_t nal_size = parseNALSize(src);
4879        if (mNALLengthSize > SIZE_MAX - nal_size) {
4880            ALOGE("b/24441553, b/24445122");
4881        }
4882        if (mBuffer->range_length() - mNALLengthSize < nal_size) {
4883            ALOGE("incomplete NAL unit.");
4884
4885            mBuffer->release();
4886            mBuffer = NULL;
4887
4888            return ERROR_MALFORMED;
4889        }
4890
4891        MediaBufferBase *clone = mBuffer->clone();
4892        CHECK(clone != NULL);
4893        clone->set_range(mBuffer->range_offset() + mNALLengthSize, nal_size);
4894
4895        CHECK(mBuffer != NULL);
4896        mBuffer->set_range(
4897                mBuffer->range_offset() + mNALLengthSize + nal_size,
4898                mBuffer->range_length() - mNALLengthSize - nal_size);
4899
4900        if (mBuffer->range_length() == 0) {
4901            mBuffer->release();
4902            mBuffer = NULL;
4903        }
4904
4905        *out = clone;
4906
4907        return OK;
4908    } else {
4909        // Whole NAL units are returned but each fragment is prefixed by
4910        // the start code (0x00 00 00 01).
4911        ssize_t num_bytes_read = 0;
4912        int32_t drm = 0;
4913        bool usesDRM = (mFormat.findInt32(kKeyIsDRM, &drm) && drm != 0);
4914        if (usesDRM) {
4915            num_bytes_read =
4916                mDataSource->readAt(offset, (uint8_t*)mBuffer->data(), size);
4917        } else {
4918            num_bytes_read = mDataSource->readAt(offset, mSrcBuffer, size);
4919        }
4920
4921        if (num_bytes_read < (ssize_t)size) {
4922            mBuffer->release();
4923            mBuffer = NULL;
4924
4925            return ERROR_IO;
4926        }
4927
4928        if (usesDRM) {
4929            CHECK(mBuffer != NULL);
4930            mBuffer->set_range(0, size);
4931
4932        } else {
4933            uint8_t *dstData = (uint8_t *)mBuffer->data();
4934            size_t srcOffset = 0;
4935            size_t dstOffset = 0;
4936
4937            while (srcOffset < size) {
4938                bool isMalFormed = !isInRange((size_t)0u, size, srcOffset, mNALLengthSize);
4939                size_t nalLength = 0;
4940                if (!isMalFormed) {
4941                    nalLength = parseNALSize(&mSrcBuffer[srcOffset]);
4942                    srcOffset += mNALLengthSize;
4943                    isMalFormed = !isInRange((size_t)0u, size, srcOffset, nalLength);
4944                }
4945
4946                if (isMalFormed) {
4947                    ALOGE("Video is malformed");
4948                    mBuffer->release();
4949                    mBuffer = NULL;
4950                    return ERROR_MALFORMED;
4951                }
4952
4953                if (nalLength == 0) {
4954                    continue;
4955                }
4956
4957                if (dstOffset > SIZE_MAX - 4 ||
4958                        dstOffset + 4 > SIZE_MAX - nalLength ||
4959                        dstOffset + 4 + nalLength > mBuffer->size()) {
4960                    ALOGE("b/27208621 : %zu %zu", dstOffset, mBuffer->size());
4961                    android_errorWriteLog(0x534e4554, "27208621");
4962                    mBuffer->release();
4963                    mBuffer = NULL;
4964                    return ERROR_MALFORMED;
4965                }
4966
4967                dstData[dstOffset++] = 0;
4968                dstData[dstOffset++] = 0;
4969                dstData[dstOffset++] = 0;
4970                dstData[dstOffset++] = 1;
4971                memcpy(&dstData[dstOffset], &mSrcBuffer[srcOffset], nalLength);
4972                srcOffset += nalLength;
4973                dstOffset += nalLength;
4974            }
4975            CHECK_EQ(srcOffset, size);
4976            CHECK(mBuffer != NULL);
4977            mBuffer->set_range(0, dstOffset);
4978        }
4979
4980        mBuffer->meta_data().clear();
4981        mBuffer->meta_data().setInt64(
4982                kKeyTime, ((int64_t)cts * 1000000) / mTimescale);
4983        mBuffer->meta_data().setInt64(
4984                kKeyDuration, ((int64_t)stts * 1000000) / mTimescale);
4985
4986        if (targetSampleTimeUs >= 0) {
4987            mBuffer->meta_data().setInt64(
4988                    kKeyTargetTime, targetSampleTimeUs);
4989        }
4990
4991        if (mIsAVC) {
4992            uint32_t layerId = FindAVCLayerId(
4993                    (const uint8_t *)mBuffer->data(), mBuffer->range_length());
4994            mBuffer->meta_data().setInt32(kKeyTemporalLayerId, layerId);
4995        }
4996
4997        if (isSyncSample) {
4998            mBuffer->meta_data().setInt32(kKeyIsSyncFrame, 1);
4999        }
5000
5001        ++mCurrentSampleIndex;
5002
5003        *out = mBuffer;
5004        mBuffer = NULL;
5005
5006        return OK;
5007    }
5008}
5009
5010status_t MPEG4Source::fragmentedRead(
5011        MediaBufferBase **out, const ReadOptions *options) {
5012
5013    ALOGV("MPEG4Source::fragmentedRead");
5014
5015    CHECK(mStarted);
5016
5017    *out = NULL;
5018
5019    int64_t targetSampleTimeUs = -1;
5020
5021    int64_t seekTimeUs;
5022    ReadOptions::SeekMode mode;
5023    if (options && options->getSeekTo(&seekTimeUs, &mode)) {
5024
5025        int numSidxEntries = mSegments.size();
5026        if (numSidxEntries != 0) {
5027            int64_t totalTime = 0;
5028            off64_t totalOffset = mFirstMoofOffset;
5029            for (int i = 0; i < numSidxEntries; i++) {
5030                const SidxEntry *se = &mSegments[i];
5031                if (totalTime + se->mDurationUs > seekTimeUs) {
5032                    // The requested time is somewhere in this segment
5033                    if ((mode == ReadOptions::SEEK_NEXT_SYNC && seekTimeUs > totalTime) ||
5034                        (mode == ReadOptions::SEEK_CLOSEST_SYNC &&
5035                        (seekTimeUs - totalTime) > (totalTime + se->mDurationUs - seekTimeUs))) {
5036                        // requested next sync, or closest sync and it was closer to the end of
5037                        // this segment
5038                        totalTime += se->mDurationUs;
5039                        totalOffset += se->mSize;
5040                    }
5041                    break;
5042                }
5043                totalTime += se->mDurationUs;
5044                totalOffset += se->mSize;
5045            }
5046            mCurrentMoofOffset = totalOffset;
5047            mNextMoofOffset = -1;
5048            mCurrentSamples.clear();
5049            mCurrentSampleIndex = 0;
5050            status_t err = parseChunk(&totalOffset);
5051            if (err != OK) {
5052                return err;
5053            }
5054            mCurrentTime = totalTime * mTimescale / 1000000ll;
5055        } else {
5056            // without sidx boxes, we can only seek to 0
5057            mCurrentMoofOffset = mFirstMoofOffset;
5058            mNextMoofOffset = -1;
5059            mCurrentSamples.clear();
5060            mCurrentSampleIndex = 0;
5061            off64_t tmp = mCurrentMoofOffset;
5062            status_t err = parseChunk(&tmp);
5063            if (err != OK) {
5064                return err;
5065            }
5066            mCurrentTime = 0;
5067        }
5068
5069        if (mBuffer != NULL) {
5070            mBuffer->release();
5071            mBuffer = NULL;
5072        }
5073
5074        // fall through
5075    }
5076
5077    off64_t offset = 0;
5078    size_t size = 0;
5079    uint32_t cts = 0;
5080    bool isSyncSample = false;
5081    bool newBuffer = false;
5082    if (mBuffer == NULL) {
5083        newBuffer = true;
5084
5085        if (mCurrentSampleIndex >= mCurrentSamples.size()) {
5086            // move to next fragment if there is one
5087            if (mNextMoofOffset <= mCurrentMoofOffset) {
5088                return ERROR_END_OF_STREAM;
5089            }
5090            off64_t nextMoof = mNextMoofOffset;
5091            mCurrentMoofOffset = nextMoof;
5092            mCurrentSamples.clear();
5093            mCurrentSampleIndex = 0;
5094            status_t err = parseChunk(&nextMoof);
5095            if (err != OK) {
5096                return err;
5097            }
5098            if (mCurrentSampleIndex >= mCurrentSamples.size()) {
5099                return ERROR_END_OF_STREAM;
5100            }
5101        }
5102
5103        const Sample *smpl = &mCurrentSamples[mCurrentSampleIndex];
5104        offset = smpl->offset;
5105        size = smpl->size;
5106        cts = mCurrentTime + smpl->compositionOffset;
5107        mCurrentTime += smpl->duration;
5108        isSyncSample = (mCurrentSampleIndex == 0); // XXX
5109
5110        status_t err = mGroup->acquire_buffer(&mBuffer);
5111
5112        if (err != OK) {
5113            CHECK(mBuffer == NULL);
5114            ALOGV("acquire_buffer returned %d", err);
5115            return err;
5116        }
5117        if (size > mBuffer->size()) {
5118            ALOGE("buffer too small: %zu > %zu", size, mBuffer->size());
5119            mBuffer->release();
5120            mBuffer = NULL;
5121            return ERROR_BUFFER_TOO_SMALL;
5122        }
5123    }
5124
5125    const Sample *smpl = &mCurrentSamples[mCurrentSampleIndex];
5126    MetaDataBase &bufmeta = mBuffer->meta_data();
5127    bufmeta.clear();
5128    if (smpl->encryptedsizes.size()) {
5129        // store clear/encrypted lengths in metadata
5130        bufmeta.setData(kKeyPlainSizes, 0,
5131                smpl->clearsizes.array(), smpl->clearsizes.size() * 4);
5132        bufmeta.setData(kKeyEncryptedSizes, 0,
5133                smpl->encryptedsizes.array(), smpl->encryptedsizes.size() * 4);
5134        bufmeta.setInt32(kKeyCryptoDefaultIVSize, mDefaultIVSize);
5135        bufmeta.setInt32(kKeyCryptoMode, mCryptoMode);
5136        bufmeta.setData(kKeyCryptoKey, 0, mCryptoKey, 16);
5137        bufmeta.setInt32(kKeyEncryptedByteBlock, mDefaultEncryptedByteBlock);
5138        bufmeta.setInt32(kKeySkipByteBlock, mDefaultSkipByteBlock);
5139
5140        uint32_t type = 0;
5141        const void *iv = NULL;
5142        size_t ivlength = 0;
5143        if (!mFormat.findData(
5144                kKeyCryptoIV, &type, &iv, &ivlength)) {
5145            iv = smpl->iv;
5146            ivlength = 16; // use 16 or the actual size?
5147        }
5148        bufmeta.setData(kKeyCryptoIV, 0, iv, ivlength);
5149
5150    }
5151
5152    if ((!mIsAVC && !mIsHEVC)|| mWantsNALFragments) {
5153        if (newBuffer) {
5154            if (!isInRange((size_t)0u, mBuffer->size(), size)) {
5155                mBuffer->release();
5156                mBuffer = NULL;
5157
5158                ALOGE("fragmentedRead ERROR_MALFORMED size %zu", size);
5159                return ERROR_MALFORMED;
5160            }
5161
5162            ssize_t num_bytes_read =
5163                mDataSource->readAt(offset, (uint8_t *)mBuffer->data(), size);
5164
5165            if (num_bytes_read < (ssize_t)size) {
5166                mBuffer->release();
5167                mBuffer = NULL;
5168
5169                ALOGE("i/o error");
5170                return ERROR_IO;
5171            }
5172
5173            CHECK(mBuffer != NULL);
5174            mBuffer->set_range(0, size);
5175            mBuffer->meta_data().setInt64(
5176                    kKeyTime, ((int64_t)cts * 1000000) / mTimescale);
5177            mBuffer->meta_data().setInt64(
5178                    kKeyDuration, ((int64_t)smpl->duration * 1000000) / mTimescale);
5179
5180            if (targetSampleTimeUs >= 0) {
5181                mBuffer->meta_data().setInt64(
5182                        kKeyTargetTime, targetSampleTimeUs);
5183            }
5184
5185            if (mIsAVC) {
5186                uint32_t layerId = FindAVCLayerId(
5187                        (const uint8_t *)mBuffer->data(), mBuffer->range_length());
5188                mBuffer->meta_data().setInt32(kKeyTemporalLayerId, layerId);
5189            }
5190
5191            if (isSyncSample) {
5192                mBuffer->meta_data().setInt32(kKeyIsSyncFrame, 1);
5193            }
5194
5195            ++mCurrentSampleIndex;
5196        }
5197
5198        if (!mIsAVC && !mIsHEVC) {
5199            *out = mBuffer;
5200            mBuffer = NULL;
5201
5202            return OK;
5203        }
5204
5205        // Each NAL unit is split up into its constituent fragments and
5206        // each one of them returned in its own buffer.
5207
5208        CHECK(mBuffer->range_length() >= mNALLengthSize);
5209
5210        const uint8_t *src =
5211            (const uint8_t *)mBuffer->data() + mBuffer->range_offset();
5212
5213        size_t nal_size = parseNALSize(src);
5214        if (mNALLengthSize > SIZE_MAX - nal_size) {
5215            ALOGE("b/24441553, b/24445122");
5216        }
5217
5218        if (mBuffer->range_length() - mNALLengthSize < nal_size) {
5219            ALOGE("incomplete NAL unit.");
5220
5221            mBuffer->release();
5222            mBuffer = NULL;
5223
5224            return ERROR_MALFORMED;
5225        }
5226
5227        MediaBufferBase *clone = mBuffer->clone();
5228        CHECK(clone != NULL);
5229        clone->set_range(mBuffer->range_offset() + mNALLengthSize, nal_size);
5230
5231        CHECK(mBuffer != NULL);
5232        mBuffer->set_range(
5233                mBuffer->range_offset() + mNALLengthSize + nal_size,
5234                mBuffer->range_length() - mNALLengthSize - nal_size);
5235
5236        if (mBuffer->range_length() == 0) {
5237            mBuffer->release();
5238            mBuffer = NULL;
5239        }
5240
5241        *out = clone;
5242
5243        return OK;
5244    } else {
5245        ALOGV("whole NAL");
5246        // Whole NAL units are returned but each fragment is prefixed by
5247        // the start code (0x00 00 00 01).
5248        ssize_t num_bytes_read = 0;
5249        int32_t drm = 0;
5250        bool usesDRM = (mFormat.findInt32(kKeyIsDRM, &drm) && drm != 0);
5251        void *data = NULL;
5252        bool isMalFormed = false;
5253        if (usesDRM) {
5254            if (mBuffer == NULL || !isInRange((size_t)0u, mBuffer->size(), size)) {
5255                isMalFormed = true;
5256            } else {
5257                data = mBuffer->data();
5258            }
5259        } else {
5260            int32_t max_size;
5261            if (!mFormat.findInt32(kKeyMaxInputSize, &max_size)
5262                    || !isInRange((size_t)0u, (size_t)max_size, size)) {
5263                isMalFormed = true;
5264            } else {
5265                data = mSrcBuffer;
5266            }
5267        }
5268
5269        if (isMalFormed || data == NULL) {
5270            ALOGE("isMalFormed size %zu", size);
5271            if (mBuffer != NULL) {
5272                mBuffer->release();
5273                mBuffer = NULL;
5274            }
5275            return ERROR_MALFORMED;
5276        }
5277        num_bytes_read = mDataSource->readAt(offset, data, size);
5278
5279        if (num_bytes_read < (ssize_t)size) {
5280            mBuffer->release();
5281            mBuffer = NULL;
5282
5283            ALOGE("i/o error");
5284            return ERROR_IO;
5285        }
5286
5287        if (usesDRM) {
5288            CHECK(mBuffer != NULL);
5289            mBuffer->set_range(0, size);
5290
5291        } else {
5292            uint8_t *dstData = (uint8_t *)mBuffer->data();
5293            size_t srcOffset = 0;
5294            size_t dstOffset = 0;
5295
5296            while (srcOffset < size) {
5297                isMalFormed = !isInRange((size_t)0u, size, srcOffset, mNALLengthSize);
5298                size_t nalLength = 0;
5299                if (!isMalFormed) {
5300                    nalLength = parseNALSize(&mSrcBuffer[srcOffset]);
5301                    srcOffset += mNALLengthSize;
5302                    isMalFormed = !isInRange((size_t)0u, size, srcOffset, nalLength)
5303                            || !isInRange((size_t)0u, mBuffer->size(), dstOffset, (size_t)4u)
5304                            || !isInRange((size_t)0u, mBuffer->size(), dstOffset + 4, nalLength);
5305                }
5306
5307                if (isMalFormed) {
5308                    ALOGE("Video is malformed; nalLength %zu", nalLength);
5309                    mBuffer->release();
5310                    mBuffer = NULL;
5311                    return ERROR_MALFORMED;
5312                }
5313
5314                if (nalLength == 0) {
5315                    continue;
5316                }
5317
5318                if (dstOffset > SIZE_MAX - 4 ||
5319                        dstOffset + 4 > SIZE_MAX - nalLength ||
5320                        dstOffset + 4 + nalLength > mBuffer->size()) {
5321                    ALOGE("b/26365349 : %zu %zu", dstOffset, mBuffer->size());
5322                    android_errorWriteLog(0x534e4554, "26365349");
5323                    mBuffer->release();
5324                    mBuffer = NULL;
5325                    return ERROR_MALFORMED;
5326                }
5327
5328                dstData[dstOffset++] = 0;
5329                dstData[dstOffset++] = 0;
5330                dstData[dstOffset++] = 0;
5331                dstData[dstOffset++] = 1;
5332                memcpy(&dstData[dstOffset], &mSrcBuffer[srcOffset], nalLength);
5333                srcOffset += nalLength;
5334                dstOffset += nalLength;
5335            }
5336            CHECK_EQ(srcOffset, size);
5337            CHECK(mBuffer != NULL);
5338            mBuffer->set_range(0, dstOffset);
5339        }
5340
5341        mBuffer->meta_data().setInt64(
5342                kKeyTime, ((int64_t)cts * 1000000) / mTimescale);
5343        mBuffer->meta_data().setInt64(
5344                kKeyDuration, ((int64_t)smpl->duration * 1000000) / mTimescale);
5345
5346        if (targetSampleTimeUs >= 0) {
5347            mBuffer->meta_data().setInt64(
5348                    kKeyTargetTime, targetSampleTimeUs);
5349        }
5350
5351        if (isSyncSample) {
5352            mBuffer->meta_data().setInt32(kKeyIsSyncFrame, 1);
5353        }
5354
5355        ++mCurrentSampleIndex;
5356
5357        *out = mBuffer;
5358        mBuffer = NULL;
5359
5360        return OK;
5361    }
5362}
5363
5364MPEG4Extractor::Track *MPEG4Extractor::findTrackByMimePrefix(
5365        const char *mimePrefix) {
5366    for (Track *track = mFirstTrack; track != NULL; track = track->next) {
5367        const char *mime;
5368        if (track->meta.findCString(kKeyMIMEType, &mime)
5369                && !strncasecmp(mime, mimePrefix, strlen(mimePrefix))) {
5370            return track;
5371        }
5372    }
5373
5374    return NULL;
5375}
5376
5377static bool LegacySniffMPEG4(DataSourceBase *source, float *confidence) {
5378    uint8_t header[8];
5379
5380    ssize_t n = source->readAt(4, header, sizeof(header));
5381    if (n < (ssize_t)sizeof(header)) {
5382        return false;
5383    }
5384
5385    if (!memcmp(header, "ftyp3gp", 7) || !memcmp(header, "ftypmp42", 8)
5386        || !memcmp(header, "ftyp3gr6", 8) || !memcmp(header, "ftyp3gs6", 8)
5387        || !memcmp(header, "ftyp3ge6", 8) || !memcmp(header, "ftyp3gg6", 8)
5388        || !memcmp(header, "ftypisom", 8) || !memcmp(header, "ftypM4V ", 8)
5389        || !memcmp(header, "ftypM4A ", 8) || !memcmp(header, "ftypf4v ", 8)
5390        || !memcmp(header, "ftypkddi", 8) || !memcmp(header, "ftypM4VP", 8)
5391        || !memcmp(header, "ftypmif1", 8) || !memcmp(header, "ftypheic", 8)
5392        || !memcmp(header, "ftypmsf1", 8) || !memcmp(header, "ftyphevc", 8)) {
5393        *confidence = 0.4;
5394
5395        return true;
5396    }
5397
5398    return false;
5399}
5400
5401static bool isCompatibleBrand(uint32_t fourcc) {
5402    static const uint32_t kCompatibleBrands[] = {
5403        FOURCC('i', 's', 'o', 'm'),
5404        FOURCC('i', 's', 'o', '2'),
5405        FOURCC('a', 'v', 'c', '1'),
5406        FOURCC('h', 'v', 'c', '1'),
5407        FOURCC('h', 'e', 'v', '1'),
5408        FOURCC('3', 'g', 'p', '4'),
5409        FOURCC('m', 'p', '4', '1'),
5410        FOURCC('m', 'p', '4', '2'),
5411        FOURCC('d', 'a', 's', 'h'),
5412
5413        // Won't promise that the following file types can be played.
5414        // Just give these file types a chance.
5415        FOURCC('q', 't', ' ', ' '),  // Apple's QuickTime
5416        FOURCC('M', 'S', 'N', 'V'),  // Sony's PSP
5417
5418        FOURCC('3', 'g', '2', 'a'),  // 3GPP2
5419        FOURCC('3', 'g', '2', 'b'),
5420        FOURCC('m', 'i', 'f', '1'),  // HEIF image
5421        FOURCC('h', 'e', 'i', 'c'),  // HEIF image
5422        FOURCC('m', 's', 'f', '1'),  // HEIF image sequence
5423        FOURCC('h', 'e', 'v', 'c'),  // HEIF image sequence
5424    };
5425
5426    for (size_t i = 0;
5427         i < sizeof(kCompatibleBrands) / sizeof(kCompatibleBrands[0]);
5428         ++i) {
5429        if (kCompatibleBrands[i] == fourcc) {
5430            return true;
5431        }
5432    }
5433
5434    return false;
5435}
5436
5437// Attempt to actually parse the 'ftyp' atom and determine if a suitable
5438// compatible brand is present.
5439// Also try to identify where this file's metadata ends
5440// (end of the 'moov' atom) and report it to the caller as part of
5441// the metadata.
5442static bool BetterSniffMPEG4(DataSourceBase *source, float *confidence) {
5443    // We scan up to 128 bytes to identify this file as an MP4.
5444    static const off64_t kMaxScanOffset = 128ll;
5445
5446    off64_t offset = 0ll;
5447    bool foundGoodFileType = false;
5448    off64_t moovAtomEndOffset = -1ll;
5449    bool done = false;
5450
5451    while (!done && offset < kMaxScanOffset) {
5452        uint32_t hdr[2];
5453        if (source->readAt(offset, hdr, 8) < 8) {
5454            return false;
5455        }
5456
5457        uint64_t chunkSize = ntohl(hdr[0]);
5458        uint32_t chunkType = ntohl(hdr[1]);
5459        off64_t chunkDataOffset = offset + 8;
5460
5461        if (chunkSize == 1) {
5462            if (source->readAt(offset + 8, &chunkSize, 8) < 8) {
5463                return false;
5464            }
5465
5466            chunkSize = ntoh64(chunkSize);
5467            chunkDataOffset += 8;
5468
5469            if (chunkSize < 16) {
5470                // The smallest valid chunk is 16 bytes long in this case.
5471                return false;
5472            }
5473
5474        } else if (chunkSize < 8) {
5475            // The smallest valid chunk is 8 bytes long.
5476            return false;
5477        }
5478
5479        // (data_offset - offset) is either 8 or 16
5480        off64_t chunkDataSize = chunkSize - (chunkDataOffset - offset);
5481        if (chunkDataSize < 0) {
5482            ALOGE("b/23540914");
5483            return false;
5484        }
5485
5486        char chunkstring[5];
5487        MakeFourCCString(chunkType, chunkstring);
5488        ALOGV("saw chunk type %s, size %" PRIu64 " @ %lld", chunkstring, chunkSize, (long long)offset);
5489        switch (chunkType) {
5490            case FOURCC('f', 't', 'y', 'p'):
5491            {
5492                if (chunkDataSize < 8) {
5493                    return false;
5494                }
5495
5496                uint32_t numCompatibleBrands = (chunkDataSize - 8) / 4;
5497                for (size_t i = 0; i < numCompatibleBrands + 2; ++i) {
5498                    if (i == 1) {
5499                        // Skip this index, it refers to the minorVersion,
5500                        // not a brand.
5501                        continue;
5502                    }
5503
5504                    uint32_t brand;
5505                    if (source->readAt(
5506                                chunkDataOffset + 4 * i, &brand, 4) < 4) {
5507                        return false;
5508                    }
5509
5510                    brand = ntohl(brand);
5511
5512                    if (isCompatibleBrand(brand)) {
5513                        foundGoodFileType = true;
5514                        break;
5515                    }
5516                }
5517
5518                if (!foundGoodFileType) {
5519                    return false;
5520                }
5521
5522                break;
5523            }
5524
5525            case FOURCC('m', 'o', 'o', 'v'):
5526            {
5527                moovAtomEndOffset = offset + chunkSize;
5528
5529                done = true;
5530                break;
5531            }
5532
5533            default:
5534                break;
5535        }
5536
5537        offset += chunkSize;
5538    }
5539
5540    if (!foundGoodFileType) {
5541        return false;
5542    }
5543
5544    *confidence = 0.4f;
5545
5546    return true;
5547}
5548
5549static MediaExtractor* CreateExtractor(DataSourceBase *source, void *) {
5550    return new MPEG4Extractor(source);
5551}
5552
5553static MediaExtractor::CreatorFunc Sniff(
5554        DataSourceBase *source, float *confidence, void **,
5555        MediaExtractor::FreeMetaFunc *) {
5556    if (BetterSniffMPEG4(source, confidence)) {
5557        return CreateExtractor;
5558    }
5559
5560    if (LegacySniffMPEG4(source, confidence)) {
5561        ALOGW("Identified supported mpeg4 through LegacySniffMPEG4.");
5562        return CreateExtractor;
5563    }
5564
5565    return NULL;
5566}
5567
5568extern "C" {
5569// This is the only symbol that needs to be exported
5570__attribute__ ((visibility ("default")))
5571MediaExtractor::ExtractorDef GETEXTRACTORDEF() {
5572    return {
5573        MediaExtractor::EXTRACTORDEF_VERSION,
5574        UUID("27575c67-4417-4c54-8d3d-8e626985a164"),
5575        1, // version
5576        "MP4 Extractor",
5577        Sniff
5578    };
5579}
5580
5581} // extern "C"
5582
5583}  // namespace android
5584