1/*
2 * Copyright (C) 2009 The Android Open Source Project
3 *
4 * Licensed under the Apache License, Version 2.0 (the "License");
5 * you may not use this file except in compliance with the License.
6 * You may obtain a copy of the License at
7 *
8 *      http://www.apache.org/licenses/LICENSE-2.0
9 *
10 * Unless required by applicable law or agreed to in writing, software
11 * distributed under the License is distributed on an "AS IS" BASIS,
12 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13 * See the License for the specific language governing permissions and
14 * limitations under the License.
15 */
16
17//#define LOG_NDEBUG 0
18#define LOG_TAG "MPEG4Extractor"
19
20#include <ctype.h>
21#include <inttypes.h>
22#include <stdint.h>
23#include <stdlib.h>
24#include <string.h>
25
26#include <utils/Log.h>
27
28#include "include/MPEG4Extractor.h"
29#include "include/SampleTable.h"
30#include "include/ESDS.h"
31
32#include <media/stagefright/foundation/ABitReader.h>
33#include <media/stagefright/foundation/ABuffer.h>
34#include <media/stagefright/foundation/ADebug.h>
35#include <media/stagefright/foundation/AMessage.h>
36#include <media/stagefright/foundation/AUtils.h>
37#include <media/stagefright/foundation/ColorUtils.h>
38#include <media/stagefright/MediaBuffer.h>
39#include <media/stagefright/MediaBufferGroup.h>
40#include <media/stagefright/MediaDefs.h>
41#include <media/stagefright/MediaSource.h>
42#include <media/stagefright/MetaData.h>
43#include <utils/String8.h>
44
45#include <byteswap.h>
46#include "include/ID3.h"
47#include "include/avc_utils.h"
48
49#ifndef UINT32_MAX
50#define UINT32_MAX       (4294967295U)
51#endif
52
53namespace android {
54
55enum {
56    // max track header chunk to return
57    kMaxTrackHeaderSize = 32,
58
59    // maximum size of an atom. Some atoms can be bigger according to the spec,
60    // but we only allow up to this size.
61    kMaxAtomSize = 64 * 1024 * 1024,
62};
63
64class MPEG4Source : public MediaSource {
65public:
66    // Caller retains ownership of both "dataSource" and "sampleTable".
67    MPEG4Source(const sp<MPEG4Extractor> &owner,
68                const sp<MetaData> &format,
69                const sp<DataSource> &dataSource,
70                int32_t timeScale,
71                const sp<SampleTable> &sampleTable,
72                Vector<SidxEntry> &sidx,
73                const Trex *trex,
74                off64_t firstMoofOffset);
75    virtual status_t init();
76
77    virtual status_t start(MetaData *params = NULL);
78    virtual status_t stop();
79
80    virtual sp<MetaData> getFormat();
81
82    virtual status_t read(MediaBuffer **buffer, const ReadOptions *options = NULL);
83    virtual bool supportNonblockingRead() { return true; }
84    virtual status_t fragmentedRead(MediaBuffer **buffer, const ReadOptions *options = NULL);
85
86protected:
87    virtual ~MPEG4Source();
88
89private:
90    Mutex mLock;
91
92    // keep the MPEG4Extractor around, since we're referencing its data
93    sp<MPEG4Extractor> mOwner;
94    sp<MetaData> mFormat;
95    sp<DataSource> mDataSource;
96    int32_t mTimescale;
97    sp<SampleTable> mSampleTable;
98    uint32_t mCurrentSampleIndex;
99    uint32_t mCurrentFragmentIndex;
100    Vector<SidxEntry> &mSegments;
101    const Trex *mTrex;
102    off64_t mFirstMoofOffset;
103    off64_t mCurrentMoofOffset;
104    off64_t mNextMoofOffset;
105    uint32_t mCurrentTime;
106    int32_t mLastParsedTrackId;
107    int32_t mTrackId;
108
109    int32_t mCryptoMode;    // passed in from extractor
110    int32_t mDefaultIVSize; // passed in from extractor
111    uint8_t mCryptoKey[16]; // passed in from extractor
112    uint32_t mCurrentAuxInfoType;
113    uint32_t mCurrentAuxInfoTypeParameter;
114    int32_t mCurrentDefaultSampleInfoSize;
115    uint32_t mCurrentSampleInfoCount;
116    uint32_t mCurrentSampleInfoAllocSize;
117    uint8_t* mCurrentSampleInfoSizes;
118    uint32_t mCurrentSampleInfoOffsetCount;
119    uint32_t mCurrentSampleInfoOffsetsAllocSize;
120    uint64_t* mCurrentSampleInfoOffsets;
121
122    bool mIsAVC;
123    bool mIsHEVC;
124    size_t mNALLengthSize;
125
126    bool mStarted;
127
128    MediaBufferGroup *mGroup;
129
130    MediaBuffer *mBuffer;
131
132    bool mWantsNALFragments;
133
134    uint8_t *mSrcBuffer;
135
136    size_t parseNALSize(const uint8_t *data) const;
137    status_t parseChunk(off64_t *offset);
138    status_t parseTrackFragmentHeader(off64_t offset, off64_t size);
139    status_t parseTrackFragmentRun(off64_t offset, off64_t size);
140    status_t parseSampleAuxiliaryInformationSizes(off64_t offset, off64_t size);
141    status_t parseSampleAuxiliaryInformationOffsets(off64_t offset, off64_t size);
142
143    struct TrackFragmentHeaderInfo {
144        enum Flags {
145            kBaseDataOffsetPresent         = 0x01,
146            kSampleDescriptionIndexPresent = 0x02,
147            kDefaultSampleDurationPresent  = 0x08,
148            kDefaultSampleSizePresent      = 0x10,
149            kDefaultSampleFlagsPresent     = 0x20,
150            kDurationIsEmpty               = 0x10000,
151        };
152
153        uint32_t mTrackID;
154        uint32_t mFlags;
155        uint64_t mBaseDataOffset;
156        uint32_t mSampleDescriptionIndex;
157        uint32_t mDefaultSampleDuration;
158        uint32_t mDefaultSampleSize;
159        uint32_t mDefaultSampleFlags;
160
161        uint64_t mDataOffset;
162    };
163    TrackFragmentHeaderInfo mTrackFragmentHeaderInfo;
164
165    struct Sample {
166        off64_t offset;
167        size_t size;
168        uint32_t duration;
169        int32_t compositionOffset;
170        uint8_t iv[16];
171        Vector<size_t> clearsizes;
172        Vector<size_t> encryptedsizes;
173    };
174    Vector<Sample> mCurrentSamples;
175
176    MPEG4Source(const MPEG4Source &);
177    MPEG4Source &operator=(const MPEG4Source &);
178};
179
180// This custom data source wraps an existing one and satisfies requests
181// falling entirely within a cached range from the cache while forwarding
182// all remaining requests to the wrapped datasource.
183// This is used to cache the full sampletable metadata for a single track,
184// possibly wrapping multiple times to cover all tracks, i.e.
185// Each MPEG4DataSource caches the sampletable metadata for a single track.
186
187struct MPEG4DataSource : public DataSource {
188    explicit MPEG4DataSource(const sp<DataSource> &source);
189
190    virtual status_t initCheck() const;
191    virtual ssize_t readAt(off64_t offset, void *data, size_t size);
192    virtual status_t getSize(off64_t *size);
193    virtual uint32_t flags();
194
195    status_t setCachedRange(off64_t offset, size_t size);
196
197protected:
198    virtual ~MPEG4DataSource();
199
200private:
201    Mutex mLock;
202
203    sp<DataSource> mSource;
204    off64_t mCachedOffset;
205    size_t mCachedSize;
206    uint8_t *mCache;
207
208    void clearCache();
209
210    MPEG4DataSource(const MPEG4DataSource &);
211    MPEG4DataSource &operator=(const MPEG4DataSource &);
212};
213
214MPEG4DataSource::MPEG4DataSource(const sp<DataSource> &source)
215    : mSource(source),
216      mCachedOffset(0),
217      mCachedSize(0),
218      mCache(NULL) {
219}
220
221MPEG4DataSource::~MPEG4DataSource() {
222    clearCache();
223}
224
225void MPEG4DataSource::clearCache() {
226    if (mCache) {
227        free(mCache);
228        mCache = NULL;
229    }
230
231    mCachedOffset = 0;
232    mCachedSize = 0;
233}
234
235status_t MPEG4DataSource::initCheck() const {
236    return mSource->initCheck();
237}
238
239ssize_t MPEG4DataSource::readAt(off64_t offset, void *data, size_t size) {
240    Mutex::Autolock autoLock(mLock);
241
242    if (isInRange(mCachedOffset, mCachedSize, offset, size)) {
243        memcpy(data, &mCache[offset - mCachedOffset], size);
244        return size;
245    }
246
247    return mSource->readAt(offset, data, size);
248}
249
250status_t MPEG4DataSource::getSize(off64_t *size) {
251    return mSource->getSize(size);
252}
253
254uint32_t MPEG4DataSource::flags() {
255    return mSource->flags();
256}
257
258status_t MPEG4DataSource::setCachedRange(off64_t offset, size_t size) {
259    Mutex::Autolock autoLock(mLock);
260
261    clearCache();
262
263    mCache = (uint8_t *)malloc(size);
264
265    if (mCache == NULL) {
266        return -ENOMEM;
267    }
268
269    mCachedOffset = offset;
270    mCachedSize = size;
271
272    ssize_t err = mSource->readAt(mCachedOffset, mCache, mCachedSize);
273
274    if (err < (ssize_t)size) {
275        clearCache();
276
277        return ERROR_IO;
278    }
279
280    return OK;
281}
282
283////////////////////////////////////////////////////////////////////////////////
284
285static const bool kUseHexDump = false;
286
287static void hexdump(const void *_data, size_t size) {
288    const uint8_t *data = (const uint8_t *)_data;
289    size_t offset = 0;
290    while (offset < size) {
291        printf("0x%04zx  ", offset);
292
293        size_t n = size - offset;
294        if (n > 16) {
295            n = 16;
296        }
297
298        for (size_t i = 0; i < 16; ++i) {
299            if (i == 8) {
300                printf(" ");
301            }
302
303            if (offset + i < size) {
304                printf("%02x ", data[offset + i]);
305            } else {
306                printf("   ");
307            }
308        }
309
310        printf(" ");
311
312        for (size_t i = 0; i < n; ++i) {
313            if (isprint(data[offset + i])) {
314                printf("%c", data[offset + i]);
315            } else {
316                printf(".");
317            }
318        }
319
320        printf("\n");
321
322        offset += 16;
323    }
324}
325
326static const char *FourCC2MIME(uint32_t fourcc) {
327    switch (fourcc) {
328        case FOURCC('m', 'p', '4', 'a'):
329            return MEDIA_MIMETYPE_AUDIO_AAC;
330
331        case FOURCC('s', 'a', 'm', 'r'):
332            return MEDIA_MIMETYPE_AUDIO_AMR_NB;
333
334        case FOURCC('s', 'a', 'w', 'b'):
335            return MEDIA_MIMETYPE_AUDIO_AMR_WB;
336
337        case FOURCC('m', 'p', '4', 'v'):
338            return MEDIA_MIMETYPE_VIDEO_MPEG4;
339
340        case FOURCC('s', '2', '6', '3'):
341        case FOURCC('h', '2', '6', '3'):
342        case FOURCC('H', '2', '6', '3'):
343            return MEDIA_MIMETYPE_VIDEO_H263;
344
345        case FOURCC('a', 'v', 'c', '1'):
346            return MEDIA_MIMETYPE_VIDEO_AVC;
347
348        case FOURCC('h', 'v', 'c', '1'):
349        case FOURCC('h', 'e', 'v', '1'):
350            return MEDIA_MIMETYPE_VIDEO_HEVC;
351        default:
352            CHECK(!"should not be here.");
353            return NULL;
354    }
355}
356
357static bool AdjustChannelsAndRate(uint32_t fourcc, uint32_t *channels, uint32_t *rate) {
358    if (!strcasecmp(MEDIA_MIMETYPE_AUDIO_AMR_NB, FourCC2MIME(fourcc))) {
359        // AMR NB audio is always mono, 8kHz
360        *channels = 1;
361        *rate = 8000;
362        return true;
363    } else if (!strcasecmp(MEDIA_MIMETYPE_AUDIO_AMR_WB, FourCC2MIME(fourcc))) {
364        // AMR WB audio is always mono, 16kHz
365        *channels = 1;
366        *rate = 16000;
367        return true;
368    }
369    return false;
370}
371
372MPEG4Extractor::MPEG4Extractor(const sp<DataSource> &source)
373    : mMoofOffset(0),
374      mMoofFound(false),
375      mMdatFound(false),
376      mDataSource(source),
377      mInitCheck(NO_INIT),
378      mHeaderTimescale(0),
379      mIsQT(false),
380      mFirstTrack(NULL),
381      mLastTrack(NULL),
382      mFileMetaData(new MetaData),
383      mFirstSINF(NULL),
384      mIsDrm(false) {
385}
386
387MPEG4Extractor::~MPEG4Extractor() {
388    Track *track = mFirstTrack;
389    while (track) {
390        Track *next = track->next;
391
392        delete track;
393        track = next;
394    }
395    mFirstTrack = mLastTrack = NULL;
396
397    SINF *sinf = mFirstSINF;
398    while (sinf) {
399        SINF *next = sinf->next;
400        delete[] sinf->IPMPData;
401        delete sinf;
402        sinf = next;
403    }
404    mFirstSINF = NULL;
405
406    for (size_t i = 0; i < mPssh.size(); i++) {
407        delete [] mPssh[i].data;
408    }
409}
410
411uint32_t MPEG4Extractor::flags() const {
412    return CAN_PAUSE |
413            ((mMoofOffset == 0 || mSidxEntries.size() != 0) ?
414                    (CAN_SEEK_BACKWARD | CAN_SEEK_FORWARD | CAN_SEEK) : 0);
415}
416
417sp<MetaData> MPEG4Extractor::getMetaData() {
418    status_t err;
419    if ((err = readMetaData()) != OK) {
420        return new MetaData;
421    }
422
423    return mFileMetaData;
424}
425
426size_t MPEG4Extractor::countTracks() {
427    status_t err;
428    if ((err = readMetaData()) != OK) {
429        ALOGV("MPEG4Extractor::countTracks: no tracks");
430        return 0;
431    }
432
433    size_t n = 0;
434    Track *track = mFirstTrack;
435    while (track) {
436        ++n;
437        track = track->next;
438    }
439
440    ALOGV("MPEG4Extractor::countTracks: %zu tracks", n);
441    return n;
442}
443
444sp<MetaData> MPEG4Extractor::getTrackMetaData(
445        size_t index, uint32_t flags) {
446    status_t err;
447    if ((err = readMetaData()) != OK) {
448        return NULL;
449    }
450
451    Track *track = mFirstTrack;
452    while (index > 0) {
453        if (track == NULL) {
454            return NULL;
455        }
456
457        track = track->next;
458        --index;
459    }
460
461    if (track == NULL) {
462        return NULL;
463    }
464
465    if ((flags & kIncludeExtensiveMetaData)
466            && !track->includes_expensive_metadata) {
467        track->includes_expensive_metadata = true;
468
469        const char *mime;
470        CHECK(track->meta->findCString(kKeyMIMEType, &mime));
471        if (!strncasecmp("video/", mime, 6)) {
472            // MPEG2 tracks do not provide CSD, so read the stream header
473            if (!strcmp(mime, MEDIA_MIMETYPE_VIDEO_MPEG2)) {
474                off64_t offset;
475                size_t size;
476                if (track->sampleTable->getMetaDataForSample(
477                            0 /* sampleIndex */, &offset, &size, NULL /* sampleTime */) == OK) {
478                    if (size > kMaxTrackHeaderSize) {
479                        size = kMaxTrackHeaderSize;
480                    }
481                    uint8_t header[kMaxTrackHeaderSize];
482                    if (mDataSource->readAt(offset, &header, size) == (ssize_t)size) {
483                        track->meta->setData(kKeyStreamHeader, 'mdat', header, size);
484                    }
485                }
486            }
487
488            if (mMoofOffset > 0) {
489                int64_t duration;
490                if (track->meta->findInt64(kKeyDuration, &duration)) {
491                    // nothing fancy, just pick a frame near 1/4th of the duration
492                    track->meta->setInt64(
493                            kKeyThumbnailTime, duration / 4);
494                }
495            } else {
496                uint32_t sampleIndex;
497                uint32_t sampleTime;
498                if (track->timescale != 0 &&
499                        track->sampleTable->findThumbnailSample(&sampleIndex) == OK
500                        && track->sampleTable->getMetaDataForSample(
501                            sampleIndex, NULL /* offset */, NULL /* size */,
502                            &sampleTime) == OK) {
503                    track->meta->setInt64(
504                            kKeyThumbnailTime,
505                            ((int64_t)sampleTime * 1000000) / track->timescale);
506                }
507            }
508        }
509    }
510
511    return track->meta;
512}
513
514static void MakeFourCCString(uint32_t x, char *s) {
515    s[0] = x >> 24;
516    s[1] = (x >> 16) & 0xff;
517    s[2] = (x >> 8) & 0xff;
518    s[3] = x & 0xff;
519    s[4] = '\0';
520}
521
522status_t MPEG4Extractor::readMetaData() {
523    if (mInitCheck != NO_INIT) {
524        return mInitCheck;
525    }
526
527    off64_t offset = 0;
528    status_t err;
529    bool sawMoovOrSidx = false;
530
531    while (!(sawMoovOrSidx && (mMdatFound || mMoofFound))) {
532        off64_t orig_offset = offset;
533        err = parseChunk(&offset, 0);
534
535        if (err != OK && err != UNKNOWN_ERROR) {
536            break;
537        } else if (offset <= orig_offset) {
538            // only continue parsing if the offset was advanced,
539            // otherwise we might end up in an infinite loop
540            ALOGE("did not advance: %lld->%lld", (long long)orig_offset, (long long)offset);
541            err = ERROR_MALFORMED;
542            break;
543        } else if (err == UNKNOWN_ERROR) {
544            sawMoovOrSidx = true;
545        }
546    }
547
548    if (mInitCheck == OK) {
549        if (findTrackByMimePrefix("video/") != NULL) {
550            mFileMetaData->setCString(
551                    kKeyMIMEType, MEDIA_MIMETYPE_CONTAINER_MPEG4);
552        } else if (findTrackByMimePrefix("audio/") != NULL) {
553            mFileMetaData->setCString(kKeyMIMEType, "audio/mp4");
554        } else {
555            mFileMetaData->setCString(kKeyMIMEType, "application/octet-stream");
556        }
557    } else {
558        mInitCheck = err;
559    }
560
561    CHECK_NE(err, (status_t)NO_INIT);
562
563    // copy pssh data into file metadata
564    uint64_t psshsize = 0;
565    for (size_t i = 0; i < mPssh.size(); i++) {
566        psshsize += 20 + mPssh[i].datalen;
567    }
568    if (psshsize > 0 && psshsize <= UINT32_MAX) {
569        char *buf = (char*)malloc(psshsize);
570        if (!buf) {
571            ALOGE("b/28471206");
572            return NO_MEMORY;
573        }
574        char *ptr = buf;
575        for (size_t i = 0; i < mPssh.size(); i++) {
576            memcpy(ptr, mPssh[i].uuid, 20); // uuid + length
577            memcpy(ptr + 20, mPssh[i].data, mPssh[i].datalen);
578            ptr += (20 + mPssh[i].datalen);
579        }
580        mFileMetaData->setData(kKeyPssh, 'pssh', buf, psshsize);
581        free(buf);
582    }
583    return mInitCheck;
584}
585
586char* MPEG4Extractor::getDrmTrackInfo(size_t trackID, int *len) {
587    if (mFirstSINF == NULL) {
588        return NULL;
589    }
590
591    SINF *sinf = mFirstSINF;
592    while (sinf && (trackID != sinf->trackID)) {
593        sinf = sinf->next;
594    }
595
596    if (sinf == NULL) {
597        return NULL;
598    }
599
600    *len = sinf->len;
601    return sinf->IPMPData;
602}
603
604// Reads an encoded integer 7 bits at a time until it encounters the high bit clear.
605static int32_t readSize(off64_t offset,
606        const sp<DataSource> &DataSource, uint8_t *numOfBytes) {
607    uint32_t size = 0;
608    uint8_t data;
609    bool moreData = true;
610    *numOfBytes = 0;
611
612    while (moreData) {
613        if (DataSource->readAt(offset, &data, 1) < 1) {
614            return -1;
615        }
616        offset ++;
617        moreData = (data >= 128) ? true : false;
618        size = (size << 7) | (data & 0x7f); // Take last 7 bits
619        (*numOfBytes) ++;
620    }
621
622    return size;
623}
624
625status_t MPEG4Extractor::parseDrmSINF(
626        off64_t * /* offset */, off64_t data_offset) {
627    uint8_t updateIdTag;
628    if (mDataSource->readAt(data_offset, &updateIdTag, 1) < 1) {
629        return ERROR_IO;
630    }
631    data_offset ++;
632
633    if (0x01/*OBJECT_DESCRIPTOR_UPDATE_ID_TAG*/ != updateIdTag) {
634        return ERROR_MALFORMED;
635    }
636
637    uint8_t numOfBytes;
638    int32_t size = readSize(data_offset, mDataSource, &numOfBytes);
639    if (size < 0) {
640        return ERROR_IO;
641    }
642    data_offset += numOfBytes;
643
644    while(size >= 11 ) {
645        uint8_t descriptorTag;
646        if (mDataSource->readAt(data_offset, &descriptorTag, 1) < 1) {
647            return ERROR_IO;
648        }
649        data_offset ++;
650
651        if (0x11/*OBJECT_DESCRIPTOR_ID_TAG*/ != descriptorTag) {
652            return ERROR_MALFORMED;
653        }
654
655        uint8_t buffer[8];
656        //ObjectDescriptorID and ObjectDescriptor url flag
657        if (mDataSource->readAt(data_offset, buffer, 2) < 2) {
658            return ERROR_IO;
659        }
660        data_offset += 2;
661
662        if ((buffer[1] >> 5) & 0x0001) { //url flag is set
663            return ERROR_MALFORMED;
664        }
665
666        if (mDataSource->readAt(data_offset, buffer, 8) < 8) {
667            return ERROR_IO;
668        }
669        data_offset += 8;
670
671        if ((0x0F/*ES_ID_REF_TAG*/ != buffer[1])
672                || ( 0x0A/*IPMP_DESCRIPTOR_POINTER_ID_TAG*/ != buffer[5])) {
673            return ERROR_MALFORMED;
674        }
675
676        SINF *sinf = new SINF;
677        sinf->trackID = U16_AT(&buffer[3]);
678        sinf->IPMPDescriptorID = buffer[7];
679        sinf->next = mFirstSINF;
680        mFirstSINF = sinf;
681
682        size -= (8 + 2 + 1);
683    }
684
685    if (size != 0) {
686        return ERROR_MALFORMED;
687    }
688
689    if (mDataSource->readAt(data_offset, &updateIdTag, 1) < 1) {
690        return ERROR_IO;
691    }
692    data_offset ++;
693
694    if(0x05/*IPMP_DESCRIPTOR_UPDATE_ID_TAG*/ != updateIdTag) {
695        return ERROR_MALFORMED;
696    }
697
698    size = readSize(data_offset, mDataSource, &numOfBytes);
699    if (size < 0) {
700        return ERROR_IO;
701    }
702    data_offset += numOfBytes;
703
704    while (size > 0) {
705        uint8_t tag;
706        int32_t dataLen;
707        if (mDataSource->readAt(data_offset, &tag, 1) < 1) {
708            return ERROR_IO;
709        }
710        data_offset ++;
711
712        if (0x0B/*IPMP_DESCRIPTOR_ID_TAG*/ == tag) {
713            uint8_t id;
714            dataLen = readSize(data_offset, mDataSource, &numOfBytes);
715            if (dataLen < 0) {
716                return ERROR_IO;
717            } else if (dataLen < 4) {
718                return ERROR_MALFORMED;
719            }
720            data_offset += numOfBytes;
721
722            if (mDataSource->readAt(data_offset, &id, 1) < 1) {
723                return ERROR_IO;
724            }
725            data_offset ++;
726
727            SINF *sinf = mFirstSINF;
728            while (sinf && (sinf->IPMPDescriptorID != id)) {
729                sinf = sinf->next;
730            }
731            if (sinf == NULL) {
732                return ERROR_MALFORMED;
733            }
734            sinf->len = dataLen - 3;
735            sinf->IPMPData = new (std::nothrow) char[sinf->len];
736            if (sinf->IPMPData == NULL) {
737                return ERROR_MALFORMED;
738            }
739            data_offset += 2;
740
741            if (mDataSource->readAt(data_offset, sinf->IPMPData, sinf->len) < sinf->len) {
742                return ERROR_IO;
743            }
744            data_offset += sinf->len;
745
746            size -= (dataLen + numOfBytes + 1);
747        }
748    }
749
750    if (size != 0) {
751        return ERROR_MALFORMED;
752    }
753
754    return UNKNOWN_ERROR;  // Return a dummy error.
755}
756
757struct PathAdder {
758    PathAdder(Vector<uint32_t> *path, uint32_t chunkType)
759        : mPath(path) {
760        mPath->push(chunkType);
761    }
762
763    ~PathAdder() {
764        mPath->pop();
765    }
766
767private:
768    Vector<uint32_t> *mPath;
769
770    PathAdder(const PathAdder &);
771    PathAdder &operator=(const PathAdder &);
772};
773
774static bool underMetaDataPath(const Vector<uint32_t> &path) {
775    return path.size() >= 5
776        && path[0] == FOURCC('m', 'o', 'o', 'v')
777        && path[1] == FOURCC('u', 'd', 't', 'a')
778        && path[2] == FOURCC('m', 'e', 't', 'a')
779        && path[3] == FOURCC('i', 'l', 's', 't');
780}
781
782static bool underQTMetaPath(const Vector<uint32_t> &path, int32_t depth) {
783    return path.size() >= 2
784            && path[0] == FOURCC('m', 'o', 'o', 'v')
785            && path[1] == FOURCC('m', 'e', 't', 'a')
786            && (depth == 2
787            || (depth == 3
788                    && (path[2] == FOURCC('h', 'd', 'l', 'r')
789                    ||  path[2] == FOURCC('i', 'l', 's', 't')
790                    ||  path[2] == FOURCC('k', 'e', 'y', 's'))));
791}
792
793// Given a time in seconds since Jan 1 1904, produce a human-readable string.
794static bool convertTimeToDate(int64_t time_1904, String8 *s) {
795    // delta between mpeg4 time and unix epoch time
796    static const int64_t delta = (((66 * 365 + 17) * 24) * 3600);
797    if (time_1904 < INT64_MIN + delta) {
798        return false;
799    }
800    time_t time_1970 = time_1904 - delta;
801
802    char tmp[32];
803    struct tm* tm = gmtime(&time_1970);
804    if (tm != NULL &&
805            strftime(tmp, sizeof(tmp), "%Y%m%dT%H%M%S.000Z", tm) > 0) {
806        s->setTo(tmp);
807        return true;
808    }
809    return false;
810}
811
812status_t MPEG4Extractor::parseChunk(off64_t *offset, int depth) {
813    ALOGV("entering parseChunk %lld/%d", (long long)*offset, depth);
814
815    if (*offset < 0) {
816        ALOGE("b/23540914");
817        return ERROR_MALFORMED;
818    }
819    if (depth > 100) {
820        ALOGE("b/27456299");
821        return ERROR_MALFORMED;
822    }
823    uint32_t hdr[2];
824    if (mDataSource->readAt(*offset, hdr, 8) < 8) {
825        return ERROR_IO;
826    }
827    uint64_t chunk_size = ntohl(hdr[0]);
828    int32_t chunk_type = ntohl(hdr[1]);
829    off64_t data_offset = *offset + 8;
830
831    if (chunk_size == 1) {
832        if (mDataSource->readAt(*offset + 8, &chunk_size, 8) < 8) {
833            return ERROR_IO;
834        }
835        chunk_size = ntoh64(chunk_size);
836        data_offset += 8;
837
838        if (chunk_size < 16) {
839            // The smallest valid chunk is 16 bytes long in this case.
840            return ERROR_MALFORMED;
841        }
842    } else if (chunk_size == 0) {
843        if (depth == 0) {
844            // atom extends to end of file
845            off64_t sourceSize;
846            if (mDataSource->getSize(&sourceSize) == OK) {
847                chunk_size = (sourceSize - *offset);
848            } else {
849                // XXX could we just pick a "sufficiently large" value here?
850                ALOGE("atom size is 0, and data source has no size");
851                return ERROR_MALFORMED;
852            }
853        } else {
854            // not allowed for non-toplevel atoms, skip it
855            *offset += 4;
856            return OK;
857        }
858    } else if (chunk_size < 8) {
859        // The smallest valid chunk is 8 bytes long.
860        ALOGE("invalid chunk size: %" PRIu64, chunk_size);
861        return ERROR_MALFORMED;
862    }
863
864    char chunk[5];
865    MakeFourCCString(chunk_type, chunk);
866    ALOGV("chunk: %s @ %lld, %d", chunk, (long long)*offset, depth);
867
868    if (kUseHexDump) {
869        static const char kWhitespace[] = "                                        ";
870        const char *indent = &kWhitespace[sizeof(kWhitespace) - 1 - 2 * depth];
871        printf("%sfound chunk '%s' of size %" PRIu64 "\n", indent, chunk, chunk_size);
872
873        char buffer[256];
874        size_t n = chunk_size;
875        if (n > sizeof(buffer)) {
876            n = sizeof(buffer);
877        }
878        if (mDataSource->readAt(*offset, buffer, n)
879                < (ssize_t)n) {
880            return ERROR_IO;
881        }
882
883        hexdump(buffer, n);
884    }
885
886    PathAdder autoAdder(&mPath, chunk_type);
887
888    // (data_offset - *offset) is either 8 or 16
889    off64_t chunk_data_size = chunk_size - (data_offset - *offset);
890    if (chunk_data_size < 0) {
891        ALOGE("b/23540914");
892        return ERROR_MALFORMED;
893    }
894    if (chunk_type != FOURCC('m', 'd', 'a', 't') && chunk_data_size > kMaxAtomSize) {
895        char errMsg[100];
896        sprintf(errMsg, "%s atom has size %" PRId64, chunk, chunk_data_size);
897        ALOGE("%s (b/28615448)", errMsg);
898        android_errorWriteWithInfoLog(0x534e4554, "28615448", -1, errMsg, strlen(errMsg));
899        return ERROR_MALFORMED;
900    }
901
902    if (chunk_type != FOURCC('c', 'p', 'r', 't')
903            && chunk_type != FOURCC('c', 'o', 'v', 'r')
904            && mPath.size() == 5 && underMetaDataPath(mPath)) {
905        off64_t stop_offset = *offset + chunk_size;
906        *offset = data_offset;
907        while (*offset < stop_offset) {
908            status_t err = parseChunk(offset, depth + 1);
909            if (err != OK) {
910                return err;
911            }
912        }
913
914        if (*offset != stop_offset) {
915            return ERROR_MALFORMED;
916        }
917
918        return OK;
919    }
920
921    switch(chunk_type) {
922        case FOURCC('m', 'o', 'o', 'v'):
923        case FOURCC('t', 'r', 'a', 'k'):
924        case FOURCC('m', 'd', 'i', 'a'):
925        case FOURCC('m', 'i', 'n', 'f'):
926        case FOURCC('d', 'i', 'n', 'f'):
927        case FOURCC('s', 't', 'b', 'l'):
928        case FOURCC('m', 'v', 'e', 'x'):
929        case FOURCC('m', 'o', 'o', 'f'):
930        case FOURCC('t', 'r', 'a', 'f'):
931        case FOURCC('m', 'f', 'r', 'a'):
932        case FOURCC('u', 'd', 't', 'a'):
933        case FOURCC('i', 'l', 's', 't'):
934        case FOURCC('s', 'i', 'n', 'f'):
935        case FOURCC('s', 'c', 'h', 'i'):
936        case FOURCC('e', 'd', 't', 's'):
937        case FOURCC('w', 'a', 'v', 'e'):
938        {
939            if (chunk_type == FOURCC('m', 'o', 'o', 'v') && depth != 0) {
940                ALOGE("moov: depth %d", depth);
941                return ERROR_MALFORMED;
942            }
943            if (chunk_type == FOURCC('m', 'o', 'o', 'f') && !mMoofFound) {
944                // store the offset of the first segment
945                mMoofFound = true;
946                mMoofOffset = *offset;
947            }
948
949            if (chunk_type == FOURCC('s', 't', 'b', 'l')) {
950                ALOGV("sampleTable chunk is %" PRIu64 " bytes long.", chunk_size);
951
952                if (mDataSource->flags()
953                        & (DataSource::kWantsPrefetching
954                            | DataSource::kIsCachingDataSource)) {
955                    sp<MPEG4DataSource> cachedSource =
956                        new MPEG4DataSource(mDataSource);
957
958                    if (cachedSource->setCachedRange(*offset, chunk_size) == OK) {
959                        mDataSource = cachedSource;
960                    }
961                }
962
963                if (mLastTrack == NULL)
964                    return ERROR_MALFORMED;
965
966                mLastTrack->sampleTable = new SampleTable(mDataSource);
967            }
968
969            bool isTrack = false;
970            if (chunk_type == FOURCC('t', 'r', 'a', 'k')) {
971                if (depth != 1) {
972                    ALOGE("trak: depth %d", depth);
973                    return ERROR_MALFORMED;
974                }
975                isTrack = true;
976
977                Track *track = new Track;
978                track->next = NULL;
979                if (mLastTrack) {
980                    mLastTrack->next = track;
981                } else {
982                    mFirstTrack = track;
983                }
984                mLastTrack = track;
985
986                track->meta = new MetaData;
987                track->includes_expensive_metadata = false;
988                track->skipTrack = false;
989                track->timescale = 0;
990                track->meta->setCString(kKeyMIMEType, "application/octet-stream");
991            }
992
993            off64_t stop_offset = *offset + chunk_size;
994            *offset = data_offset;
995            while (*offset < stop_offset) {
996                status_t err = parseChunk(offset, depth + 1);
997                if (err != OK) {
998                    if (isTrack) {
999                        mLastTrack->skipTrack = true;
1000                        break;
1001                    }
1002                    return err;
1003                }
1004            }
1005
1006            if (*offset != stop_offset) {
1007                return ERROR_MALFORMED;
1008            }
1009
1010            if (isTrack) {
1011                int32_t trackId;
1012                // There must be exact one track header per track.
1013                if (!mLastTrack->meta->findInt32(kKeyTrackID, &trackId)) {
1014                    mLastTrack->skipTrack = true;
1015                }
1016                if (mLastTrack->skipTrack) {
1017                    Track *cur = mFirstTrack;
1018
1019                    if (cur == mLastTrack) {
1020                        delete cur;
1021                        mFirstTrack = mLastTrack = NULL;
1022                    } else {
1023                        while (cur && cur->next != mLastTrack) {
1024                            cur = cur->next;
1025                        }
1026                        if (cur) {
1027                            cur->next = NULL;
1028                        }
1029                        delete mLastTrack;
1030                        mLastTrack = cur;
1031                    }
1032
1033                    return OK;
1034                }
1035
1036                status_t err = verifyTrack(mLastTrack);
1037
1038                if (err != OK) {
1039                    return err;
1040                }
1041            } else if (chunk_type == FOURCC('m', 'o', 'o', 'v')) {
1042                mInitCheck = OK;
1043
1044                if (!mIsDrm) {
1045                    return UNKNOWN_ERROR;  // Return a dummy error.
1046                } else {
1047                    return OK;
1048                }
1049            }
1050            break;
1051        }
1052
1053        case FOURCC('e', 'l', 's', 't'):
1054        {
1055            *offset += chunk_size;
1056
1057            // See 14496-12 8.6.6
1058            uint8_t version;
1059            if (mDataSource->readAt(data_offset, &version, 1) < 1) {
1060                return ERROR_IO;
1061            }
1062
1063            uint32_t entry_count;
1064            if (!mDataSource->getUInt32(data_offset + 4, &entry_count)) {
1065                return ERROR_IO;
1066            }
1067
1068            if (entry_count != 1) {
1069                // we only support a single entry at the moment, for gapless playback
1070                ALOGW("ignoring edit list with %d entries", entry_count);
1071            } else if (mHeaderTimescale == 0) {
1072                ALOGW("ignoring edit list because timescale is 0");
1073            } else {
1074                off64_t entriesoffset = data_offset + 8;
1075                uint64_t segment_duration;
1076                int64_t media_time;
1077
1078                if (version == 1) {
1079                    if (!mDataSource->getUInt64(entriesoffset, &segment_duration) ||
1080                            !mDataSource->getUInt64(entriesoffset + 8, (uint64_t*)&media_time)) {
1081                        return ERROR_IO;
1082                    }
1083                } else if (version == 0) {
1084                    uint32_t sd;
1085                    int32_t mt;
1086                    if (!mDataSource->getUInt32(entriesoffset, &sd) ||
1087                            !mDataSource->getUInt32(entriesoffset + 4, (uint32_t*)&mt)) {
1088                        return ERROR_IO;
1089                    }
1090                    segment_duration = sd;
1091                    media_time = mt;
1092                } else {
1093                    return ERROR_IO;
1094                }
1095
1096                uint64_t halfscale = mHeaderTimescale / 2;
1097                segment_duration = (segment_duration * 1000000 + halfscale)/ mHeaderTimescale;
1098                media_time = (media_time * 1000000 + halfscale) / mHeaderTimescale;
1099
1100                int64_t duration;
1101                int32_t samplerate;
1102                if (!mLastTrack) {
1103                    return ERROR_MALFORMED;
1104                }
1105                if (mLastTrack->meta->findInt64(kKeyDuration, &duration) &&
1106                        mLastTrack->meta->findInt32(kKeySampleRate, &samplerate)) {
1107
1108                    int64_t delay = (media_time  * samplerate + 500000) / 1000000;
1109                    mLastTrack->meta->setInt32(kKeyEncoderDelay, delay);
1110
1111                    int64_t paddingus = duration - (int64_t)(segment_duration + media_time);
1112                    if (paddingus < 0) {
1113                        // track duration from media header (which is what kKeyDuration is) might
1114                        // be slightly shorter than the segment duration, which would make the
1115                        // padding negative. Clamp to zero.
1116                        paddingus = 0;
1117                    }
1118                    int64_t paddingsamples = (paddingus * samplerate + 500000) / 1000000;
1119                    mLastTrack->meta->setInt32(kKeyEncoderPadding, paddingsamples);
1120                }
1121            }
1122            break;
1123        }
1124
1125        case FOURCC('f', 'r', 'm', 'a'):
1126        {
1127            *offset += chunk_size;
1128
1129            uint32_t original_fourcc;
1130            if (mDataSource->readAt(data_offset, &original_fourcc, 4) < 4) {
1131                return ERROR_IO;
1132            }
1133            original_fourcc = ntohl(original_fourcc);
1134            ALOGV("read original format: %d", original_fourcc);
1135
1136            if (mLastTrack == NULL)
1137                return ERROR_MALFORMED;
1138
1139            mLastTrack->meta->setCString(kKeyMIMEType, FourCC2MIME(original_fourcc));
1140            uint32_t num_channels = 0;
1141            uint32_t sample_rate = 0;
1142            if (AdjustChannelsAndRate(original_fourcc, &num_channels, &sample_rate)) {
1143                mLastTrack->meta->setInt32(kKeyChannelCount, num_channels);
1144                mLastTrack->meta->setInt32(kKeySampleRate, sample_rate);
1145            }
1146            break;
1147        }
1148
1149        case FOURCC('t', 'e', 'n', 'c'):
1150        {
1151            *offset += chunk_size;
1152
1153            if (chunk_size < 32) {
1154                return ERROR_MALFORMED;
1155            }
1156
1157            // tenc box contains 1 byte version, 3 byte flags, 3 byte default algorithm id, one byte
1158            // default IV size, 16 bytes default KeyID
1159            // (ISO 23001-7)
1160            char buf[4];
1161            memset(buf, 0, 4);
1162            if (mDataSource->readAt(data_offset + 4, buf + 1, 3) < 3) {
1163                return ERROR_IO;
1164            }
1165            uint32_t defaultAlgorithmId = ntohl(*((int32_t*)buf));
1166            if (defaultAlgorithmId > 1) {
1167                // only 0 (clear) and 1 (AES-128) are valid
1168                return ERROR_MALFORMED;
1169            }
1170
1171            memset(buf, 0, 4);
1172            if (mDataSource->readAt(data_offset + 7, buf + 3, 1) < 1) {
1173                return ERROR_IO;
1174            }
1175            uint32_t defaultIVSize = ntohl(*((int32_t*)buf));
1176
1177            if ((defaultAlgorithmId == 0 && defaultIVSize != 0) ||
1178                    (defaultAlgorithmId != 0 && defaultIVSize == 0)) {
1179                // only unencrypted data must have 0 IV size
1180                return ERROR_MALFORMED;
1181            } else if (defaultIVSize != 0 &&
1182                    defaultIVSize != 8 &&
1183                    defaultIVSize != 16) {
1184                // only supported sizes are 0, 8 and 16
1185                return ERROR_MALFORMED;
1186            }
1187
1188            uint8_t defaultKeyId[16];
1189
1190            if (mDataSource->readAt(data_offset + 8, &defaultKeyId, 16) < 16) {
1191                return ERROR_IO;
1192            }
1193
1194            if (mLastTrack == NULL)
1195                return ERROR_MALFORMED;
1196
1197            mLastTrack->meta->setInt32(kKeyCryptoMode, defaultAlgorithmId);
1198            mLastTrack->meta->setInt32(kKeyCryptoDefaultIVSize, defaultIVSize);
1199            mLastTrack->meta->setData(kKeyCryptoKey, 'tenc', defaultKeyId, 16);
1200            break;
1201        }
1202
1203        case FOURCC('t', 'k', 'h', 'd'):
1204        {
1205            *offset += chunk_size;
1206
1207            status_t err;
1208            if ((err = parseTrackHeader(data_offset, chunk_data_size)) != OK) {
1209                return err;
1210            }
1211
1212            break;
1213        }
1214
1215        case FOURCC('p', 's', 's', 'h'):
1216        {
1217            *offset += chunk_size;
1218
1219            PsshInfo pssh;
1220
1221            if (mDataSource->readAt(data_offset + 4, &pssh.uuid, 16) < 16) {
1222                return ERROR_IO;
1223            }
1224
1225            uint32_t psshdatalen = 0;
1226            if (mDataSource->readAt(data_offset + 20, &psshdatalen, 4) < 4) {
1227                return ERROR_IO;
1228            }
1229            pssh.datalen = ntohl(psshdatalen);
1230            ALOGV("pssh data size: %d", pssh.datalen);
1231            if (chunk_size < 20 || pssh.datalen > chunk_size - 20) {
1232                // pssh data length exceeds size of containing box
1233                return ERROR_MALFORMED;
1234            }
1235
1236            pssh.data = new (std::nothrow) uint8_t[pssh.datalen];
1237            if (pssh.data == NULL) {
1238                return ERROR_MALFORMED;
1239            }
1240            ALOGV("allocated pssh @ %p", pssh.data);
1241            ssize_t requested = (ssize_t) pssh.datalen;
1242            if (mDataSource->readAt(data_offset + 24, pssh.data, requested) < requested) {
1243                delete[] pssh.data;
1244                return ERROR_IO;
1245            }
1246            mPssh.push_back(pssh);
1247
1248            break;
1249        }
1250
1251        case FOURCC('m', 'd', 'h', 'd'):
1252        {
1253            *offset += chunk_size;
1254
1255            if (chunk_data_size < 4 || mLastTrack == NULL) {
1256                return ERROR_MALFORMED;
1257            }
1258
1259            uint8_t version;
1260            if (mDataSource->readAt(
1261                        data_offset, &version, sizeof(version))
1262                    < (ssize_t)sizeof(version)) {
1263                return ERROR_IO;
1264            }
1265
1266            off64_t timescale_offset;
1267
1268            if (version == 1) {
1269                timescale_offset = data_offset + 4 + 16;
1270            } else if (version == 0) {
1271                timescale_offset = data_offset + 4 + 8;
1272            } else {
1273                return ERROR_IO;
1274            }
1275
1276            uint32_t timescale;
1277            if (mDataSource->readAt(
1278                        timescale_offset, &timescale, sizeof(timescale))
1279                    < (ssize_t)sizeof(timescale)) {
1280                return ERROR_IO;
1281            }
1282
1283            if (!timescale) {
1284                ALOGE("timescale should not be ZERO.");
1285                return ERROR_MALFORMED;
1286            }
1287
1288            mLastTrack->timescale = ntohl(timescale);
1289
1290            // 14496-12 says all ones means indeterminate, but some files seem to use
1291            // 0 instead. We treat both the same.
1292            int64_t duration = 0;
1293            if (version == 1) {
1294                if (mDataSource->readAt(
1295                            timescale_offset + 4, &duration, sizeof(duration))
1296                        < (ssize_t)sizeof(duration)) {
1297                    return ERROR_IO;
1298                }
1299                if (duration != -1) {
1300                    duration = ntoh64(duration);
1301                }
1302            } else {
1303                uint32_t duration32;
1304                if (mDataSource->readAt(
1305                            timescale_offset + 4, &duration32, sizeof(duration32))
1306                        < (ssize_t)sizeof(duration32)) {
1307                    return ERROR_IO;
1308                }
1309                if (duration32 != 0xffffffff) {
1310                    duration = ntohl(duration32);
1311                }
1312            }
1313            if (duration != 0 && mLastTrack->timescale != 0) {
1314                mLastTrack->meta->setInt64(
1315                        kKeyDuration, (duration * 1000000) / mLastTrack->timescale);
1316            }
1317
1318            uint8_t lang[2];
1319            off64_t lang_offset;
1320            if (version == 1) {
1321                lang_offset = timescale_offset + 4 + 8;
1322            } else if (version == 0) {
1323                lang_offset = timescale_offset + 4 + 4;
1324            } else {
1325                return ERROR_IO;
1326            }
1327
1328            if (mDataSource->readAt(lang_offset, &lang, sizeof(lang))
1329                    < (ssize_t)sizeof(lang)) {
1330                return ERROR_IO;
1331            }
1332
1333            // To get the ISO-639-2/T three character language code
1334            // 1 bit pad followed by 3 5-bits characters. Each character
1335            // is packed as the difference between its ASCII value and 0x60.
1336            char lang_code[4];
1337            lang_code[0] = ((lang[0] >> 2) & 0x1f) + 0x60;
1338            lang_code[1] = ((lang[0] & 0x3) << 3 | (lang[1] >> 5)) + 0x60;
1339            lang_code[2] = (lang[1] & 0x1f) + 0x60;
1340            lang_code[3] = '\0';
1341
1342            mLastTrack->meta->setCString(
1343                    kKeyMediaLanguage, lang_code);
1344
1345            break;
1346        }
1347
1348        case FOURCC('s', 't', 's', 'd'):
1349        {
1350            uint8_t buffer[8];
1351            if (chunk_data_size < (off64_t)sizeof(buffer)) {
1352                return ERROR_MALFORMED;
1353            }
1354
1355            if (mDataSource->readAt(
1356                        data_offset, buffer, 8) < 8) {
1357                return ERROR_IO;
1358            }
1359
1360            if (U32_AT(buffer) != 0) {
1361                // Should be version 0, flags 0.
1362                return ERROR_MALFORMED;
1363            }
1364
1365            uint32_t entry_count = U32_AT(&buffer[4]);
1366
1367            if (entry_count > 1) {
1368                // For 3GPP timed text, there could be multiple tx3g boxes contain
1369                // multiple text display formats. These formats will be used to
1370                // display the timed text.
1371                // For encrypted files, there may also be more than one entry.
1372                const char *mime;
1373
1374                if (mLastTrack == NULL)
1375                    return ERROR_MALFORMED;
1376
1377                CHECK(mLastTrack->meta->findCString(kKeyMIMEType, &mime));
1378                if (strcasecmp(mime, MEDIA_MIMETYPE_TEXT_3GPP) &&
1379                        strcasecmp(mime, "application/octet-stream")) {
1380                    // For now we only support a single type of media per track.
1381                    mLastTrack->skipTrack = true;
1382                    *offset += chunk_size;
1383                    break;
1384                }
1385            }
1386            off64_t stop_offset = *offset + chunk_size;
1387            *offset = data_offset + 8;
1388            for (uint32_t i = 0; i < entry_count; ++i) {
1389                status_t err = parseChunk(offset, depth + 1);
1390                if (err != OK) {
1391                    return err;
1392                }
1393            }
1394
1395            if (*offset != stop_offset) {
1396                return ERROR_MALFORMED;
1397            }
1398            break;
1399        }
1400        case FOURCC('m', 'e', 't', 't'):
1401        {
1402            *offset += chunk_size;
1403
1404            if (mLastTrack == NULL)
1405                return ERROR_MALFORMED;
1406
1407            sp<ABuffer> buffer = new ABuffer(chunk_data_size);
1408            if (buffer->data() == NULL) {
1409                return NO_MEMORY;
1410            }
1411
1412            if (mDataSource->readAt(
1413                        data_offset, buffer->data(), chunk_data_size) < chunk_data_size) {
1414                return ERROR_IO;
1415            }
1416
1417            String8 mimeFormat((const char *)(buffer->data()), chunk_data_size);
1418            mLastTrack->meta->setCString(kKeyMIMEType, mimeFormat.string());
1419
1420            break;
1421        }
1422
1423        case FOURCC('m', 'p', '4', 'a'):
1424        case FOURCC('e', 'n', 'c', 'a'):
1425        case FOURCC('s', 'a', 'm', 'r'):
1426        case FOURCC('s', 'a', 'w', 'b'):
1427        {
1428            if (mIsQT && chunk_type == FOURCC('m', 'p', '4', 'a')
1429                    && depth >= 1 && mPath[depth - 1] == FOURCC('w', 'a', 'v', 'e')) {
1430                // Ignore mp4a embedded in QT wave atom
1431                *offset += chunk_size;
1432                break;
1433            }
1434
1435            uint8_t buffer[8 + 20];
1436            if (chunk_data_size < (ssize_t)sizeof(buffer)) {
1437                // Basic AudioSampleEntry size.
1438                return ERROR_MALFORMED;
1439            }
1440
1441            if (mDataSource->readAt(
1442                        data_offset, buffer, sizeof(buffer)) < (ssize_t)sizeof(buffer)) {
1443                return ERROR_IO;
1444            }
1445
1446            uint16_t data_ref_index __unused = U16_AT(&buffer[6]);
1447            uint16_t version = U16_AT(&buffer[8]);
1448            uint32_t num_channels = U16_AT(&buffer[16]);
1449
1450            uint16_t sample_size = U16_AT(&buffer[18]);
1451            uint32_t sample_rate = U32_AT(&buffer[24]) >> 16;
1452
1453            if (mLastTrack == NULL)
1454                return ERROR_MALFORMED;
1455
1456            off64_t stop_offset = *offset + chunk_size;
1457            *offset = data_offset + sizeof(buffer);
1458
1459            if (mIsQT && chunk_type == FOURCC('m', 'p', '4', 'a')) {
1460                if (version == 1) {
1461                    if (mDataSource->readAt(*offset, buffer, 16) < 16) {
1462                        return ERROR_IO;
1463                    }
1464
1465#if 0
1466                    U32_AT(buffer);  // samples per packet
1467                    U32_AT(&buffer[4]);  // bytes per packet
1468                    U32_AT(&buffer[8]);  // bytes per frame
1469                    U32_AT(&buffer[12]);  // bytes per sample
1470#endif
1471                    *offset += 16;
1472                } else if (version == 2) {
1473                    uint8_t v2buffer[36];
1474                    if (mDataSource->readAt(*offset, v2buffer, 36) < 36) {
1475                        return ERROR_IO;
1476                    }
1477
1478#if 0
1479                    U32_AT(v2buffer);  // size of struct only
1480                    sample_rate = (uint32_t)U64_AT(&v2buffer[4]);  // audio sample rate
1481                    num_channels = U32_AT(&v2buffer[12]);  // num audio channels
1482                    U32_AT(&v2buffer[16]);  // always 0x7f000000
1483                    sample_size = (uint16_t)U32_AT(&v2buffer[20]);  // const bits per channel
1484                    U32_AT(&v2buffer[24]);  // format specifc flags
1485                    U32_AT(&v2buffer[28]);  // const bytes per audio packet
1486                    U32_AT(&v2buffer[32]);  // const LPCM frames per audio packet
1487#endif
1488                    *offset += 36;
1489                }
1490            }
1491
1492            if (chunk_type != FOURCC('e', 'n', 'c', 'a')) {
1493                // if the chunk type is enca, we'll get the type from the sinf/frma box later
1494                mLastTrack->meta->setCString(kKeyMIMEType, FourCC2MIME(chunk_type));
1495                AdjustChannelsAndRate(chunk_type, &num_channels, &sample_rate);
1496            }
1497            ALOGV("*** coding='%s' %d channels, size %d, rate %d\n",
1498                   chunk, num_channels, sample_size, sample_rate);
1499            mLastTrack->meta->setInt32(kKeyChannelCount, num_channels);
1500            mLastTrack->meta->setInt32(kKeySampleRate, sample_rate);
1501
1502            while (*offset < stop_offset) {
1503                status_t err = parseChunk(offset, depth + 1);
1504                if (err != OK) {
1505                    return err;
1506                }
1507            }
1508
1509            if (*offset != stop_offset) {
1510                return ERROR_MALFORMED;
1511            }
1512            break;
1513        }
1514
1515        case FOURCC('m', 'p', '4', 'v'):
1516        case FOURCC('e', 'n', 'c', 'v'):
1517        case FOURCC('s', '2', '6', '3'):
1518        case FOURCC('H', '2', '6', '3'):
1519        case FOURCC('h', '2', '6', '3'):
1520        case FOURCC('a', 'v', 'c', '1'):
1521        case FOURCC('h', 'v', 'c', '1'):
1522        case FOURCC('h', 'e', 'v', '1'):
1523        {
1524            uint8_t buffer[78];
1525            if (chunk_data_size < (ssize_t)sizeof(buffer)) {
1526                // Basic VideoSampleEntry size.
1527                return ERROR_MALFORMED;
1528            }
1529
1530            if (mDataSource->readAt(
1531                        data_offset, buffer, sizeof(buffer)) < (ssize_t)sizeof(buffer)) {
1532                return ERROR_IO;
1533            }
1534
1535            uint16_t data_ref_index __unused = U16_AT(&buffer[6]);
1536            uint16_t width = U16_AT(&buffer[6 + 18]);
1537            uint16_t height = U16_AT(&buffer[6 + 20]);
1538
1539            // The video sample is not standard-compliant if it has invalid dimension.
1540            // Use some default width and height value, and
1541            // let the decoder figure out the actual width and height (and thus
1542            // be prepared for INFO_FOMRAT_CHANGED event).
1543            if (width == 0)  width  = 352;
1544            if (height == 0) height = 288;
1545
1546            // printf("*** coding='%s' width=%d height=%d\n",
1547            //        chunk, width, height);
1548
1549            if (mLastTrack == NULL)
1550                return ERROR_MALFORMED;
1551
1552            if (chunk_type != FOURCC('e', 'n', 'c', 'v')) {
1553                // if the chunk type is encv, we'll get the type from the sinf/frma box later
1554                mLastTrack->meta->setCString(kKeyMIMEType, FourCC2MIME(chunk_type));
1555            }
1556            mLastTrack->meta->setInt32(kKeyWidth, width);
1557            mLastTrack->meta->setInt32(kKeyHeight, height);
1558
1559            off64_t stop_offset = *offset + chunk_size;
1560            *offset = data_offset + sizeof(buffer);
1561            while (*offset < stop_offset) {
1562                status_t err = parseChunk(offset, depth + 1);
1563                if (err != OK) {
1564                    return err;
1565                }
1566            }
1567
1568            if (*offset != stop_offset) {
1569                return ERROR_MALFORMED;
1570            }
1571            break;
1572        }
1573
1574        case FOURCC('s', 't', 'c', 'o'):
1575        case FOURCC('c', 'o', '6', '4'):
1576        {
1577            if ((mLastTrack == NULL) || (mLastTrack->sampleTable == NULL))
1578                return ERROR_MALFORMED;
1579
1580            status_t err =
1581                mLastTrack->sampleTable->setChunkOffsetParams(
1582                        chunk_type, data_offset, chunk_data_size);
1583
1584            *offset += chunk_size;
1585
1586            if (err != OK) {
1587                return err;
1588            }
1589
1590            break;
1591        }
1592
1593        case FOURCC('s', 't', 's', 'c'):
1594        {
1595            if ((mLastTrack == NULL) || (mLastTrack->sampleTable == NULL))
1596                return ERROR_MALFORMED;
1597
1598            status_t err =
1599                mLastTrack->sampleTable->setSampleToChunkParams(
1600                        data_offset, chunk_data_size);
1601
1602            *offset += chunk_size;
1603
1604            if (err != OK) {
1605                return err;
1606            }
1607
1608            break;
1609        }
1610
1611        case FOURCC('s', 't', 's', 'z'):
1612        case FOURCC('s', 't', 'z', '2'):
1613        {
1614            if ((mLastTrack == NULL) || (mLastTrack->sampleTable == NULL))
1615                return ERROR_MALFORMED;
1616
1617            status_t err =
1618                mLastTrack->sampleTable->setSampleSizeParams(
1619                        chunk_type, data_offset, chunk_data_size);
1620
1621            *offset += chunk_size;
1622
1623            if (err != OK) {
1624                return err;
1625            }
1626
1627            size_t max_size;
1628            err = mLastTrack->sampleTable->getMaxSampleSize(&max_size);
1629
1630            if (err != OK) {
1631                return err;
1632            }
1633
1634            if (max_size != 0) {
1635                // Assume that a given buffer only contains at most 10 chunks,
1636                // each chunk originally prefixed with a 2 byte length will
1637                // have a 4 byte header (0x00 0x00 0x00 0x01) after conversion,
1638                // and thus will grow by 2 bytes per chunk.
1639                if (max_size > SIZE_MAX - 10 * 2) {
1640                    ALOGE("max sample size too big: %zu", max_size);
1641                    return ERROR_MALFORMED;
1642                }
1643                mLastTrack->meta->setInt32(kKeyMaxInputSize, max_size + 10 * 2);
1644            } else {
1645                // No size was specified. Pick a conservatively large size.
1646                uint32_t width, height;
1647                if (!mLastTrack->meta->findInt32(kKeyWidth, (int32_t*)&width) ||
1648                    !mLastTrack->meta->findInt32(kKeyHeight,(int32_t*) &height)) {
1649                    ALOGE("No width or height, assuming worst case 1080p");
1650                    width = 1920;
1651                    height = 1080;
1652                } else {
1653                    // A resolution was specified, check that it's not too big. The values below
1654                    // were chosen so that the calculations below don't cause overflows, they're
1655                    // not indicating that resolutions up to 32kx32k are actually supported.
1656                    if (width > 32768 || height > 32768) {
1657                        ALOGE("can't support %u x %u video", width, height);
1658                        return ERROR_MALFORMED;
1659                    }
1660                }
1661
1662                const char *mime;
1663                CHECK(mLastTrack->meta->findCString(kKeyMIMEType, &mime));
1664                if (!strcmp(mime, MEDIA_MIMETYPE_VIDEO_AVC)
1665                        || !strcmp(mime, MEDIA_MIMETYPE_VIDEO_HEVC)) {
1666                    // AVC & HEVC requires compression ratio of at least 2, and uses
1667                    // macroblocks
1668                    max_size = ((width + 15) / 16) * ((height + 15) / 16) * 192;
1669                } else {
1670                    // For all other formats there is no minimum compression
1671                    // ratio. Use compression ratio of 1.
1672                    max_size = width * height * 3 / 2;
1673                }
1674                mLastTrack->meta->setInt32(kKeyMaxInputSize, max_size);
1675            }
1676
1677            // NOTE: setting another piece of metadata invalidates any pointers (such as the
1678            // mimetype) previously obtained, so don't cache them.
1679            const char *mime;
1680            CHECK(mLastTrack->meta->findCString(kKeyMIMEType, &mime));
1681            // Calculate average frame rate.
1682            if (!strncasecmp("video/", mime, 6)) {
1683                size_t nSamples = mLastTrack->sampleTable->countSamples();
1684                if (nSamples == 0) {
1685                    int32_t trackId;
1686                    if (mLastTrack->meta->findInt32(kKeyTrackID, &trackId)) {
1687                        for (size_t i = 0; i < mTrex.size(); i++) {
1688                            Trex *t = &mTrex.editItemAt(i);
1689                            if (t->track_ID == (uint32_t) trackId) {
1690                                if (t->default_sample_duration > 0) {
1691                                    int32_t frameRate =
1692                                            mLastTrack->timescale / t->default_sample_duration;
1693                                    mLastTrack->meta->setInt32(kKeyFrameRate, frameRate);
1694                                }
1695                                break;
1696                            }
1697                        }
1698                    }
1699                } else {
1700                    int64_t durationUs;
1701                    if (mLastTrack->meta->findInt64(kKeyDuration, &durationUs)) {
1702                        if (durationUs > 0) {
1703                            int32_t frameRate = (nSamples * 1000000LL +
1704                                        (durationUs >> 1)) / durationUs;
1705                            mLastTrack->meta->setInt32(kKeyFrameRate, frameRate);
1706                        }
1707                    }
1708                }
1709            }
1710
1711            break;
1712        }
1713
1714        case FOURCC('s', 't', 't', 's'):
1715        {
1716            if ((mLastTrack == NULL) || (mLastTrack->sampleTable == NULL))
1717                return ERROR_MALFORMED;
1718
1719            *offset += chunk_size;
1720
1721            status_t err =
1722                mLastTrack->sampleTable->setTimeToSampleParams(
1723                        data_offset, chunk_data_size);
1724
1725            if (err != OK) {
1726                return err;
1727            }
1728
1729            break;
1730        }
1731
1732        case FOURCC('c', 't', 't', 's'):
1733        {
1734            if ((mLastTrack == NULL) || (mLastTrack->sampleTable == NULL))
1735                return ERROR_MALFORMED;
1736
1737            *offset += chunk_size;
1738
1739            status_t err =
1740                mLastTrack->sampleTable->setCompositionTimeToSampleParams(
1741                        data_offset, chunk_data_size);
1742
1743            if (err != OK) {
1744                return err;
1745            }
1746
1747            break;
1748        }
1749
1750        case FOURCC('s', 't', 's', 's'):
1751        {
1752            if ((mLastTrack == NULL) || (mLastTrack->sampleTable == NULL))
1753                return ERROR_MALFORMED;
1754
1755            *offset += chunk_size;
1756
1757            status_t err =
1758                mLastTrack->sampleTable->setSyncSampleParams(
1759                        data_offset, chunk_data_size);
1760
1761            if (err != OK) {
1762                return err;
1763            }
1764
1765            break;
1766        }
1767
1768        // \xA9xyz
1769        case FOURCC(0xA9, 'x', 'y', 'z'):
1770        {
1771            *offset += chunk_size;
1772
1773            // Best case the total data length inside "\xA9xyz" box
1774            // would be 8, for instance "\xA9xyz" + "\x00\x04\x15\xc7" + "0+0/",
1775            // where "\x00\x04" is the text string length with value = 4,
1776            // "\0x15\xc7" is the language code = en, and "0+0" is a
1777            // location (string) value with longitude = 0 and latitude = 0.
1778            if (chunk_data_size < 8) {
1779                return ERROR_MALFORMED;
1780            }
1781
1782            // Worst case the location string length would be 18,
1783            // for instance +90.0000-180.0000, without the trailing "/" and
1784            // the string length + language code, and some devices include
1785            // an additional 8 bytes of altitude, e.g. +007.186
1786            char buffer[18 + 8];
1787
1788            // Substracting 5 from the data size is because the text string length +
1789            // language code takes 4 bytes, and the trailing slash "/" takes 1 byte.
1790            off64_t location_length = chunk_data_size - 5;
1791            if (location_length >= (off64_t) sizeof(buffer)) {
1792                return ERROR_MALFORMED;
1793            }
1794
1795            if (mDataSource->readAt(
1796                        data_offset + 4, buffer, location_length) < location_length) {
1797                return ERROR_IO;
1798            }
1799
1800            buffer[location_length] = '\0';
1801            mFileMetaData->setCString(kKeyLocation, buffer);
1802            break;
1803        }
1804
1805        case FOURCC('e', 's', 'd', 's'):
1806        {
1807            *offset += chunk_size;
1808
1809            if (chunk_data_size < 4) {
1810                return ERROR_MALFORMED;
1811            }
1812
1813            uint8_t buffer[256];
1814            if (chunk_data_size > (off64_t)sizeof(buffer)) {
1815                return ERROR_BUFFER_TOO_SMALL;
1816            }
1817
1818            if (mDataSource->readAt(
1819                        data_offset, buffer, chunk_data_size) < chunk_data_size) {
1820                return ERROR_IO;
1821            }
1822
1823            if (U32_AT(buffer) != 0) {
1824                // Should be version 0, flags 0.
1825                return ERROR_MALFORMED;
1826            }
1827
1828            if (mLastTrack == NULL)
1829                return ERROR_MALFORMED;
1830
1831            mLastTrack->meta->setData(
1832                    kKeyESDS, kTypeESDS, &buffer[4], chunk_data_size - 4);
1833
1834            if (mPath.size() >= 2
1835                    && mPath[mPath.size() - 2] == FOURCC('m', 'p', '4', 'a')) {
1836                // Information from the ESDS must be relied on for proper
1837                // setup of sample rate and channel count for MPEG4 Audio.
1838                // The generic header appears to only contain generic
1839                // information...
1840
1841                status_t err = updateAudioTrackInfoFromESDS_MPEG4Audio(
1842                        &buffer[4], chunk_data_size - 4);
1843
1844                if (err != OK) {
1845                    return err;
1846                }
1847            }
1848            if (mPath.size() >= 2
1849                    && mPath[mPath.size() - 2] == FOURCC('m', 'p', '4', 'v')) {
1850                // Check if the video is MPEG2
1851                ESDS esds(&buffer[4], chunk_data_size - 4);
1852
1853                uint8_t objectTypeIndication;
1854                if (esds.getObjectTypeIndication(&objectTypeIndication) == OK) {
1855                    if (objectTypeIndication >= 0x60 && objectTypeIndication <= 0x65) {
1856                        mLastTrack->meta->setCString(kKeyMIMEType, MEDIA_MIMETYPE_VIDEO_MPEG2);
1857                    }
1858                }
1859            }
1860            break;
1861        }
1862
1863        case FOURCC('b', 't', 'r', 't'):
1864        {
1865            *offset += chunk_size;
1866            if (mLastTrack == NULL) {
1867                return ERROR_MALFORMED;
1868            }
1869
1870            uint8_t buffer[12];
1871            if (chunk_data_size != sizeof(buffer)) {
1872                return ERROR_MALFORMED;
1873            }
1874
1875            if (mDataSource->readAt(
1876                    data_offset, buffer, chunk_data_size) < chunk_data_size) {
1877                return ERROR_IO;
1878            }
1879
1880            uint32_t maxBitrate = U32_AT(&buffer[4]);
1881            uint32_t avgBitrate = U32_AT(&buffer[8]);
1882            if (maxBitrate > 0 && maxBitrate < INT32_MAX) {
1883                mLastTrack->meta->setInt32(kKeyMaxBitRate, (int32_t)maxBitrate);
1884            }
1885            if (avgBitrate > 0 && avgBitrate < INT32_MAX) {
1886                mLastTrack->meta->setInt32(kKeyBitRate, (int32_t)avgBitrate);
1887            }
1888            break;
1889        }
1890
1891        case FOURCC('a', 'v', 'c', 'C'):
1892        {
1893            *offset += chunk_size;
1894
1895            sp<ABuffer> buffer = new ABuffer(chunk_data_size);
1896
1897            if (buffer->data() == NULL) {
1898                ALOGE("b/28471206");
1899                return NO_MEMORY;
1900            }
1901
1902            if (mDataSource->readAt(
1903                        data_offset, buffer->data(), chunk_data_size) < chunk_data_size) {
1904                return ERROR_IO;
1905            }
1906
1907            if (mLastTrack == NULL)
1908                return ERROR_MALFORMED;
1909
1910            mLastTrack->meta->setData(
1911                    kKeyAVCC, kTypeAVCC, buffer->data(), chunk_data_size);
1912
1913            break;
1914        }
1915        case FOURCC('h', 'v', 'c', 'C'):
1916        {
1917            sp<ABuffer> buffer = new ABuffer(chunk_data_size);
1918
1919            if (buffer->data() == NULL) {
1920                ALOGE("b/28471206");
1921                return NO_MEMORY;
1922            }
1923
1924            if (mDataSource->readAt(
1925                        data_offset, buffer->data(), chunk_data_size) < chunk_data_size) {
1926                return ERROR_IO;
1927            }
1928
1929            if (mLastTrack == NULL)
1930                return ERROR_MALFORMED;
1931
1932            mLastTrack->meta->setData(
1933                    kKeyHVCC, kTypeHVCC, buffer->data(), chunk_data_size);
1934
1935            *offset += chunk_size;
1936            break;
1937        }
1938
1939        case FOURCC('d', '2', '6', '3'):
1940        {
1941            *offset += chunk_size;
1942            /*
1943             * d263 contains a fixed 7 bytes part:
1944             *   vendor - 4 bytes
1945             *   version - 1 byte
1946             *   level - 1 byte
1947             *   profile - 1 byte
1948             * optionally, "d263" box itself may contain a 16-byte
1949             * bit rate box (bitr)
1950             *   average bit rate - 4 bytes
1951             *   max bit rate - 4 bytes
1952             */
1953            char buffer[23];
1954            if (chunk_data_size != 7 &&
1955                chunk_data_size != 23) {
1956                ALOGE("Incorrect D263 box size %lld", (long long)chunk_data_size);
1957                return ERROR_MALFORMED;
1958            }
1959
1960            if (mDataSource->readAt(
1961                    data_offset, buffer, chunk_data_size) < chunk_data_size) {
1962                return ERROR_IO;
1963            }
1964
1965            if (mLastTrack == NULL)
1966                return ERROR_MALFORMED;
1967
1968            mLastTrack->meta->setData(kKeyD263, kTypeD263, buffer, chunk_data_size);
1969
1970            break;
1971        }
1972
1973        case FOURCC('m', 'e', 't', 'a'):
1974        {
1975            off64_t stop_offset = *offset + chunk_size;
1976            *offset = data_offset;
1977            bool isParsingMetaKeys = underQTMetaPath(mPath, 2);
1978            if (!isParsingMetaKeys) {
1979                uint8_t buffer[4];
1980                if (chunk_data_size < (off64_t)sizeof(buffer)) {
1981                    *offset = stop_offset;
1982                    return ERROR_MALFORMED;
1983                }
1984
1985                if (mDataSource->readAt(
1986                            data_offset, buffer, 4) < 4) {
1987                    *offset = stop_offset;
1988                    return ERROR_IO;
1989                }
1990
1991                if (U32_AT(buffer) != 0) {
1992                    // Should be version 0, flags 0.
1993
1994                    // If it's not, let's assume this is one of those
1995                    // apparently malformed chunks that don't have flags
1996                    // and completely different semantics than what's
1997                    // in the MPEG4 specs and skip it.
1998                    *offset = stop_offset;
1999                    return OK;
2000                }
2001                *offset +=  sizeof(buffer);
2002            }
2003
2004            while (*offset < stop_offset) {
2005                status_t err = parseChunk(offset, depth + 1);
2006                if (err != OK) {
2007                    return err;
2008                }
2009            }
2010
2011            if (*offset != stop_offset) {
2012                return ERROR_MALFORMED;
2013            }
2014            break;
2015        }
2016
2017        case FOURCC('m', 'e', 'a', 'n'):
2018        case FOURCC('n', 'a', 'm', 'e'):
2019        case FOURCC('d', 'a', 't', 'a'):
2020        {
2021            *offset += chunk_size;
2022
2023            if (mPath.size() == 6 && underMetaDataPath(mPath)) {
2024                status_t err = parseITunesMetaData(data_offset, chunk_data_size);
2025
2026                if (err != OK) {
2027                    return err;
2028                }
2029            }
2030
2031            break;
2032        }
2033
2034        case FOURCC('m', 'v', 'h', 'd'):
2035        {
2036            *offset += chunk_size;
2037
2038            if (depth != 1) {
2039                ALOGE("mvhd: depth %d", depth);
2040                return ERROR_MALFORMED;
2041            }
2042            if (chunk_data_size < 32) {
2043                return ERROR_MALFORMED;
2044            }
2045
2046            uint8_t header[32];
2047            if (mDataSource->readAt(
2048                        data_offset, header, sizeof(header))
2049                    < (ssize_t)sizeof(header)) {
2050                return ERROR_IO;
2051            }
2052
2053            uint64_t creationTime;
2054            uint64_t duration = 0;
2055            if (header[0] == 1) {
2056                creationTime = U64_AT(&header[4]);
2057                mHeaderTimescale = U32_AT(&header[20]);
2058                duration = U64_AT(&header[24]);
2059                if (duration == 0xffffffffffffffff) {
2060                    duration = 0;
2061                }
2062            } else if (header[0] != 0) {
2063                return ERROR_MALFORMED;
2064            } else {
2065                creationTime = U32_AT(&header[4]);
2066                mHeaderTimescale = U32_AT(&header[12]);
2067                uint32_t d32 = U32_AT(&header[16]);
2068                if (d32 == 0xffffffff) {
2069                    d32 = 0;
2070                }
2071                duration = d32;
2072            }
2073            if (duration != 0 && mHeaderTimescale != 0 && duration < UINT64_MAX / 1000000) {
2074                mFileMetaData->setInt64(kKeyDuration, duration * 1000000 / mHeaderTimescale);
2075            }
2076
2077            String8 s;
2078            if (convertTimeToDate(creationTime, &s)) {
2079                mFileMetaData->setCString(kKeyDate, s.string());
2080            }
2081
2082
2083            break;
2084        }
2085
2086        case FOURCC('m', 'e', 'h', 'd'):
2087        {
2088            *offset += chunk_size;
2089
2090            if (chunk_data_size < 8) {
2091                return ERROR_MALFORMED;
2092            }
2093
2094            uint8_t flags[4];
2095            if (mDataSource->readAt(
2096                        data_offset, flags, sizeof(flags))
2097                    < (ssize_t)sizeof(flags)) {
2098                return ERROR_IO;
2099            }
2100
2101            uint64_t duration = 0;
2102            if (flags[0] == 1) {
2103                // 64 bit
2104                if (chunk_data_size < 12) {
2105                    return ERROR_MALFORMED;
2106                }
2107                mDataSource->getUInt64(data_offset + 4, &duration);
2108                if (duration == 0xffffffffffffffff) {
2109                    duration = 0;
2110                }
2111            } else if (flags[0] == 0) {
2112                // 32 bit
2113                uint32_t d32;
2114                mDataSource->getUInt32(data_offset + 4, &d32);
2115                if (d32 == 0xffffffff) {
2116                    d32 = 0;
2117                }
2118                duration = d32;
2119            } else {
2120                return ERROR_MALFORMED;
2121            }
2122
2123            if (duration != 0 && mHeaderTimescale != 0) {
2124                mFileMetaData->setInt64(kKeyDuration, duration * 1000000 / mHeaderTimescale);
2125            }
2126
2127            break;
2128        }
2129
2130        case FOURCC('m', 'd', 'a', 't'):
2131        {
2132            ALOGV("mdat chunk, drm: %d", mIsDrm);
2133
2134            mMdatFound = true;
2135
2136            if (!mIsDrm) {
2137                *offset += chunk_size;
2138                break;
2139            }
2140
2141            if (chunk_size < 8) {
2142                return ERROR_MALFORMED;
2143            }
2144
2145            return parseDrmSINF(offset, data_offset);
2146        }
2147
2148        case FOURCC('h', 'd', 'l', 'r'):
2149        {
2150            *offset += chunk_size;
2151
2152            if (underQTMetaPath(mPath, 3)) {
2153                break;
2154            }
2155
2156            uint32_t buffer;
2157            if (mDataSource->readAt(
2158                        data_offset + 8, &buffer, 4) < 4) {
2159                return ERROR_IO;
2160            }
2161
2162            uint32_t type = ntohl(buffer);
2163            // For the 3GPP file format, the handler-type within the 'hdlr' box
2164            // shall be 'text'. We also want to support 'sbtl' handler type
2165            // for a practical reason as various MPEG4 containers use it.
2166            if (type == FOURCC('t', 'e', 'x', 't') || type == FOURCC('s', 'b', 't', 'l')) {
2167                if (mLastTrack != NULL) {
2168                    mLastTrack->meta->setCString(kKeyMIMEType, MEDIA_MIMETYPE_TEXT_3GPP);
2169                }
2170            }
2171
2172            break;
2173        }
2174
2175        case FOURCC('k', 'e', 'y', 's'):
2176        {
2177            *offset += chunk_size;
2178
2179            if (underQTMetaPath(mPath, 3)) {
2180                status_t err = parseQTMetaKey(data_offset, chunk_data_size);
2181                if (err != OK) {
2182                    return err;
2183                }
2184            }
2185            break;
2186        }
2187
2188        case FOURCC('t', 'r', 'e', 'x'):
2189        {
2190            *offset += chunk_size;
2191
2192            if (chunk_data_size < 24) {
2193                return ERROR_IO;
2194            }
2195            Trex trex;
2196            if (!mDataSource->getUInt32(data_offset + 4, &trex.track_ID) ||
2197                !mDataSource->getUInt32(data_offset + 8, &trex.default_sample_description_index) ||
2198                !mDataSource->getUInt32(data_offset + 12, &trex.default_sample_duration) ||
2199                !mDataSource->getUInt32(data_offset + 16, &trex.default_sample_size) ||
2200                !mDataSource->getUInt32(data_offset + 20, &trex.default_sample_flags)) {
2201                return ERROR_IO;
2202            }
2203            mTrex.add(trex);
2204            break;
2205        }
2206
2207        case FOURCC('t', 'x', '3', 'g'):
2208        {
2209            if (mLastTrack == NULL)
2210                return ERROR_MALFORMED;
2211
2212            uint32_t type;
2213            const void *data;
2214            size_t size = 0;
2215            if (!mLastTrack->meta->findData(
2216                    kKeyTextFormatData, &type, &data, &size)) {
2217                size = 0;
2218            }
2219
2220            if ((chunk_size > SIZE_MAX) || (SIZE_MAX - chunk_size <= size)) {
2221                return ERROR_MALFORMED;
2222            }
2223
2224            uint8_t *buffer = new (std::nothrow) uint8_t[size + chunk_size];
2225            if (buffer == NULL) {
2226                return ERROR_MALFORMED;
2227            }
2228
2229            if (size > 0) {
2230                memcpy(buffer, data, size);
2231            }
2232
2233            if ((size_t)(mDataSource->readAt(*offset, buffer + size, chunk_size))
2234                    < chunk_size) {
2235                delete[] buffer;
2236                buffer = NULL;
2237
2238                // advance read pointer so we don't end up reading this again
2239                *offset += chunk_size;
2240                return ERROR_IO;
2241            }
2242
2243            mLastTrack->meta->setData(
2244                    kKeyTextFormatData, 0, buffer, size + chunk_size);
2245
2246            delete[] buffer;
2247
2248            *offset += chunk_size;
2249            break;
2250        }
2251
2252        case FOURCC('c', 'o', 'v', 'r'):
2253        {
2254            *offset += chunk_size;
2255
2256            if (mFileMetaData != NULL) {
2257                ALOGV("chunk_data_size = %" PRId64 " and data_offset = %" PRId64,
2258                      chunk_data_size, data_offset);
2259
2260                if (chunk_data_size < 0 || static_cast<uint64_t>(chunk_data_size) >= SIZE_MAX - 1) {
2261                    return ERROR_MALFORMED;
2262                }
2263                sp<ABuffer> buffer = new ABuffer(chunk_data_size + 1);
2264                if (buffer->data() == NULL) {
2265                    ALOGE("b/28471206");
2266                    return NO_MEMORY;
2267                }
2268                if (mDataSource->readAt(
2269                    data_offset, buffer->data(), chunk_data_size) != (ssize_t)chunk_data_size) {
2270                    return ERROR_IO;
2271                }
2272                const int kSkipBytesOfDataBox = 16;
2273                if (chunk_data_size <= kSkipBytesOfDataBox) {
2274                    return ERROR_MALFORMED;
2275                }
2276
2277                mFileMetaData->setData(
2278                    kKeyAlbumArt, MetaData::TYPE_NONE,
2279                    buffer->data() + kSkipBytesOfDataBox, chunk_data_size - kSkipBytesOfDataBox);
2280            }
2281
2282            break;
2283        }
2284
2285        case FOURCC('c', 'o', 'l', 'r'):
2286        {
2287            *offset += chunk_size;
2288            // this must be in a VisualSampleEntry box under the Sample Description Box ('stsd')
2289            // ignore otherwise
2290            if (depth >= 2 && mPath[depth - 2] == FOURCC('s', 't', 's', 'd')) {
2291                status_t err = parseColorInfo(data_offset, chunk_data_size);
2292                if (err != OK) {
2293                    return err;
2294                }
2295            }
2296
2297            break;
2298        }
2299
2300        case FOURCC('t', 'i', 't', 'l'):
2301        case FOURCC('p', 'e', 'r', 'f'):
2302        case FOURCC('a', 'u', 't', 'h'):
2303        case FOURCC('g', 'n', 'r', 'e'):
2304        case FOURCC('a', 'l', 'b', 'm'):
2305        case FOURCC('y', 'r', 'r', 'c'):
2306        {
2307            *offset += chunk_size;
2308
2309            status_t err = parse3GPPMetaData(data_offset, chunk_data_size, depth);
2310
2311            if (err != OK) {
2312                return err;
2313            }
2314
2315            break;
2316        }
2317
2318        case FOURCC('I', 'D', '3', '2'):
2319        {
2320            *offset += chunk_size;
2321
2322            if (chunk_data_size < 6) {
2323                return ERROR_MALFORMED;
2324            }
2325
2326            parseID3v2MetaData(data_offset + 6);
2327
2328            break;
2329        }
2330
2331        case FOURCC('-', '-', '-', '-'):
2332        {
2333            mLastCommentMean.clear();
2334            mLastCommentName.clear();
2335            mLastCommentData.clear();
2336            *offset += chunk_size;
2337            break;
2338        }
2339
2340        case FOURCC('s', 'i', 'd', 'x'):
2341        {
2342            status_t err = parseSegmentIndex(data_offset, chunk_data_size);
2343            if (err != OK) {
2344                return err;
2345            }
2346            *offset += chunk_size;
2347            return UNKNOWN_ERROR; // stop parsing after sidx
2348        }
2349
2350        case FOURCC('a', 'c', '-', '3'):
2351        {
2352            *offset += chunk_size;
2353            return parseAC3SampleEntry(data_offset);
2354        }
2355
2356        case FOURCC('f', 't', 'y', 'p'):
2357        {
2358            if (chunk_data_size < 8 || depth != 0) {
2359                return ERROR_MALFORMED;
2360            }
2361
2362            off64_t stop_offset = *offset + chunk_size;
2363            uint32_t numCompatibleBrands = (chunk_data_size - 8) / 4;
2364            for (size_t i = 0; i < numCompatibleBrands + 2; ++i) {
2365                if (i == 1) {
2366                    // Skip this index, it refers to the minorVersion,
2367                    // not a brand.
2368                    continue;
2369                }
2370
2371                uint32_t brand;
2372                if (mDataSource->readAt(data_offset + 4 * i, &brand, 4) < 4) {
2373                    return ERROR_MALFORMED;
2374                }
2375
2376                brand = ntohl(brand);
2377                if (brand == FOURCC('q', 't', ' ', ' ')) {
2378                    mIsQT = true;
2379                    break;
2380                }
2381            }
2382
2383            *offset = stop_offset;
2384
2385            break;
2386        }
2387
2388        default:
2389        {
2390            // check if we're parsing 'ilst' for meta keys
2391            // if so, treat type as a number (key-id).
2392            if (underQTMetaPath(mPath, 3)) {
2393                status_t err = parseQTMetaVal(chunk_type, data_offset, chunk_data_size);
2394                if (err != OK) {
2395                    return err;
2396                }
2397            }
2398
2399            *offset += chunk_size;
2400            break;
2401        }
2402    }
2403
2404    return OK;
2405}
2406
2407status_t MPEG4Extractor::parseAC3SampleEntry(off64_t offset) {
2408    // skip 16 bytes:
2409    //  + 6-byte reserved,
2410    //  + 2-byte data reference index,
2411    //  + 8-byte reserved
2412    offset += 16;
2413    uint16_t channelCount;
2414    if (!mDataSource->getUInt16(offset, &channelCount)) {
2415        return ERROR_MALFORMED;
2416    }
2417    // skip 8 bytes:
2418    //  + 2-byte channelCount,
2419    //  + 2-byte sample size,
2420    //  + 4-byte reserved
2421    offset += 8;
2422    uint16_t sampleRate;
2423    if (!mDataSource->getUInt16(offset, &sampleRate)) {
2424        ALOGE("MPEG4Extractor: error while reading ac-3 block: cannot read sample rate");
2425        return ERROR_MALFORMED;
2426    }
2427
2428    // skip 4 bytes:
2429    //  + 2-byte sampleRate,
2430    //  + 2-byte reserved
2431    offset += 4;
2432    return parseAC3SpecificBox(offset, sampleRate);
2433}
2434
2435status_t MPEG4Extractor::parseAC3SpecificBox(
2436        off64_t offset, uint16_t sampleRate) {
2437    uint32_t size;
2438    // + 4-byte size
2439    // + 4-byte type
2440    // + 3-byte payload
2441    const uint32_t kAC3SpecificBoxSize = 11;
2442    if (!mDataSource->getUInt32(offset, &size) || size < kAC3SpecificBoxSize) {
2443        ALOGE("MPEG4Extractor: error while reading ac-3 block: cannot read specific box size");
2444        return ERROR_MALFORMED;
2445    }
2446
2447    offset += 4;
2448    uint32_t type;
2449    if (!mDataSource->getUInt32(offset, &type) || type != FOURCC('d', 'a', 'c', '3')) {
2450        ALOGE("MPEG4Extractor: error while reading ac-3 specific block: header not dac3");
2451        return ERROR_MALFORMED;
2452    }
2453
2454    offset += 4;
2455    const uint32_t kAC3SpecificBoxPayloadSize = 3;
2456    uint8_t chunk[kAC3SpecificBoxPayloadSize];
2457    if (mDataSource->readAt(offset, chunk, sizeof(chunk)) != sizeof(chunk)) {
2458        ALOGE("MPEG4Extractor: error while reading ac-3 specific block: bitstream fields");
2459        return ERROR_MALFORMED;
2460    }
2461
2462    ABitReader br(chunk, sizeof(chunk));
2463    static const unsigned channelCountTable[] = {2, 1, 2, 3, 3, 4, 4, 5};
2464    static const unsigned sampleRateTable[] = {48000, 44100, 32000};
2465
2466    unsigned fscod = br.getBits(2);
2467    if (fscod == 3) {
2468        ALOGE("Incorrect fscod (3) in AC3 header");
2469        return ERROR_MALFORMED;
2470    }
2471    unsigned boxSampleRate = sampleRateTable[fscod];
2472    if (boxSampleRate != sampleRate) {
2473        ALOGE("sample rate mismatch: boxSampleRate = %d, sampleRate = %d",
2474            boxSampleRate, sampleRate);
2475        return ERROR_MALFORMED;
2476    }
2477
2478    unsigned bsid = br.getBits(5);
2479    if (bsid > 8) {
2480        ALOGW("Incorrect bsid in AC3 header. Possibly E-AC-3?");
2481        return ERROR_MALFORMED;
2482    }
2483
2484    // skip
2485    unsigned bsmod __unused = br.getBits(3);
2486
2487    unsigned acmod = br.getBits(3);
2488    unsigned lfeon = br.getBits(1);
2489    unsigned channelCount = channelCountTable[acmod] + lfeon;
2490
2491    if (mLastTrack == NULL) {
2492        return ERROR_MALFORMED;
2493    }
2494    mLastTrack->meta->setCString(kKeyMIMEType, MEDIA_MIMETYPE_AUDIO_AC3);
2495    mLastTrack->meta->setInt32(kKeyChannelCount, channelCount);
2496    mLastTrack->meta->setInt32(kKeySampleRate, sampleRate);
2497    return OK;
2498}
2499
2500status_t MPEG4Extractor::parseSegmentIndex(off64_t offset, size_t size) {
2501  ALOGV("MPEG4Extractor::parseSegmentIndex");
2502
2503    if (size < 12) {
2504      return -EINVAL;
2505    }
2506
2507    uint32_t flags;
2508    if (!mDataSource->getUInt32(offset, &flags)) {
2509        return ERROR_MALFORMED;
2510    }
2511
2512    uint32_t version = flags >> 24;
2513    flags &= 0xffffff;
2514
2515    ALOGV("sidx version %d", version);
2516
2517    uint32_t referenceId;
2518    if (!mDataSource->getUInt32(offset + 4, &referenceId)) {
2519        return ERROR_MALFORMED;
2520    }
2521
2522    uint32_t timeScale;
2523    if (!mDataSource->getUInt32(offset + 8, &timeScale)) {
2524        return ERROR_MALFORMED;
2525    }
2526    ALOGV("sidx refid/timescale: %d/%d", referenceId, timeScale);
2527    if (timeScale == 0)
2528        return ERROR_MALFORMED;
2529
2530    uint64_t earliestPresentationTime;
2531    uint64_t firstOffset;
2532
2533    offset += 12;
2534    size -= 12;
2535
2536    if (version == 0) {
2537        if (size < 8) {
2538            return -EINVAL;
2539        }
2540        uint32_t tmp;
2541        if (!mDataSource->getUInt32(offset, &tmp)) {
2542            return ERROR_MALFORMED;
2543        }
2544        earliestPresentationTime = tmp;
2545        if (!mDataSource->getUInt32(offset + 4, &tmp)) {
2546            return ERROR_MALFORMED;
2547        }
2548        firstOffset = tmp;
2549        offset += 8;
2550        size -= 8;
2551    } else {
2552        if (size < 16) {
2553            return -EINVAL;
2554        }
2555        if (!mDataSource->getUInt64(offset, &earliestPresentationTime)) {
2556            return ERROR_MALFORMED;
2557        }
2558        if (!mDataSource->getUInt64(offset + 8, &firstOffset)) {
2559            return ERROR_MALFORMED;
2560        }
2561        offset += 16;
2562        size -= 16;
2563    }
2564    ALOGV("sidx pres/off: %" PRIu64 "/%" PRIu64, earliestPresentationTime, firstOffset);
2565
2566    if (size < 4) {
2567        return -EINVAL;
2568    }
2569
2570    uint16_t referenceCount;
2571    if (!mDataSource->getUInt16(offset + 2, &referenceCount)) {
2572        return ERROR_MALFORMED;
2573    }
2574    offset += 4;
2575    size -= 4;
2576    ALOGV("refcount: %d", referenceCount);
2577
2578    if (size < referenceCount * 12) {
2579        return -EINVAL;
2580    }
2581
2582    uint64_t total_duration = 0;
2583    for (unsigned int i = 0; i < referenceCount; i++) {
2584        uint32_t d1, d2, d3;
2585
2586        if (!mDataSource->getUInt32(offset, &d1) ||     // size
2587            !mDataSource->getUInt32(offset + 4, &d2) || // duration
2588            !mDataSource->getUInt32(offset + 8, &d3)) { // flags
2589            return ERROR_MALFORMED;
2590        }
2591
2592        if (d1 & 0x80000000) {
2593            ALOGW("sub-sidx boxes not supported yet");
2594        }
2595        bool sap = d3 & 0x80000000;
2596        uint32_t saptype = (d3 >> 28) & 7;
2597        if (!sap || (saptype != 1 && saptype != 2)) {
2598            // type 1 and 2 are sync samples
2599            ALOGW("not a stream access point, or unsupported type: %08x", d3);
2600        }
2601        total_duration += d2;
2602        offset += 12;
2603        ALOGV(" item %d, %08x %08x %08x", i, d1, d2, d3);
2604        SidxEntry se;
2605        se.mSize = d1 & 0x7fffffff;
2606        se.mDurationUs = 1000000LL * d2 / timeScale;
2607        mSidxEntries.add(se);
2608    }
2609
2610    uint64_t sidxDuration = total_duration * 1000000 / timeScale;
2611
2612    if (mLastTrack == NULL)
2613        return ERROR_MALFORMED;
2614
2615    int64_t metaDuration;
2616    if (!mLastTrack->meta->findInt64(kKeyDuration, &metaDuration) || metaDuration == 0) {
2617        mLastTrack->meta->setInt64(kKeyDuration, sidxDuration);
2618    }
2619    return OK;
2620}
2621
2622status_t MPEG4Extractor::parseQTMetaKey(off64_t offset, size_t size) {
2623    if (size < 8) {
2624        return ERROR_MALFORMED;
2625    }
2626
2627    uint32_t count;
2628    if (!mDataSource->getUInt32(offset + 4, &count)) {
2629        return ERROR_MALFORMED;
2630    }
2631
2632    if (mMetaKeyMap.size() > 0) {
2633        ALOGW("'keys' atom seen again, discarding existing entries");
2634        mMetaKeyMap.clear();
2635    }
2636
2637    off64_t keyOffset = offset + 8;
2638    off64_t stopOffset = offset + size;
2639    for (size_t i = 1; i <= count; i++) {
2640        if (keyOffset + 8 > stopOffset) {
2641            return ERROR_MALFORMED;
2642        }
2643
2644        uint32_t keySize;
2645        if (!mDataSource->getUInt32(keyOffset, &keySize)
2646                || keySize < 8
2647                || keyOffset + keySize > stopOffset) {
2648            return ERROR_MALFORMED;
2649        }
2650
2651        uint32_t type;
2652        if (!mDataSource->getUInt32(keyOffset + 4, &type)
2653                || type != FOURCC('m', 'd', 't', 'a')) {
2654            return ERROR_MALFORMED;
2655        }
2656
2657        keySize -= 8;
2658        keyOffset += 8;
2659
2660        sp<ABuffer> keyData = new ABuffer(keySize);
2661        if (keyData->data() == NULL) {
2662            return ERROR_MALFORMED;
2663        }
2664        if (mDataSource->readAt(
2665                keyOffset, keyData->data(), keySize) < (ssize_t) keySize) {
2666            return ERROR_MALFORMED;
2667        }
2668
2669        AString key((const char *)keyData->data(), keySize);
2670        mMetaKeyMap.add(i, key);
2671
2672        keyOffset += keySize;
2673    }
2674    return OK;
2675}
2676
2677status_t MPEG4Extractor::parseQTMetaVal(
2678        int32_t keyId, off64_t offset, size_t size) {
2679    ssize_t index = mMetaKeyMap.indexOfKey(keyId);
2680    if (index < 0) {
2681        // corresponding key is not present, ignore
2682        return ERROR_MALFORMED;
2683    }
2684
2685    if (size <= 16) {
2686        return ERROR_MALFORMED;
2687    }
2688    uint32_t dataSize;
2689    if (!mDataSource->getUInt32(offset, &dataSize)
2690            || dataSize > size || dataSize <= 16) {
2691        return ERROR_MALFORMED;
2692    }
2693    uint32_t atomFourCC;
2694    if (!mDataSource->getUInt32(offset + 4, &atomFourCC)
2695            || atomFourCC != FOURCC('d', 'a', 't', 'a')) {
2696        return ERROR_MALFORMED;
2697    }
2698    uint32_t dataType;
2699    if (!mDataSource->getUInt32(offset + 8, &dataType)
2700            || ((dataType & 0xff000000) != 0)) {
2701        // not well-known type
2702        return ERROR_MALFORMED;
2703    }
2704
2705    dataSize -= 16;
2706    offset += 16;
2707
2708    if (dataType == 23 && dataSize >= 4) {
2709        // BE Float32
2710        uint32_t val;
2711        if (!mDataSource->getUInt32(offset, &val)) {
2712            return ERROR_MALFORMED;
2713        }
2714        if (!strcasecmp(mMetaKeyMap[index].c_str(), "com.android.capture.fps")) {
2715            mFileMetaData->setFloat(kKeyCaptureFramerate, *(float *)&val);
2716        }
2717    } else if (dataType == 67 && dataSize >= 4) {
2718        // BE signed int32
2719        uint32_t val;
2720        if (!mDataSource->getUInt32(offset, &val)) {
2721            return ERROR_MALFORMED;
2722        }
2723        if (!strcasecmp(mMetaKeyMap[index].c_str(), "com.android.video.temporal_layers_count")) {
2724            mFileMetaData->setInt32(kKeyTemporalLayerCount, val);
2725        }
2726    } else {
2727        // add more keys if needed
2728        ALOGV("ignoring key: type %d, size %d", dataType, dataSize);
2729    }
2730
2731    return OK;
2732}
2733
2734status_t MPEG4Extractor::parseTrackHeader(
2735        off64_t data_offset, off64_t data_size) {
2736    if (data_size < 4) {
2737        return ERROR_MALFORMED;
2738    }
2739
2740    uint8_t version;
2741    if (mDataSource->readAt(data_offset, &version, 1) < 1) {
2742        return ERROR_IO;
2743    }
2744
2745    size_t dynSize = (version == 1) ? 36 : 24;
2746
2747    uint8_t buffer[36 + 60];
2748
2749    if (data_size != (off64_t)dynSize + 60) {
2750        return ERROR_MALFORMED;
2751    }
2752
2753    if (mDataSource->readAt(
2754                data_offset, buffer, data_size) < (ssize_t)data_size) {
2755        return ERROR_IO;
2756    }
2757
2758    uint64_t ctime __unused, mtime __unused, duration __unused;
2759    int32_t id;
2760
2761    if (version == 1) {
2762        ctime = U64_AT(&buffer[4]);
2763        mtime = U64_AT(&buffer[12]);
2764        id = U32_AT(&buffer[20]);
2765        duration = U64_AT(&buffer[28]);
2766    } else if (version == 0) {
2767        ctime = U32_AT(&buffer[4]);
2768        mtime = U32_AT(&buffer[8]);
2769        id = U32_AT(&buffer[12]);
2770        duration = U32_AT(&buffer[20]);
2771    } else {
2772        return ERROR_UNSUPPORTED;
2773    }
2774
2775    if (mLastTrack == NULL)
2776        return ERROR_MALFORMED;
2777
2778    mLastTrack->meta->setInt32(kKeyTrackID, id);
2779
2780    size_t matrixOffset = dynSize + 16;
2781    int32_t a00 = U32_AT(&buffer[matrixOffset]);
2782    int32_t a01 = U32_AT(&buffer[matrixOffset + 4]);
2783    int32_t a10 = U32_AT(&buffer[matrixOffset + 12]);
2784    int32_t a11 = U32_AT(&buffer[matrixOffset + 16]);
2785
2786#if 0
2787    int32_t dx = U32_AT(&buffer[matrixOffset + 8]);
2788    int32_t dy = U32_AT(&buffer[matrixOffset + 20]);
2789
2790    ALOGI("x' = %.2f * x + %.2f * y + %.2f",
2791         a00 / 65536.0f, a01 / 65536.0f, dx / 65536.0f);
2792    ALOGI("y' = %.2f * x + %.2f * y + %.2f",
2793         a10 / 65536.0f, a11 / 65536.0f, dy / 65536.0f);
2794#endif
2795
2796    uint32_t rotationDegrees;
2797
2798    static const int32_t kFixedOne = 0x10000;
2799    if (a00 == kFixedOne && a01 == 0 && a10 == 0 && a11 == kFixedOne) {
2800        // Identity, no rotation
2801        rotationDegrees = 0;
2802    } else if (a00 == 0 && a01 == kFixedOne && a10 == -kFixedOne && a11 == 0) {
2803        rotationDegrees = 90;
2804    } else if (a00 == 0 && a01 == -kFixedOne && a10 == kFixedOne && a11 == 0) {
2805        rotationDegrees = 270;
2806    } else if (a00 == -kFixedOne && a01 == 0 && a10 == 0 && a11 == -kFixedOne) {
2807        rotationDegrees = 180;
2808    } else {
2809        ALOGW("We only support 0,90,180,270 degree rotation matrices");
2810        rotationDegrees = 0;
2811    }
2812
2813    if (rotationDegrees != 0) {
2814        mLastTrack->meta->setInt32(kKeyRotation, rotationDegrees);
2815    }
2816
2817    // Handle presentation display size, which could be different
2818    // from the image size indicated by kKeyWidth and kKeyHeight.
2819    uint32_t width = U32_AT(&buffer[dynSize + 52]);
2820    uint32_t height = U32_AT(&buffer[dynSize + 56]);
2821    mLastTrack->meta->setInt32(kKeyDisplayWidth, width >> 16);
2822    mLastTrack->meta->setInt32(kKeyDisplayHeight, height >> 16);
2823
2824    return OK;
2825}
2826
2827status_t MPEG4Extractor::parseITunesMetaData(off64_t offset, size_t size) {
2828    if (size == 0) {
2829        return OK;
2830    }
2831
2832    if (size < 4 || size == SIZE_MAX) {
2833        return ERROR_MALFORMED;
2834    }
2835
2836    uint8_t *buffer = new (std::nothrow) uint8_t[size + 1];
2837    if (buffer == NULL) {
2838        return ERROR_MALFORMED;
2839    }
2840    if (mDataSource->readAt(
2841                offset, buffer, size) != (ssize_t)size) {
2842        delete[] buffer;
2843        buffer = NULL;
2844
2845        return ERROR_IO;
2846    }
2847
2848    uint32_t flags = U32_AT(buffer);
2849
2850    uint32_t metadataKey = 0;
2851    char chunk[5];
2852    MakeFourCCString(mPath[4], chunk);
2853    ALOGV("meta: %s @ %lld", chunk, (long long)offset);
2854    switch ((int32_t)mPath[4]) {
2855        case FOURCC(0xa9, 'a', 'l', 'b'):
2856        {
2857            metadataKey = kKeyAlbum;
2858            break;
2859        }
2860        case FOURCC(0xa9, 'A', 'R', 'T'):
2861        {
2862            metadataKey = kKeyArtist;
2863            break;
2864        }
2865        case FOURCC('a', 'A', 'R', 'T'):
2866        {
2867            metadataKey = kKeyAlbumArtist;
2868            break;
2869        }
2870        case FOURCC(0xa9, 'd', 'a', 'y'):
2871        {
2872            metadataKey = kKeyYear;
2873            break;
2874        }
2875        case FOURCC(0xa9, 'n', 'a', 'm'):
2876        {
2877            metadataKey = kKeyTitle;
2878            break;
2879        }
2880        case FOURCC(0xa9, 'w', 'r', 't'):
2881        {
2882            metadataKey = kKeyWriter;
2883            break;
2884        }
2885        case FOURCC('c', 'o', 'v', 'r'):
2886        {
2887            metadataKey = kKeyAlbumArt;
2888            break;
2889        }
2890        case FOURCC('g', 'n', 'r', 'e'):
2891        {
2892            metadataKey = kKeyGenre;
2893            break;
2894        }
2895        case FOURCC(0xa9, 'g', 'e', 'n'):
2896        {
2897            metadataKey = kKeyGenre;
2898            break;
2899        }
2900        case FOURCC('c', 'p', 'i', 'l'):
2901        {
2902            if (size == 9 && flags == 21) {
2903                char tmp[16];
2904                sprintf(tmp, "%d",
2905                        (int)buffer[size - 1]);
2906
2907                mFileMetaData->setCString(kKeyCompilation, tmp);
2908            }
2909            break;
2910        }
2911        case FOURCC('t', 'r', 'k', 'n'):
2912        {
2913            if (size == 16 && flags == 0) {
2914                char tmp[16];
2915                uint16_t* pTrack = (uint16_t*)&buffer[10];
2916                uint16_t* pTotalTracks = (uint16_t*)&buffer[12];
2917                sprintf(tmp, "%d/%d", ntohs(*pTrack), ntohs(*pTotalTracks));
2918
2919                mFileMetaData->setCString(kKeyCDTrackNumber, tmp);
2920            }
2921            break;
2922        }
2923        case FOURCC('d', 'i', 's', 'k'):
2924        {
2925            if ((size == 14 || size == 16) && flags == 0) {
2926                char tmp[16];
2927                uint16_t* pDisc = (uint16_t*)&buffer[10];
2928                uint16_t* pTotalDiscs = (uint16_t*)&buffer[12];
2929                sprintf(tmp, "%d/%d", ntohs(*pDisc), ntohs(*pTotalDiscs));
2930
2931                mFileMetaData->setCString(kKeyDiscNumber, tmp);
2932            }
2933            break;
2934        }
2935        case FOURCC('-', '-', '-', '-'):
2936        {
2937            buffer[size] = '\0';
2938            switch (mPath[5]) {
2939                case FOURCC('m', 'e', 'a', 'n'):
2940                    mLastCommentMean.setTo((const char *)buffer + 4);
2941                    break;
2942                case FOURCC('n', 'a', 'm', 'e'):
2943                    mLastCommentName.setTo((const char *)buffer + 4);
2944                    break;
2945                case FOURCC('d', 'a', 't', 'a'):
2946                    if (size < 8) {
2947                        delete[] buffer;
2948                        buffer = NULL;
2949                        ALOGE("b/24346430");
2950                        return ERROR_MALFORMED;
2951                    }
2952                    mLastCommentData.setTo((const char *)buffer + 8);
2953                    break;
2954            }
2955
2956            // Once we have a set of mean/name/data info, go ahead and process
2957            // it to see if its something we are interested in.  Whether or not
2958            // were are interested in the specific tag, make sure to clear out
2959            // the set so we can be ready to process another tuple should one
2960            // show up later in the file.
2961            if ((mLastCommentMean.length() != 0) &&
2962                (mLastCommentName.length() != 0) &&
2963                (mLastCommentData.length() != 0)) {
2964
2965                if (mLastCommentMean == "com.apple.iTunes"
2966                        && mLastCommentName == "iTunSMPB") {
2967                    int32_t delay, padding;
2968                    if (sscanf(mLastCommentData,
2969                               " %*x %x %x %*x", &delay, &padding) == 2) {
2970                        if (mLastTrack == NULL) {
2971                            delete[] buffer;
2972                            return ERROR_MALFORMED;
2973                        }
2974
2975                        mLastTrack->meta->setInt32(kKeyEncoderDelay, delay);
2976                        mLastTrack->meta->setInt32(kKeyEncoderPadding, padding);
2977                    }
2978                }
2979
2980                mLastCommentMean.clear();
2981                mLastCommentName.clear();
2982                mLastCommentData.clear();
2983            }
2984            break;
2985        }
2986
2987        default:
2988            break;
2989    }
2990
2991    if (size >= 8 && metadataKey && !mFileMetaData->hasData(metadataKey)) {
2992        if (metadataKey == kKeyAlbumArt) {
2993            mFileMetaData->setData(
2994                    kKeyAlbumArt, MetaData::TYPE_NONE,
2995                    buffer + 8, size - 8);
2996        } else if (metadataKey == kKeyGenre) {
2997            if (flags == 0) {
2998                // uint8_t genre code, iTunes genre codes are
2999                // the standard id3 codes, except they start
3000                // at 1 instead of 0 (e.g. Pop is 14, not 13)
3001                // We use standard id3 numbering, so subtract 1.
3002                int genrecode = (int)buffer[size - 1];
3003                genrecode--;
3004                if (genrecode < 0) {
3005                    genrecode = 255; // reserved for 'unknown genre'
3006                }
3007                char genre[10];
3008                sprintf(genre, "%d", genrecode);
3009
3010                mFileMetaData->setCString(metadataKey, genre);
3011            } else if (flags == 1) {
3012                // custom genre string
3013                buffer[size] = '\0';
3014
3015                mFileMetaData->setCString(
3016                        metadataKey, (const char *)buffer + 8);
3017            }
3018        } else {
3019            buffer[size] = '\0';
3020
3021            mFileMetaData->setCString(
3022                    metadataKey, (const char *)buffer + 8);
3023        }
3024    }
3025
3026    delete[] buffer;
3027    buffer = NULL;
3028
3029    return OK;
3030}
3031
3032status_t MPEG4Extractor::parseColorInfo(off64_t offset, size_t size) {
3033    if (size < 4 || size == SIZE_MAX || mLastTrack == NULL) {
3034        return ERROR_MALFORMED;
3035    }
3036
3037    uint8_t *buffer = new (std::nothrow) uint8_t[size + 1];
3038    if (buffer == NULL) {
3039        return ERROR_MALFORMED;
3040    }
3041    if (mDataSource->readAt(offset, buffer, size) != (ssize_t)size) {
3042        delete[] buffer;
3043        buffer = NULL;
3044
3045        return ERROR_IO;
3046    }
3047
3048    int32_t type = U32_AT(&buffer[0]);
3049    if ((type == FOURCC('n', 'c', 'l', 'x') && size >= 11)
3050            || (type == FOURCC('n', 'c', 'l', 'c') && size >= 10)) {
3051        int32_t primaries = U16_AT(&buffer[4]);
3052        int32_t transfer = U16_AT(&buffer[6]);
3053        int32_t coeffs = U16_AT(&buffer[8]);
3054        bool fullRange = (type == FOURCC('n', 'c', 'l', 'x')) && (buffer[10] & 128);
3055
3056        ColorAspects aspects;
3057        ColorUtils::convertIsoColorAspectsToCodecAspects(
3058                primaries, transfer, coeffs, fullRange, aspects);
3059
3060        // only store the first color specification
3061        if (!mLastTrack->meta->hasData(kKeyColorPrimaries)) {
3062            mLastTrack->meta->setInt32(kKeyColorPrimaries, aspects.mPrimaries);
3063            mLastTrack->meta->setInt32(kKeyTransferFunction, aspects.mTransfer);
3064            mLastTrack->meta->setInt32(kKeyColorMatrix, aspects.mMatrixCoeffs);
3065            mLastTrack->meta->setInt32(kKeyColorRange, aspects.mRange);
3066        }
3067    }
3068
3069    delete[] buffer;
3070    buffer = NULL;
3071
3072    return OK;
3073}
3074
3075status_t MPEG4Extractor::parse3GPPMetaData(off64_t offset, size_t size, int depth) {
3076    if (size < 4 || size == SIZE_MAX) {
3077        return ERROR_MALFORMED;
3078    }
3079
3080    uint8_t *buffer = new (std::nothrow) uint8_t[size + 1];
3081    if (buffer == NULL) {
3082        return ERROR_MALFORMED;
3083    }
3084    if (mDataSource->readAt(
3085                offset, buffer, size) != (ssize_t)size) {
3086        delete[] buffer;
3087        buffer = NULL;
3088
3089        return ERROR_IO;
3090    }
3091
3092    uint32_t metadataKey = 0;
3093    switch (mPath[depth]) {
3094        case FOURCC('t', 'i', 't', 'l'):
3095        {
3096            metadataKey = kKeyTitle;
3097            break;
3098        }
3099        case FOURCC('p', 'e', 'r', 'f'):
3100        {
3101            metadataKey = kKeyArtist;
3102            break;
3103        }
3104        case FOURCC('a', 'u', 't', 'h'):
3105        {
3106            metadataKey = kKeyWriter;
3107            break;
3108        }
3109        case FOURCC('g', 'n', 'r', 'e'):
3110        {
3111            metadataKey = kKeyGenre;
3112            break;
3113        }
3114        case FOURCC('a', 'l', 'b', 'm'):
3115        {
3116            if (buffer[size - 1] != '\0') {
3117              char tmp[4];
3118              sprintf(tmp, "%u", buffer[size - 1]);
3119
3120              mFileMetaData->setCString(kKeyCDTrackNumber, tmp);
3121            }
3122
3123            metadataKey = kKeyAlbum;
3124            break;
3125        }
3126        case FOURCC('y', 'r', 'r', 'c'):
3127        {
3128            if (size < 6) {
3129                delete[] buffer;
3130                buffer = NULL;
3131                ALOGE("b/62133227");
3132                android_errorWriteLog(0x534e4554, "62133227");
3133                return ERROR_MALFORMED;
3134            }
3135            char tmp[5];
3136            uint16_t year = U16_AT(&buffer[4]);
3137
3138            if (year < 10000) {
3139                sprintf(tmp, "%u", year);
3140
3141                mFileMetaData->setCString(kKeyYear, tmp);
3142            }
3143            break;
3144        }
3145
3146        default:
3147            break;
3148    }
3149
3150    if (metadataKey > 0) {
3151        bool isUTF8 = true; // Common case
3152        char16_t *framedata = NULL;
3153        int len16 = 0; // Number of UTF-16 characters
3154
3155        // smallest possible valid UTF-16 string w BOM: 0xfe 0xff 0x00 0x00
3156        if (size < 6) {
3157            delete[] buffer;
3158            buffer = NULL;
3159            return ERROR_MALFORMED;
3160        }
3161
3162        if (size - 6 >= 4) {
3163            len16 = ((size - 6) / 2) - 1; // don't include 0x0000 terminator
3164            framedata = (char16_t *)(buffer + 6);
3165            if (0xfffe == *framedata) {
3166                // endianness marker (BOM) doesn't match host endianness
3167                for (int i = 0; i < len16; i++) {
3168                    framedata[i] = bswap_16(framedata[i]);
3169                }
3170                // BOM is now swapped to 0xfeff, we will execute next block too
3171            }
3172
3173            if (0xfeff == *framedata) {
3174                // Remove the BOM
3175                framedata++;
3176                len16--;
3177                isUTF8 = false;
3178            }
3179            // else normal non-zero-length UTF-8 string
3180            // we can't handle UTF-16 without BOM as there is no other
3181            // indication of encoding.
3182        }
3183
3184        if (isUTF8) {
3185            buffer[size] = 0;
3186            mFileMetaData->setCString(metadataKey, (const char *)buffer + 6);
3187        } else {
3188            // Convert from UTF-16 string to UTF-8 string.
3189            String8 tmpUTF8str(framedata, len16);
3190            mFileMetaData->setCString(metadataKey, tmpUTF8str.string());
3191        }
3192    }
3193
3194    delete[] buffer;
3195    buffer = NULL;
3196
3197    return OK;
3198}
3199
3200void MPEG4Extractor::parseID3v2MetaData(off64_t offset) {
3201    ID3 id3(mDataSource, true /* ignorev1 */, offset);
3202
3203    if (id3.isValid()) {
3204        struct Map {
3205            int key;
3206            const char *tag1;
3207            const char *tag2;
3208        };
3209        static const Map kMap[] = {
3210            { kKeyAlbum, "TALB", "TAL" },
3211            { kKeyArtist, "TPE1", "TP1" },
3212            { kKeyAlbumArtist, "TPE2", "TP2" },
3213            { kKeyComposer, "TCOM", "TCM" },
3214            { kKeyGenre, "TCON", "TCO" },
3215            { kKeyTitle, "TIT2", "TT2" },
3216            { kKeyYear, "TYE", "TYER" },
3217            { kKeyAuthor, "TXT", "TEXT" },
3218            { kKeyCDTrackNumber, "TRK", "TRCK" },
3219            { kKeyDiscNumber, "TPA", "TPOS" },
3220            { kKeyCompilation, "TCP", "TCMP" },
3221        };
3222        static const size_t kNumMapEntries = sizeof(kMap) / sizeof(kMap[0]);
3223
3224        for (size_t i = 0; i < kNumMapEntries; ++i) {
3225            if (!mFileMetaData->hasData(kMap[i].key)) {
3226                ID3::Iterator *it = new ID3::Iterator(id3, kMap[i].tag1);
3227                if (it->done()) {
3228                    delete it;
3229                    it = new ID3::Iterator(id3, kMap[i].tag2);
3230                }
3231
3232                if (it->done()) {
3233                    delete it;
3234                    continue;
3235                }
3236
3237                String8 s;
3238                it->getString(&s);
3239                delete it;
3240
3241                mFileMetaData->setCString(kMap[i].key, s);
3242            }
3243        }
3244
3245        size_t dataSize;
3246        String8 mime;
3247        const void *data = id3.getAlbumArt(&dataSize, &mime);
3248
3249        if (data) {
3250            mFileMetaData->setData(kKeyAlbumArt, MetaData::TYPE_NONE, data, dataSize);
3251            mFileMetaData->setCString(kKeyAlbumArtMIME, mime.string());
3252        }
3253    }
3254}
3255
3256sp<IMediaSource> MPEG4Extractor::getTrack(size_t index) {
3257    status_t err;
3258    if ((err = readMetaData()) != OK) {
3259        return NULL;
3260    }
3261
3262    Track *track = mFirstTrack;
3263    while (index > 0) {
3264        if (track == NULL) {
3265            return NULL;
3266        }
3267
3268        track = track->next;
3269        --index;
3270    }
3271
3272    if (track == NULL) {
3273        return NULL;
3274    }
3275
3276
3277    Trex *trex = NULL;
3278    int32_t trackId;
3279    if (track->meta->findInt32(kKeyTrackID, &trackId)) {
3280        for (size_t i = 0; i < mTrex.size(); i++) {
3281            Trex *t = &mTrex.editItemAt(i);
3282            if (t->track_ID == (uint32_t) trackId) {
3283                trex = t;
3284                break;
3285            }
3286        }
3287    } else {
3288        ALOGE("b/21657957");
3289        return NULL;
3290    }
3291
3292    ALOGV("getTrack called, pssh: %zu", mPssh.size());
3293
3294    const char *mime;
3295    if (!track->meta->findCString(kKeyMIMEType, &mime)) {
3296        return NULL;
3297    }
3298
3299    if (!strcasecmp(mime, MEDIA_MIMETYPE_VIDEO_AVC)) {
3300        uint32_t type;
3301        const void *data;
3302        size_t size;
3303        if (!track->meta->findData(kKeyAVCC, &type, &data, &size)) {
3304            return NULL;
3305        }
3306
3307        const uint8_t *ptr = (const uint8_t *)data;
3308
3309        if (size < 7 || ptr[0] != 1) {  // configurationVersion == 1
3310            return NULL;
3311        }
3312    } else if (!strcasecmp(mime, MEDIA_MIMETYPE_VIDEO_HEVC)) {
3313        uint32_t type;
3314        const void *data;
3315        size_t size;
3316        if (!track->meta->findData(kKeyHVCC, &type, &data, &size)) {
3317            return NULL;
3318        }
3319
3320        const uint8_t *ptr = (const uint8_t *)data;
3321
3322        if (size < 22 || ptr[0] != 1) {  // configurationVersion == 1
3323            return NULL;
3324        }
3325    }
3326
3327    sp<MPEG4Source> source =  new MPEG4Source(this,
3328            track->meta, mDataSource, track->timescale, track->sampleTable,
3329            mSidxEntries, trex, mMoofOffset);
3330    if (source->init() != OK) {
3331        return NULL;
3332    }
3333    return source;
3334}
3335
3336// static
3337status_t MPEG4Extractor::verifyTrack(Track *track) {
3338    const char *mime;
3339    CHECK(track->meta->findCString(kKeyMIMEType, &mime));
3340
3341    uint32_t type;
3342    const void *data;
3343    size_t size;
3344    if (!strcasecmp(mime, MEDIA_MIMETYPE_VIDEO_AVC)) {
3345        if (!track->meta->findData(kKeyAVCC, &type, &data, &size)
3346                || type != kTypeAVCC) {
3347            return ERROR_MALFORMED;
3348        }
3349    } else if (!strcasecmp(mime, MEDIA_MIMETYPE_VIDEO_HEVC)) {
3350        if (!track->meta->findData(kKeyHVCC, &type, &data, &size)
3351                    || type != kTypeHVCC) {
3352            return ERROR_MALFORMED;
3353        }
3354    } else if (!strcasecmp(mime, MEDIA_MIMETYPE_VIDEO_MPEG4)
3355            || !strcasecmp(mime, MEDIA_MIMETYPE_VIDEO_MPEG2)
3356            || !strcasecmp(mime, MEDIA_MIMETYPE_AUDIO_AAC)) {
3357        if (!track->meta->findData(kKeyESDS, &type, &data, &size)
3358                || type != kTypeESDS) {
3359            return ERROR_MALFORMED;
3360        }
3361    }
3362
3363    if (track->sampleTable == NULL || !track->sampleTable->isValid()) {
3364        // Make sure we have all the metadata we need.
3365        ALOGE("stbl atom missing/invalid.");
3366        return ERROR_MALFORMED;
3367    }
3368
3369    if (track->timescale == 0) {
3370        ALOGE("timescale invalid.");
3371        return ERROR_MALFORMED;
3372    }
3373
3374    return OK;
3375}
3376
3377typedef enum {
3378    //AOT_NONE             = -1,
3379    //AOT_NULL_OBJECT      = 0,
3380    //AOT_AAC_MAIN         = 1, /**< Main profile                              */
3381    AOT_AAC_LC           = 2,   /**< Low Complexity object                     */
3382    //AOT_AAC_SSR          = 3,
3383    //AOT_AAC_LTP          = 4,
3384    AOT_SBR              = 5,
3385    //AOT_AAC_SCAL         = 6,
3386    //AOT_TWIN_VQ          = 7,
3387    //AOT_CELP             = 8,
3388    //AOT_HVXC             = 9,
3389    //AOT_RSVD_10          = 10, /**< (reserved)                                */
3390    //AOT_RSVD_11          = 11, /**< (reserved)                                */
3391    //AOT_TTSI             = 12, /**< TTSI Object                               */
3392    //AOT_MAIN_SYNTH       = 13, /**< Main Synthetic object                     */
3393    //AOT_WAV_TAB_SYNTH    = 14, /**< Wavetable Synthesis object                */
3394    //AOT_GEN_MIDI         = 15, /**< General MIDI object                       */
3395    //AOT_ALG_SYNTH_AUD_FX = 16, /**< Algorithmic Synthesis and Audio FX object */
3396    AOT_ER_AAC_LC        = 17,   /**< Error Resilient(ER) AAC Low Complexity    */
3397    //AOT_RSVD_18          = 18, /**< (reserved)                                */
3398    //AOT_ER_AAC_LTP       = 19, /**< Error Resilient(ER) AAC LTP object        */
3399    AOT_ER_AAC_SCAL      = 20,   /**< Error Resilient(ER) AAC Scalable object   */
3400    //AOT_ER_TWIN_VQ       = 21, /**< Error Resilient(ER) TwinVQ object         */
3401    AOT_ER_BSAC          = 22,   /**< Error Resilient(ER) BSAC object           */
3402    AOT_ER_AAC_LD        = 23,   /**< Error Resilient(ER) AAC LowDelay object   */
3403    //AOT_ER_CELP          = 24, /**< Error Resilient(ER) CELP object           */
3404    //AOT_ER_HVXC          = 25, /**< Error Resilient(ER) HVXC object           */
3405    //AOT_ER_HILN          = 26, /**< Error Resilient(ER) HILN object           */
3406    //AOT_ER_PARA          = 27, /**< Error Resilient(ER) Parametric object     */
3407    //AOT_RSVD_28          = 28, /**< might become SSC                          */
3408    AOT_PS               = 29,   /**< PS, Parametric Stereo (includes SBR)      */
3409    //AOT_MPEGS            = 30, /**< MPEG Surround                             */
3410
3411    AOT_ESCAPE           = 31,   /**< Signal AOT uses more than 5 bits          */
3412
3413    //AOT_MP3ONMP4_L1      = 32, /**< MPEG-Layer1 in mp4                        */
3414    //AOT_MP3ONMP4_L2      = 33, /**< MPEG-Layer2 in mp4                        */
3415    //AOT_MP3ONMP4_L3      = 34, /**< MPEG-Layer3 in mp4                        */
3416    //AOT_RSVD_35          = 35, /**< might become DST                          */
3417    //AOT_RSVD_36          = 36, /**< might become ALS                          */
3418    //AOT_AAC_SLS          = 37, /**< AAC + SLS                                 */
3419    //AOT_SLS              = 38, /**< SLS                                       */
3420    //AOT_ER_AAC_ELD       = 39, /**< AAC Enhanced Low Delay                    */
3421
3422    //AOT_USAC             = 42, /**< USAC                                      */
3423    //AOT_SAOC             = 43, /**< SAOC                                      */
3424    //AOT_LD_MPEGS         = 44, /**< Low Delay MPEG Surround                   */
3425
3426    //AOT_RSVD50           = 50,  /**< Interim AOT for Rsvd50                   */
3427} AUDIO_OBJECT_TYPE;
3428
3429status_t MPEG4Extractor::updateAudioTrackInfoFromESDS_MPEG4Audio(
3430        const void *esds_data, size_t esds_size) {
3431    ESDS esds(esds_data, esds_size);
3432
3433    uint8_t objectTypeIndication;
3434    if (esds.getObjectTypeIndication(&objectTypeIndication) != OK) {
3435        return ERROR_MALFORMED;
3436    }
3437
3438    if (objectTypeIndication == 0xe1) {
3439        // This isn't MPEG4 audio at all, it's QCELP 14k...
3440        if (mLastTrack == NULL)
3441            return ERROR_MALFORMED;
3442
3443        mLastTrack->meta->setCString(kKeyMIMEType, MEDIA_MIMETYPE_AUDIO_QCELP);
3444        return OK;
3445    }
3446
3447    if (objectTypeIndication  == 0x6b) {
3448        // The media subtype is MP3 audio
3449        // Our software MP3 audio decoder may not be able to handle
3450        // packetized MP3 audio; for now, lets just return ERROR_UNSUPPORTED
3451        ALOGE("MP3 track in MP4/3GPP file is not supported");
3452        return ERROR_UNSUPPORTED;
3453    }
3454
3455    const uint8_t *csd;
3456    size_t csd_size;
3457    if (esds.getCodecSpecificInfo(
3458                (const void **)&csd, &csd_size) != OK) {
3459        return ERROR_MALFORMED;
3460    }
3461
3462    if (kUseHexDump) {
3463        printf("ESD of size %zu\n", csd_size);
3464        hexdump(csd, csd_size);
3465    }
3466
3467    if (csd_size == 0) {
3468        // There's no further information, i.e. no codec specific data
3469        // Let's assume that the information provided in the mpeg4 headers
3470        // is accurate and hope for the best.
3471
3472        return OK;
3473    }
3474
3475    if (csd_size < 2) {
3476        return ERROR_MALFORMED;
3477    }
3478
3479    static uint32_t kSamplingRate[] = {
3480        96000, 88200, 64000, 48000, 44100, 32000, 24000, 22050,
3481        16000, 12000, 11025, 8000, 7350
3482    };
3483
3484    ABitReader br(csd, csd_size);
3485    uint32_t objectType = br.getBits(5);
3486
3487    if (objectType == 31) {  // AAC-ELD => additional 6 bits
3488        objectType = 32 + br.getBits(6);
3489    }
3490
3491    if (mLastTrack == NULL)
3492        return ERROR_MALFORMED;
3493
3494    //keep AOT type
3495    mLastTrack->meta->setInt32(kKeyAACAOT, objectType);
3496
3497    uint32_t freqIndex = br.getBits(4);
3498
3499    int32_t sampleRate = 0;
3500    int32_t numChannels = 0;
3501    if (freqIndex == 15) {
3502        if (br.numBitsLeft() < 28) return ERROR_MALFORMED;
3503        sampleRate = br.getBits(24);
3504        numChannels = br.getBits(4);
3505    } else {
3506        if (br.numBitsLeft() < 4) return ERROR_MALFORMED;
3507        numChannels = br.getBits(4);
3508
3509        if (freqIndex == 13 || freqIndex == 14) {
3510            return ERROR_MALFORMED;
3511        }
3512
3513        sampleRate = kSamplingRate[freqIndex];
3514    }
3515
3516    if (objectType == AOT_SBR || objectType == AOT_PS) {//SBR specific config per 14496-3 table 1.13
3517        if (br.numBitsLeft() < 4) return ERROR_MALFORMED;
3518        uint32_t extFreqIndex = br.getBits(4);
3519        int32_t extSampleRate __unused;
3520        if (extFreqIndex == 15) {
3521            if (csd_size < 8) {
3522                return ERROR_MALFORMED;
3523            }
3524            if (br.numBitsLeft() < 24) return ERROR_MALFORMED;
3525            extSampleRate = br.getBits(24);
3526        } else {
3527            if (extFreqIndex == 13 || extFreqIndex == 14) {
3528                return ERROR_MALFORMED;
3529            }
3530            extSampleRate = kSamplingRate[extFreqIndex];
3531        }
3532        //TODO: save the extension sampling rate value in meta data =>
3533        //      mLastTrack->meta->setInt32(kKeyExtSampleRate, extSampleRate);
3534    }
3535
3536    switch (numChannels) {
3537        // values defined in 14496-3_2009 amendment-4 Table 1.19 - Channel Configuration
3538        case 0:
3539        case 1:// FC
3540        case 2:// FL FR
3541        case 3:// FC, FL FR
3542        case 4:// FC, FL FR, RC
3543        case 5:// FC, FL FR, SL SR
3544        case 6:// FC, FL FR, SL SR, LFE
3545            //numChannels already contains the right value
3546            break;
3547        case 11:// FC, FL FR, SL SR, RC, LFE
3548            numChannels = 7;
3549            break;
3550        case 7: // FC, FCL FCR, FL FR, SL SR, LFE
3551        case 12:// FC, FL  FR,  SL SR, RL RR, LFE
3552        case 14:// FC, FL  FR,  SL SR, LFE, FHL FHR
3553            numChannels = 8;
3554            break;
3555        default:
3556            return ERROR_UNSUPPORTED;
3557    }
3558
3559    {
3560        if (objectType == AOT_SBR || objectType == AOT_PS) {
3561            if (br.numBitsLeft() < 5) return ERROR_MALFORMED;
3562            objectType = br.getBits(5);
3563
3564            if (objectType == AOT_ESCAPE) {
3565                if (br.numBitsLeft() < 6) return ERROR_MALFORMED;
3566                objectType = 32 + br.getBits(6);
3567            }
3568        }
3569        if (objectType == AOT_AAC_LC || objectType == AOT_ER_AAC_LC ||
3570                objectType == AOT_ER_AAC_LD || objectType == AOT_ER_AAC_SCAL ||
3571                objectType == AOT_ER_BSAC) {
3572            if (br.numBitsLeft() < 2) return ERROR_MALFORMED;
3573            const int32_t frameLengthFlag __unused = br.getBits(1);
3574
3575            const int32_t dependsOnCoreCoder = br.getBits(1);
3576
3577            if (dependsOnCoreCoder ) {
3578                if (br.numBitsLeft() < 14) return ERROR_MALFORMED;
3579                const int32_t coreCoderDelay __unused = br.getBits(14);
3580            }
3581
3582            int32_t extensionFlag = -1;
3583            if (br.numBitsLeft() > 0) {
3584                extensionFlag = br.getBits(1);
3585            } else {
3586                switch (objectType) {
3587                // 14496-3 4.5.1.1 extensionFlag
3588                case AOT_AAC_LC:
3589                    extensionFlag = 0;
3590                    break;
3591                case AOT_ER_AAC_LC:
3592                case AOT_ER_AAC_SCAL:
3593                case AOT_ER_BSAC:
3594                case AOT_ER_AAC_LD:
3595                    extensionFlag = 1;
3596                    break;
3597                default:
3598                    return ERROR_MALFORMED;
3599                    break;
3600                }
3601                ALOGW("csd missing extension flag; assuming %d for object type %u.",
3602                        extensionFlag, objectType);
3603            }
3604
3605            if (numChannels == 0) {
3606                int32_t channelsEffectiveNum = 0;
3607                int32_t channelsNum = 0;
3608                if (br.numBitsLeft() < 32) {
3609                    return ERROR_MALFORMED;
3610                }
3611                const int32_t ElementInstanceTag __unused = br.getBits(4);
3612                const int32_t Profile __unused = br.getBits(2);
3613                const int32_t SamplingFrequencyIndex __unused = br.getBits(4);
3614                const int32_t NumFrontChannelElements = br.getBits(4);
3615                const int32_t NumSideChannelElements = br.getBits(4);
3616                const int32_t NumBackChannelElements = br.getBits(4);
3617                const int32_t NumLfeChannelElements = br.getBits(2);
3618                const int32_t NumAssocDataElements __unused = br.getBits(3);
3619                const int32_t NumValidCcElements __unused = br.getBits(4);
3620
3621                const int32_t MonoMixdownPresent = br.getBits(1);
3622
3623                if (MonoMixdownPresent != 0) {
3624                    if (br.numBitsLeft() < 4) return ERROR_MALFORMED;
3625                    const int32_t MonoMixdownElementNumber __unused = br.getBits(4);
3626                }
3627
3628                if (br.numBitsLeft() < 1) return ERROR_MALFORMED;
3629                const int32_t StereoMixdownPresent = br.getBits(1);
3630                if (StereoMixdownPresent != 0) {
3631                    if (br.numBitsLeft() < 4) return ERROR_MALFORMED;
3632                    const int32_t StereoMixdownElementNumber __unused = br.getBits(4);
3633                }
3634
3635                if (br.numBitsLeft() < 1) return ERROR_MALFORMED;
3636                const int32_t MatrixMixdownIndexPresent = br.getBits(1);
3637                if (MatrixMixdownIndexPresent != 0) {
3638                    if (br.numBitsLeft() < 3) return ERROR_MALFORMED;
3639                    const int32_t MatrixMixdownIndex __unused = br.getBits(2);
3640                    const int32_t PseudoSurroundEnable __unused = br.getBits(1);
3641                }
3642
3643                int i;
3644                for (i=0; i < NumFrontChannelElements; i++) {
3645                    if (br.numBitsLeft() < 5) return ERROR_MALFORMED;
3646                    const int32_t FrontElementIsCpe = br.getBits(1);
3647                    const int32_t FrontElementTagSelect __unused = br.getBits(4);
3648                    channelsNum += FrontElementIsCpe ? 2 : 1;
3649                }
3650
3651                for (i=0; i < NumSideChannelElements; i++) {
3652                    if (br.numBitsLeft() < 5) return ERROR_MALFORMED;
3653                    const int32_t SideElementIsCpe = br.getBits(1);
3654                    const int32_t SideElementTagSelect __unused = br.getBits(4);
3655                    channelsNum += SideElementIsCpe ? 2 : 1;
3656                }
3657
3658                for (i=0; i < NumBackChannelElements; i++) {
3659                    if (br.numBitsLeft() < 5) return ERROR_MALFORMED;
3660                    const int32_t BackElementIsCpe = br.getBits(1);
3661                    const int32_t BackElementTagSelect __unused = br.getBits(4);
3662                    channelsNum += BackElementIsCpe ? 2 : 1;
3663                }
3664                channelsEffectiveNum = channelsNum;
3665
3666                for (i=0; i < NumLfeChannelElements; i++) {
3667                    if (br.numBitsLeft() < 4) return ERROR_MALFORMED;
3668                    const int32_t LfeElementTagSelect __unused = br.getBits(4);
3669                    channelsNum += 1;
3670                }
3671                ALOGV("mpeg4 audio channelsNum = %d", channelsNum);
3672                ALOGV("mpeg4 audio channelsEffectiveNum = %d", channelsEffectiveNum);
3673                numChannels = channelsNum;
3674            }
3675        }
3676    }
3677
3678    if (numChannels == 0) {
3679        return ERROR_UNSUPPORTED;
3680    }
3681
3682    if (mLastTrack == NULL)
3683        return ERROR_MALFORMED;
3684
3685    int32_t prevSampleRate;
3686    CHECK(mLastTrack->meta->findInt32(kKeySampleRate, &prevSampleRate));
3687
3688    if (prevSampleRate != sampleRate) {
3689        ALOGV("mpeg4 audio sample rate different from previous setting. "
3690             "was: %d, now: %d", prevSampleRate, sampleRate);
3691    }
3692
3693    mLastTrack->meta->setInt32(kKeySampleRate, sampleRate);
3694
3695    int32_t prevChannelCount;
3696    CHECK(mLastTrack->meta->findInt32(kKeyChannelCount, &prevChannelCount));
3697
3698    if (prevChannelCount != numChannels) {
3699        ALOGV("mpeg4 audio channel count different from previous setting. "
3700             "was: %d, now: %d", prevChannelCount, numChannels);
3701    }
3702
3703    mLastTrack->meta->setInt32(kKeyChannelCount, numChannels);
3704
3705    return OK;
3706}
3707
3708////////////////////////////////////////////////////////////////////////////////
3709
3710MPEG4Source::MPEG4Source(
3711        const sp<MPEG4Extractor> &owner,
3712        const sp<MetaData> &format,
3713        const sp<DataSource> &dataSource,
3714        int32_t timeScale,
3715        const sp<SampleTable> &sampleTable,
3716        Vector<