MPEG4Extractor.cpp revision f8f0e0b756b0f96eccc94af89a0087c146232b26
1/*
2 * Copyright (C) 2009 The Android Open Source Project
3 *
4 * Licensed under the Apache License, Version 2.0 (the "License");
5 * you may not use this file except in compliance with the License.
6 * You may obtain a copy of the License at
7 *
8 *      http://www.apache.org/licenses/LICENSE-2.0
9 *
10 * Unless required by applicable law or agreed to in writing, software
11 * distributed under the License is distributed on an "AS IS" BASIS,
12 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13 * See the License for the specific language governing permissions and
14 * limitations under the License.
15 */
16
17//#define LOG_NDEBUG 0
18#define LOG_TAG "MPEG4Extractor"
19
20#include <ctype.h>
21#include <inttypes.h>
22#include <stdint.h>
23#include <stdlib.h>
24#include <string.h>
25
26#include <utils/Log.h>
27
28#include "include/MPEG4Extractor.h"
29#include "include/SampleTable.h"
30#include "include/ESDS.h"
31
32#include <media/stagefright/foundation/ABitReader.h>
33#include <media/stagefright/foundation/ABuffer.h>
34#include <media/stagefright/foundation/ADebug.h>
35#include <media/stagefright/foundation/AMessage.h>
36#include <media/stagefright/foundation/AUtils.h>
37#include <media/stagefright/MediaBuffer.h>
38#include <media/stagefright/MediaBufferGroup.h>
39#include <media/stagefright/MediaDefs.h>
40#include <media/stagefright/MediaSource.h>
41#include <media/stagefright/MetaData.h>
42#include <utils/String8.h>
43
44#include <byteswap.h>
45#include "include/ID3.h"
46
47#ifndef UINT32_MAX
48#define UINT32_MAX       (4294967295U)
49#endif
50
51namespace android {
52
53class MPEG4Source : public MediaSource {
54public:
55    // Caller retains ownership of both "dataSource" and "sampleTable".
56    MPEG4Source(const sp<MPEG4Extractor> &owner,
57                const sp<MetaData> &format,
58                const sp<DataSource> &dataSource,
59                int32_t timeScale,
60                const sp<SampleTable> &sampleTable,
61                Vector<SidxEntry> &sidx,
62                const Trex *trex,
63                off64_t firstMoofOffset);
64
65    virtual status_t start(MetaData *params = NULL);
66    virtual status_t stop();
67
68    virtual sp<MetaData> getFormat();
69
70    virtual status_t read(MediaBuffer **buffer, const ReadOptions *options = NULL);
71    virtual status_t fragmentedRead(MediaBuffer **buffer, const ReadOptions *options = NULL);
72
73protected:
74    virtual ~MPEG4Source();
75
76private:
77    Mutex mLock;
78
79    // keep the MPEG4Extractor around, since we're referencing its data
80    sp<MPEG4Extractor> mOwner;
81    sp<MetaData> mFormat;
82    sp<DataSource> mDataSource;
83    int32_t mTimescale;
84    sp<SampleTable> mSampleTable;
85    uint32_t mCurrentSampleIndex;
86    uint32_t mCurrentFragmentIndex;
87    Vector<SidxEntry> &mSegments;
88    const Trex *mTrex;
89    off64_t mFirstMoofOffset;
90    off64_t mCurrentMoofOffset;
91    off64_t mNextMoofOffset;
92    uint32_t mCurrentTime;
93    int32_t mLastParsedTrackId;
94    int32_t mTrackId;
95
96    int32_t mCryptoMode;    // passed in from extractor
97    int32_t mDefaultIVSize; // passed in from extractor
98    uint8_t mCryptoKey[16]; // passed in from extractor
99    uint32_t mCurrentAuxInfoType;
100    uint32_t mCurrentAuxInfoTypeParameter;
101    int32_t mCurrentDefaultSampleInfoSize;
102    uint32_t mCurrentSampleInfoCount;
103    uint32_t mCurrentSampleInfoAllocSize;
104    uint8_t* mCurrentSampleInfoSizes;
105    uint32_t mCurrentSampleInfoOffsetCount;
106    uint32_t mCurrentSampleInfoOffsetsAllocSize;
107    uint64_t* mCurrentSampleInfoOffsets;
108
109    bool mIsAVC;
110    bool mIsHEVC;
111    size_t mNALLengthSize;
112
113    bool mStarted;
114
115    MediaBufferGroup *mGroup;
116
117    MediaBuffer *mBuffer;
118
119    bool mWantsNALFragments;
120
121    uint8_t *mSrcBuffer;
122
123    size_t parseNALSize(const uint8_t *data) const;
124    status_t parseChunk(off64_t *offset);
125    status_t parseTrackFragmentHeader(off64_t offset, off64_t size);
126    status_t parseTrackFragmentRun(off64_t offset, off64_t size);
127    status_t parseSampleAuxiliaryInformationSizes(off64_t offset, off64_t size);
128    status_t parseSampleAuxiliaryInformationOffsets(off64_t offset, off64_t size);
129
130    struct TrackFragmentHeaderInfo {
131        enum Flags {
132            kBaseDataOffsetPresent         = 0x01,
133            kSampleDescriptionIndexPresent = 0x02,
134            kDefaultSampleDurationPresent  = 0x08,
135            kDefaultSampleSizePresent      = 0x10,
136            kDefaultSampleFlagsPresent     = 0x20,
137            kDurationIsEmpty               = 0x10000,
138        };
139
140        uint32_t mTrackID;
141        uint32_t mFlags;
142        uint64_t mBaseDataOffset;
143        uint32_t mSampleDescriptionIndex;
144        uint32_t mDefaultSampleDuration;
145        uint32_t mDefaultSampleSize;
146        uint32_t mDefaultSampleFlags;
147
148        uint64_t mDataOffset;
149    };
150    TrackFragmentHeaderInfo mTrackFragmentHeaderInfo;
151
152    struct Sample {
153        off64_t offset;
154        size_t size;
155        uint32_t duration;
156        int32_t compositionOffset;
157        uint8_t iv[16];
158        Vector<size_t> clearsizes;
159        Vector<size_t> encryptedsizes;
160    };
161    Vector<Sample> mCurrentSamples;
162
163    MPEG4Source(const MPEG4Source &);
164    MPEG4Source &operator=(const MPEG4Source &);
165};
166
167// This custom data source wraps an existing one and satisfies requests
168// falling entirely within a cached range from the cache while forwarding
169// all remaining requests to the wrapped datasource.
170// This is used to cache the full sampletable metadata for a single track,
171// possibly wrapping multiple times to cover all tracks, i.e.
172// Each MPEG4DataSource caches the sampletable metadata for a single track.
173
174struct MPEG4DataSource : public DataSource {
175    MPEG4DataSource(const sp<DataSource> &source);
176
177    virtual status_t initCheck() const;
178    virtual ssize_t readAt(off64_t offset, void *data, size_t size);
179    virtual status_t getSize(off64_t *size);
180    virtual uint32_t flags();
181
182    status_t setCachedRange(off64_t offset, size_t size);
183
184protected:
185    virtual ~MPEG4DataSource();
186
187private:
188    Mutex mLock;
189
190    sp<DataSource> mSource;
191    off64_t mCachedOffset;
192    size_t mCachedSize;
193    uint8_t *mCache;
194
195    void clearCache();
196
197    MPEG4DataSource(const MPEG4DataSource &);
198    MPEG4DataSource &operator=(const MPEG4DataSource &);
199};
200
201MPEG4DataSource::MPEG4DataSource(const sp<DataSource> &source)
202    : mSource(source),
203      mCachedOffset(0),
204      mCachedSize(0),
205      mCache(NULL) {
206}
207
208MPEG4DataSource::~MPEG4DataSource() {
209    clearCache();
210}
211
212void MPEG4DataSource::clearCache() {
213    if (mCache) {
214        free(mCache);
215        mCache = NULL;
216    }
217
218    mCachedOffset = 0;
219    mCachedSize = 0;
220}
221
222status_t MPEG4DataSource::initCheck() const {
223    return mSource->initCheck();
224}
225
226ssize_t MPEG4DataSource::readAt(off64_t offset, void *data, size_t size) {
227    Mutex::Autolock autoLock(mLock);
228
229    if (isInRange(mCachedOffset, mCachedSize, offset, size)) {
230        memcpy(data, &mCache[offset - mCachedOffset], size);
231        return size;
232    }
233
234    return mSource->readAt(offset, data, size);
235}
236
237status_t MPEG4DataSource::getSize(off64_t *size) {
238    return mSource->getSize(size);
239}
240
241uint32_t MPEG4DataSource::flags() {
242    return mSource->flags();
243}
244
245status_t MPEG4DataSource::setCachedRange(off64_t offset, size_t size) {
246    Mutex::Autolock autoLock(mLock);
247
248    clearCache();
249
250    mCache = (uint8_t *)malloc(size);
251
252    if (mCache == NULL) {
253        return -ENOMEM;
254    }
255
256    mCachedOffset = offset;
257    mCachedSize = size;
258
259    ssize_t err = mSource->readAt(mCachedOffset, mCache, mCachedSize);
260
261    if (err < (ssize_t)size) {
262        clearCache();
263
264        return ERROR_IO;
265    }
266
267    return OK;
268}
269
270////////////////////////////////////////////////////////////////////////////////
271
272static const bool kUseHexDump = false;
273
274static void hexdump(const void *_data, size_t size) {
275    const uint8_t *data = (const uint8_t *)_data;
276    size_t offset = 0;
277    while (offset < size) {
278        printf("0x%04zx  ", offset);
279
280        size_t n = size - offset;
281        if (n > 16) {
282            n = 16;
283        }
284
285        for (size_t i = 0; i < 16; ++i) {
286            if (i == 8) {
287                printf(" ");
288            }
289
290            if (offset + i < size) {
291                printf("%02x ", data[offset + i]);
292            } else {
293                printf("   ");
294            }
295        }
296
297        printf(" ");
298
299        for (size_t i = 0; i < n; ++i) {
300            if (isprint(data[offset + i])) {
301                printf("%c", data[offset + i]);
302            } else {
303                printf(".");
304            }
305        }
306
307        printf("\n");
308
309        offset += 16;
310    }
311}
312
313static const char *FourCC2MIME(uint32_t fourcc) {
314    switch (fourcc) {
315        case FOURCC('m', 'p', '4', 'a'):
316            return MEDIA_MIMETYPE_AUDIO_AAC;
317
318        case FOURCC('s', 'a', 'm', 'r'):
319            return MEDIA_MIMETYPE_AUDIO_AMR_NB;
320
321        case FOURCC('s', 'a', 'w', 'b'):
322            return MEDIA_MIMETYPE_AUDIO_AMR_WB;
323
324        case FOURCC('m', 'p', '4', 'v'):
325            return MEDIA_MIMETYPE_VIDEO_MPEG4;
326
327        case FOURCC('s', '2', '6', '3'):
328        case FOURCC('h', '2', '6', '3'):
329        case FOURCC('H', '2', '6', '3'):
330            return MEDIA_MIMETYPE_VIDEO_H263;
331
332        case FOURCC('a', 'v', 'c', '1'):
333            return MEDIA_MIMETYPE_VIDEO_AVC;
334
335        case FOURCC('h', 'v', 'c', '1'):
336        case FOURCC('h', 'e', 'v', '1'):
337            return MEDIA_MIMETYPE_VIDEO_HEVC;
338        default:
339            CHECK(!"should not be here.");
340            return NULL;
341    }
342}
343
344static bool AdjustChannelsAndRate(uint32_t fourcc, uint32_t *channels, uint32_t *rate) {
345    if (!strcasecmp(MEDIA_MIMETYPE_AUDIO_AMR_NB, FourCC2MIME(fourcc))) {
346        // AMR NB audio is always mono, 8kHz
347        *channels = 1;
348        *rate = 8000;
349        return true;
350    } else if (!strcasecmp(MEDIA_MIMETYPE_AUDIO_AMR_WB, FourCC2MIME(fourcc))) {
351        // AMR WB audio is always mono, 16kHz
352        *channels = 1;
353        *rate = 16000;
354        return true;
355    }
356    return false;
357}
358
359MPEG4Extractor::MPEG4Extractor(const sp<DataSource> &source)
360    : mMoofOffset(0),
361      mDataSource(source),
362      mInitCheck(NO_INIT),
363      mHasVideo(false),
364      mHeaderTimescale(0),
365      mFirstTrack(NULL),
366      mLastTrack(NULL),
367      mFileMetaData(new MetaData),
368      mFirstSINF(NULL),
369      mIsDrm(false) {
370}
371
372MPEG4Extractor::~MPEG4Extractor() {
373    Track *track = mFirstTrack;
374    while (track) {
375        Track *next = track->next;
376
377        delete track;
378        track = next;
379    }
380    mFirstTrack = mLastTrack = NULL;
381
382    SINF *sinf = mFirstSINF;
383    while (sinf) {
384        SINF *next = sinf->next;
385        delete[] sinf->IPMPData;
386        delete sinf;
387        sinf = next;
388    }
389    mFirstSINF = NULL;
390
391    for (size_t i = 0; i < mPssh.size(); i++) {
392        delete [] mPssh[i].data;
393    }
394}
395
396uint32_t MPEG4Extractor::flags() const {
397    return CAN_PAUSE |
398            ((mMoofOffset == 0 || mSidxEntries.size() != 0) ?
399                    (CAN_SEEK_BACKWARD | CAN_SEEK_FORWARD | CAN_SEEK) : 0);
400}
401
402sp<MetaData> MPEG4Extractor::getMetaData() {
403    status_t err;
404    if ((err = readMetaData()) != OK) {
405        return new MetaData;
406    }
407
408    return mFileMetaData;
409}
410
411size_t MPEG4Extractor::countTracks() {
412    status_t err;
413    if ((err = readMetaData()) != OK) {
414        ALOGV("MPEG4Extractor::countTracks: no tracks");
415        return 0;
416    }
417
418    size_t n = 0;
419    Track *track = mFirstTrack;
420    while (track) {
421        ++n;
422        track = track->next;
423    }
424
425    ALOGV("MPEG4Extractor::countTracks: %zu tracks", n);
426    return n;
427}
428
429sp<MetaData> MPEG4Extractor::getTrackMetaData(
430        size_t index, uint32_t flags) {
431    status_t err;
432    if ((err = readMetaData()) != OK) {
433        return NULL;
434    }
435
436    Track *track = mFirstTrack;
437    while (index > 0) {
438        if (track == NULL) {
439            return NULL;
440        }
441
442        track = track->next;
443        --index;
444    }
445
446    if (track == NULL) {
447        return NULL;
448    }
449
450    if ((flags & kIncludeExtensiveMetaData)
451            && !track->includes_expensive_metadata) {
452        track->includes_expensive_metadata = true;
453
454        const char *mime;
455        CHECK(track->meta->findCString(kKeyMIMEType, &mime));
456        if (!strncasecmp("video/", mime, 6)) {
457            if (mMoofOffset > 0) {
458                int64_t duration;
459                if (track->meta->findInt64(kKeyDuration, &duration)) {
460                    // nothing fancy, just pick a frame near 1/4th of the duration
461                    track->meta->setInt64(
462                            kKeyThumbnailTime, duration / 4);
463                }
464            } else {
465                uint32_t sampleIndex;
466                uint32_t sampleTime;
467                if (track->sampleTable->findThumbnailSample(&sampleIndex) == OK
468                        && track->sampleTable->getMetaDataForSample(
469                            sampleIndex, NULL /* offset */, NULL /* size */,
470                            &sampleTime) == OK) {
471                    track->meta->setInt64(
472                            kKeyThumbnailTime,
473                            ((int64_t)sampleTime * 1000000) / track->timescale);
474                }
475            }
476        }
477    }
478
479    return track->meta;
480}
481
482static void MakeFourCCString(uint32_t x, char *s) {
483    s[0] = x >> 24;
484    s[1] = (x >> 16) & 0xff;
485    s[2] = (x >> 8) & 0xff;
486    s[3] = x & 0xff;
487    s[4] = '\0';
488}
489
490status_t MPEG4Extractor::readMetaData() {
491    if (mInitCheck != NO_INIT) {
492        return mInitCheck;
493    }
494
495    off64_t offset = 0;
496    status_t err;
497    while (true) {
498        off64_t orig_offset = offset;
499        err = parseChunk(&offset, 0);
500
501        if (err != OK && err != UNKNOWN_ERROR) {
502            break;
503        } else if (offset <= orig_offset) {
504            // only continue parsing if the offset was advanced,
505            // otherwise we might end up in an infinite loop
506            ALOGE("did not advance: 0x%lld->0x%lld", orig_offset, offset);
507            err = ERROR_MALFORMED;
508            break;
509        } else if (err == OK) {
510            continue;
511        }
512
513        uint32_t hdr[2];
514        if (mDataSource->readAt(offset, hdr, 8) < 8) {
515            break;
516        }
517        uint32_t chunk_type = ntohl(hdr[1]);
518        if (chunk_type == FOURCC('m', 'o', 'o', 'f')) {
519            // store the offset of the first segment
520            mMoofOffset = offset;
521        } else if (chunk_type != FOURCC('m', 'd', 'a', 't')) {
522            // keep parsing until we get to the data
523            continue;
524        }
525        break;
526    }
527
528    if (mInitCheck == OK) {
529        if (mHasVideo) {
530            mFileMetaData->setCString(
531                    kKeyMIMEType, MEDIA_MIMETYPE_CONTAINER_MPEG4);
532        } else {
533            mFileMetaData->setCString(kKeyMIMEType, "audio/mp4");
534        }
535    } else {
536        mInitCheck = err;
537    }
538
539    CHECK_NE(err, (status_t)NO_INIT);
540
541    // copy pssh data into file metadata
542    int psshsize = 0;
543    for (size_t i = 0; i < mPssh.size(); i++) {
544        psshsize += 20 + mPssh[i].datalen;
545    }
546    if (psshsize) {
547        char *buf = (char*)malloc(psshsize);
548        char *ptr = buf;
549        for (size_t i = 0; i < mPssh.size(); i++) {
550            memcpy(ptr, mPssh[i].uuid, 20); // uuid + length
551            memcpy(ptr + 20, mPssh[i].data, mPssh[i].datalen);
552            ptr += (20 + mPssh[i].datalen);
553        }
554        mFileMetaData->setData(kKeyPssh, 'pssh', buf, psshsize);
555        free(buf);
556    }
557    return mInitCheck;
558}
559
560char* MPEG4Extractor::getDrmTrackInfo(size_t trackID, int *len) {
561    if (mFirstSINF == NULL) {
562        return NULL;
563    }
564
565    SINF *sinf = mFirstSINF;
566    while (sinf && (trackID != sinf->trackID)) {
567        sinf = sinf->next;
568    }
569
570    if (sinf == NULL) {
571        return NULL;
572    }
573
574    *len = sinf->len;
575    return sinf->IPMPData;
576}
577
578// Reads an encoded integer 7 bits at a time until it encounters the high bit clear.
579static int32_t readSize(off64_t offset,
580        const sp<DataSource> DataSource, uint8_t *numOfBytes) {
581    uint32_t size = 0;
582    uint8_t data;
583    bool moreData = true;
584    *numOfBytes = 0;
585
586    while (moreData) {
587        if (DataSource->readAt(offset, &data, 1) < 1) {
588            return -1;
589        }
590        offset ++;
591        moreData = (data >= 128) ? true : false;
592        size = (size << 7) | (data & 0x7f); // Take last 7 bits
593        (*numOfBytes) ++;
594    }
595
596    return size;
597}
598
599status_t MPEG4Extractor::parseDrmSINF(
600        off64_t * /* offset */, off64_t data_offset) {
601    uint8_t updateIdTag;
602    if (mDataSource->readAt(data_offset, &updateIdTag, 1) < 1) {
603        return ERROR_IO;
604    }
605    data_offset ++;
606
607    if (0x01/*OBJECT_DESCRIPTOR_UPDATE_ID_TAG*/ != updateIdTag) {
608        return ERROR_MALFORMED;
609    }
610
611    uint8_t numOfBytes;
612    int32_t size = readSize(data_offset, mDataSource, &numOfBytes);
613    if (size < 0) {
614        return ERROR_IO;
615    }
616    data_offset += numOfBytes;
617
618    while(size >= 11 ) {
619        uint8_t descriptorTag;
620        if (mDataSource->readAt(data_offset, &descriptorTag, 1) < 1) {
621            return ERROR_IO;
622        }
623        data_offset ++;
624
625        if (0x11/*OBJECT_DESCRIPTOR_ID_TAG*/ != descriptorTag) {
626            return ERROR_MALFORMED;
627        }
628
629        uint8_t buffer[8];
630        //ObjectDescriptorID and ObjectDescriptor url flag
631        if (mDataSource->readAt(data_offset, buffer, 2) < 2) {
632            return ERROR_IO;
633        }
634        data_offset += 2;
635
636        if ((buffer[1] >> 5) & 0x0001) { //url flag is set
637            return ERROR_MALFORMED;
638        }
639
640        if (mDataSource->readAt(data_offset, buffer, 8) < 8) {
641            return ERROR_IO;
642        }
643        data_offset += 8;
644
645        if ((0x0F/*ES_ID_REF_TAG*/ != buffer[1])
646                || ( 0x0A/*IPMP_DESCRIPTOR_POINTER_ID_TAG*/ != buffer[5])) {
647            return ERROR_MALFORMED;
648        }
649
650        SINF *sinf = new SINF;
651        sinf->trackID = U16_AT(&buffer[3]);
652        sinf->IPMPDescriptorID = buffer[7];
653        sinf->next = mFirstSINF;
654        mFirstSINF = sinf;
655
656        size -= (8 + 2 + 1);
657    }
658
659    if (size != 0) {
660        return ERROR_MALFORMED;
661    }
662
663    if (mDataSource->readAt(data_offset, &updateIdTag, 1) < 1) {
664        return ERROR_IO;
665    }
666    data_offset ++;
667
668    if(0x05/*IPMP_DESCRIPTOR_UPDATE_ID_TAG*/ != updateIdTag) {
669        return ERROR_MALFORMED;
670    }
671
672    size = readSize(data_offset, mDataSource, &numOfBytes);
673    if (size < 0) {
674        return ERROR_IO;
675    }
676    data_offset += numOfBytes;
677
678    while (size > 0) {
679        uint8_t tag;
680        int32_t dataLen;
681        if (mDataSource->readAt(data_offset, &tag, 1) < 1) {
682            return ERROR_IO;
683        }
684        data_offset ++;
685
686        if (0x0B/*IPMP_DESCRIPTOR_ID_TAG*/ == tag) {
687            uint8_t id;
688            dataLen = readSize(data_offset, mDataSource, &numOfBytes);
689            if (dataLen < 0) {
690                return ERROR_IO;
691            } else if (dataLen < 4) {
692                return ERROR_MALFORMED;
693            }
694            data_offset += numOfBytes;
695
696            if (mDataSource->readAt(data_offset, &id, 1) < 1) {
697                return ERROR_IO;
698            }
699            data_offset ++;
700
701            SINF *sinf = mFirstSINF;
702            while (sinf && (sinf->IPMPDescriptorID != id)) {
703                sinf = sinf->next;
704            }
705            if (sinf == NULL) {
706                return ERROR_MALFORMED;
707            }
708            sinf->len = dataLen - 3;
709            sinf->IPMPData = new (std::nothrow) char[sinf->len];
710            if (sinf->IPMPData == NULL) {
711                return ERROR_MALFORMED;
712            }
713            data_offset += 2;
714
715            if (mDataSource->readAt(data_offset, sinf->IPMPData, sinf->len) < sinf->len) {
716                return ERROR_IO;
717            }
718            data_offset += sinf->len;
719
720            size -= (dataLen + numOfBytes + 1);
721        }
722    }
723
724    if (size != 0) {
725        return ERROR_MALFORMED;
726    }
727
728    return UNKNOWN_ERROR;  // Return a dummy error.
729}
730
731struct PathAdder {
732    PathAdder(Vector<uint32_t> *path, uint32_t chunkType)
733        : mPath(path) {
734        mPath->push(chunkType);
735    }
736
737    ~PathAdder() {
738        mPath->pop();
739    }
740
741private:
742    Vector<uint32_t> *mPath;
743
744    PathAdder(const PathAdder &);
745    PathAdder &operator=(const PathAdder &);
746};
747
748static bool underMetaDataPath(const Vector<uint32_t> &path) {
749    return path.size() >= 5
750        && path[0] == FOURCC('m', 'o', 'o', 'v')
751        && path[1] == FOURCC('u', 'd', 't', 'a')
752        && path[2] == FOURCC('m', 'e', 't', 'a')
753        && path[3] == FOURCC('i', 'l', 's', 't');
754}
755
756// Given a time in seconds since Jan 1 1904, produce a human-readable string.
757static void convertTimeToDate(int64_t time_1904, String8 *s) {
758    time_t time_1970 = time_1904 - (((66 * 365 + 17) * 24) * 3600);
759
760    char tmp[32];
761    strftime(tmp, sizeof(tmp), "%Y%m%dT%H%M%S.000Z", gmtime(&time_1970));
762
763    s->setTo(tmp);
764}
765
766status_t MPEG4Extractor::parseChunk(off64_t *offset, int depth) {
767    ALOGV("entering parseChunk %lld/%d", *offset, depth);
768    uint32_t hdr[2];
769    if (mDataSource->readAt(*offset, hdr, 8) < 8) {
770        return ERROR_IO;
771    }
772    uint64_t chunk_size = ntohl(hdr[0]);
773    int32_t chunk_type = ntohl(hdr[1]);
774    off64_t data_offset = *offset + 8;
775
776    if (chunk_size == 1) {
777        if (mDataSource->readAt(*offset + 8, &chunk_size, 8) < 8) {
778            return ERROR_IO;
779        }
780        chunk_size = ntoh64(chunk_size);
781        data_offset += 8;
782
783        if (chunk_size < 16) {
784            // The smallest valid chunk is 16 bytes long in this case.
785            return ERROR_MALFORMED;
786        }
787    } else if (chunk_size == 0) {
788        if (depth == 0) {
789            // atom extends to end of file
790            off64_t sourceSize;
791            if (mDataSource->getSize(&sourceSize) == OK) {
792                chunk_size = (sourceSize - *offset);
793            } else {
794                // XXX could we just pick a "sufficiently large" value here?
795                ALOGE("atom size is 0, and data source has no size");
796                return ERROR_MALFORMED;
797            }
798        } else {
799            // not allowed for non-toplevel atoms, skip it
800            *offset += 4;
801            return OK;
802        }
803    } else if (chunk_size < 8) {
804        // The smallest valid chunk is 8 bytes long.
805        ALOGE("invalid chunk size: %" PRIu64, chunk_size);
806        return ERROR_MALFORMED;
807    }
808
809    char chunk[5];
810    MakeFourCCString(chunk_type, chunk);
811    ALOGV("chunk: %s @ %lld, %d", chunk, *offset, depth);
812
813    if (kUseHexDump) {
814        static const char kWhitespace[] = "                                        ";
815        const char *indent = &kWhitespace[sizeof(kWhitespace) - 1 - 2 * depth];
816        printf("%sfound chunk '%s' of size %" PRIu64 "\n", indent, chunk, chunk_size);
817
818        char buffer[256];
819        size_t n = chunk_size;
820        if (n > sizeof(buffer)) {
821            n = sizeof(buffer);
822        }
823        if (mDataSource->readAt(*offset, buffer, n)
824                < (ssize_t)n) {
825            return ERROR_IO;
826        }
827
828        hexdump(buffer, n);
829    }
830
831    PathAdder autoAdder(&mPath, chunk_type);
832
833    off64_t chunk_data_size = *offset + chunk_size - data_offset;
834
835    if (chunk_type != FOURCC('c', 'p', 'r', 't')
836            && chunk_type != FOURCC('c', 'o', 'v', 'r')
837            && mPath.size() == 5 && underMetaDataPath(mPath)) {
838        off64_t stop_offset = *offset + chunk_size;
839        *offset = data_offset;
840        while (*offset < stop_offset) {
841            status_t err = parseChunk(offset, depth + 1);
842            if (err != OK) {
843                return err;
844            }
845        }
846
847        if (*offset != stop_offset) {
848            return ERROR_MALFORMED;
849        }
850
851        return OK;
852    }
853
854    switch(chunk_type) {
855        case FOURCC('m', 'o', 'o', 'v'):
856        case FOURCC('t', 'r', 'a', 'k'):
857        case FOURCC('m', 'd', 'i', 'a'):
858        case FOURCC('m', 'i', 'n', 'f'):
859        case FOURCC('d', 'i', 'n', 'f'):
860        case FOURCC('s', 't', 'b', 'l'):
861        case FOURCC('m', 'v', 'e', 'x'):
862        case FOURCC('m', 'o', 'o', 'f'):
863        case FOURCC('t', 'r', 'a', 'f'):
864        case FOURCC('m', 'f', 'r', 'a'):
865        case FOURCC('u', 'd', 't', 'a'):
866        case FOURCC('i', 'l', 's', 't'):
867        case FOURCC('s', 'i', 'n', 'f'):
868        case FOURCC('s', 'c', 'h', 'i'):
869        case FOURCC('e', 'd', 't', 's'):
870        {
871            if (chunk_type == FOURCC('s', 't', 'b', 'l')) {
872                ALOGV("sampleTable chunk is %" PRIu64 " bytes long.", chunk_size);
873
874                if (mDataSource->flags()
875                        & (DataSource::kWantsPrefetching
876                            | DataSource::kIsCachingDataSource)) {
877                    sp<MPEG4DataSource> cachedSource =
878                        new MPEG4DataSource(mDataSource);
879
880                    if (cachedSource->setCachedRange(*offset, chunk_size) == OK) {
881                        mDataSource = cachedSource;
882                    }
883                }
884
885                mLastTrack->sampleTable = new SampleTable(mDataSource);
886            }
887
888            bool isTrack = false;
889            if (chunk_type == FOURCC('t', 'r', 'a', 'k')) {
890                isTrack = true;
891
892                Track *track = new Track;
893                track->next = NULL;
894                if (mLastTrack) {
895                    mLastTrack->next = track;
896                } else {
897                    mFirstTrack = track;
898                }
899                mLastTrack = track;
900
901                track->meta = new MetaData;
902                track->includes_expensive_metadata = false;
903                track->skipTrack = false;
904                track->timescale = 0;
905                track->meta->setCString(kKeyMIMEType, "application/octet-stream");
906            }
907
908            off64_t stop_offset = *offset + chunk_size;
909            *offset = data_offset;
910            while (*offset < stop_offset) {
911                status_t err = parseChunk(offset, depth + 1);
912                if (err != OK) {
913                    return err;
914                }
915            }
916
917            if (*offset != stop_offset) {
918                return ERROR_MALFORMED;
919            }
920
921            if (isTrack) {
922                if (mLastTrack->skipTrack) {
923                    Track *cur = mFirstTrack;
924
925                    if (cur == mLastTrack) {
926                        delete cur;
927                        mFirstTrack = mLastTrack = NULL;
928                    } else {
929                        while (cur && cur->next != mLastTrack) {
930                            cur = cur->next;
931                        }
932                        cur->next = NULL;
933                        delete mLastTrack;
934                        mLastTrack = cur;
935                    }
936
937                    return OK;
938                }
939
940                status_t err = verifyTrack(mLastTrack);
941
942                if (err != OK) {
943                    return err;
944                }
945            } else if (chunk_type == FOURCC('m', 'o', 'o', 'v')) {
946                mInitCheck = OK;
947
948                if (!mIsDrm) {
949                    return UNKNOWN_ERROR;  // Return a dummy error.
950                } else {
951                    return OK;
952                }
953            }
954            break;
955        }
956
957        case FOURCC('e', 'l', 's', 't'):
958        {
959            *offset += chunk_size;
960
961            // See 14496-12 8.6.6
962            uint8_t version;
963            if (mDataSource->readAt(data_offset, &version, 1) < 1) {
964                return ERROR_IO;
965            }
966
967            uint32_t entry_count;
968            if (!mDataSource->getUInt32(data_offset + 4, &entry_count)) {
969                return ERROR_IO;
970            }
971
972            if (entry_count != 1) {
973                // we only support a single entry at the moment, for gapless playback
974                ALOGW("ignoring edit list with %d entries", entry_count);
975            } else if (mHeaderTimescale == 0) {
976                ALOGW("ignoring edit list because timescale is 0");
977            } else {
978                off64_t entriesoffset = data_offset + 8;
979                uint64_t segment_duration;
980                int64_t media_time;
981
982                if (version == 1) {
983                    if (!mDataSource->getUInt64(entriesoffset, &segment_duration) ||
984                            !mDataSource->getUInt64(entriesoffset + 8, (uint64_t*)&media_time)) {
985                        return ERROR_IO;
986                    }
987                } else if (version == 0) {
988                    uint32_t sd;
989                    int32_t mt;
990                    if (!mDataSource->getUInt32(entriesoffset, &sd) ||
991                            !mDataSource->getUInt32(entriesoffset + 4, (uint32_t*)&mt)) {
992                        return ERROR_IO;
993                    }
994                    segment_duration = sd;
995                    media_time = mt;
996                } else {
997                    return ERROR_IO;
998                }
999
1000                uint64_t halfscale = mHeaderTimescale / 2;
1001                segment_duration = (segment_duration * 1000000 + halfscale)/ mHeaderTimescale;
1002                media_time = (media_time * 1000000 + halfscale) / mHeaderTimescale;
1003
1004                int64_t duration;
1005                int32_t samplerate;
1006                if (!mLastTrack) {
1007                    return ERROR_MALFORMED;
1008                }
1009                if (mLastTrack->meta->findInt64(kKeyDuration, &duration) &&
1010                        mLastTrack->meta->findInt32(kKeySampleRate, &samplerate)) {
1011
1012                    int64_t delay = (media_time  * samplerate + 500000) / 1000000;
1013                    mLastTrack->meta->setInt32(kKeyEncoderDelay, delay);
1014
1015                    int64_t paddingus = duration - (segment_duration + media_time);
1016                    if (paddingus < 0) {
1017                        // track duration from media header (which is what kKeyDuration is) might
1018                        // be slightly shorter than the segment duration, which would make the
1019                        // padding negative. Clamp to zero.
1020                        paddingus = 0;
1021                    }
1022                    int64_t paddingsamples = (paddingus * samplerate + 500000) / 1000000;
1023                    mLastTrack->meta->setInt32(kKeyEncoderPadding, paddingsamples);
1024                }
1025            }
1026            break;
1027        }
1028
1029        case FOURCC('f', 'r', 'm', 'a'):
1030        {
1031            *offset += chunk_size;
1032
1033            uint32_t original_fourcc;
1034            if (mDataSource->readAt(data_offset, &original_fourcc, 4) < 4) {
1035                return ERROR_IO;
1036            }
1037            original_fourcc = ntohl(original_fourcc);
1038            ALOGV("read original format: %d", original_fourcc);
1039            mLastTrack->meta->setCString(kKeyMIMEType, FourCC2MIME(original_fourcc));
1040            uint32_t num_channels = 0;
1041            uint32_t sample_rate = 0;
1042            if (AdjustChannelsAndRate(original_fourcc, &num_channels, &sample_rate)) {
1043                mLastTrack->meta->setInt32(kKeyChannelCount, num_channels);
1044                mLastTrack->meta->setInt32(kKeySampleRate, sample_rate);
1045            }
1046            break;
1047        }
1048
1049        case FOURCC('t', 'e', 'n', 'c'):
1050        {
1051            *offset += chunk_size;
1052
1053            if (chunk_size < 32) {
1054                return ERROR_MALFORMED;
1055            }
1056
1057            // tenc box contains 1 byte version, 3 byte flags, 3 byte default algorithm id, one byte
1058            // default IV size, 16 bytes default KeyID
1059            // (ISO 23001-7)
1060            char buf[4];
1061            memset(buf, 0, 4);
1062            if (mDataSource->readAt(data_offset + 4, buf + 1, 3) < 3) {
1063                return ERROR_IO;
1064            }
1065            uint32_t defaultAlgorithmId = ntohl(*((int32_t*)buf));
1066            if (defaultAlgorithmId > 1) {
1067                // only 0 (clear) and 1 (AES-128) are valid
1068                return ERROR_MALFORMED;
1069            }
1070
1071            memset(buf, 0, 4);
1072            if (mDataSource->readAt(data_offset + 7, buf + 3, 1) < 1) {
1073                return ERROR_IO;
1074            }
1075            uint32_t defaultIVSize = ntohl(*((int32_t*)buf));
1076
1077            if ((defaultAlgorithmId == 0 && defaultIVSize != 0) ||
1078                    (defaultAlgorithmId != 0 && defaultIVSize == 0)) {
1079                // only unencrypted data must have 0 IV size
1080                return ERROR_MALFORMED;
1081            } else if (defaultIVSize != 0 &&
1082                    defaultIVSize != 8 &&
1083                    defaultIVSize != 16) {
1084                // only supported sizes are 0, 8 and 16
1085                return ERROR_MALFORMED;
1086            }
1087
1088            uint8_t defaultKeyId[16];
1089
1090            if (mDataSource->readAt(data_offset + 8, &defaultKeyId, 16) < 16) {
1091                return ERROR_IO;
1092            }
1093
1094            mLastTrack->meta->setInt32(kKeyCryptoMode, defaultAlgorithmId);
1095            mLastTrack->meta->setInt32(kKeyCryptoDefaultIVSize, defaultIVSize);
1096            mLastTrack->meta->setData(kKeyCryptoKey, 'tenc', defaultKeyId, 16);
1097            break;
1098        }
1099
1100        case FOURCC('t', 'k', 'h', 'd'):
1101        {
1102            *offset += chunk_size;
1103
1104            status_t err;
1105            if ((err = parseTrackHeader(data_offset, chunk_data_size)) != OK) {
1106                return err;
1107            }
1108
1109            break;
1110        }
1111
1112        case FOURCC('p', 's', 's', 'h'):
1113        {
1114            *offset += chunk_size;
1115
1116            PsshInfo pssh;
1117
1118            if (mDataSource->readAt(data_offset + 4, &pssh.uuid, 16) < 16) {
1119                return ERROR_IO;
1120            }
1121
1122            uint32_t psshdatalen = 0;
1123            if (mDataSource->readAt(data_offset + 20, &psshdatalen, 4) < 4) {
1124                return ERROR_IO;
1125            }
1126            pssh.datalen = ntohl(psshdatalen);
1127            ALOGV("pssh data size: %d", pssh.datalen);
1128            if (pssh.datalen + 20 > chunk_size) {
1129                // pssh data length exceeds size of containing box
1130                return ERROR_MALFORMED;
1131            }
1132
1133            pssh.data = new (std::nothrow) uint8_t[pssh.datalen];
1134            if (pssh.data == NULL) {
1135                return ERROR_MALFORMED;
1136            }
1137            ALOGV("allocated pssh @ %p", pssh.data);
1138            ssize_t requested = (ssize_t) pssh.datalen;
1139            if (mDataSource->readAt(data_offset + 24, pssh.data, requested) < requested) {
1140                return ERROR_IO;
1141            }
1142            mPssh.push_back(pssh);
1143
1144            break;
1145        }
1146
1147        case FOURCC('m', 'd', 'h', 'd'):
1148        {
1149            *offset += chunk_size;
1150
1151            if (chunk_data_size < 4 || mLastTrack == NULL) {
1152                return ERROR_MALFORMED;
1153            }
1154
1155            uint8_t version;
1156            if (mDataSource->readAt(
1157                        data_offset, &version, sizeof(version))
1158                    < (ssize_t)sizeof(version)) {
1159                return ERROR_IO;
1160            }
1161
1162            off64_t timescale_offset;
1163
1164            if (version == 1) {
1165                timescale_offset = data_offset + 4 + 16;
1166            } else if (version == 0) {
1167                timescale_offset = data_offset + 4 + 8;
1168            } else {
1169                return ERROR_IO;
1170            }
1171
1172            uint32_t timescale;
1173            if (mDataSource->readAt(
1174                        timescale_offset, &timescale, sizeof(timescale))
1175                    < (ssize_t)sizeof(timescale)) {
1176                return ERROR_IO;
1177            }
1178
1179            if (!timescale) {
1180                ALOGE("timescale should not be ZERO.");
1181                return ERROR_MALFORMED;
1182            }
1183
1184            mLastTrack->timescale = ntohl(timescale);
1185
1186            // 14496-12 says all ones means indeterminate, but some files seem to use
1187            // 0 instead. We treat both the same.
1188            int64_t duration = 0;
1189            if (version == 1) {
1190                if (mDataSource->readAt(
1191                            timescale_offset + 4, &duration, sizeof(duration))
1192                        < (ssize_t)sizeof(duration)) {
1193                    return ERROR_IO;
1194                }
1195                if (duration != -1) {
1196                    duration = ntoh64(duration);
1197                }
1198            } else {
1199                uint32_t duration32;
1200                if (mDataSource->readAt(
1201                            timescale_offset + 4, &duration32, sizeof(duration32))
1202                        < (ssize_t)sizeof(duration32)) {
1203                    return ERROR_IO;
1204                }
1205                if (duration32 != 0xffffffff) {
1206                    duration = ntohl(duration32);
1207                }
1208            }
1209            if (duration != 0) {
1210                mLastTrack->meta->setInt64(
1211                        kKeyDuration, (duration * 1000000) / mLastTrack->timescale);
1212            }
1213
1214            uint8_t lang[2];
1215            off64_t lang_offset;
1216            if (version == 1) {
1217                lang_offset = timescale_offset + 4 + 8;
1218            } else if (version == 0) {
1219                lang_offset = timescale_offset + 4 + 4;
1220            } else {
1221                return ERROR_IO;
1222            }
1223
1224            if (mDataSource->readAt(lang_offset, &lang, sizeof(lang))
1225                    < (ssize_t)sizeof(lang)) {
1226                return ERROR_IO;
1227            }
1228
1229            // To get the ISO-639-2/T three character language code
1230            // 1 bit pad followed by 3 5-bits characters. Each character
1231            // is packed as the difference between its ASCII value and 0x60.
1232            char lang_code[4];
1233            lang_code[0] = ((lang[0] >> 2) & 0x1f) + 0x60;
1234            lang_code[1] = ((lang[0] & 0x3) << 3 | (lang[1] >> 5)) + 0x60;
1235            lang_code[2] = (lang[1] & 0x1f) + 0x60;
1236            lang_code[3] = '\0';
1237
1238            mLastTrack->meta->setCString(
1239                    kKeyMediaLanguage, lang_code);
1240
1241            break;
1242        }
1243
1244        case FOURCC('s', 't', 's', 'd'):
1245        {
1246            if (chunk_data_size < 8) {
1247                return ERROR_MALFORMED;
1248            }
1249
1250            uint8_t buffer[8];
1251            if (chunk_data_size < (off64_t)sizeof(buffer)) {
1252                return ERROR_MALFORMED;
1253            }
1254
1255            if (mDataSource->readAt(
1256                        data_offset, buffer, 8) < 8) {
1257                return ERROR_IO;
1258            }
1259
1260            if (U32_AT(buffer) != 0) {
1261                // Should be version 0, flags 0.
1262                return ERROR_MALFORMED;
1263            }
1264
1265            uint32_t entry_count = U32_AT(&buffer[4]);
1266
1267            if (entry_count > 1) {
1268                // For 3GPP timed text, there could be multiple tx3g boxes contain
1269                // multiple text display formats. These formats will be used to
1270                // display the timed text.
1271                // For encrypted files, there may also be more than one entry.
1272                const char *mime;
1273                CHECK(mLastTrack->meta->findCString(kKeyMIMEType, &mime));
1274                if (strcasecmp(mime, MEDIA_MIMETYPE_TEXT_3GPP) &&
1275                        strcasecmp(mime, "application/octet-stream")) {
1276                    // For now we only support a single type of media per track.
1277                    mLastTrack->skipTrack = true;
1278                    *offset += chunk_size;
1279                    break;
1280                }
1281            }
1282            off64_t stop_offset = *offset + chunk_size;
1283            *offset = data_offset + 8;
1284            for (uint32_t i = 0; i < entry_count; ++i) {
1285                status_t err = parseChunk(offset, depth + 1);
1286                if (err != OK) {
1287                    return err;
1288                }
1289            }
1290
1291            if (*offset != stop_offset) {
1292                return ERROR_MALFORMED;
1293            }
1294            break;
1295        }
1296
1297        case FOURCC('m', 'p', '4', 'a'):
1298        case FOURCC('e', 'n', 'c', 'a'):
1299        case FOURCC('s', 'a', 'm', 'r'):
1300        case FOURCC('s', 'a', 'w', 'b'):
1301        {
1302            uint8_t buffer[8 + 20];
1303            if (chunk_data_size < (ssize_t)sizeof(buffer)) {
1304                // Basic AudioSampleEntry size.
1305                return ERROR_MALFORMED;
1306            }
1307
1308            if (mDataSource->readAt(
1309                        data_offset, buffer, sizeof(buffer)) < (ssize_t)sizeof(buffer)) {
1310                return ERROR_IO;
1311            }
1312
1313            uint16_t data_ref_index __unused = U16_AT(&buffer[6]);
1314            uint32_t num_channels = U16_AT(&buffer[16]);
1315
1316            uint16_t sample_size = U16_AT(&buffer[18]);
1317            uint32_t sample_rate = U32_AT(&buffer[24]) >> 16;
1318
1319            if (chunk_type != FOURCC('e', 'n', 'c', 'a')) {
1320                // if the chunk type is enca, we'll get the type from the sinf/frma box later
1321                mLastTrack->meta->setCString(kKeyMIMEType, FourCC2MIME(chunk_type));
1322                AdjustChannelsAndRate(chunk_type, &num_channels, &sample_rate);
1323            }
1324            ALOGV("*** coding='%s' %d channels, size %d, rate %d\n",
1325                   chunk, num_channels, sample_size, sample_rate);
1326            mLastTrack->meta->setInt32(kKeyChannelCount, num_channels);
1327            mLastTrack->meta->setInt32(kKeySampleRate, sample_rate);
1328
1329            off64_t stop_offset = *offset + chunk_size;
1330            *offset = data_offset + sizeof(buffer);
1331            while (*offset < stop_offset) {
1332                status_t err = parseChunk(offset, depth + 1);
1333                if (err != OK) {
1334                    return err;
1335                }
1336            }
1337
1338            if (*offset != stop_offset) {
1339                return ERROR_MALFORMED;
1340            }
1341            break;
1342        }
1343
1344        case FOURCC('m', 'p', '4', 'v'):
1345        case FOURCC('e', 'n', 'c', 'v'):
1346        case FOURCC('s', '2', '6', '3'):
1347        case FOURCC('H', '2', '6', '3'):
1348        case FOURCC('h', '2', '6', '3'):
1349        case FOURCC('a', 'v', 'c', '1'):
1350        case FOURCC('h', 'v', 'c', '1'):
1351        case FOURCC('h', 'e', 'v', '1'):
1352        {
1353            mHasVideo = true;
1354
1355            uint8_t buffer[78];
1356            if (chunk_data_size < (ssize_t)sizeof(buffer)) {
1357                // Basic VideoSampleEntry size.
1358                return ERROR_MALFORMED;
1359            }
1360
1361            if (mDataSource->readAt(
1362                        data_offset, buffer, sizeof(buffer)) < (ssize_t)sizeof(buffer)) {
1363                return ERROR_IO;
1364            }
1365
1366            uint16_t data_ref_index __unused = U16_AT(&buffer[6]);
1367            uint16_t width = U16_AT(&buffer[6 + 18]);
1368            uint16_t height = U16_AT(&buffer[6 + 20]);
1369
1370            // The video sample is not standard-compliant if it has invalid dimension.
1371            // Use some default width and height value, and
1372            // let the decoder figure out the actual width and height (and thus
1373            // be prepared for INFO_FOMRAT_CHANGED event).
1374            if (width == 0)  width  = 352;
1375            if (height == 0) height = 288;
1376
1377            // printf("*** coding='%s' width=%d height=%d\n",
1378            //        chunk, width, height);
1379
1380            if (chunk_type != FOURCC('e', 'n', 'c', 'v')) {
1381                // if the chunk type is encv, we'll get the type from the sinf/frma box later
1382                mLastTrack->meta->setCString(kKeyMIMEType, FourCC2MIME(chunk_type));
1383            }
1384            mLastTrack->meta->setInt32(kKeyWidth, width);
1385            mLastTrack->meta->setInt32(kKeyHeight, height);
1386
1387            off64_t stop_offset = *offset + chunk_size;
1388            *offset = data_offset + sizeof(buffer);
1389            while (*offset < stop_offset) {
1390                status_t err = parseChunk(offset, depth + 1);
1391                if (err != OK) {
1392                    return err;
1393                }
1394            }
1395
1396            if (*offset != stop_offset) {
1397                return ERROR_MALFORMED;
1398            }
1399            break;
1400        }
1401
1402        case FOURCC('s', 't', 'c', 'o'):
1403        case FOURCC('c', 'o', '6', '4'):
1404        {
1405            status_t err =
1406                mLastTrack->sampleTable->setChunkOffsetParams(
1407                        chunk_type, data_offset, chunk_data_size);
1408
1409            *offset += chunk_size;
1410
1411            if (err != OK) {
1412                return err;
1413            }
1414
1415            break;
1416        }
1417
1418        case FOURCC('s', 't', 's', 'c'):
1419        {
1420            status_t err =
1421                mLastTrack->sampleTable->setSampleToChunkParams(
1422                        data_offset, chunk_data_size);
1423
1424            *offset += chunk_size;
1425
1426            if (err != OK) {
1427                return err;
1428            }
1429
1430            break;
1431        }
1432
1433        case FOURCC('s', 't', 's', 'z'):
1434        case FOURCC('s', 't', 'z', '2'):
1435        {
1436            status_t err =
1437                mLastTrack->sampleTable->setSampleSizeParams(
1438                        chunk_type, data_offset, chunk_data_size);
1439
1440            *offset += chunk_size;
1441
1442            if (err != OK) {
1443                return err;
1444            }
1445
1446            size_t max_size;
1447            err = mLastTrack->sampleTable->getMaxSampleSize(&max_size);
1448
1449            if (err != OK) {
1450                return err;
1451            }
1452
1453            if (max_size != 0) {
1454                // Assume that a given buffer only contains at most 10 chunks,
1455                // each chunk originally prefixed with a 2 byte length will
1456                // have a 4 byte header (0x00 0x00 0x00 0x01) after conversion,
1457                // and thus will grow by 2 bytes per chunk.
1458                if (max_size > SIZE_MAX - 10 * 2) {
1459                    ALOGE("max sample size too big: %zu", max_size);
1460                    return ERROR_MALFORMED;
1461                }
1462                mLastTrack->meta->setInt32(kKeyMaxInputSize, max_size + 10 * 2);
1463            } else {
1464                // No size was specified. Pick a conservatively large size.
1465                uint32_t width, height;
1466                if (!mLastTrack->meta->findInt32(kKeyWidth, (int32_t*)&width) ||
1467                    !mLastTrack->meta->findInt32(kKeyHeight,(int32_t*) &height)) {
1468                    ALOGE("No width or height, assuming worst case 1080p");
1469                    width = 1920;
1470                    height = 1080;
1471                } else {
1472                    // A resolution was specified, check that it's not too big. The values below
1473                    // were chosen so that the calculations below don't cause overflows, they're
1474                    // not indicating that resolutions up to 32kx32k are actually supported.
1475                    if (width > 32768 || height > 32768) {
1476                        ALOGE("can't support %u x %u video", width, height);
1477                        return ERROR_MALFORMED;
1478                    }
1479                }
1480
1481                const char *mime;
1482                CHECK(mLastTrack->meta->findCString(kKeyMIMEType, &mime));
1483                if (!strcmp(mime, MEDIA_MIMETYPE_VIDEO_AVC)) {
1484                    // AVC requires compression ratio of at least 2, and uses
1485                    // macroblocks
1486                    max_size = ((width + 15) / 16) * ((height + 15) / 16) * 192;
1487                } else {
1488                    // For all other formats there is no minimum compression
1489                    // ratio. Use compression ratio of 1.
1490                    max_size = width * height * 3 / 2;
1491                }
1492                mLastTrack->meta->setInt32(kKeyMaxInputSize, max_size);
1493            }
1494
1495            // NOTE: setting another piece of metadata invalidates any pointers (such as the
1496            // mimetype) previously obtained, so don't cache them.
1497            const char *mime;
1498            CHECK(mLastTrack->meta->findCString(kKeyMIMEType, &mime));
1499            // Calculate average frame rate.
1500            if (!strncasecmp("video/", mime, 6)) {
1501                size_t nSamples = mLastTrack->sampleTable->countSamples();
1502                int64_t durationUs;
1503                if (mLastTrack->meta->findInt64(kKeyDuration, &durationUs)) {
1504                    if (durationUs > 0) {
1505                        int32_t frameRate = (nSamples * 1000000LL +
1506                                    (durationUs >> 1)) / durationUs;
1507                        mLastTrack->meta->setInt32(kKeyFrameRate, frameRate);
1508                    }
1509                }
1510            }
1511
1512            break;
1513        }
1514
1515        case FOURCC('s', 't', 't', 's'):
1516        {
1517            *offset += chunk_size;
1518
1519            status_t err =
1520                mLastTrack->sampleTable->setTimeToSampleParams(
1521                        data_offset, chunk_data_size);
1522
1523            if (err != OK) {
1524                return err;
1525            }
1526
1527            break;
1528        }
1529
1530        case FOURCC('c', 't', 't', 's'):
1531        {
1532            *offset += chunk_size;
1533
1534            status_t err =
1535                mLastTrack->sampleTable->setCompositionTimeToSampleParams(
1536                        data_offset, chunk_data_size);
1537
1538            if (err != OK) {
1539                return err;
1540            }
1541
1542            break;
1543        }
1544
1545        case FOURCC('s', 't', 's', 's'):
1546        {
1547            *offset += chunk_size;
1548
1549            status_t err =
1550                mLastTrack->sampleTable->setSyncSampleParams(
1551                        data_offset, chunk_data_size);
1552
1553            if (err != OK) {
1554                return err;
1555            }
1556
1557            break;
1558        }
1559
1560        // �xyz
1561        case FOURCC(0xA9, 'x', 'y', 'z'):
1562        {
1563            *offset += chunk_size;
1564
1565            // Best case the total data length inside "�xyz" box
1566            // would be 8, for instance "�xyz" + "\x00\x04\x15\xc7" + "0+0/",
1567            // where "\x00\x04" is the text string length with value = 4,
1568            // "\0x15\xc7" is the language code = en, and "0+0" is a
1569            // location (string) value with longitude = 0 and latitude = 0.
1570            if (chunk_data_size < 8) {
1571                return ERROR_MALFORMED;
1572            }
1573
1574            // Worst case the location string length would be 18,
1575            // for instance +90.0000-180.0000, without the trailing "/" and
1576            // the string length + language code.
1577            char buffer[18];
1578
1579            // Substracting 5 from the data size is because the text string length +
1580            // language code takes 4 bytes, and the trailing slash "/" takes 1 byte.
1581            off64_t location_length = chunk_data_size - 5;
1582            if (location_length >= (off64_t) sizeof(buffer)) {
1583                return ERROR_MALFORMED;
1584            }
1585
1586            if (mDataSource->readAt(
1587                        data_offset + 4, buffer, location_length) < location_length) {
1588                return ERROR_IO;
1589            }
1590
1591            buffer[location_length] = '\0';
1592            mFileMetaData->setCString(kKeyLocation, buffer);
1593            break;
1594        }
1595
1596        case FOURCC('e', 's', 'd', 's'):
1597        {
1598            *offset += chunk_size;
1599
1600            if (chunk_data_size < 4) {
1601                return ERROR_MALFORMED;
1602            }
1603
1604            uint8_t buffer[256];
1605            if (chunk_data_size > (off64_t)sizeof(buffer)) {
1606                return ERROR_BUFFER_TOO_SMALL;
1607            }
1608
1609            if (mDataSource->readAt(
1610                        data_offset, buffer, chunk_data_size) < chunk_data_size) {
1611                return ERROR_IO;
1612            }
1613
1614            if (U32_AT(buffer) != 0) {
1615                // Should be version 0, flags 0.
1616                return ERROR_MALFORMED;
1617            }
1618
1619            mLastTrack->meta->setData(
1620                    kKeyESDS, kTypeESDS, &buffer[4], chunk_data_size - 4);
1621
1622            if (mPath.size() >= 2
1623                    && mPath[mPath.size() - 2] == FOURCC('m', 'p', '4', 'a')) {
1624                // Information from the ESDS must be relied on for proper
1625                // setup of sample rate and channel count for MPEG4 Audio.
1626                // The generic header appears to only contain generic
1627                // information...
1628
1629                status_t err = updateAudioTrackInfoFromESDS_MPEG4Audio(
1630                        &buffer[4], chunk_data_size - 4);
1631
1632                if (err != OK) {
1633                    return err;
1634                }
1635            }
1636
1637            break;
1638        }
1639
1640        case FOURCC('a', 'v', 'c', 'C'):
1641        {
1642            *offset += chunk_size;
1643
1644            sp<ABuffer> buffer = new ABuffer(chunk_data_size);
1645
1646            if (mDataSource->readAt(
1647                        data_offset, buffer->data(), chunk_data_size) < chunk_data_size) {
1648                return ERROR_IO;
1649            }
1650
1651            mLastTrack->meta->setData(
1652                    kKeyAVCC, kTypeAVCC, buffer->data(), chunk_data_size);
1653
1654            break;
1655        }
1656        case FOURCC('h', 'v', 'c', 'C'):
1657        {
1658            sp<ABuffer> buffer = new ABuffer(chunk_data_size);
1659
1660            if (mDataSource->readAt(
1661                        data_offset, buffer->data(), chunk_data_size) < chunk_data_size) {
1662                return ERROR_IO;
1663            }
1664
1665            mLastTrack->meta->setData(
1666                    kKeyHVCC, kTypeHVCC, buffer->data(), chunk_data_size);
1667
1668            *offset += chunk_size;
1669            break;
1670        }
1671
1672        case FOURCC('d', '2', '6', '3'):
1673        {
1674            *offset += chunk_size;
1675            /*
1676             * d263 contains a fixed 7 bytes part:
1677             *   vendor - 4 bytes
1678             *   version - 1 byte
1679             *   level - 1 byte
1680             *   profile - 1 byte
1681             * optionally, "d263" box itself may contain a 16-byte
1682             * bit rate box (bitr)
1683             *   average bit rate - 4 bytes
1684             *   max bit rate - 4 bytes
1685             */
1686            char buffer[23];
1687            if (chunk_data_size != 7 &&
1688                chunk_data_size != 23) {
1689                ALOGE("Incorrect D263 box size %lld", chunk_data_size);
1690                return ERROR_MALFORMED;
1691            }
1692
1693            if (mDataSource->readAt(
1694                    data_offset, buffer, chunk_data_size) < chunk_data_size) {
1695                return ERROR_IO;
1696            }
1697
1698            mLastTrack->meta->setData(kKeyD263, kTypeD263, buffer, chunk_data_size);
1699
1700            break;
1701        }
1702
1703        case FOURCC('m', 'e', 't', 'a'):
1704        {
1705            uint8_t buffer[4];
1706            if (chunk_data_size < (off64_t)sizeof(buffer)) {
1707                *offset += chunk_size;
1708                return ERROR_MALFORMED;
1709            }
1710
1711            if (mDataSource->readAt(
1712                        data_offset, buffer, 4) < 4) {
1713                *offset += chunk_size;
1714                return ERROR_IO;
1715            }
1716
1717            if (U32_AT(buffer) != 0) {
1718                // Should be version 0, flags 0.
1719
1720                // If it's not, let's assume this is one of those
1721                // apparently malformed chunks that don't have flags
1722                // and completely different semantics than what's
1723                // in the MPEG4 specs and skip it.
1724                *offset += chunk_size;
1725                return OK;
1726            }
1727
1728            off64_t stop_offset = *offset + chunk_size;
1729            *offset = data_offset + sizeof(buffer);
1730            while (*offset < stop_offset) {
1731                status_t err = parseChunk(offset, depth + 1);
1732                if (err != OK) {
1733                    return err;
1734                }
1735            }
1736
1737            if (*offset != stop_offset) {
1738                return ERROR_MALFORMED;
1739            }
1740            break;
1741        }
1742
1743        case FOURCC('m', 'e', 'a', 'n'):
1744        case FOURCC('n', 'a', 'm', 'e'):
1745        case FOURCC('d', 'a', 't', 'a'):
1746        {
1747            *offset += chunk_size;
1748
1749            if (mPath.size() == 6 && underMetaDataPath(mPath)) {
1750                status_t err = parseITunesMetaData(data_offset, chunk_data_size);
1751
1752                if (err != OK) {
1753                    return err;
1754                }
1755            }
1756
1757            break;
1758        }
1759
1760        case FOURCC('m', 'v', 'h', 'd'):
1761        {
1762            *offset += chunk_size;
1763
1764            if (chunk_data_size < 32) {
1765                return ERROR_MALFORMED;
1766            }
1767
1768            uint8_t header[32];
1769            if (mDataSource->readAt(
1770                        data_offset, header, sizeof(header))
1771                    < (ssize_t)sizeof(header)) {
1772                return ERROR_IO;
1773            }
1774
1775            uint64_t creationTime;
1776            uint64_t duration = 0;
1777            if (header[0] == 1) {
1778                creationTime = U64_AT(&header[4]);
1779                mHeaderTimescale = U32_AT(&header[20]);
1780                duration = U64_AT(&header[24]);
1781                if (duration == 0xffffffffffffffff) {
1782                    duration = 0;
1783                }
1784            } else if (header[0] != 0) {
1785                return ERROR_MALFORMED;
1786            } else {
1787                creationTime = U32_AT(&header[4]);
1788                mHeaderTimescale = U32_AT(&header[12]);
1789                uint32_t d32 = U32_AT(&header[16]);
1790                if (d32 == 0xffffffff) {
1791                    d32 = 0;
1792                }
1793                duration = d32;
1794            }
1795            if (duration != 0) {
1796                mFileMetaData->setInt64(kKeyDuration, duration * 1000000 / mHeaderTimescale);
1797            }
1798
1799            String8 s;
1800            convertTimeToDate(creationTime, &s);
1801
1802            mFileMetaData->setCString(kKeyDate, s.string());
1803
1804            break;
1805        }
1806
1807        case FOURCC('m', 'e', 'h', 'd'):
1808        {
1809            *offset += chunk_size;
1810
1811            if (chunk_data_size < 8) {
1812                return ERROR_MALFORMED;
1813            }
1814
1815            uint8_t flags[4];
1816            if (mDataSource->readAt(
1817                        data_offset, flags, sizeof(flags))
1818                    < (ssize_t)sizeof(flags)) {
1819                return ERROR_IO;
1820            }
1821
1822            uint64_t duration = 0;
1823            if (flags[0] == 1) {
1824                // 64 bit
1825                if (chunk_data_size < 12) {
1826                    return ERROR_MALFORMED;
1827                }
1828                mDataSource->getUInt64(data_offset + 4, &duration);
1829                if (duration == 0xffffffffffffffff) {
1830                    duration = 0;
1831                }
1832            } else if (flags[0] == 0) {
1833                // 32 bit
1834                uint32_t d32;
1835                mDataSource->getUInt32(data_offset + 4, &d32);
1836                if (d32 == 0xffffffff) {
1837                    d32 = 0;
1838                }
1839                duration = d32;
1840            } else {
1841                return ERROR_MALFORMED;
1842            }
1843
1844            if (duration != 0) {
1845                mFileMetaData->setInt64(kKeyDuration, duration * 1000000 / mHeaderTimescale);
1846            }
1847
1848            break;
1849        }
1850
1851        case FOURCC('m', 'd', 'a', 't'):
1852        {
1853            ALOGV("mdat chunk, drm: %d", mIsDrm);
1854            if (!mIsDrm) {
1855                *offset += chunk_size;
1856                break;
1857            }
1858
1859            if (chunk_size < 8) {
1860                return ERROR_MALFORMED;
1861            }
1862
1863            return parseDrmSINF(offset, data_offset);
1864        }
1865
1866        case FOURCC('h', 'd', 'l', 'r'):
1867        {
1868            *offset += chunk_size;
1869
1870            uint32_t buffer;
1871            if (mDataSource->readAt(
1872                        data_offset + 8, &buffer, 4) < 4) {
1873                return ERROR_IO;
1874            }
1875
1876            uint32_t type = ntohl(buffer);
1877            // For the 3GPP file format, the handler-type within the 'hdlr' box
1878            // shall be 'text'. We also want to support 'sbtl' handler type
1879            // for a practical reason as various MPEG4 containers use it.
1880            if (type == FOURCC('t', 'e', 'x', 't') || type == FOURCC('s', 'b', 't', 'l')) {
1881                mLastTrack->meta->setCString(kKeyMIMEType, MEDIA_MIMETYPE_TEXT_3GPP);
1882            }
1883
1884            break;
1885        }
1886
1887        case FOURCC('t', 'r', 'e', 'x'):
1888        {
1889            *offset += chunk_size;
1890
1891            if (chunk_data_size < 24) {
1892                return ERROR_IO;
1893            }
1894            Trex trex;
1895            if (!mDataSource->getUInt32(data_offset + 4, &trex.track_ID) ||
1896                !mDataSource->getUInt32(data_offset + 8, &trex.default_sample_description_index) ||
1897                !mDataSource->getUInt32(data_offset + 12, &trex.default_sample_duration) ||
1898                !mDataSource->getUInt32(data_offset + 16, &trex.default_sample_size) ||
1899                !mDataSource->getUInt32(data_offset + 20, &trex.default_sample_flags)) {
1900                return ERROR_IO;
1901            }
1902            mTrex.add(trex);
1903            break;
1904        }
1905
1906        case FOURCC('t', 'x', '3', 'g'):
1907        {
1908            uint32_t type;
1909            const void *data;
1910            size_t size = 0;
1911            if (!mLastTrack->meta->findData(
1912                    kKeyTextFormatData, &type, &data, &size)) {
1913                size = 0;
1914            }
1915
1916            if ((chunk_size > SIZE_MAX) || (SIZE_MAX - chunk_size <= size)) {
1917                return ERROR_MALFORMED;
1918            }
1919
1920            uint8_t *buffer = new (std::nothrow) uint8_t[size + chunk_size];
1921            if (buffer == NULL) {
1922                return ERROR_MALFORMED;
1923            }
1924
1925            if (size > 0) {
1926                memcpy(buffer, data, size);
1927            }
1928
1929            if ((size_t)(mDataSource->readAt(*offset, buffer + size, chunk_size))
1930                    < chunk_size) {
1931                delete[] buffer;
1932                buffer = NULL;
1933
1934                // advance read pointer so we don't end up reading this again
1935                *offset += chunk_size;
1936                return ERROR_IO;
1937            }
1938
1939            mLastTrack->meta->setData(
1940                    kKeyTextFormatData, 0, buffer, size + chunk_size);
1941
1942            delete[] buffer;
1943
1944            *offset += chunk_size;
1945            break;
1946        }
1947
1948        case FOURCC('c', 'o', 'v', 'r'):
1949        {
1950            *offset += chunk_size;
1951
1952            if (mFileMetaData != NULL) {
1953                ALOGV("chunk_data_size = %lld and data_offset = %lld",
1954                        chunk_data_size, data_offset);
1955
1956                if (chunk_data_size < 0 || static_cast<uint64_t>(chunk_data_size) >= SIZE_MAX - 1) {
1957                    return ERROR_MALFORMED;
1958                }
1959                sp<ABuffer> buffer = new ABuffer(chunk_data_size + 1);
1960                if (mDataSource->readAt(
1961                    data_offset, buffer->data(), chunk_data_size) != (ssize_t)chunk_data_size) {
1962                    return ERROR_IO;
1963                }
1964                const int kSkipBytesOfDataBox = 16;
1965                if (chunk_data_size <= kSkipBytesOfDataBox) {
1966                    return ERROR_MALFORMED;
1967                }
1968
1969                mFileMetaData->setData(
1970                    kKeyAlbumArt, MetaData::TYPE_NONE,
1971                    buffer->data() + kSkipBytesOfDataBox, chunk_data_size - kSkipBytesOfDataBox);
1972            }
1973
1974            break;
1975        }
1976
1977        case FOURCC('t', 'i', 't', 'l'):
1978        case FOURCC('p', 'e', 'r', 'f'):
1979        case FOURCC('a', 'u', 't', 'h'):
1980        case FOURCC('g', 'n', 'r', 'e'):
1981        case FOURCC('a', 'l', 'b', 'm'):
1982        case FOURCC('y', 'r', 'r', 'c'):
1983        {
1984            *offset += chunk_size;
1985
1986            status_t err = parse3GPPMetaData(data_offset, chunk_data_size, depth);
1987
1988            if (err != OK) {
1989                return err;
1990            }
1991
1992            break;
1993        }
1994
1995        case FOURCC('I', 'D', '3', '2'):
1996        {
1997            *offset += chunk_size;
1998
1999            if (chunk_data_size < 6) {
2000                return ERROR_MALFORMED;
2001            }
2002
2003            parseID3v2MetaData(data_offset + 6);
2004
2005            break;
2006        }
2007
2008        case FOURCC('-', '-', '-', '-'):
2009        {
2010            mLastCommentMean.clear();
2011            mLastCommentName.clear();
2012            mLastCommentData.clear();
2013            *offset += chunk_size;
2014            break;
2015        }
2016
2017        case FOURCC('s', 'i', 'd', 'x'):
2018        {
2019            parseSegmentIndex(data_offset, chunk_data_size);
2020            *offset += chunk_size;
2021            return UNKNOWN_ERROR; // stop parsing after sidx
2022        }
2023
2024        default:
2025        {
2026            *offset += chunk_size;
2027            break;
2028        }
2029    }
2030
2031    return OK;
2032}
2033
2034status_t MPEG4Extractor::parseSegmentIndex(off64_t offset, size_t size) {
2035  ALOGV("MPEG4Extractor::parseSegmentIndex");
2036
2037    if (size < 12) {
2038      return -EINVAL;
2039    }
2040
2041    uint32_t flags;
2042    if (!mDataSource->getUInt32(offset, &flags)) {
2043        return ERROR_MALFORMED;
2044    }
2045
2046    uint32_t version = flags >> 24;
2047    flags &= 0xffffff;
2048
2049    ALOGV("sidx version %d", version);
2050
2051    uint32_t referenceId;
2052    if (!mDataSource->getUInt32(offset + 4, &referenceId)) {
2053        return ERROR_MALFORMED;
2054    }
2055
2056    uint32_t timeScale;
2057    if (!mDataSource->getUInt32(offset + 8, &timeScale)) {
2058        return ERROR_MALFORMED;
2059    }
2060    ALOGV("sidx refid/timescale: %d/%d", referenceId, timeScale);
2061
2062    uint64_t earliestPresentationTime;
2063    uint64_t firstOffset;
2064
2065    offset += 12;
2066    size -= 12;
2067
2068    if (version == 0) {
2069        if (size < 8) {
2070            return -EINVAL;
2071        }
2072        uint32_t tmp;
2073        if (!mDataSource->getUInt32(offset, &tmp)) {
2074            return ERROR_MALFORMED;
2075        }
2076        earliestPresentationTime = tmp;
2077        if (!mDataSource->getUInt32(offset + 4, &tmp)) {
2078            return ERROR_MALFORMED;
2079        }
2080        firstOffset = tmp;
2081        offset += 8;
2082        size -= 8;
2083    } else {
2084        if (size < 16) {
2085            return -EINVAL;
2086        }
2087        if (!mDataSource->getUInt64(offset, &earliestPresentationTime)) {
2088            return ERROR_MALFORMED;
2089        }
2090        if (!mDataSource->getUInt64(offset + 8, &firstOffset)) {
2091            return ERROR_MALFORMED;
2092        }
2093        offset += 16;
2094        size -= 16;
2095    }
2096    ALOGV("sidx pres/off: %" PRIu64 "/%" PRIu64, earliestPresentationTime, firstOffset);
2097
2098    if (size < 4) {
2099        return -EINVAL;
2100    }
2101
2102    uint16_t referenceCount;
2103    if (!mDataSource->getUInt16(offset + 2, &referenceCount)) {
2104        return ERROR_MALFORMED;
2105    }
2106    offset += 4;
2107    size -= 4;
2108    ALOGV("refcount: %d", referenceCount);
2109
2110    if (size < referenceCount * 12) {
2111        return -EINVAL;
2112    }
2113
2114    uint64_t total_duration = 0;
2115    for (unsigned int i = 0; i < referenceCount; i++) {
2116        uint32_t d1, d2, d3;
2117
2118        if (!mDataSource->getUInt32(offset, &d1) ||     // size
2119            !mDataSource->getUInt32(offset + 4, &d2) || // duration
2120            !mDataSource->getUInt32(offset + 8, &d3)) { // flags
2121            return ERROR_MALFORMED;
2122        }
2123
2124        if (d1 & 0x80000000) {
2125            ALOGW("sub-sidx boxes not supported yet");
2126        }
2127        bool sap = d3 & 0x80000000;
2128        uint32_t saptype = (d3 >> 28) & 7;
2129        if (!sap || (saptype != 1 && saptype != 2)) {
2130            // type 1 and 2 are sync samples
2131            ALOGW("not a stream access point, or unsupported type: %08x", d3);
2132        }
2133        total_duration += d2;
2134        offset += 12;
2135        ALOGV(" item %d, %08x %08x %08x", i, d1, d2, d3);
2136        SidxEntry se;
2137        se.mSize = d1 & 0x7fffffff;
2138        se.mDurationUs = 1000000LL * d2 / timeScale;
2139        mSidxEntries.add(se);
2140    }
2141
2142    uint64_t sidxDuration = total_duration * 1000000 / timeScale;
2143
2144    int64_t metaDuration;
2145    if (!mLastTrack->meta->findInt64(kKeyDuration, &metaDuration) || metaDuration == 0) {
2146        mLastTrack->meta->setInt64(kKeyDuration, sidxDuration);
2147    }
2148    return OK;
2149}
2150
2151
2152
2153status_t MPEG4Extractor::parseTrackHeader(
2154        off64_t data_offset, off64_t data_size) {
2155    if (data_size < 4) {
2156        return ERROR_MALFORMED;
2157    }
2158
2159    uint8_t version;
2160    if (mDataSource->readAt(data_offset, &version, 1) < 1) {
2161        return ERROR_IO;
2162    }
2163
2164    size_t dynSize = (version == 1) ? 36 : 24;
2165
2166    uint8_t buffer[36 + 60];
2167
2168    if (data_size != (off64_t)dynSize + 60) {
2169        return ERROR_MALFORMED;
2170    }
2171
2172    if (mDataSource->readAt(
2173                data_offset, buffer, data_size) < (ssize_t)data_size) {
2174        return ERROR_IO;
2175    }
2176
2177    uint64_t ctime __unused, mtime __unused, duration __unused;
2178    int32_t id;
2179
2180    if (version == 1) {
2181        ctime = U64_AT(&buffer[4]);
2182        mtime = U64_AT(&buffer[12]);
2183        id = U32_AT(&buffer[20]);
2184        duration = U64_AT(&buffer[28]);
2185    } else if (version == 0) {
2186        ctime = U32_AT(&buffer[4]);
2187        mtime = U32_AT(&buffer[8]);
2188        id = U32_AT(&buffer[12]);
2189        duration = U32_AT(&buffer[20]);
2190    } else {
2191        return ERROR_UNSUPPORTED;
2192    }
2193
2194    mLastTrack->meta->setInt32(kKeyTrackID, id);
2195
2196    size_t matrixOffset = dynSize + 16;
2197    int32_t a00 = U32_AT(&buffer[matrixOffset]);
2198    int32_t a01 = U32_AT(&buffer[matrixOffset + 4]);
2199    int32_t a10 = U32_AT(&buffer[matrixOffset + 12]);
2200    int32_t a11 = U32_AT(&buffer[matrixOffset + 16]);
2201
2202#if 0
2203    int32_t dx = U32_AT(&buffer[matrixOffset + 8]);
2204    int32_t dy = U32_AT(&buffer[matrixOffset + 20]);
2205
2206    ALOGI("x' = %.2f * x + %.2f * y + %.2f",
2207         a00 / 65536.0f, a01 / 65536.0f, dx / 65536.0f);
2208    ALOGI("y' = %.2f * x + %.2f * y + %.2f",
2209         a10 / 65536.0f, a11 / 65536.0f, dy / 65536.0f);
2210#endif
2211
2212    uint32_t rotationDegrees;
2213
2214    static const int32_t kFixedOne = 0x10000;
2215    if (a00 == kFixedOne && a01 == 0 && a10 == 0 && a11 == kFixedOne) {
2216        // Identity, no rotation
2217        rotationDegrees = 0;
2218    } else if (a00 == 0 && a01 == kFixedOne && a10 == -kFixedOne && a11 == 0) {
2219        rotationDegrees = 90;
2220    } else if (a00 == 0 && a01 == -kFixedOne && a10 == kFixedOne && a11 == 0) {
2221        rotationDegrees = 270;
2222    } else if (a00 == -kFixedOne && a01 == 0 && a10 == 0 && a11 == -kFixedOne) {
2223        rotationDegrees = 180;
2224    } else {
2225        ALOGW("We only support 0,90,180,270 degree rotation matrices");
2226        rotationDegrees = 0;
2227    }
2228
2229    if (rotationDegrees != 0) {
2230        mLastTrack->meta->setInt32(kKeyRotation, rotationDegrees);
2231    }
2232
2233    // Handle presentation display size, which could be different
2234    // from the image size indicated by kKeyWidth and kKeyHeight.
2235    uint32_t width = U32_AT(&buffer[dynSize + 52]);
2236    uint32_t height = U32_AT(&buffer[dynSize + 56]);
2237    mLastTrack->meta->setInt32(kKeyDisplayWidth, width >> 16);
2238    mLastTrack->meta->setInt32(kKeyDisplayHeight, height >> 16);
2239
2240    return OK;
2241}
2242
2243status_t MPEG4Extractor::parseITunesMetaData(off64_t offset, size_t size) {
2244    if (size < 4 || size == SIZE_MAX) {
2245        return ERROR_MALFORMED;
2246    }
2247
2248    uint8_t *buffer = new (std::nothrow) uint8_t[size + 1];
2249    if (buffer == NULL) {
2250        return ERROR_MALFORMED;
2251    }
2252    if (mDataSource->readAt(
2253                offset, buffer, size) != (ssize_t)size) {
2254        delete[] buffer;
2255        buffer = NULL;
2256
2257        return ERROR_IO;
2258    }
2259
2260    uint32_t flags = U32_AT(buffer);
2261
2262    uint32_t metadataKey = 0;
2263    char chunk[5];
2264    MakeFourCCString(mPath[4], chunk);
2265    ALOGV("meta: %s @ %lld", chunk, offset);
2266    switch ((int32_t)mPath[4]) {
2267        case FOURCC(0xa9, 'a', 'l', 'b'):
2268        {
2269            metadataKey = kKeyAlbum;
2270            break;
2271        }
2272        case FOURCC(0xa9, 'A', 'R', 'T'):
2273        {
2274            metadataKey = kKeyArtist;
2275            break;
2276        }
2277        case FOURCC('a', 'A', 'R', 'T'):
2278        {
2279            metadataKey = kKeyAlbumArtist;
2280            break;
2281        }
2282        case FOURCC(0xa9, 'd', 'a', 'y'):
2283        {
2284            metadataKey = kKeyYear;
2285            break;
2286        }
2287        case FOURCC(0xa9, 'n', 'a', 'm'):
2288        {
2289            metadataKey = kKeyTitle;
2290            break;
2291        }
2292        case FOURCC(0xa9, 'w', 'r', 't'):
2293        {
2294            metadataKey = kKeyWriter;
2295            break;
2296        }
2297        case FOURCC('c', 'o', 'v', 'r'):
2298        {
2299            metadataKey = kKeyAlbumArt;
2300            break;
2301        }
2302        case FOURCC('g', 'n', 'r', 'e'):
2303        {
2304            metadataKey = kKeyGenre;
2305            break;
2306        }
2307        case FOURCC(0xa9, 'g', 'e', 'n'):
2308        {
2309            metadataKey = kKeyGenre;
2310            break;
2311        }
2312        case FOURCC('c', 'p', 'i', 'l'):
2313        {
2314            if (size == 9 && flags == 21) {
2315                char tmp[16];
2316                sprintf(tmp, "%d",
2317                        (int)buffer[size - 1]);
2318
2319                mFileMetaData->setCString(kKeyCompilation, tmp);
2320            }
2321            break;
2322        }
2323        case FOURCC('t', 'r', 'k', 'n'):
2324        {
2325            if (size == 16 && flags == 0) {
2326                char tmp[16];
2327                uint16_t* pTrack = (uint16_t*)&buffer[10];
2328                uint16_t* pTotalTracks = (uint16_t*)&buffer[12];
2329                sprintf(tmp, "%d/%d", ntohs(*pTrack), ntohs(*pTotalTracks));
2330
2331                mFileMetaData->setCString(kKeyCDTrackNumber, tmp);
2332            }
2333            break;
2334        }
2335        case FOURCC('d', 'i', 's', 'k'):
2336        {
2337            if ((size == 14 || size == 16) && flags == 0) {
2338                char tmp[16];
2339                uint16_t* pDisc = (uint16_t*)&buffer[10];
2340                uint16_t* pTotalDiscs = (uint16_t*)&buffer[12];
2341                sprintf(tmp, "%d/%d", ntohs(*pDisc), ntohs(*pTotalDiscs));
2342
2343                mFileMetaData->setCString(kKeyDiscNumber, tmp);
2344            }
2345            break;
2346        }
2347        case FOURCC('-', '-', '-', '-'):
2348        {
2349            buffer[size] = '\0';
2350            switch (mPath[5]) {
2351                case FOURCC('m', 'e', 'a', 'n'):
2352                    mLastCommentMean.setTo((const char *)buffer + 4);
2353                    break;
2354                case FOURCC('n', 'a', 'm', 'e'):
2355                    mLastCommentName.setTo((const char *)buffer + 4);
2356                    break;
2357                case FOURCC('d', 'a', 't', 'a'):
2358                    mLastCommentData.setTo((const char *)buffer + 8);
2359                    break;
2360            }
2361
2362            // Once we have a set of mean/name/data info, go ahead and process
2363            // it to see if its something we are interested in.  Whether or not
2364            // were are interested in the specific tag, make sure to clear out
2365            // the set so we can be ready to process another tuple should one
2366            // show up later in the file.
2367            if ((mLastCommentMean.length() != 0) &&
2368                (mLastCommentName.length() != 0) &&
2369                (mLastCommentData.length() != 0)) {
2370
2371                if (mLastCommentMean == "com.apple.iTunes"
2372                        && mLastCommentName == "iTunSMPB") {
2373                    int32_t delay, padding;
2374                    if (sscanf(mLastCommentData,
2375                               " %*x %x %x %*x", &delay, &padding) == 2) {
2376                        mLastTrack->meta->setInt32(kKeyEncoderDelay, delay);
2377                        mLastTrack->meta->setInt32(kKeyEncoderPadding, padding);
2378                    }
2379                }
2380
2381                mLastCommentMean.clear();
2382                mLastCommentName.clear();
2383                mLastCommentData.clear();
2384            }
2385            break;
2386        }
2387
2388        default:
2389            break;
2390    }
2391
2392    if (size >= 8 && metadataKey && !mFileMetaData->hasData(metadataKey)) {
2393        if (metadataKey == kKeyAlbumArt) {
2394            mFileMetaData->setData(
2395                    kKeyAlbumArt, MetaData::TYPE_NONE,
2396                    buffer + 8, size - 8);
2397        } else if (metadataKey == kKeyGenre) {
2398            if (flags == 0) {
2399                // uint8_t genre code, iTunes genre codes are
2400                // the standard id3 codes, except they start
2401                // at 1 instead of 0 (e.g. Pop is 14, not 13)
2402                // We use standard id3 numbering, so subtract 1.
2403                int genrecode = (int)buffer[size - 1];
2404                genrecode--;
2405                if (genrecode < 0) {
2406                    genrecode = 255; // reserved for 'unknown genre'
2407                }
2408                char genre[10];
2409                sprintf(genre, "%d", genrecode);
2410
2411                mFileMetaData->setCString(metadataKey, genre);
2412            } else if (flags == 1) {
2413                // custom genre string
2414                buffer[size] = '\0';
2415
2416                mFileMetaData->setCString(
2417                        metadataKey, (const char *)buffer + 8);
2418            }
2419        } else {
2420            buffer[size] = '\0';
2421
2422            mFileMetaData->setCString(
2423                    metadataKey, (const char *)buffer + 8);
2424        }
2425    }
2426
2427    delete[] buffer;
2428    buffer = NULL;
2429
2430    return OK;
2431}
2432
2433status_t MPEG4Extractor::parse3GPPMetaData(off64_t offset, size_t size, int depth) {
2434    if (size < 4 || size == SIZE_MAX) {
2435        return ERROR_MALFORMED;
2436    }
2437
2438    uint8_t *buffer = new (std::nothrow) uint8_t[size + 1];
2439    if (buffer == NULL) {
2440        return ERROR_MALFORMED;
2441    }
2442    if (mDataSource->readAt(
2443                offset, buffer, size) != (ssize_t)size) {
2444        delete[] buffer;
2445        buffer = NULL;
2446
2447        return ERROR_IO;
2448    }
2449
2450    uint32_t metadataKey = 0;
2451    switch (mPath[depth]) {
2452        case FOURCC('t', 'i', 't', 'l'):
2453        {
2454            metadataKey = kKeyTitle;
2455            break;
2456        }
2457        case FOURCC('p', 'e', 'r', 'f'):
2458        {
2459            metadataKey = kKeyArtist;
2460            break;
2461        }
2462        case FOURCC('a', 'u', 't', 'h'):
2463        {
2464            metadataKey = kKeyWriter;
2465            break;
2466        }
2467        case FOURCC('g', 'n', 'r', 'e'):
2468        {
2469            metadataKey = kKeyGenre;
2470            break;
2471        }
2472        case FOURCC('a', 'l', 'b', 'm'):
2473        {
2474            if (buffer[size - 1] != '\0') {
2475              char tmp[4];
2476              sprintf(tmp, "%u", buffer[size - 1]);
2477
2478              mFileMetaData->setCString(kKeyCDTrackNumber, tmp);
2479            }
2480
2481            metadataKey = kKeyAlbum;
2482            break;
2483        }
2484        case FOURCC('y', 'r', 'r', 'c'):
2485        {
2486            char tmp[5];
2487            uint16_t year = U16_AT(&buffer[4]);
2488
2489            if (year < 10000) {
2490                sprintf(tmp, "%u", year);
2491
2492                mFileMetaData->setCString(kKeyYear, tmp);
2493            }
2494            break;
2495        }
2496
2497        default:
2498            break;
2499    }
2500
2501    if (metadataKey > 0) {
2502        bool isUTF8 = true; // Common case
2503        char16_t *framedata = NULL;
2504        int len16 = 0; // Number of UTF-16 characters
2505
2506        // smallest possible valid UTF-16 string w BOM: 0xfe 0xff 0x00 0x00
2507        if (size < 6) {
2508            return ERROR_MALFORMED;
2509        }
2510
2511        if (size - 6 >= 4) {
2512            len16 = ((size - 6) / 2) - 1; // don't include 0x0000 terminator
2513            framedata = (char16_t *)(buffer + 6);
2514            if (0xfffe == *framedata) {
2515                // endianness marker (BOM) doesn't match host endianness
2516                for (int i = 0; i < len16; i++) {
2517                    framedata[i] = bswap_16(framedata[i]);
2518                }
2519                // BOM is now swapped to 0xfeff, we will execute next block too
2520            }
2521
2522            if (0xfeff == *framedata) {
2523                // Remove the BOM
2524                framedata++;
2525                len16--;
2526                isUTF8 = false;
2527            }
2528            // else normal non-zero-length UTF-8 string
2529            // we can't handle UTF-16 without BOM as there is no other
2530            // indication of encoding.
2531        }
2532
2533        if (isUTF8) {
2534            buffer[size] = 0;
2535            mFileMetaData->setCString(metadataKey, (const char *)buffer + 6);
2536        } else {
2537            // Convert from UTF-16 string to UTF-8 string.
2538            String8 tmpUTF8str(framedata, len16);
2539            mFileMetaData->setCString(metadataKey, tmpUTF8str.string());
2540        }
2541    }
2542
2543    delete[] buffer;
2544    buffer = NULL;
2545
2546    return OK;
2547}
2548
2549void MPEG4Extractor::parseID3v2MetaData(off64_t offset) {
2550    ID3 id3(mDataSource, true /* ignorev1 */, offset);
2551
2552    if (id3.isValid()) {
2553        struct Map {
2554            int key;
2555            const char *tag1;
2556            const char *tag2;
2557        };
2558        static const Map kMap[] = {
2559            { kKeyAlbum, "TALB", "TAL" },
2560            { kKeyArtist, "TPE1", "TP1" },
2561            { kKeyAlbumArtist, "TPE2", "TP2" },
2562            { kKeyComposer, "TCOM", "TCM" },
2563            { kKeyGenre, "TCON", "TCO" },
2564            { kKeyTitle, "TIT2", "TT2" },
2565            { kKeyYear, "TYE", "TYER" },
2566            { kKeyAuthor, "TXT", "TEXT" },
2567            { kKeyCDTrackNumber, "TRK", "TRCK" },
2568            { kKeyDiscNumber, "TPA", "TPOS" },
2569            { kKeyCompilation, "TCP", "TCMP" },
2570        };
2571        static const size_t kNumMapEntries = sizeof(kMap) / sizeof(kMap[0]);
2572
2573        for (size_t i = 0; i < kNumMapEntries; ++i) {
2574            if (!mFileMetaData->hasData(kMap[i].key)) {
2575                ID3::Iterator *it = new ID3::Iterator(id3, kMap[i].tag1);
2576                if (it->done()) {
2577                    delete it;
2578                    it = new ID3::Iterator(id3, kMap[i].tag2);
2579                }
2580
2581                if (it->done()) {
2582                    delete it;
2583                    continue;
2584                }
2585
2586                String8 s;
2587                it->getString(&s);
2588                delete it;
2589
2590                mFileMetaData->setCString(kMap[i].key, s);
2591            }
2592        }
2593
2594        size_t dataSize;
2595        String8 mime;
2596        const void *data = id3.getAlbumArt(&dataSize, &mime);
2597
2598        if (data) {
2599            mFileMetaData->setData(kKeyAlbumArt, MetaData::TYPE_NONE, data, dataSize);
2600            mFileMetaData->setCString(kKeyAlbumArtMIME, mime.string());
2601        }
2602    }
2603}
2604
2605sp<MediaSource> MPEG4Extractor::getTrack(size_t index) {
2606    status_t err;
2607    if ((err = readMetaData()) != OK) {
2608        return NULL;
2609    }
2610
2611    Track *track = mFirstTrack;
2612    while (index > 0) {
2613        if (track == NULL) {
2614            return NULL;
2615        }
2616
2617        track = track->next;
2618        --index;
2619    }
2620
2621    if (track == NULL) {
2622        return NULL;
2623    }
2624
2625
2626    Trex *trex = NULL;
2627    int32_t trackId;
2628    if (track->meta->findInt32(kKeyTrackID, &trackId)) {
2629        for (size_t i = 0; i < mTrex.size(); i++) {
2630            Trex *t = &mTrex.editItemAt(index);
2631            if (t->track_ID == (uint32_t) trackId) {
2632                trex = t;
2633                break;
2634            }
2635        }
2636    }
2637
2638    ALOGV("getTrack called, pssh: %zu", mPssh.size());
2639
2640    return new MPEG4Source(this,
2641            track->meta, mDataSource, track->timescale, track->sampleTable,
2642            mSidxEntries, trex, mMoofOffset);
2643}
2644
2645// static
2646status_t MPEG4Extractor::verifyTrack(Track *track) {
2647    const char *mime;
2648    CHECK(track->meta->findCString(kKeyMIMEType, &mime));
2649
2650    uint32_t type;
2651    const void *data;
2652    size_t size;
2653    if (!strcasecmp(mime, MEDIA_MIMETYPE_VIDEO_AVC)) {
2654        if (!track->meta->findData(kKeyAVCC, &type, &data, &size)
2655                || type != kTypeAVCC) {
2656            return ERROR_MALFORMED;
2657        }
2658    } else if (!strcasecmp(mime, MEDIA_MIMETYPE_VIDEO_HEVC)) {
2659        if (!track->meta->findData(kKeyHVCC, &type, &data, &size)
2660                    || type != kTypeHVCC) {
2661            return ERROR_MALFORMED;
2662        }
2663    } else if (!strcasecmp(mime, MEDIA_MIMETYPE_VIDEO_MPEG4)
2664            || !strcasecmp(mime, MEDIA_MIMETYPE_AUDIO_AAC)) {
2665        if (!track->meta->findData(kKeyESDS, &type, &data, &size)
2666                || type != kTypeESDS) {
2667            return ERROR_MALFORMED;
2668        }
2669    }
2670
2671    if (track->sampleTable == NULL || !track->sampleTable->isValid()) {
2672        // Make sure we have all the metadata we need.
2673        ALOGE("stbl atom missing/invalid.");
2674        return ERROR_MALFORMED;
2675    }
2676
2677    if (track->timescale == 0) {
2678        ALOGE("timescale invalid.");
2679        return ERROR_MALFORMED;
2680    }
2681
2682    return OK;
2683}
2684
2685typedef enum {
2686    //AOT_NONE             = -1,
2687    //AOT_NULL_OBJECT      = 0,
2688    //AOT_AAC_MAIN         = 1, /**< Main profile                              */
2689    AOT_AAC_LC           = 2,   /**< Low Complexity object                     */
2690    //AOT_AAC_SSR          = 3,
2691    //AOT_AAC_LTP          = 4,
2692    AOT_SBR              = 5,
2693    //AOT_AAC_SCAL         = 6,
2694    //AOT_TWIN_VQ          = 7,
2695    //AOT_CELP             = 8,
2696    //AOT_HVXC             = 9,
2697    //AOT_RSVD_10          = 10, /**< (reserved)                                */
2698    //AOT_RSVD_11          = 11, /**< (reserved)                                */
2699    //AOT_TTSI             = 12, /**< TTSI Object                               */
2700    //AOT_MAIN_SYNTH       = 13, /**< Main Synthetic object                     */
2701    //AOT_WAV_TAB_SYNTH    = 14, /**< Wavetable Synthesis object                */
2702    //AOT_GEN_MIDI         = 15, /**< General MIDI object                       */
2703    //AOT_ALG_SYNTH_AUD_FX = 16, /**< Algorithmic Synthesis and Audio FX object */
2704    AOT_ER_AAC_LC        = 17,   /**< Error Resilient(ER) AAC Low Complexity    */
2705    //AOT_RSVD_18          = 18, /**< (reserved)                                */
2706    //AOT_ER_AAC_LTP       = 19, /**< Error Resilient(ER) AAC LTP object        */
2707    AOT_ER_AAC_SCAL      = 20,   /**< Error Resilient(ER) AAC Scalable object   */
2708    //AOT_ER_TWIN_VQ       = 21, /**< Error Resilient(ER) TwinVQ object         */
2709    AOT_ER_BSAC          = 22,   /**< Error Resilient(ER) BSAC object           */
2710    AOT_ER_AAC_LD        = 23,   /**< Error Resilient(ER) AAC LowDelay object   */
2711    //AOT_ER_CELP          = 24, /**< Error Resilient(ER) CELP object           */
2712    //AOT_ER_HVXC          = 25, /**< Error Resilient(ER) HVXC object           */
2713    //AOT_ER_HILN          = 26, /**< Error Resilient(ER) HILN object           */
2714    //AOT_ER_PARA          = 27, /**< Error Resilient(ER) Parametric object     */
2715    //AOT_RSVD_28          = 28, /**< might become SSC                          */
2716    AOT_PS               = 29,   /**< PS, Parametric Stereo (includes SBR)      */
2717    //AOT_MPEGS            = 30, /**< MPEG Surround                             */
2718
2719    AOT_ESCAPE           = 31,   /**< Signal AOT uses more than 5 bits          */
2720
2721    //AOT_MP3ONMP4_L1      = 32, /**< MPEG-Layer1 in mp4                        */
2722    //AOT_MP3ONMP4_L2      = 33, /**< MPEG-Layer2 in mp4                        */
2723    //AOT_MP3ONMP4_L3      = 34, /**< MPEG-Layer3 in mp4                        */
2724    //AOT_RSVD_35          = 35, /**< might become DST                          */
2725    //AOT_RSVD_36          = 36, /**< might become ALS                          */
2726    //AOT_AAC_SLS          = 37, /**< AAC + SLS                                 */
2727    //AOT_SLS              = 38, /**< SLS                                       */
2728    //AOT_ER_AAC_ELD       = 39, /**< AAC Enhanced Low Delay                    */
2729
2730    //AOT_USAC             = 42, /**< USAC                                      */
2731    //AOT_SAOC             = 43, /**< SAOC                                      */
2732    //AOT_LD_MPEGS         = 44, /**< Low Delay MPEG Surround                   */
2733
2734    //AOT_RSVD50           = 50,  /**< Interim AOT for Rsvd50                   */
2735} AUDIO_OBJECT_TYPE;
2736
2737status_t MPEG4Extractor::updateAudioTrackInfoFromESDS_MPEG4Audio(
2738        const void *esds_data, size_t esds_size) {
2739    ESDS esds(esds_data, esds_size);
2740
2741    uint8_t objectTypeIndication;
2742    if (esds.getObjectTypeIndication(&objectTypeIndication) != OK) {
2743        return ERROR_MALFORMED;
2744    }
2745
2746    if (objectTypeIndication == 0xe1) {
2747        // This isn't MPEG4 audio at all, it's QCELP 14k...
2748        mLastTrack->meta->setCString(kKeyMIMEType, MEDIA_MIMETYPE_AUDIO_QCELP);
2749        return OK;
2750    }
2751
2752    if (objectTypeIndication  == 0x6b) {
2753        // The media subtype is MP3 audio
2754        // Our software MP3 audio decoder may not be able to handle
2755        // packetized MP3 audio; for now, lets just return ERROR_UNSUPPORTED
2756        ALOGE("MP3 track in MP4/3GPP file is not supported");
2757        return ERROR_UNSUPPORTED;
2758    }
2759
2760    const uint8_t *csd;
2761    size_t csd_size;
2762    if (esds.getCodecSpecificInfo(
2763                (const void **)&csd, &csd_size) != OK) {
2764        return ERROR_MALFORMED;
2765    }
2766
2767    if (kUseHexDump) {
2768        printf("ESD of size %d\n", csd_size);
2769        hexdump(csd, csd_size);
2770    }
2771
2772    if (csd_size == 0) {
2773        // There's no further information, i.e. no codec specific data
2774        // Let's assume that the information provided in the mpeg4 headers
2775        // is accurate and hope for the best.
2776
2777        return OK;
2778    }
2779
2780    if (csd_size < 2) {
2781        return ERROR_MALFORMED;
2782    }
2783
2784    static uint32_t kSamplingRate[] = {
2785        96000, 88200, 64000, 48000, 44100, 32000, 24000, 22050,
2786        16000, 12000, 11025, 8000, 7350
2787    };
2788
2789    ABitReader br(csd, csd_size);
2790    uint32_t objectType = br.getBits(5);
2791
2792    if (objectType == 31) {  // AAC-ELD => additional 6 bits
2793        objectType = 32 + br.getBits(6);
2794    }
2795
2796    //keep AOT type
2797    mLastTrack->meta->setInt32(kKeyAACAOT, objectType);
2798
2799    uint32_t freqIndex = br.getBits(4);
2800
2801    int32_t sampleRate = 0;
2802    int32_t numChannels = 0;
2803    if (freqIndex == 15) {
2804        if (csd_size < 5) {
2805            return ERROR_MALFORMED;
2806        }
2807        sampleRate = br.getBits(24);
2808        numChannels = br.getBits(4);
2809    } else {
2810        numChannels = br.getBits(4);
2811
2812        if (freqIndex == 13 || freqIndex == 14) {
2813            return ERROR_MALFORMED;
2814        }
2815
2816        sampleRate = kSamplingRate[freqIndex];
2817    }
2818
2819    if (objectType == AOT_SBR || objectType == AOT_PS) {//SBR specific config per 14496-3 table 1.13
2820        uint32_t extFreqIndex = br.getBits(4);
2821        int32_t extSampleRate __unused;
2822        if (extFreqIndex == 15) {
2823            if (csd_size < 8) {
2824                return ERROR_MALFORMED;
2825            }
2826            extSampleRate = br.getBits(24);
2827        } else {
2828            if (extFreqIndex == 13 || extFreqIndex == 14) {
2829                return ERROR_MALFORMED;
2830            }
2831            extSampleRate = kSamplingRate[extFreqIndex];
2832        }
2833        //TODO: save the extension sampling rate value in meta data =>
2834        //      mLastTrack->meta->setInt32(kKeyExtSampleRate, extSampleRate);
2835    }
2836
2837    switch (numChannels) {
2838        // values defined in 14496-3_2009 amendment-4 Table 1.19 - Channel Configuration
2839        case 0:
2840        case 1:// FC
2841        case 2:// FL FR
2842        case 3:// FC, FL FR
2843        case 4:// FC, FL FR, RC
2844        case 5:// FC, FL FR, SL SR
2845        case 6:// FC, FL FR, SL SR, LFE
2846            //numChannels already contains the right value
2847            break;
2848        case 11:// FC, FL FR, SL SR, RC, LFE
2849            numChannels = 7;
2850            break;
2851        case 7: // FC, FCL FCR, FL FR, SL SR, LFE
2852        case 12:// FC, FL  FR,  SL SR, RL RR, LFE
2853        case 14:// FC, FL  FR,  SL SR, LFE, FHL FHR
2854            numChannels = 8;
2855            break;
2856        default:
2857            return ERROR_UNSUPPORTED;
2858    }
2859
2860    {
2861        if (objectType == AOT_SBR || objectType == AOT_PS) {
2862            objectType = br.getBits(5);
2863
2864            if (objectType == AOT_ESCAPE) {
2865                objectType = 32 + br.getBits(6);
2866            }
2867        }
2868        if (objectType == AOT_AAC_LC || objectType == AOT_ER_AAC_LC ||
2869                objectType == AOT_ER_AAC_LD || objectType == AOT_ER_AAC_SCAL ||
2870                objectType == AOT_ER_BSAC) {
2871            const int32_t frameLengthFlag __unused = br.getBits(1);
2872
2873            const int32_t dependsOnCoreCoder = br.getBits(1);
2874
2875            if (dependsOnCoreCoder ) {
2876                const int32_t coreCoderDelay __unused = br.getBits(14);
2877            }
2878
2879            int32_t extensionFlag = -1;
2880            if (br.numBitsLeft() > 0) {
2881                extensionFlag = br.getBits(1);
2882            } else {
2883                switch (objectType) {
2884                // 14496-3 4.5.1.1 extensionFlag
2885                case AOT_AAC_LC:
2886                    extensionFlag = 0;
2887                    break;
2888                case AOT_ER_AAC_LC:
2889                case AOT_ER_AAC_SCAL:
2890                case AOT_ER_BSAC:
2891                case AOT_ER_AAC_LD:
2892                    extensionFlag = 1;
2893                    break;
2894                default:
2895                    TRESPASS();
2896                    break;
2897                }
2898                ALOGW("csd missing extension flag; assuming %d for object type %u.",
2899                        extensionFlag, objectType);
2900            }
2901
2902            if (numChannels == 0) {
2903                int32_t channelsEffectiveNum = 0;
2904                int32_t channelsNum = 0;
2905                const int32_t ElementInstanceTag __unused = br.getBits(4);
2906                const int32_t Profile __unused = br.getBits(2);
2907                const int32_t SamplingFrequencyIndex __unused = br.getBits(4);
2908                const int32_t NumFrontChannelElements = br.getBits(4);
2909                const int32_t NumSideChannelElements = br.getBits(4);
2910                const int32_t NumBackChannelElements = br.getBits(4);
2911                const int32_t NumLfeChannelElements = br.getBits(2);
2912                const int32_t NumAssocDataElements __unused = br.getBits(3);
2913                const int32_t NumValidCcElements __unused = br.getBits(4);
2914
2915                const int32_t MonoMixdownPresent = br.getBits(1);
2916                if (MonoMixdownPresent != 0) {
2917                    const int32_t MonoMixdownElementNumber __unused = br.getBits(4);
2918                }
2919
2920                const int32_t StereoMixdownPresent = br.getBits(1);
2921                if (StereoMixdownPresent != 0) {
2922                    const int32_t StereoMixdownElementNumber __unused = br.getBits(4);
2923                }
2924
2925                const int32_t MatrixMixdownIndexPresent = br.getBits(1);
2926                if (MatrixMixdownIndexPresent != 0) {
2927                    const int32_t MatrixMixdownIndex __unused = br.getBits(2);
2928                    const int32_t PseudoSurroundEnable __unused = br.getBits(1);
2929                }
2930
2931                int i;
2932                for (i=0; i < NumFrontChannelElements; i++) {
2933                    const int32_t FrontElementIsCpe = br.getBits(1);
2934                    const int32_t FrontElementTagSelect __unused = br.getBits(4);
2935                    channelsNum += FrontElementIsCpe ? 2 : 1;
2936                }
2937
2938                for (i=0; i < NumSideChannelElements; i++) {
2939                    const int32_t SideElementIsCpe = br.getBits(1);
2940                    const int32_t SideElementTagSelect __unused = br.getBits(4);
2941                    channelsNum += SideElementIsCpe ? 2 : 1;
2942                }
2943
2944                for (i=0; i < NumBackChannelElements; i++) {
2945                    const int32_t BackElementIsCpe = br.getBits(1);
2946                    const int32_t BackElementTagSelect __unused = br.getBits(4);
2947                    channelsNum += BackElementIsCpe ? 2 : 1;
2948                }
2949                channelsEffectiveNum = channelsNum;
2950
2951                for (i=0; i < NumLfeChannelElements; i++) {
2952                    const int32_t LfeElementTagSelect __unused = br.getBits(4);
2953                    channelsNum += 1;
2954                }
2955                ALOGV("mpeg4 audio channelsNum = %d", channelsNum);
2956                ALOGV("mpeg4 audio channelsEffectiveNum = %d", channelsEffectiveNum);
2957                numChannels = channelsNum;
2958            }
2959        }
2960    }
2961
2962    if (numChannels == 0) {
2963        return ERROR_UNSUPPORTED;
2964    }
2965
2966    int32_t prevSampleRate;
2967    CHECK(mLastTrack->meta->findInt32(kKeySampleRate, &prevSampleRate));
2968
2969    if (prevSampleRate != sampleRate) {
2970        ALOGV("mpeg4 audio sample rate different from previous setting. "
2971             "was: %d, now: %d", prevSampleRate, sampleRate);
2972    }
2973
2974    mLastTrack->meta->setInt32(kKeySampleRate, sampleRate);
2975
2976    int32_t prevChannelCount;
2977    CHECK(mLastTrack->meta->findInt32(kKeyChannelCount, &prevChannelCount));
2978
2979    if (prevChannelCount != numChannels) {
2980        ALOGV("mpeg4 audio channel count different from previous setting. "
2981             "was: %d, now: %d", prevChannelCount, numChannels);
2982    }
2983
2984    mLastTrack->meta->setInt32(kKeyChannelCount, numChannels);
2985
2986    return OK;
2987}
2988
2989////////////////////////////////////////////////////////////////////////////////
2990
2991MPEG4Source::MPEG4Source(
2992        const sp<MPEG4Extractor> &owner,
2993        const sp<MetaData> &format,
2994        const sp<DataSource> &dataSource,
2995        int32_t timeScale,
2996        const sp<SampleTable> &sampleTable,
2997        Vector<SidxEntry> &sidx,
2998        const Trex *trex,
2999        off64_t firstMoofOffset)
3000    : mOwner(owner),
3001      mFormat(format),
3002      mDataSource(dataSource),
3003      mTimescale(timeScale),
3004      mSampleTable(sampleTable),
3005      mCurrentSampleIndex(0),
3006      mCurrentFragmentIndex(0),
3007      mSegments(sidx),
3008      mTrex(trex),
3009      mFirstMoofOffset(firstMoofOffset),
3010      mCurrentMoofOffset(firstMoofOffset),
3011      mCurrentTime(0),
3012      mCurrentSampleInfoAllocSize(0),
3013      mCurrentSampleInfoSizes(NULL),
3014      mCurrentSampleInfoOffsetsAllocSize(0),
3015      mCurrentSampleInfoOffsets(NULL),
3016      mIsAVC(false),
3017      mIsHEVC(false),
3018      mNALLengthSize(0),
3019      mStarted(false),
3020      mGroup(NULL),
3021      mBuffer(NULL),
3022      mWantsNALFragments(false),
3023      mSrcBuffer(NULL) {
3024
3025    memset(&mTrackFragmentHeaderInfo, 0, sizeof(mTrackFragmentHeaderInfo));
3026
3027    mFormat->findInt32(kKeyCryptoMode, &mCryptoMode);
3028    mDefaultIVSize = 0;
3029    mFormat->findInt32(kKeyCryptoDefaultIVSize, &mDefaultIVSize);
3030    uint32_t keytype;
3031    const void *key;
3032    size_t keysize;
3033    if (mFormat->findData(kKeyCryptoKey, &keytype, &key, &keysize)) {
3034        CHECK(keysize <= 16);
3035        memset(mCryptoKey, 0, 16);
3036        memcpy(mCryptoKey, key, keysize);
3037    }
3038
3039    const char *mime;
3040    bool success = mFormat->findCString(kKeyMIMEType, &mime);
3041    CHECK(success);
3042
3043    mIsAVC = !strcasecmp(mime, MEDIA_MIMETYPE_VIDEO_AVC);
3044    mIsHEVC = !strcasecmp(mime, MEDIA_MIMETYPE_VIDEO_HEVC);
3045
3046    if (mIsAVC) {
3047        uint32_t type;
3048        const void *data;
3049        size_t size;
3050        CHECK(format->findData(kKeyAVCC, &type, &data, &size));
3051
3052        const uint8_t *ptr = (const uint8_t *)data;
3053
3054        CHECK(size >= 7);
3055        CHECK_EQ((unsigned)ptr[0], 1u);  // configurationVersion == 1
3056
3057        // The number of bytes used to encode the length of a NAL unit.
3058        mNALLengthSize = 1 + (ptr[4] & 3);
3059    } else if (mIsHEVC) {
3060        uint32_t type;
3061        const void *data;
3062        size_t size;
3063        CHECK(format->findData(kKeyHVCC, &type, &data, &size));
3064
3065        const uint8_t *ptr = (const uint8_t *)data;
3066
3067        CHECK(size >= 7);
3068        CHECK_EQ((unsigned)ptr[0], 1u);  // configurationVersion == 1
3069
3070        mNALLengthSize = 1 + (ptr[14 + 7] & 3);
3071    }
3072
3073    CHECK(format->findInt32(kKeyTrackID, &mTrackId));
3074
3075    if (mFirstMoofOffset != 0) {
3076        off64_t offset = mFirstMoofOffset;
3077        parseChunk(&offset);
3078    }
3079}
3080
3081MPEG4Source::~MPEG4Source() {
3082    if (mStarted) {
3083        stop();
3084    }
3085    free(mCurrentSampleInfoSizes);
3086    free(mCurrentSampleInfoOffsets);
3087}
3088
3089status_t MPEG4Source::start(MetaData *params) {
3090    Mutex::Autolock autoLock(mLock);
3091
3092    CHECK(!mStarted);
3093
3094    int32_t val;
3095    if (params && params->findInt32(kKeyWantsNALFragments, &val)
3096        && val != 0) {
3097        mWantsNALFragments = true;
3098    } else {
3099        mWantsNALFragments = false;
3100    }
3101
3102    mGroup = new MediaBufferGroup;
3103
3104    int32_t max_size;
3105    CHECK(mFormat->findInt32(kKeyMaxInputSize, &max_size));
3106
3107    mGroup->add_buffer(new MediaBuffer(max_size));
3108
3109    mSrcBuffer = new (std::nothrow) uint8_t[max_size];
3110    if (mSrcBuffer == NULL) {
3111        // file probably specified a bad max size
3112        return ERROR_MALFORMED;
3113    }
3114
3115    mStarted = true;
3116
3117    return OK;
3118}
3119
3120status_t MPEG4Source::stop() {
3121    Mutex::Autolock autoLock(mLock);
3122
3123    CHECK(mStarted);
3124
3125    if (mBuffer != NULL) {
3126        mBuffer->release();
3127        mBuffer = NULL;
3128    }
3129
3130    delete[] mSrcBuffer;
3131    mSrcBuffer = NULL;
3132
3133    delete mGroup;
3134    mGroup = NULL;
3135
3136    mStarted = false;
3137    mCurrentSampleIndex = 0;
3138
3139    return OK;
3140}
3141
3142status_t MPEG4Source::parseChunk(off64_t *offset) {
3143    uint32_t hdr[2];
3144    if (mDataSource->readAt(*offset, hdr, 8) < 8) {
3145        return ERROR_IO;
3146    }
3147    uint64_t chunk_size = ntohl(hdr[0]);
3148    uint32_t chunk_type = ntohl(hdr[1]);
3149    off64_t data_offset = *offset + 8;
3150
3151    if (chunk_size == 1) {
3152        if (mDataSource->readAt(*offset + 8, &chunk_size, 8) < 8) {
3153            return ERROR_IO;
3154        }
3155        chunk_size = ntoh64(chunk_size);
3156        data_offset += 8;
3157
3158        if (chunk_size < 16) {
3159            // The smallest valid chunk is 16 bytes long in this case.
3160            return ERROR_MALFORMED;
3161        }
3162    } else if (chunk_size < 8) {
3163        // The smallest valid chunk is 8 bytes long.
3164        return ERROR_MALFORMED;
3165    }
3166
3167    char chunk[5];
3168    MakeFourCCString(chunk_type, chunk);
3169    ALOGV("MPEG4Source chunk %s @ %llx", chunk, *offset);
3170
3171    off64_t chunk_data_size = *offset + chunk_size - data_offset;
3172
3173    switch(chunk_type) {
3174
3175        case FOURCC('t', 'r', 'a', 'f'):
3176        case FOURCC('m', 'o', 'o', 'f'): {
3177            off64_t stop_offset = *offset + chunk_size;
3178            *offset = data_offset;
3179            while (*offset < stop_offset) {
3180                status_t err = parseChunk(offset);
3181                if (err != OK) {
3182                    return err;
3183                }
3184            }
3185            if (chunk_type == FOURCC('m', 'o', 'o', 'f')) {
3186                // *offset points to the box following this moof. Find the next moof from there.
3187
3188                while (true) {
3189                    if (mDataSource->readAt(*offset, hdr, 8) < 8) {
3190                        return ERROR_END_OF_STREAM;
3191                    }
3192                    chunk_size = ntohl(hdr[0]);
3193                    chunk_type = ntohl(hdr[1]);
3194                    if (chunk_type == FOURCC('m', 'o', 'o', 'f')) {
3195                        mNextMoofOffset = *offset;
3196                        break;
3197                    }
3198                    *offset += chunk_size;
3199                }
3200            }
3201            break;
3202        }
3203
3204        case FOURCC('t', 'f', 'h', 'd'): {
3205                status_t err;
3206                if ((err = parseTrackFragmentHeader(data_offset, chunk_data_size)) != OK) {
3207                    return err;
3208                }
3209                *offset += chunk_size;
3210                break;
3211        }
3212
3213        case FOURCC('t', 'r', 'u', 'n'): {
3214                status_t err;
3215                if (mLastParsedTrackId == mTrackId) {
3216                    if ((err = parseTrackFragmentRun(data_offset, chunk_data_size)) != OK) {
3217                        return err;
3218                    }
3219                }
3220
3221                *offset += chunk_size;
3222                break;
3223        }
3224
3225        case FOURCC('s', 'a', 'i', 'z'): {
3226            status_t err;
3227            if ((err = parseSampleAuxiliaryInformationSizes(data_offset, chunk_data_size)) != OK) {
3228                return err;
3229            }
3230            *offset += chunk_size;
3231            break;
3232        }
3233        case FOURCC('s', 'a', 'i', 'o'): {
3234            status_t err;
3235            if ((err = parseSampleAuxiliaryInformationOffsets(data_offset, chunk_data_size)) != OK) {
3236                return err;
3237            }
3238            *offset += chunk_size;
3239            break;
3240        }
3241
3242        case FOURCC('m', 'd', 'a', 't'): {
3243            // parse DRM info if present
3244            ALOGV("MPEG4Source::parseChunk mdat");
3245            // if saiz/saoi was previously observed, do something with the sampleinfos
3246            *offset += chunk_size;
3247            break;
3248        }
3249
3250        default: {
3251            *offset += chunk_size;
3252            break;
3253        }
3254    }
3255    return OK;
3256}
3257
3258status_t MPEG4Source::parseSampleAuxiliaryInformationSizes(
3259        off64_t offset, off64_t /* size */) {
3260    ALOGV("parseSampleAuxiliaryInformationSizes");
3261    // 14496-12 8.7.12
3262    uint8_t version;
3263    if (mDataSource->readAt(
3264            offset, &version, sizeof(version))
3265            < (ssize_t)sizeof(version)) {
3266        return ERROR_IO;
3267    }
3268
3269    if (version != 0) {
3270        return ERROR_UNSUPPORTED;
3271    }
3272    offset++;
3273
3274    uint32_t flags;
3275    if (!mDataSource->getUInt24(offset, &flags)) {
3276        return ERROR_IO;
3277    }
3278    offset += 3;
3279
3280    if (flags & 1) {
3281        uint32_t tmp;
3282        if (!mDataSource->getUInt32(offset, &tmp)) {
3283            return ERROR_MALFORMED;
3284        }
3285        mCurrentAuxInfoType = tmp;
3286        offset += 4;
3287        if (!mDataSource->getUInt32(offset, &tmp)) {
3288            return ERROR_MALFORMED;
3289        }
3290        mCurrentAuxInfoTypeParameter = tmp;
3291        offset += 4;
3292    }
3293
3294    uint8_t defsize;
3295    if (mDataSource->readAt(offset, &defsize, 1) != 1) {
3296        return ERROR_MALFORMED;
3297    }
3298    mCurrentDefaultSampleInfoSize = defsize;
3299    offset++;
3300
3301    uint32_t smplcnt;
3302    if (!mDataSource->getUInt32(offset, &smplcnt)) {
3303        return ERROR_MALFORMED;
3304    }
3305    mCurrentSampleInfoCount = smplcnt;
3306    offset += 4;
3307
3308    if (mCurrentDefaultSampleInfoSize != 0) {
3309        ALOGV("@@@@ using default sample info size of %d", mCurrentDefaultSampleInfoSize);
3310        return OK;
3311    }
3312    if (smplcnt > mCurrentSampleInfoAllocSize) {
3313        mCurrentSampleInfoSizes = (uint8_t*) realloc(mCurrentSampleInfoSizes, smplcnt);
3314        mCurrentSampleInfoAllocSize = smplcnt;
3315    }
3316
3317    mDataSource->readAt(offset, mCurrentSampleInfoSizes, smplcnt);
3318    return OK;
3319}
3320
3321status_t MPEG4Source::parseSampleAuxiliaryInformationOffsets(
3322        off64_t offset, off64_t /* size */) {
3323    ALOGV("parseSampleAuxiliaryInformationOffsets");
3324    // 14496-12 8.7.13
3325    uint8_t version;
3326    if (mDataSource->readAt(offset, &version, sizeof(version)) != 1) {
3327        return ERROR_IO;
3328    }
3329    offset++;
3330
3331    uint32_t flags;
3332    if (!mDataSource->getUInt24(offset, &flags)) {
3333        return ERROR_IO;
3334    }
3335    offset += 3;
3336
3337    uint32_t entrycount;
3338    if (!mDataSource->getUInt32(offset, &entrycount)) {
3339        return ERROR_IO;
3340    }
3341    offset += 4;
3342    if (entrycount == 0) {
3343        return OK;
3344    }
3345    if (entrycount > UINT32_MAX / 8) {
3346        return ERROR_MALFORMED;
3347    }
3348
3349    if (entrycount > mCurrentSampleInfoOffsetsAllocSize) {
3350        uint64_t *newPtr = (uint64_t *)realloc(mCurrentSampleInfoOffsets, entrycount * 8);
3351        if (newPtr == NULL) {
3352            return NO_MEMORY;
3353        }
3354        mCurrentSampleInfoOffsets = newPtr;
3355        mCurrentSampleInfoOffsetsAllocSize = entrycount;
3356    }
3357    mCurrentSampleInfoOffsetCount = entrycount;
3358
3359    if (mCurrentSampleInfoOffsets == NULL) {
3360        return OK;
3361    }
3362
3363    for (size_t i = 0; i < entrycount; i++) {
3364        if (version == 0) {
3365            uint32_t tmp;
3366            if (!mDataSource->getUInt32(offset, &tmp)) {
3367                return ERROR_IO;
3368            }
3369            mCurrentSampleInfoOffsets[i] = tmp;
3370            offset += 4;
3371        } else {
3372            uint64_t tmp;
3373            if (!mDataSource->getUInt64(offset, &tmp)) {
3374                return ERROR_IO;
3375            }
3376            mCurrentSampleInfoOffsets[i] = tmp;
3377            offset += 8;
3378        }
3379    }
3380
3381    // parse clear/encrypted data
3382
3383    off64_t drmoffset = mCurrentSampleInfoOffsets[0]; // from moof
3384
3385    drmoffset += mCurrentMoofOffset;
3386    int ivlength;
3387    CHECK(mFormat->findInt32(kKeyCryptoDefaultIVSize, &ivlength));
3388
3389    // only 0, 8 and 16 byte initialization vectors are supported
3390    if (ivlength != 0 && ivlength != 8 && ivlength != 16) {
3391        ALOGW("unsupported IV length: %d", ivlength);
3392        return ERROR_MALFORMED;
3393    }
3394    // read CencSampleAuxiliaryDataFormats
3395    for (size_t i = 0; i < mCurrentSampleInfoCount; i++) {
3396        if (i >= mCurrentSamples.size()) {
3397            ALOGW("too few samples");
3398            break;
3399        }
3400        Sample *smpl = &mCurrentSamples.editItemAt(i);
3401
3402        memset(smpl->iv, 0, 16);
3403        if (mDataSource->readAt(drmoffset, smpl->iv, ivlength) != ivlength) {
3404            return ERROR_IO;
3405        }
3406
3407        drmoffset += ivlength;
3408
3409        int32_t smplinfosize = mCurrentDefaultSampleInfoSize;
3410        if (smplinfosize == 0) {
3411            smplinfosize = mCurrentSampleInfoSizes[i];
3412        }
3413        if (smplinfosize > ivlength) {
3414            uint16_t numsubsamples;
3415            if (!mDataSource->getUInt16(drmoffset, &numsubsamples)) {
3416                return ERROR_IO;
3417            }
3418            drmoffset += 2;
3419            for (size_t j = 0; j < numsubsamples; j++) {
3420                uint16_t numclear;
3421                uint32_t numencrypted;
3422                if (!mDataSource->getUInt16(drmoffset, &numclear)) {
3423                    return ERROR_IO;
3424                }
3425                drmoffset += 2;
3426                if (!mDataSource->getUInt32(drmoffset, &numencrypted)) {
3427                    return ERROR_IO;
3428                }
3429                drmoffset += 4;
3430                smpl->clearsizes.add(numclear);
3431                smpl->encryptedsizes.add(numencrypted);
3432            }
3433        } else {
3434            smpl->clearsizes.add(0);
3435            smpl->encryptedsizes.add(smpl->size);
3436        }
3437    }
3438
3439
3440    return OK;
3441}
3442
3443status_t MPEG4Source::parseTrackFragmentHeader(off64_t offset, off64_t size) {
3444
3445    if (size < 8) {
3446        return -EINVAL;
3447    }
3448
3449    uint32_t flags;
3450    if (!mDataSource->getUInt32(offset, &flags)) { // actually version + flags
3451        return ERROR_MALFORMED;
3452    }
3453
3454    if (flags & 0xff000000) {
3455        return -EINVAL;
3456    }
3457
3458    if (!mDataSource->getUInt32(offset + 4, (uint32_t*)&mLastParsedTrackId)) {
3459        return ERROR_MALFORMED;
3460    }
3461
3462    if (mLastParsedTrackId != mTrackId) {
3463        // this is not the right track, skip it
3464        return OK;
3465    }
3466
3467    mTrackFragmentHeaderInfo.mFlags = flags;
3468    mTrackFragmentHeaderInfo.mTrackID = mLastParsedTrackId;
3469    offset += 8;
3470    size -= 8;
3471
3472    ALOGV("fragment header: %08x %08x", flags, mTrackFragmentHeaderInfo.mTrackID);
3473
3474    if (flags & TrackFragmentHeaderInfo::kBaseDataOffsetPresent) {
3475        if (size < 8) {
3476            return -EINVAL;
3477        }
3478
3479        if (!mDataSource->getUInt64(offset, &mTrackFragmentHeaderInfo.mBaseDataOffset)) {
3480            return ERROR_MALFORMED;
3481        }
3482        offset += 8;
3483        size -= 8;
3484    }
3485
3486    if (flags & TrackFragmentHeaderInfo::kSampleDescriptionIndexPresent) {
3487        if (size < 4) {
3488            return -EINVAL;
3489        }
3490
3491        if (!mDataSource->getUInt32(offset, &mTrackFragmentHeaderInfo.mSampleDescriptionIndex)) {
3492            return ERROR_MALFORMED;
3493        }
3494        offset += 4;
3495        size -= 4;
3496    }
3497
3498    if (flags & TrackFragmentHeaderInfo::kDefaultSampleDurationPresent) {
3499        if (size < 4) {
3500            return -EINVAL;
3501        }
3502
3503        if (!mDataSource->getUInt32(offset, &mTrackFragmentHeaderInfo.mDefaultSampleDuration)) {
3504            return ERROR_MALFORMED;
3505        }
3506        offset += 4;
3507        size -= 4;
3508    }
3509
3510    if (flags & TrackFragmentHeaderInfo::kDefaultSampleSizePresent) {
3511        if (size < 4) {
3512            return -EINVAL;
3513        }
3514
3515        if (!mDataSource->getUInt32(offset, &mTrackFragmentHeaderInfo.mDefaultSampleSize)) {
3516            return ERROR_MALFORMED;
3517        }
3518        offset += 4;
3519        size -= 4;
3520    }
3521
3522    if (flags & TrackFragmentHeaderInfo::kDefaultSampleFlagsPresent) {
3523        if (size < 4) {
3524            return -EINVAL;
3525        }
3526
3527        if (!mDataSource->getUInt32(offset, &mTrackFragmentHeaderInfo.mDefaultSampleFlags)) {
3528            return ERROR_MALFORMED;
3529        }
3530        offset += 4;
3531        size -= 4;
3532    }
3533
3534    if (!(flags & TrackFragmentHeaderInfo::kBaseDataOffsetPresent)) {
3535        mTrackFragmentHeaderInfo.mBaseDataOffset = mCurrentMoofOffset;
3536    }
3537
3538    mTrackFragmentHeaderInfo.mDataOffset = 0;
3539    return OK;
3540}
3541
3542status_t MPEG4Source::parseTrackFragmentRun(off64_t offset, off64_t size) {
3543
3544    ALOGV("MPEG4Extractor::parseTrackFragmentRun");
3545    if (size < 8) {
3546        return -EINVAL;
3547    }
3548
3549    enum {
3550        kDataOffsetPresent                  = 0x01,
3551        kFirstSampleFlagsPresent            = 0x04,
3552        kSampleDurationPresent              = 0x100,
3553        kSampleSizePresent                  = 0x200,
3554        kSampleFlagsPresent                 = 0x400,
3555        kSampleCompositionTimeOffsetPresent = 0x800,
3556    };
3557
3558    uint32_t flags;
3559    if (!mDataSource->getUInt32(offset, &flags)) {
3560        return ERROR_MALFORMED;
3561    }
3562    ALOGV("fragment run flags: %08x", flags);
3563
3564    if (flags & 0xff000000) {
3565        return -EINVAL;
3566    }
3567
3568    if ((flags & kFirstSampleFlagsPresent) && (flags & kSampleFlagsPresent)) {
3569        // These two shall not be used together.
3570        return -EINVAL;
3571    }
3572
3573    uint32_t sampleCount;
3574    if (!mDataSource->getUInt32(offset + 4, &sampleCount)) {
3575        return ERROR_MALFORMED;
3576    }
3577    offset += 8;
3578    size -= 8;
3579
3580    uint64_t dataOffset = mTrackFragmentHeaderInfo.mDataOffset;
3581
3582    uint32_t firstSampleFlags = 0;
3583
3584    if (flags & kDataOffsetPresent) {
3585        if (size < 4) {
3586            return -EINVAL;
3587        }
3588
3589        int32_t dataOffsetDelta;
3590        if (!mDataSource->getUInt32(offset, (uint32_t*)&dataOffsetDelta)) {
3591            return ERROR_MALFORMED;
3592        }
3593
3594        dataOffset = mTrackFragmentHeaderInfo.mBaseDataOffset + dataOffsetDelta;
3595
3596        offset += 4;
3597        size -= 4;
3598    }
3599
3600    if (flags & kFirstSampleFlagsPresent) {
3601        if (size < 4) {
3602            return -EINVAL;
3603        }
3604
3605        if (!mDataSource->getUInt32(offset, &firstSampleFlags)) {
3606            return ERROR_MALFORMED;
3607        }
3608        offset += 4;
3609        size -= 4;
3610    }
3611
3612    uint32_t sampleDuration = 0, sampleSize = 0, sampleFlags = 0,
3613             sampleCtsOffset = 0;
3614
3615    size_t bytesPerSample = 0;
3616    if (flags & kSampleDurationPresent) {
3617        bytesPerSample += 4;
3618    } else if (mTrackFragmentHeaderInfo.mFlags
3619            & TrackFragmentHeaderInfo::kDefaultSampleDurationPresent) {
3620        sampleDuration = mTrackFragmentHeaderInfo.mDefaultSampleDuration;
3621    } else if (mTrex) {
3622        sampleDuration = mTrex->default_sample_duration;
3623    }
3624
3625    if (flags & kSampleSizePresent) {
3626        bytesPerSample += 4;
3627    } else if (mTrackFragmentHeaderInfo.mFlags
3628            & TrackFragmentHeaderInfo::kDefaultSampleSizePresent) {
3629        sampleSize = mTrackFragmentHeaderInfo.mDefaultSampleSize;
3630    } else {
3631        sampleSize = mTrackFragmentHeaderInfo.mDefaultSampleSize;
3632    }
3633
3634    if (flags & kSampleFlagsPresent) {
3635        bytesPerSample += 4;
3636    } else if (mTrackFragmentHeaderInfo.mFlags
3637            & TrackFragmentHeaderInfo::kDefaultSampleFlagsPresent) {
3638        sampleFlags = mTrackFragmentHeaderInfo.mDefaultSampleFlags;
3639    } else {
3640        sampleFlags = mTrackFragmentHeaderInfo.mDefaultSampleFlags;
3641    }
3642
3643    if (flags & kSampleCompositionTimeOffsetPresent) {
3644        bytesPerSample += 4;
3645    } else {
3646        sampleCtsOffset = 0;
3647    }
3648
3649    if (size < (off64_t)sampleCount * bytesPerSample) {
3650        return -EINVAL;
3651    }
3652
3653    Sample tmp;
3654    for (uint32_t i = 0; i < sampleCount; ++i) {
3655        if (flags & kSampleDurationPresent) {
3656            if (!mDataSource->getUInt32(offset, &sampleDuration)) {
3657                return ERROR_MALFORMED;
3658            }
3659            offset += 4;
3660        }
3661
3662        if (flags & kSampleSizePresent) {
3663            if (!mDataSource->getUInt32(offset, &sampleSize)) {
3664                return ERROR_MALFORMED;
3665            }
3666            offset += 4;
3667        }
3668
3669        if (flags & kSampleFlagsPresent) {
3670            if (!mDataSource->getUInt32(offset, &sampleFlags)) {
3671                return ERROR_MALFORMED;
3672            }
3673            offset += 4;
3674        }
3675
3676        if (flags & kSampleCompositionTimeOffsetPresent) {
3677            if (!mDataSource->getUInt32(offset, &sampleCtsOffset)) {
3678                return ERROR_MALFORMED;
3679            }
3680            offset += 4;
3681        }
3682
3683        ALOGV("adding sample %d at offset 0x%08" PRIx64 ", size %u, duration %u, "
3684              " flags 0x%08x", i + 1,
3685                dataOffset, sampleSize, sampleDuration,
3686                (flags & kFirstSampleFlagsPresent) && i == 0
3687                    ? firstSampleFlags : sampleFlags);
3688        tmp.offset = dataOffset;
3689        tmp.size = sampleSize;
3690        tmp.duration = sampleDuration;
3691        tmp.compositionOffset = sampleCtsOffset;
3692        mCurrentSamples.add(tmp);
3693
3694        dataOffset += sampleSize;
3695    }
3696
3697    mTrackFragmentHeaderInfo.mDataOffset = dataOffset;
3698
3699    return OK;
3700}
3701
3702sp<MetaData> MPEG4Source::getFormat() {
3703    Mutex::Autolock autoLock(mLock);
3704
3705    return mFormat;
3706}
3707
3708size_t MPEG4Source::parseNALSize(const uint8_t *data) const {
3709    switch (mNALLengthSize) {
3710        case 1:
3711            return *data;
3712        case 2:
3713            return U16_AT(data);
3714        case 3:
3715            return ((size_t)data[0] << 16) | U16_AT(&data[1]);
3716        case 4:
3717            return U32_AT(data);
3718    }
3719
3720    // This cannot happen, mNALLengthSize springs to life by adding 1 to
3721    // a 2-bit integer.
3722    CHECK(!"Should not be here.");
3723
3724    return 0;
3725}
3726
3727status_t MPEG4Source::read(
3728        MediaBuffer **out, const ReadOptions *options) {
3729    Mutex::Autolock autoLock(mLock);
3730
3731    CHECK(mStarted);
3732
3733    if (mFirstMoofOffset > 0) {
3734        return fragmentedRead(out, options);
3735    }
3736
3737    *out = NULL;
3738
3739    int64_t targetSampleTimeUs = -1;
3740
3741    int64_t seekTimeUs;
3742    ReadOptions::SeekMode mode;
3743    if (options && options->getSeekTo(&seekTimeUs, &mode)) {
3744        uint32_t findFlags = 0;
3745        switch (mode) {
3746            case ReadOptions::SEEK_PREVIOUS_SYNC:
3747                findFlags = SampleTable::kFlagBefore;
3748                break;
3749            case ReadOptions::SEEK_NEXT_SYNC:
3750                findFlags = SampleTable::kFlagAfter;
3751                break;
3752            case ReadOptions::SEEK_CLOSEST_SYNC:
3753            case ReadOptions::SEEK_CLOSEST:
3754                findFlags = SampleTable::kFlagClosest;
3755                break;
3756            default:
3757                CHECK(!"Should not be here.");
3758                break;
3759        }
3760
3761        uint32_t sampleIndex;
3762        status_t err = mSampleTable->findSampleAtTime(
3763                seekTimeUs, 1000000, mTimescale,
3764                &sampleIndex, findFlags);
3765
3766        if (mode == ReadOptions::SEEK_CLOSEST) {
3767            // We found the closest sample already, now we want the sync
3768            // sample preceding it (or the sample itself of course), even
3769            // if the subsequent sync sample is closer.
3770            findFlags = SampleTable::kFlagBefore;
3771        }
3772
3773        uint32_t syncSampleIndex;
3774        if (err == OK) {
3775            err = mSampleTable->findSyncSampleNear(
3776                    sampleIndex, &syncSampleIndex, findFlags);
3777        }
3778
3779        uint32_t sampleTime;
3780        if (err == OK) {
3781            err = mSampleTable->getMetaDataForSample(
3782                    sampleIndex, NULL, NULL, &sampleTime);
3783        }
3784
3785        if (err != OK) {
3786            if (err == ERROR_OUT_OF_RANGE) {
3787                // An attempt to seek past the end of the stream would
3788                // normally cause this ERROR_OUT_OF_RANGE error. Propagating
3789                // this all the way to the MediaPlayer would cause abnormal
3790                // termination. Legacy behaviour appears to be to behave as if
3791                // we had seeked to the end of stream, ending normally.
3792                err = ERROR_END_OF_STREAM;
3793            }
3794            ALOGV("end of stream");
3795            return err;
3796        }
3797
3798        if (mode == ReadOptions::SEEK_CLOSEST) {
3799            targetSampleTimeUs = (sampleTime * 1000000ll) / mTimescale;
3800        }
3801
3802#if 0
3803        uint32_t syncSampleTime;
3804        CHECK_EQ(OK, mSampleTable->getMetaDataForSample(
3805                    syncSampleIndex, NULL, NULL, &syncSampleTime));
3806
3807        ALOGI("seek to time %lld us => sample at time %lld us, "
3808             "sync sample at time %lld us",
3809             seekTimeUs,
3810             sampleTime * 1000000ll / mTimescale,
3811             syncSampleTime * 1000000ll / mTimescale);
3812#endif
3813
3814        mCurrentSampleIndex = syncSampleIndex;
3815        if (mBuffer != NULL) {
3816            mBuffer->release();
3817            mBuffer = NULL;
3818        }
3819
3820        // fall through
3821    }
3822
3823    off64_t offset;
3824    size_t size;
3825    uint32_t cts, stts;
3826    bool isSyncSample;
3827    bool newBuffer = false;
3828    if (mBuffer == NULL) {
3829        newBuffer = true;
3830
3831        status_t err =
3832            mSampleTable->getMetaDataForSample(
3833                    mCurrentSampleIndex, &offset, &size, &cts, &isSyncSample, &stts);
3834
3835        if (err != OK) {
3836            return err;
3837        }
3838
3839        err = mGroup->acquire_buffer(&mBuffer);
3840
3841        if (err != OK) {
3842            CHECK(mBuffer == NULL);
3843            return err;
3844        }
3845        if (size > mBuffer->size()) {
3846            ALOGE("buffer too small: %zu > %zu", size, mBuffer->size());
3847            return ERROR_BUFFER_TOO_SMALL;
3848        }
3849    }
3850
3851    if ((!mIsAVC && !mIsHEVC) || mWantsNALFragments) {
3852        if (newBuffer) {
3853            ssize_t num_bytes_read =
3854                mDataSource->readAt(offset, (uint8_t *)mBuffer->data(), size);
3855
3856            if (num_bytes_read < (ssize_t)size) {
3857                mBuffer->release();
3858                mBuffer = NULL;
3859
3860                return ERROR_IO;
3861            }
3862
3863            CHECK(mBuffer != NULL);
3864            mBuffer->set_range(0, size);
3865            mBuffer->meta_data()->clear();
3866            mBuffer->meta_data()->setInt64(
3867                    kKeyTime, ((int64_t)cts * 1000000) / mTimescale);
3868            mBuffer->meta_data()->setInt64(
3869                    kKeyDuration, ((int64_t)stts * 1000000) / mTimescale);
3870
3871            if (targetSampleTimeUs >= 0) {
3872                mBuffer->meta_data()->setInt64(
3873                        kKeyTargetTime, targetSampleTimeUs);
3874            }
3875
3876            if (isSyncSample) {
3877                mBuffer->meta_data()->setInt32(kKeyIsSyncFrame, 1);
3878            }
3879
3880            ++mCurrentSampleIndex;
3881        }
3882
3883        if (!mIsAVC && !mIsHEVC) {
3884            *out = mBuffer;
3885            mBuffer = NULL;
3886
3887            return OK;
3888        }
3889
3890        // Each NAL unit is split up into its constituent fragments and
3891        // each one of them returned in its own buffer.
3892
3893        CHECK(mBuffer->range_length() >= mNALLengthSize);
3894
3895        const uint8_t *src =
3896            (const uint8_t *)mBuffer->data() + mBuffer->range_offset();
3897
3898        size_t nal_size = parseNALSize(src);
3899        if (mBuffer->range_length() < mNALLengthSize + nal_size) {
3900            ALOGE("incomplete NAL unit.");
3901
3902            mBuffer->release();
3903            mBuffer = NULL;
3904
3905            return ERROR_MALFORMED;
3906        }
3907
3908        MediaBuffer *clone = mBuffer->clone();
3909        CHECK(clone != NULL);
3910        clone->set_range(mBuffer->range_offset() + mNALLengthSize, nal_size);
3911
3912        CHECK(mBuffer != NULL);
3913        mBuffer->set_range(
3914                mBuffer->range_offset() + mNALLengthSize + nal_size,
3915                mBuffer->range_length() - mNALLengthSize - nal_size);
3916
3917        if (mBuffer->range_length() == 0) {
3918            mBuffer->release();
3919            mBuffer = NULL;
3920        }
3921
3922        *out = clone;
3923
3924        return OK;
3925    } else {
3926        // Whole NAL units are returned but each fragment is prefixed by
3927        // the start code (0x00 00 00 01).
3928        ssize_t num_bytes_read = 0;
3929        int32_t drm = 0;
3930        bool usesDRM = (mFormat->findInt32(kKeyIsDRM, &drm) && drm != 0);
3931        if (usesDRM) {
3932            num_bytes_read =
3933                mDataSource->readAt(offset, (uint8_t*)mBuffer->data(), size);
3934        } else {
3935            num_bytes_read = mDataSource->readAt(offset, mSrcBuffer, size);
3936        }
3937
3938        if (num_bytes_read < (ssize_t)size) {
3939            mBuffer->release();
3940            mBuffer = NULL;
3941
3942            return ERROR_IO;
3943        }
3944
3945        if (usesDRM) {
3946            CHECK(mBuffer != NULL);
3947            mBuffer->set_range(0, size);
3948
3949        } else {
3950            uint8_t *dstData = (uint8_t *)mBuffer->data();
3951            size_t srcOffset = 0;
3952            size_t dstOffset = 0;
3953
3954            while (srcOffset < size) {
3955                bool isMalFormed = !isInRange((size_t)0u, size, srcOffset, mNALLengthSize);
3956                size_t nalLength = 0;
3957                if (!isMalFormed) {
3958                    nalLength = parseNALSize(&mSrcBuffer[srcOffset]);
3959                    srcOffset += mNALLengthSize;
3960                    isMalFormed = !isInRange((size_t)0u, size, srcOffset, nalLength);
3961                }
3962
3963                if (isMalFormed) {
3964                    ALOGE("Video is malformed");
3965                    mBuffer->release();
3966                    mBuffer = NULL;
3967                    return ERROR_MALFORMED;
3968                }
3969
3970                if (nalLength == 0) {
3971                    continue;
3972                }
3973
3974                CHECK(dstOffset + 4 <= mBuffer->size());
3975
3976                dstData[dstOffset++] = 0;
3977                dstData[dstOffset++] = 0;
3978                dstData[dstOffset++] = 0;
3979                dstData[dstOffset++] = 1;
3980                memcpy(&dstData[dstOffset], &mSrcBuffer[srcOffset], nalLength);
3981                srcOffset += nalLength;
3982                dstOffset += nalLength;
3983            }
3984            CHECK_EQ(srcOffset, size);
3985            CHECK(mBuffer != NULL);
3986            mBuffer->set_range(0, dstOffset);
3987        }
3988
3989        mBuffer->meta_data()->clear();
3990        mBuffer->meta_data()->setInt64(
3991                kKeyTime, ((int64_t)cts * 1000000) / mTimescale);
3992        mBuffer->meta_data()->setInt64(
3993                kKeyDuration, ((int64_t)stts * 1000000) / mTimescale);
3994
3995        if (targetSampleTimeUs >= 0) {
3996            mBuffer->meta_data()->setInt64(
3997                    kKeyTargetTime, targetSampleTimeUs);
3998        }
3999
4000        if (isSyncSample) {
4001            mBuffer->meta_data()->setInt32(kKeyIsSyncFrame, 1);
4002        }
4003
4004        ++mCurrentSampleIndex;
4005
4006        *out = mBuffer;
4007        mBuffer = NULL;
4008
4009        return OK;
4010    }
4011}
4012
4013status_t MPEG4Source::fragmentedRead(
4014        MediaBuffer **out, const ReadOptions *options) {
4015
4016    ALOGV("MPEG4Source::fragmentedRead");
4017
4018    CHECK(mStarted);
4019
4020    *out = NULL;
4021
4022    int64_t targetSampleTimeUs = -1;
4023
4024    int64_t seekTimeUs;
4025    ReadOptions::SeekMode mode;
4026    if (options && options->getSeekTo(&seekTimeUs, &mode)) {
4027
4028        int numSidxEntries = mSegments.size();
4029        if (numSidxEntries != 0) {
4030            int64_t totalTime = 0;
4031            off64_t totalOffset = mFirstMoofOffset;
4032            for (int i = 0; i < numSidxEntries; i++) {
4033                const SidxEntry *se = &mSegments[i];
4034                if (totalTime + se->mDurationUs > seekTimeUs) {
4035                    // The requested time is somewhere in this segment
4036                    if ((mode == ReadOptions::SEEK_NEXT_SYNC && seekTimeUs > totalTime) ||
4037                        (mode == ReadOptions::SEEK_CLOSEST_SYNC &&
4038                        (seekTimeUs - totalTime) > (totalTime + se->mDurationUs - seekTimeUs))) {
4039                        // requested next sync, or closest sync and it was closer to the end of
4040                        // this segment
4041                        totalTime += se->mDurationUs;
4042                        totalOffset += se->mSize;
4043                    }
4044                    break;
4045                }
4046                totalTime += se->mDurationUs;
4047                totalOffset += se->mSize;
4048            }
4049            mCurrentMoofOffset = totalOffset;
4050            mCurrentSamples.clear();
4051            mCurrentSampleIndex = 0;
4052            parseChunk(&totalOffset);
4053            mCurrentTime = totalTime * mTimescale / 1000000ll;
4054        } else {
4055            // without sidx boxes, we can only seek to 0
4056            mCurrentMoofOffset = mFirstMoofOffset;
4057            mCurrentSamples.clear();
4058            mCurrentSampleIndex = 0;
4059            off64_t tmp = mCurrentMoofOffset;
4060            parseChunk(&tmp);
4061            mCurrentTime = 0;
4062        }
4063
4064        if (mBuffer != NULL) {
4065            mBuffer->release();
4066            mBuffer = NULL;
4067        }
4068
4069        // fall through
4070    }
4071
4072    off64_t offset = 0;
4073    size_t size = 0;
4074    uint32_t cts = 0;
4075    bool isSyncSample = false;
4076    bool newBuffer = false;
4077    if (mBuffer == NULL) {
4078        newBuffer = true;
4079
4080        if (mCurrentSampleIndex >= mCurrentSamples.size()) {
4081            // move to next fragment if there is one
4082            if (mNextMoofOffset <= mCurrentMoofOffset) {
4083                return ERROR_END_OF_STREAM;
4084            }
4085            off64_t nextMoof = mNextMoofOffset;
4086            mCurrentMoofOffset = nextMoof;
4087            mCurrentSamples.clear();
4088            mCurrentSampleIndex = 0;
4089            parseChunk(&nextMoof);
4090            if (mCurrentSampleIndex >= mCurrentSamples.size()) {
4091                return ERROR_END_OF_STREAM;
4092            }
4093        }
4094
4095        const Sample *smpl = &mCurrentSamples[mCurrentSampleIndex];
4096        offset = smpl->offset;
4097        size = smpl->size;
4098        cts = mCurrentTime + smpl->compositionOffset;
4099        mCurrentTime += smpl->duration;
4100        isSyncSample = (mCurrentSampleIndex == 0); // XXX
4101
4102        status_t err = mGroup->acquire_buffer(&mBuffer);
4103
4104        if (err != OK) {
4105            CHECK(mBuffer == NULL);
4106            ALOGV("acquire_buffer returned %d", err);
4107            return err;
4108        }
4109        if (size > mBuffer->size()) {
4110            ALOGE("buffer too small: %zu > %zu", size, mBuffer->size());
4111            return ERROR_BUFFER_TOO_SMALL;
4112        }
4113    }
4114
4115    const Sample *smpl = &mCurrentSamples[mCurrentSampleIndex];
4116    const sp<MetaData> bufmeta = mBuffer->meta_data();
4117    bufmeta->clear();
4118    if (smpl->encryptedsizes.size()) {
4119        // store clear/encrypted lengths in metadata
4120        bufmeta->setData(kKeyPlainSizes, 0,
4121                smpl->clearsizes.array(), smpl->clearsizes.size() * 4);
4122        bufmeta->setData(kKeyEncryptedSizes, 0,
4123                smpl->encryptedsizes.array(), smpl->encryptedsizes.size() * 4);
4124        bufmeta->setData(kKeyCryptoIV, 0, smpl->iv, 16); // use 16 or the actual size?
4125        bufmeta->setInt32(kKeyCryptoDefaultIVSize, mDefaultIVSize);
4126        bufmeta->setInt32(kKeyCryptoMode, mCryptoMode);
4127        bufmeta->setData(kKeyCryptoKey, 0, mCryptoKey, 16);
4128    }
4129
4130    if ((!mIsAVC && !mIsHEVC)|| mWantsNALFragments) {
4131        if (newBuffer) {
4132            if (!isInRange((size_t)0u, mBuffer->size(), size)) {
4133                mBuffer->release();
4134                mBuffer = NULL;
4135
4136                ALOGE("fragmentedRead ERROR_MALFORMED size %zu", size);
4137                return ERROR_MALFORMED;
4138            }
4139
4140            ssize_t num_bytes_read =
4141                mDataSource->readAt(offset, (uint8_t *)mBuffer->data(), size);
4142
4143            if (num_bytes_read < (ssize_t)size) {
4144                mBuffer->release();
4145                mBuffer = NULL;
4146
4147                ALOGE("i/o error");
4148                return ERROR_IO;
4149            }
4150
4151            CHECK(mBuffer != NULL);
4152            mBuffer->set_range(0, size);
4153            mBuffer->meta_data()->setInt64(
4154                    kKeyTime, ((int64_t)cts * 1000000) / mTimescale);
4155            mBuffer->meta_data()->setInt64(
4156                    kKeyDuration, ((int64_t)smpl->duration * 1000000) / mTimescale);
4157
4158            if (targetSampleTimeUs >= 0) {
4159                mBuffer->meta_data()->setInt64(
4160                        kKeyTargetTime, targetSampleTimeUs);
4161            }
4162
4163            if (isSyncSample) {
4164                mBuffer->meta_data()->setInt32(kKeyIsSyncFrame, 1);
4165            }
4166
4167            ++mCurrentSampleIndex;
4168        }
4169
4170        if (!mIsAVC && !mIsHEVC) {
4171            *out = mBuffer;
4172            mBuffer = NULL;
4173
4174            return OK;
4175        }
4176
4177        // Each NAL unit is split up into its constituent fragments and
4178        // each one of them returned in its own buffer.
4179
4180        CHECK(mBuffer->range_length() >= mNALLengthSize);
4181
4182        const uint8_t *src =
4183            (const uint8_t *)mBuffer->data() + mBuffer->range_offset();
4184
4185        size_t nal_size = parseNALSize(src);
4186        if (mBuffer->range_length() < mNALLengthSize + nal_size) {
4187            ALOGE("incomplete NAL unit.");
4188
4189            mBuffer->release();
4190            mBuffer = NULL;
4191
4192            return ERROR_MALFORMED;
4193        }
4194
4195        MediaBuffer *clone = mBuffer->clone();
4196        CHECK(clone != NULL);
4197        clone->set_range(mBuffer->range_offset() + mNALLengthSize, nal_size);
4198
4199        CHECK(mBuffer != NULL);
4200        mBuffer->set_range(
4201                mBuffer->range_offset() + mNALLengthSize + nal_size,
4202                mBuffer->range_length() - mNALLengthSize - nal_size);
4203
4204        if (mBuffer->range_length() == 0) {
4205            mBuffer->release();
4206            mBuffer = NULL;
4207        }
4208
4209        *out = clone;
4210
4211        return OK;
4212    } else {
4213        ALOGV("whole NAL");
4214        // Whole NAL units are returned but each fragment is prefixed by
4215        // the start code (0x00 00 00 01).
4216        ssize_t num_bytes_read = 0;
4217        int32_t drm = 0;
4218        bool usesDRM = (mFormat->findInt32(kKeyIsDRM, &drm) && drm != 0);
4219        void *data = NULL;
4220        bool isMalFormed = false;
4221        if (usesDRM) {
4222            if (mBuffer == NULL || !isInRange((size_t)0u, mBuffer->size(), size)) {
4223                isMalFormed = true;
4224            } else {
4225                data = mBuffer->data();
4226            }
4227        } else {
4228            int32_t max_size;
4229            if (mFormat == NULL
4230                    || !mFormat->findInt32(kKeyMaxInputSize, &max_size)
4231                    || !isInRange((size_t)0u, (size_t)max_size, size)) {
4232                isMalFormed = true;
4233            } else {
4234                data = mSrcBuffer;
4235            }
4236        }
4237
4238        if (isMalFormed || data == NULL) {
4239            ALOGE("isMalFormed size %zu", size);
4240            if (mBuffer != NULL) {
4241                mBuffer->release();
4242                mBuffer = NULL;
4243            }
4244            return ERROR_MALFORMED;
4245        }
4246        num_bytes_read = mDataSource->readAt(offset, data, size);
4247
4248        if (num_bytes_read < (ssize_t)size) {
4249            mBuffer->release();
4250            mBuffer = NULL;
4251
4252            ALOGE("i/o error");
4253            return ERROR_IO;
4254        }
4255
4256        if (usesDRM) {
4257            CHECK(mBuffer != NULL);
4258            mBuffer->set_range(0, size);
4259
4260        } else {
4261            uint8_t *dstData = (uint8_t *)mBuffer->data();
4262            size_t srcOffset = 0;
4263            size_t dstOffset = 0;
4264
4265            while (srcOffset < size) {
4266                isMalFormed = !isInRange((size_t)0u, size, srcOffset, mNALLengthSize);
4267                size_t nalLength = 0;
4268                if (!isMalFormed) {
4269                    nalLength = parseNALSize(&mSrcBuffer[srcOffset]);
4270                    srcOffset += mNALLengthSize;
4271                    isMalFormed = !isInRange((size_t)0u, size, srcOffset, nalLength)
4272                            || !isInRange((size_t)0u, mBuffer->size(), dstOffset, (size_t)4u)
4273                            || !isInRange((size_t)0u, mBuffer->size(), dstOffset + 4, nalLength);
4274                }
4275
4276                if (isMalFormed) {
4277                    ALOGE("Video is malformed; nalLength %zu", nalLength);
4278                    mBuffer->release();
4279                    mBuffer = NULL;
4280                    return ERROR_MALFORMED;
4281                }
4282
4283                if (nalLength == 0) {
4284                    continue;
4285                }
4286
4287                CHECK(dstOffset + 4 <= mBuffer->size());
4288
4289                dstData[dstOffset++] = 0;
4290                dstData[dstOffset++] = 0;
4291                dstData[dstOffset++] = 0;
4292                dstData[dstOffset++] = 1;
4293                memcpy(&dstData[dstOffset], &mSrcBuffer[srcOffset], nalLength);
4294                srcOffset += nalLength;
4295                dstOffset += nalLength;
4296            }
4297            CHECK_EQ(srcOffset, size);
4298            CHECK(mBuffer != NULL);
4299            mBuffer->set_range(0, dstOffset);
4300        }
4301
4302        mBuffer->meta_data()->setInt64(
4303                kKeyTime, ((int64_t)cts * 1000000) / mTimescale);
4304        mBuffer->meta_data()->setInt64(
4305                kKeyDuration, ((int64_t)smpl->duration * 1000000) / mTimescale);
4306
4307        if (targetSampleTimeUs >= 0) {
4308            mBuffer->meta_data()->setInt64(
4309                    kKeyTargetTime, targetSampleTimeUs);
4310        }
4311
4312        if (isSyncSample) {
4313            mBuffer->meta_data()->setInt32(kKeyIsSyncFrame, 1);
4314        }
4315
4316        ++mCurrentSampleIndex;
4317
4318        *out = mBuffer;
4319        mBuffer = NULL;
4320
4321        return OK;
4322    }
4323}
4324
4325MPEG4Extractor::Track *MPEG4Extractor::findTrackByMimePrefix(
4326        const char *mimePrefix) {
4327    for (Track *track = mFirstTrack; track != NULL; track = track->next) {
4328        const char *mime;
4329        if (track->meta != NULL
4330                && track->meta->findCString(kKeyMIMEType, &mime)
4331                && !strncasecmp(mime, mimePrefix, strlen(mimePrefix))) {
4332            return track;
4333        }
4334    }
4335
4336    return NULL;
4337}
4338
4339static bool LegacySniffMPEG4(
4340        const sp<DataSource> &source, String8 *mimeType, float *confidence) {
4341    uint8_t header[8];
4342
4343    ssize_t n = source->readAt(4, header, sizeof(header));
4344    if (n < (ssize_t)sizeof(header)) {
4345        return false;
4346    }
4347
4348    if (!memcmp(header, "ftyp3gp", 7) || !memcmp(header, "ftypmp42", 8)
4349        || !memcmp(header, "ftyp3gr6", 8) || !memcmp(header, "ftyp3gs6", 8)
4350        || !memcmp(header, "ftyp3ge6", 8) || !memcmp(header, "ftyp3gg6", 8)
4351        || !memcmp(header, "ftypisom", 8) || !memcmp(header, "ftypM4V ", 8)
4352        || !memcmp(header, "ftypM4A ", 8) || !memcmp(header, "ftypf4v ", 8)
4353        || !memcmp(header, "ftypkddi", 8) || !memcmp(header, "ftypM4VP", 8)) {
4354        *mimeType = MEDIA_MIMETYPE_CONTAINER_MPEG4;
4355        *confidence = 0.4;
4356
4357        return true;
4358    }
4359
4360    return false;
4361}
4362
4363static bool isCompatibleBrand(uint32_t fourcc) {
4364    static const uint32_t kCompatibleBrands[] = {
4365        FOURCC('i', 's', 'o', 'm'),
4366        FOURCC('i', 's', 'o', '2'),
4367        FOURCC('a', 'v', 'c', '1'),
4368        FOURCC('h', 'v', 'c', '1'),
4369        FOURCC('h', 'e', 'v', '1'),
4370        FOURCC('3', 'g', 'p', '4'),
4371        FOURCC('m', 'p', '4', '1'),
4372        FOURCC('m', 'p', '4', '2'),
4373
4374        // Won't promise that the following file types can be played.
4375        // Just give these file types a chance.
4376        FOURCC('q', 't', ' ', ' '),  // Apple's QuickTime
4377        FOURCC('M', 'S', 'N', 'V'),  // Sony's PSP
4378
4379        FOURCC('3', 'g', '2', 'a'),  // 3GPP2
4380        FOURCC('3', 'g', '2', 'b'),
4381    };
4382
4383    for (size_t i = 0;
4384         i < sizeof(kCompatibleBrands) / sizeof(kCompatibleBrands[0]);
4385         ++i) {
4386        if (kCompatibleBrands[i] == fourcc) {
4387            return true;
4388        }
4389    }
4390
4391    return false;
4392}
4393
4394// Attempt to actually parse the 'ftyp' atom and determine if a suitable
4395// compatible brand is present.
4396// Also try to identify where this file's metadata ends
4397// (end of the 'moov' atom) and report it to the caller as part of
4398// the metadata.
4399static bool BetterSniffMPEG4(
4400        const sp<DataSource> &source, String8 *mimeType, float *confidence,
4401        sp<AMessage> *meta) {
4402    // We scan up to 128 bytes to identify this file as an MP4.
4403    static const off64_t kMaxScanOffset = 128ll;
4404
4405    off64_t offset = 0ll;
4406    bool foundGoodFileType = false;
4407    off64_t moovAtomEndOffset = -1ll;
4408    bool done = false;
4409
4410    while (!done && offset < kMaxScanOffset) {
4411        uint32_t hdr[2];
4412        if (source->readAt(offset, hdr, 8) < 8) {
4413            return false;
4414        }
4415
4416        uint64_t chunkSize = ntohl(hdr[0]);
4417        uint32_t chunkType = ntohl(hdr[1]);
4418        off64_t chunkDataOffset = offset + 8;
4419
4420        if (chunkSize == 1) {
4421            if (source->readAt(offset + 8, &chunkSize, 8) < 8) {
4422                return false;
4423            }
4424
4425            chunkSize = ntoh64(chunkSize);
4426            chunkDataOffset += 8;
4427
4428            if (chunkSize < 16) {
4429                // The smallest valid chunk is 16 bytes long in this case.
4430                return false;
4431            }
4432        } else if (chunkSize < 8) {
4433            // The smallest valid chunk is 8 bytes long.
4434            return false;
4435        }
4436
4437        off64_t chunkDataSize = offset + chunkSize - chunkDataOffset;
4438
4439        char chunkstring[5];
4440        MakeFourCCString(chunkType, chunkstring);
4441        ALOGV("saw chunk type %s, size %" PRIu64 " @ %lld", chunkstring, chunkSize, offset);
4442        switch (chunkType) {
4443            case FOURCC('f', 't', 'y', 'p'):
4444            {
4445                if (chunkDataSize < 8) {
4446                    return false;
4447                }
4448
4449                uint32_t numCompatibleBrands = (chunkDataSize - 8) / 4;
4450                for (size_t i = 0; i < numCompatibleBrands + 2; ++i) {
4451                    if (i == 1) {
4452                        // Skip this index, it refers to the minorVersion,
4453                        // not a brand.
4454                        continue;
4455                    }
4456
4457                    uint32_t brand;
4458                    if (source->readAt(
4459                                chunkDataOffset + 4 * i, &brand, 4) < 4) {
4460                        return false;
4461                    }
4462
4463                    brand = ntohl(brand);
4464
4465                    if (isCompatibleBrand(brand)) {
4466                        foundGoodFileType = true;
4467                        break;
4468                    }
4469                }
4470
4471                if (!foundGoodFileType) {
4472                    return false;
4473                }
4474
4475                break;
4476            }
4477
4478            case FOURCC('m', 'o', 'o', 'v'):
4479            {
4480                moovAtomEndOffset = offset + chunkSize;
4481
4482                done = true;
4483                break;
4484            }
4485
4486            default:
4487                break;
4488        }
4489
4490        offset += chunkSize;
4491    }
4492
4493    if (!foundGoodFileType) {
4494        return false;
4495    }
4496
4497    *mimeType = MEDIA_MIMETYPE_CONTAINER_MPEG4;
4498    *confidence = 0.4f;
4499
4500    if (moovAtomEndOffset >= 0) {
4501        *meta = new AMessage;
4502        (*meta)->setInt64("meta-data-size", moovAtomEndOffset);
4503
4504        ALOGV("found metadata size: %lld", moovAtomEndOffset);
4505    }
4506
4507    return true;
4508}
4509
4510bool SniffMPEG4(
4511        const sp<DataSource> &source, String8 *mimeType, float *confidence,
4512        sp<AMessage> *meta) {
4513    if (BetterSniffMPEG4(source, mimeType, confidence, meta)) {
4514        return true;
4515    }
4516
4517    if (LegacySniffMPEG4(source, mimeType, confidence)) {
4518        ALOGW("Identified supported mpeg4 through LegacySniffMPEG4.");
4519        return true;
4520    }
4521
4522    return false;
4523}
4524
4525}  // namespace android
4526