MPEG4Extractor.cpp revision 75f6043574256313de760628806df441566fd114
1/*
2 * Copyright (C) 2009 The Android Open Source Project
3 *
4 * Licensed under the Apache License, Version 2.0 (the "License");
5 * you may not use this file except in compliance with the License.
6 * You may obtain a copy of the License at
7 *
8 *      http://www.apache.org/licenses/LICENSE-2.0
9 *
10 * Unless required by applicable law or agreed to in writing, software
11 * distributed under the License is distributed on an "AS IS" BASIS,
12 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13 * See the License for the specific language governing permissions and
14 * limitations under the License.
15 */
16
17//#define LOG_NDEBUG 0
18#define LOG_TAG "MPEG4Extractor"
19
20#include <ctype.h>
21#include <inttypes.h>
22#include <stdint.h>
23#include <stdlib.h>
24#include <string.h>
25
26#include <utils/Log.h>
27
28#include "include/MPEG4Extractor.h"
29#include "include/SampleTable.h"
30#include "include/ESDS.h"
31
32#include <media/stagefright/foundation/ABitReader.h>
33#include <media/stagefright/foundation/ABuffer.h>
34#include <media/stagefright/foundation/ADebug.h>
35#include <media/stagefright/foundation/AMessage.h>
36#include <media/stagefright/foundation/AUtils.h>
37#include <media/stagefright/MediaBuffer.h>
38#include <media/stagefright/MediaBufferGroup.h>
39#include <media/stagefright/MediaDefs.h>
40#include <media/stagefright/MediaSource.h>
41#include <media/stagefright/MetaData.h>
42#include <utils/String8.h>
43
44#include <byteswap.h>
45#include "include/ID3.h"
46
47namespace android {
48
49class MPEG4Source : public MediaSource {
50public:
51    // Caller retains ownership of both "dataSource" and "sampleTable".
52    MPEG4Source(const sp<MPEG4Extractor> &owner,
53                const sp<MetaData> &format,
54                const sp<DataSource> &dataSource,
55                int32_t timeScale,
56                const sp<SampleTable> &sampleTable,
57                Vector<SidxEntry> &sidx,
58                const Trex *trex,
59                off64_t firstMoofOffset);
60
61    virtual status_t start(MetaData *params = NULL);
62    virtual status_t stop();
63
64    virtual sp<MetaData> getFormat();
65
66    virtual status_t read(MediaBuffer **buffer, const ReadOptions *options = NULL);
67    virtual status_t fragmentedRead(MediaBuffer **buffer, const ReadOptions *options = NULL);
68
69protected:
70    virtual ~MPEG4Source();
71
72private:
73    Mutex mLock;
74
75    // keep the MPEG4Extractor around, since we're referencing its data
76    sp<MPEG4Extractor> mOwner;
77    sp<MetaData> mFormat;
78    sp<DataSource> mDataSource;
79    int32_t mTimescale;
80    sp<SampleTable> mSampleTable;
81    uint32_t mCurrentSampleIndex;
82    uint32_t mCurrentFragmentIndex;
83    Vector<SidxEntry> &mSegments;
84    const Trex *mTrex;
85    off64_t mFirstMoofOffset;
86    off64_t mCurrentMoofOffset;
87    off64_t mNextMoofOffset;
88    uint32_t mCurrentTime;
89    int32_t mLastParsedTrackId;
90    int32_t mTrackId;
91
92    int32_t mCryptoMode;    // passed in from extractor
93    int32_t mDefaultIVSize; // passed in from extractor
94    uint8_t mCryptoKey[16]; // passed in from extractor
95    uint32_t mCurrentAuxInfoType;
96    uint32_t mCurrentAuxInfoTypeParameter;
97    int32_t mCurrentDefaultSampleInfoSize;
98    uint32_t mCurrentSampleInfoCount;
99    uint32_t mCurrentSampleInfoAllocSize;
100    uint8_t* mCurrentSampleInfoSizes;
101    uint32_t mCurrentSampleInfoOffsetCount;
102    uint32_t mCurrentSampleInfoOffsetsAllocSize;
103    uint64_t* mCurrentSampleInfoOffsets;
104
105    bool mIsAVC;
106    bool mIsHEVC;
107    size_t mNALLengthSize;
108
109    bool mStarted;
110
111    MediaBufferGroup *mGroup;
112
113    MediaBuffer *mBuffer;
114
115    bool mWantsNALFragments;
116
117    uint8_t *mSrcBuffer;
118
119    size_t parseNALSize(const uint8_t *data) const;
120    status_t parseChunk(off64_t *offset);
121    status_t parseTrackFragmentHeader(off64_t offset, off64_t size);
122    status_t parseTrackFragmentRun(off64_t offset, off64_t size);
123    status_t parseSampleAuxiliaryInformationSizes(off64_t offset, off64_t size);
124    status_t parseSampleAuxiliaryInformationOffsets(off64_t offset, off64_t size);
125
126    struct TrackFragmentHeaderInfo {
127        enum Flags {
128            kBaseDataOffsetPresent         = 0x01,
129            kSampleDescriptionIndexPresent = 0x02,
130            kDefaultSampleDurationPresent  = 0x08,
131            kDefaultSampleSizePresent      = 0x10,
132            kDefaultSampleFlagsPresent     = 0x20,
133            kDurationIsEmpty               = 0x10000,
134        };
135
136        uint32_t mTrackID;
137        uint32_t mFlags;
138        uint64_t mBaseDataOffset;
139        uint32_t mSampleDescriptionIndex;
140        uint32_t mDefaultSampleDuration;
141        uint32_t mDefaultSampleSize;
142        uint32_t mDefaultSampleFlags;
143
144        uint64_t mDataOffset;
145    };
146    TrackFragmentHeaderInfo mTrackFragmentHeaderInfo;
147
148    struct Sample {
149        off64_t offset;
150        size_t size;
151        uint32_t duration;
152        int32_t compositionOffset;
153        uint8_t iv[16];
154        Vector<size_t> clearsizes;
155        Vector<size_t> encryptedsizes;
156    };
157    Vector<Sample> mCurrentSamples;
158
159    MPEG4Source(const MPEG4Source &);
160    MPEG4Source &operator=(const MPEG4Source &);
161};
162
163// This custom data source wraps an existing one and satisfies requests
164// falling entirely within a cached range from the cache while forwarding
165// all remaining requests to the wrapped datasource.
166// This is used to cache the full sampletable metadata for a single track,
167// possibly wrapping multiple times to cover all tracks, i.e.
168// Each MPEG4DataSource caches the sampletable metadata for a single track.
169
170struct MPEG4DataSource : public DataSource {
171    MPEG4DataSource(const sp<DataSource> &source);
172
173    virtual status_t initCheck() const;
174    virtual ssize_t readAt(off64_t offset, void *data, size_t size);
175    virtual status_t getSize(off64_t *size);
176    virtual uint32_t flags();
177
178    status_t setCachedRange(off64_t offset, size_t size);
179
180protected:
181    virtual ~MPEG4DataSource();
182
183private:
184    Mutex mLock;
185
186    sp<DataSource> mSource;
187    off64_t mCachedOffset;
188    size_t mCachedSize;
189    uint8_t *mCache;
190
191    void clearCache();
192
193    MPEG4DataSource(const MPEG4DataSource &);
194    MPEG4DataSource &operator=(const MPEG4DataSource &);
195};
196
197MPEG4DataSource::MPEG4DataSource(const sp<DataSource> &source)
198    : mSource(source),
199      mCachedOffset(0),
200      mCachedSize(0),
201      mCache(NULL) {
202}
203
204MPEG4DataSource::~MPEG4DataSource() {
205    clearCache();
206}
207
208void MPEG4DataSource::clearCache() {
209    if (mCache) {
210        free(mCache);
211        mCache = NULL;
212    }
213
214    mCachedOffset = 0;
215    mCachedSize = 0;
216}
217
218status_t MPEG4DataSource::initCheck() const {
219    return mSource->initCheck();
220}
221
222ssize_t MPEG4DataSource::readAt(off64_t offset, void *data, size_t size) {
223    Mutex::Autolock autoLock(mLock);
224
225    if (isInRange(mCachedOffset, mCachedSize, offset, size)) {
226        memcpy(data, &mCache[offset - mCachedOffset], size);
227        return size;
228    }
229
230    return mSource->readAt(offset, data, size);
231}
232
233status_t MPEG4DataSource::getSize(off64_t *size) {
234    return mSource->getSize(size);
235}
236
237uint32_t MPEG4DataSource::flags() {
238    return mSource->flags();
239}
240
241status_t MPEG4DataSource::setCachedRange(off64_t offset, size_t size) {
242    Mutex::Autolock autoLock(mLock);
243
244    clearCache();
245
246    mCache = (uint8_t *)malloc(size);
247
248    if (mCache == NULL) {
249        return -ENOMEM;
250    }
251
252    mCachedOffset = offset;
253    mCachedSize = size;
254
255    ssize_t err = mSource->readAt(mCachedOffset, mCache, mCachedSize);
256
257    if (err < (ssize_t)size) {
258        clearCache();
259
260        return ERROR_IO;
261    }
262
263    return OK;
264}
265
266////////////////////////////////////////////////////////////////////////////////
267
268static const bool kUseHexDump = false;
269
270static void hexdump(const void *_data, size_t size) {
271    const uint8_t *data = (const uint8_t *)_data;
272    size_t offset = 0;
273    while (offset < size) {
274        printf("0x%04zx  ", offset);
275
276        size_t n = size - offset;
277        if (n > 16) {
278            n = 16;
279        }
280
281        for (size_t i = 0; i < 16; ++i) {
282            if (i == 8) {
283                printf(" ");
284            }
285
286            if (offset + i < size) {
287                printf("%02x ", data[offset + i]);
288            } else {
289                printf("   ");
290            }
291        }
292
293        printf(" ");
294
295        for (size_t i = 0; i < n; ++i) {
296            if (isprint(data[offset + i])) {
297                printf("%c", data[offset + i]);
298            } else {
299                printf(".");
300            }
301        }
302
303        printf("\n");
304
305        offset += 16;
306    }
307}
308
309static const char *FourCC2MIME(uint32_t fourcc) {
310    switch (fourcc) {
311        case FOURCC('m', 'p', '4', 'a'):
312            return MEDIA_MIMETYPE_AUDIO_AAC;
313
314        case FOURCC('s', 'a', 'm', 'r'):
315            return MEDIA_MIMETYPE_AUDIO_AMR_NB;
316
317        case FOURCC('s', 'a', 'w', 'b'):
318            return MEDIA_MIMETYPE_AUDIO_AMR_WB;
319
320        case FOURCC('m', 'p', '4', 'v'):
321            return MEDIA_MIMETYPE_VIDEO_MPEG4;
322
323        case FOURCC('s', '2', '6', '3'):
324        case FOURCC('h', '2', '6', '3'):
325        case FOURCC('H', '2', '6', '3'):
326            return MEDIA_MIMETYPE_VIDEO_H263;
327
328        case FOURCC('a', 'v', 'c', '1'):
329            return MEDIA_MIMETYPE_VIDEO_AVC;
330
331        case FOURCC('h', 'v', 'c', '1'):
332        case FOURCC('h', 'e', 'v', '1'):
333            return MEDIA_MIMETYPE_VIDEO_HEVC;
334        default:
335            CHECK(!"should not be here.");
336            return NULL;
337    }
338}
339
340static bool AdjustChannelsAndRate(uint32_t fourcc, uint32_t *channels, uint32_t *rate) {
341    if (!strcasecmp(MEDIA_MIMETYPE_AUDIO_AMR_NB, FourCC2MIME(fourcc))) {
342        // AMR NB audio is always mono, 8kHz
343        *channels = 1;
344        *rate = 8000;
345        return true;
346    } else if (!strcasecmp(MEDIA_MIMETYPE_AUDIO_AMR_WB, FourCC2MIME(fourcc))) {
347        // AMR WB audio is always mono, 16kHz
348        *channels = 1;
349        *rate = 16000;
350        return true;
351    }
352    return false;
353}
354
355MPEG4Extractor::MPEG4Extractor(const sp<DataSource> &source)
356    : mMoofOffset(0),
357      mDataSource(source),
358      mInitCheck(NO_INIT),
359      mHasVideo(false),
360      mHeaderTimescale(0),
361      mFirstTrack(NULL),
362      mLastTrack(NULL),
363      mFileMetaData(new MetaData),
364      mFirstSINF(NULL),
365      mIsDrm(false) {
366}
367
368MPEG4Extractor::~MPEG4Extractor() {
369    Track *track = mFirstTrack;
370    while (track) {
371        Track *next = track->next;
372
373        delete track;
374        track = next;
375    }
376    mFirstTrack = mLastTrack = NULL;
377
378    SINF *sinf = mFirstSINF;
379    while (sinf) {
380        SINF *next = sinf->next;
381        delete[] sinf->IPMPData;
382        delete sinf;
383        sinf = next;
384    }
385    mFirstSINF = NULL;
386
387    for (size_t i = 0; i < mPssh.size(); i++) {
388        delete [] mPssh[i].data;
389    }
390}
391
392uint32_t MPEG4Extractor::flags() const {
393    return CAN_PAUSE |
394            ((mMoofOffset == 0 || mSidxEntries.size() != 0) ?
395                    (CAN_SEEK_BACKWARD | CAN_SEEK_FORWARD | CAN_SEEK) : 0);
396}
397
398sp<MetaData> MPEG4Extractor::getMetaData() {
399    status_t err;
400    if ((err = readMetaData()) != OK) {
401        return new MetaData;
402    }
403
404    return mFileMetaData;
405}
406
407size_t MPEG4Extractor::countTracks() {
408    status_t err;
409    if ((err = readMetaData()) != OK) {
410        ALOGV("MPEG4Extractor::countTracks: no tracks");
411        return 0;
412    }
413
414    size_t n = 0;
415    Track *track = mFirstTrack;
416    while (track) {
417        ++n;
418        track = track->next;
419    }
420
421    ALOGV("MPEG4Extractor::countTracks: %zu tracks", n);
422    return n;
423}
424
425sp<MetaData> MPEG4Extractor::getTrackMetaData(
426        size_t index, uint32_t flags) {
427    status_t err;
428    if ((err = readMetaData()) != OK) {
429        return NULL;
430    }
431
432    Track *track = mFirstTrack;
433    while (index > 0) {
434        if (track == NULL) {
435            return NULL;
436        }
437
438        track = track->next;
439        --index;
440    }
441
442    if (track == NULL) {
443        return NULL;
444    }
445
446    if ((flags & kIncludeExtensiveMetaData)
447            && !track->includes_expensive_metadata) {
448        track->includes_expensive_metadata = true;
449
450        const char *mime;
451        CHECK(track->meta->findCString(kKeyMIMEType, &mime));
452        if (!strncasecmp("video/", mime, 6)) {
453            if (mMoofOffset > 0) {
454                int64_t duration;
455                if (track->meta->findInt64(kKeyDuration, &duration)) {
456                    // nothing fancy, just pick a frame near 1/4th of the duration
457                    track->meta->setInt64(
458                            kKeyThumbnailTime, duration / 4);
459                }
460            } else {
461                uint32_t sampleIndex;
462                uint32_t sampleTime;
463                if (track->sampleTable->findThumbnailSample(&sampleIndex) == OK
464                        && track->sampleTable->getMetaDataForSample(
465                            sampleIndex, NULL /* offset */, NULL /* size */,
466                            &sampleTime) == OK) {
467                    track->meta->setInt64(
468                            kKeyThumbnailTime,
469                            ((int64_t)sampleTime * 1000000) / track->timescale);
470                }
471            }
472        }
473    }
474
475    return track->meta;
476}
477
478static void MakeFourCCString(uint32_t x, char *s) {
479    s[0] = x >> 24;
480    s[1] = (x >> 16) & 0xff;
481    s[2] = (x >> 8) & 0xff;
482    s[3] = x & 0xff;
483    s[4] = '\0';
484}
485
486status_t MPEG4Extractor::readMetaData() {
487    if (mInitCheck != NO_INIT) {
488        return mInitCheck;
489    }
490
491    off64_t offset = 0;
492    status_t err;
493    while (true) {
494        off64_t orig_offset = offset;
495        err = parseChunk(&offset, 0);
496
497        if (err != OK && err != UNKNOWN_ERROR) {
498            break;
499        } else if (offset <= orig_offset) {
500            // only continue parsing if the offset was advanced,
501            // otherwise we might end up in an infinite loop
502            ALOGE("did not advance: 0x%lld->0x%lld", orig_offset, offset);
503            err = ERROR_MALFORMED;
504            break;
505        } else if (err == OK) {
506            continue;
507        }
508
509        uint32_t hdr[2];
510        if (mDataSource->readAt(offset, hdr, 8) < 8) {
511            break;
512        }
513        uint32_t chunk_type = ntohl(hdr[1]);
514        if (chunk_type == FOURCC('m', 'o', 'o', 'f')) {
515            // store the offset of the first segment
516            mMoofOffset = offset;
517        } else if (chunk_type != FOURCC('m', 'd', 'a', 't')) {
518            // keep parsing until we get to the data
519            continue;
520        }
521        break;
522    }
523
524    if (mInitCheck == OK) {
525        if (mHasVideo) {
526            mFileMetaData->setCString(
527                    kKeyMIMEType, MEDIA_MIMETYPE_CONTAINER_MPEG4);
528        } else {
529            mFileMetaData->setCString(kKeyMIMEType, "audio/mp4");
530        }
531    } else {
532        mInitCheck = err;
533    }
534
535    CHECK_NE(err, (status_t)NO_INIT);
536
537    // copy pssh data into file metadata
538    int psshsize = 0;
539    for (size_t i = 0; i < mPssh.size(); i++) {
540        psshsize += 20 + mPssh[i].datalen;
541    }
542    if (psshsize) {
543        char *buf = (char*)malloc(psshsize);
544        char *ptr = buf;
545        for (size_t i = 0; i < mPssh.size(); i++) {
546            memcpy(ptr, mPssh[i].uuid, 20); // uuid + length
547            memcpy(ptr + 20, mPssh[i].data, mPssh[i].datalen);
548            ptr += (20 + mPssh[i].datalen);
549        }
550        mFileMetaData->setData(kKeyPssh, 'pssh', buf, psshsize);
551        free(buf);
552    }
553    return mInitCheck;
554}
555
556char* MPEG4Extractor::getDrmTrackInfo(size_t trackID, int *len) {
557    if (mFirstSINF == NULL) {
558        return NULL;
559    }
560
561    SINF *sinf = mFirstSINF;
562    while (sinf && (trackID != sinf->trackID)) {
563        sinf = sinf->next;
564    }
565
566    if (sinf == NULL) {
567        return NULL;
568    }
569
570    *len = sinf->len;
571    return sinf->IPMPData;
572}
573
574// Reads an encoded integer 7 bits at a time until it encounters the high bit clear.
575static int32_t readSize(off64_t offset,
576        const sp<DataSource> DataSource, uint8_t *numOfBytes) {
577    uint32_t size = 0;
578    uint8_t data;
579    bool moreData = true;
580    *numOfBytes = 0;
581
582    while (moreData) {
583        if (DataSource->readAt(offset, &data, 1) < 1) {
584            return -1;
585        }
586        offset ++;
587        moreData = (data >= 128) ? true : false;
588        size = (size << 7) | (data & 0x7f); // Take last 7 bits
589        (*numOfBytes) ++;
590    }
591
592    return size;
593}
594
595status_t MPEG4Extractor::parseDrmSINF(
596        off64_t * /* offset */, off64_t data_offset) {
597    uint8_t updateIdTag;
598    if (mDataSource->readAt(data_offset, &updateIdTag, 1) < 1) {
599        return ERROR_IO;
600    }
601    data_offset ++;
602
603    if (0x01/*OBJECT_DESCRIPTOR_UPDATE_ID_TAG*/ != updateIdTag) {
604        return ERROR_MALFORMED;
605    }
606
607    uint8_t numOfBytes;
608    int32_t size = readSize(data_offset, mDataSource, &numOfBytes);
609    if (size < 0) {
610        return ERROR_IO;
611    }
612    data_offset += numOfBytes;
613
614    while(size >= 11 ) {
615        uint8_t descriptorTag;
616        if (mDataSource->readAt(data_offset, &descriptorTag, 1) < 1) {
617            return ERROR_IO;
618        }
619        data_offset ++;
620
621        if (0x11/*OBJECT_DESCRIPTOR_ID_TAG*/ != descriptorTag) {
622            return ERROR_MALFORMED;
623        }
624
625        uint8_t buffer[8];
626        //ObjectDescriptorID and ObjectDescriptor url flag
627        if (mDataSource->readAt(data_offset, buffer, 2) < 2) {
628            return ERROR_IO;
629        }
630        data_offset += 2;
631
632        if ((buffer[1] >> 5) & 0x0001) { //url flag is set
633            return ERROR_MALFORMED;
634        }
635
636        if (mDataSource->readAt(data_offset, buffer, 8) < 8) {
637            return ERROR_IO;
638        }
639        data_offset += 8;
640
641        if ((0x0F/*ES_ID_REF_TAG*/ != buffer[1])
642                || ( 0x0A/*IPMP_DESCRIPTOR_POINTER_ID_TAG*/ != buffer[5])) {
643            return ERROR_MALFORMED;
644        }
645
646        SINF *sinf = new SINF;
647        sinf->trackID = U16_AT(&buffer[3]);
648        sinf->IPMPDescriptorID = buffer[7];
649        sinf->next = mFirstSINF;
650        mFirstSINF = sinf;
651
652        size -= (8 + 2 + 1);
653    }
654
655    if (size != 0) {
656        return ERROR_MALFORMED;
657    }
658
659    if (mDataSource->readAt(data_offset, &updateIdTag, 1) < 1) {
660        return ERROR_IO;
661    }
662    data_offset ++;
663
664    if(0x05/*IPMP_DESCRIPTOR_UPDATE_ID_TAG*/ != updateIdTag) {
665        return ERROR_MALFORMED;
666    }
667
668    size = readSize(data_offset, mDataSource, &numOfBytes);
669    if (size < 0) {
670        return ERROR_IO;
671    }
672    data_offset += numOfBytes;
673
674    while (size > 0) {
675        uint8_t tag;
676        int32_t dataLen;
677        if (mDataSource->readAt(data_offset, &tag, 1) < 1) {
678            return ERROR_IO;
679        }
680        data_offset ++;
681
682        if (0x0B/*IPMP_DESCRIPTOR_ID_TAG*/ == tag) {
683            uint8_t id;
684            dataLen = readSize(data_offset, mDataSource, &numOfBytes);
685            if (dataLen < 0) {
686                return ERROR_IO;
687            } else if (dataLen < 4) {
688                return ERROR_MALFORMED;
689            }
690            data_offset += numOfBytes;
691
692            if (mDataSource->readAt(data_offset, &id, 1) < 1) {
693                return ERROR_IO;
694            }
695            data_offset ++;
696
697            SINF *sinf = mFirstSINF;
698            while (sinf && (sinf->IPMPDescriptorID != id)) {
699                sinf = sinf->next;
700            }
701            if (sinf == NULL) {
702                return ERROR_MALFORMED;
703            }
704            sinf->len = dataLen - 3;
705            sinf->IPMPData = new (std::nothrow) char[sinf->len];
706            if (sinf->IPMPData == NULL) {
707                return ERROR_MALFORMED;
708            }
709            data_offset += 2;
710
711            if (mDataSource->readAt(data_offset, sinf->IPMPData, sinf->len) < sinf->len) {
712                return ERROR_IO;
713            }
714            data_offset += sinf->len;
715
716            size -= (dataLen + numOfBytes + 1);
717        }
718    }
719
720    if (size != 0) {
721        return ERROR_MALFORMED;
722    }
723
724    return UNKNOWN_ERROR;  // Return a dummy error.
725}
726
727struct PathAdder {
728    PathAdder(Vector<uint32_t> *path, uint32_t chunkType)
729        : mPath(path) {
730        mPath->push(chunkType);
731    }
732
733    ~PathAdder() {
734        mPath->pop();
735    }
736
737private:
738    Vector<uint32_t> *mPath;
739
740    PathAdder(const PathAdder &);
741    PathAdder &operator=(const PathAdder &);
742};
743
744static bool underMetaDataPath(const Vector<uint32_t> &path) {
745    return path.size() >= 5
746        && path[0] == FOURCC('m', 'o', 'o', 'v')
747        && path[1] == FOURCC('u', 'd', 't', 'a')
748        && path[2] == FOURCC('m', 'e', 't', 'a')
749        && path[3] == FOURCC('i', 'l', 's', 't');
750}
751
752// Given a time in seconds since Jan 1 1904, produce a human-readable string.
753static void convertTimeToDate(int64_t time_1904, String8 *s) {
754    time_t time_1970 = time_1904 - (((66 * 365 + 17) * 24) * 3600);
755
756    char tmp[32];
757    strftime(tmp, sizeof(tmp), "%Y%m%dT%H%M%S.000Z", gmtime(&time_1970));
758
759    s->setTo(tmp);
760}
761
762status_t MPEG4Extractor::parseChunk(off64_t *offset, int depth) {
763    ALOGV("entering parseChunk %lld/%d", *offset, depth);
764    uint32_t hdr[2];
765    if (mDataSource->readAt(*offset, hdr, 8) < 8) {
766        return ERROR_IO;
767    }
768    uint64_t chunk_size = ntohl(hdr[0]);
769    int32_t chunk_type = ntohl(hdr[1]);
770    off64_t data_offset = *offset + 8;
771
772    if (chunk_size == 1) {
773        if (mDataSource->readAt(*offset + 8, &chunk_size, 8) < 8) {
774            return ERROR_IO;
775        }
776        chunk_size = ntoh64(chunk_size);
777        data_offset += 8;
778
779        if (chunk_size < 16) {
780            // The smallest valid chunk is 16 bytes long in this case.
781            return ERROR_MALFORMED;
782        }
783    } else if (chunk_size == 0) {
784        if (depth == 0) {
785            // atom extends to end of file
786            off64_t sourceSize;
787            if (mDataSource->getSize(&sourceSize) == OK) {
788                chunk_size = (sourceSize - *offset);
789            } else {
790                // XXX could we just pick a "sufficiently large" value here?
791                ALOGE("atom size is 0, and data source has no size");
792                return ERROR_MALFORMED;
793            }
794        } else {
795            // not allowed for non-toplevel atoms, skip it
796            *offset += 4;
797            return OK;
798        }
799    } else if (chunk_size < 8) {
800        // The smallest valid chunk is 8 bytes long.
801        ALOGE("invalid chunk size: %" PRIu64, chunk_size);
802        return ERROR_MALFORMED;
803    }
804
805    char chunk[5];
806    MakeFourCCString(chunk_type, chunk);
807    ALOGV("chunk: %s @ %lld, %d", chunk, *offset, depth);
808
809    if (kUseHexDump) {
810        static const char kWhitespace[] = "                                        ";
811        const char *indent = &kWhitespace[sizeof(kWhitespace) - 1 - 2 * depth];
812        printf("%sfound chunk '%s' of size %" PRIu64 "\n", indent, chunk, chunk_size);
813
814        char buffer[256];
815        size_t n = chunk_size;
816        if (n > sizeof(buffer)) {
817            n = sizeof(buffer);
818        }
819        if (mDataSource->readAt(*offset, buffer, n)
820                < (ssize_t)n) {
821            return ERROR_IO;
822        }
823
824        hexdump(buffer, n);
825    }
826
827    PathAdder autoAdder(&mPath, chunk_type);
828
829    off64_t chunk_data_size = *offset + chunk_size - data_offset;
830
831    if (chunk_type != FOURCC('c', 'p', 'r', 't')
832            && chunk_type != FOURCC('c', 'o', 'v', 'r')
833            && mPath.size() == 5 && underMetaDataPath(mPath)) {
834        off64_t stop_offset = *offset + chunk_size;
835        *offset = data_offset;
836        while (*offset < stop_offset) {
837            status_t err = parseChunk(offset, depth + 1);
838            if (err != OK) {
839                return err;
840            }
841        }
842
843        if (*offset != stop_offset) {
844            return ERROR_MALFORMED;
845        }
846
847        return OK;
848    }
849
850    switch(chunk_type) {
851        case FOURCC('m', 'o', 'o', 'v'):
852        case FOURCC('t', 'r', 'a', 'k'):
853        case FOURCC('m', 'd', 'i', 'a'):
854        case FOURCC('m', 'i', 'n', 'f'):
855        case FOURCC('d', 'i', 'n', 'f'):
856        case FOURCC('s', 't', 'b', 'l'):
857        case FOURCC('m', 'v', 'e', 'x'):
858        case FOURCC('m', 'o', 'o', 'f'):
859        case FOURCC('t', 'r', 'a', 'f'):
860        case FOURCC('m', 'f', 'r', 'a'):
861        case FOURCC('u', 'd', 't', 'a'):
862        case FOURCC('i', 'l', 's', 't'):
863        case FOURCC('s', 'i', 'n', 'f'):
864        case FOURCC('s', 'c', 'h', 'i'):
865        case FOURCC('e', 'd', 't', 's'):
866        {
867            if (chunk_type == FOURCC('s', 't', 'b', 'l')) {
868                ALOGV("sampleTable chunk is %" PRIu64 " bytes long.", chunk_size);
869
870                if (mDataSource->flags()
871                        & (DataSource::kWantsPrefetching
872                            | DataSource::kIsCachingDataSource)) {
873                    sp<MPEG4DataSource> cachedSource =
874                        new MPEG4DataSource(mDataSource);
875
876                    if (cachedSource->setCachedRange(*offset, chunk_size) == OK) {
877                        mDataSource = cachedSource;
878                    }
879                }
880
881                mLastTrack->sampleTable = new SampleTable(mDataSource);
882            }
883
884            bool isTrack = false;
885            if (chunk_type == FOURCC('t', 'r', 'a', 'k')) {
886                isTrack = true;
887
888                Track *track = new Track;
889                track->next = NULL;
890                if (mLastTrack) {
891                    mLastTrack->next = track;
892                } else {
893                    mFirstTrack = track;
894                }
895                mLastTrack = track;
896
897                track->meta = new MetaData;
898                track->includes_expensive_metadata = false;
899                track->skipTrack = false;
900                track->timescale = 0;
901                track->meta->setCString(kKeyMIMEType, "application/octet-stream");
902            }
903
904            off64_t stop_offset = *offset + chunk_size;
905            *offset = data_offset;
906            while (*offset < stop_offset) {
907                status_t err = parseChunk(offset, depth + 1);
908                if (err != OK) {
909                    return err;
910                }
911            }
912
913            if (*offset != stop_offset) {
914                return ERROR_MALFORMED;
915            }
916
917            if (isTrack) {
918                if (mLastTrack->skipTrack) {
919                    Track *cur = mFirstTrack;
920
921                    if (cur == mLastTrack) {
922                        delete cur;
923                        mFirstTrack = mLastTrack = NULL;
924                    } else {
925                        while (cur && cur->next != mLastTrack) {
926                            cur = cur->next;
927                        }
928                        cur->next = NULL;
929                        delete mLastTrack;
930                        mLastTrack = cur;
931                    }
932
933                    return OK;
934                }
935
936                status_t err = verifyTrack(mLastTrack);
937
938                if (err != OK) {
939                    return err;
940                }
941            } else if (chunk_type == FOURCC('m', 'o', 'o', 'v')) {
942                mInitCheck = OK;
943
944                if (!mIsDrm) {
945                    return UNKNOWN_ERROR;  // Return a dummy error.
946                } else {
947                    return OK;
948                }
949            }
950            break;
951        }
952
953        case FOURCC('e', 'l', 's', 't'):
954        {
955            *offset += chunk_size;
956
957            // See 14496-12 8.6.6
958            uint8_t version;
959            if (mDataSource->readAt(data_offset, &version, 1) < 1) {
960                return ERROR_IO;
961            }
962
963            uint32_t entry_count;
964            if (!mDataSource->getUInt32(data_offset + 4, &entry_count)) {
965                return ERROR_IO;
966            }
967
968            if (entry_count != 1) {
969                // we only support a single entry at the moment, for gapless playback
970                ALOGW("ignoring edit list with %d entries", entry_count);
971            } else if (mHeaderTimescale == 0) {
972                ALOGW("ignoring edit list because timescale is 0");
973            } else {
974                off64_t entriesoffset = data_offset + 8;
975                uint64_t segment_duration;
976                int64_t media_time;
977
978                if (version == 1) {
979                    if (!mDataSource->getUInt64(entriesoffset, &segment_duration) ||
980                            !mDataSource->getUInt64(entriesoffset + 8, (uint64_t*)&media_time)) {
981                        return ERROR_IO;
982                    }
983                } else if (version == 0) {
984                    uint32_t sd;
985                    int32_t mt;
986                    if (!mDataSource->getUInt32(entriesoffset, &sd) ||
987                            !mDataSource->getUInt32(entriesoffset + 4, (uint32_t*)&mt)) {
988                        return ERROR_IO;
989                    }
990                    segment_duration = sd;
991                    media_time = mt;
992                } else {
993                    return ERROR_IO;
994                }
995
996                uint64_t halfscale = mHeaderTimescale / 2;
997                segment_duration = (segment_duration * 1000000 + halfscale)/ mHeaderTimescale;
998                media_time = (media_time * 1000000 + halfscale) / mHeaderTimescale;
999
1000                int64_t duration;
1001                int32_t samplerate;
1002                if (!mLastTrack) {
1003                    return ERROR_MALFORMED;
1004                }
1005                if (mLastTrack->meta->findInt64(kKeyDuration, &duration) &&
1006                        mLastTrack->meta->findInt32(kKeySampleRate, &samplerate)) {
1007
1008                    int64_t delay = (media_time  * samplerate + 500000) / 1000000;
1009                    mLastTrack->meta->setInt32(kKeyEncoderDelay, delay);
1010
1011                    int64_t paddingus = duration - (segment_duration + media_time);
1012                    if (paddingus < 0) {
1013                        // track duration from media header (which is what kKeyDuration is) might
1014                        // be slightly shorter than the segment duration, which would make the
1015                        // padding negative. Clamp to zero.
1016                        paddingus = 0;
1017                    }
1018                    int64_t paddingsamples = (paddingus * samplerate + 500000) / 1000000;
1019                    mLastTrack->meta->setInt32(kKeyEncoderPadding, paddingsamples);
1020                }
1021            }
1022            break;
1023        }
1024
1025        case FOURCC('f', 'r', 'm', 'a'):
1026        {
1027            *offset += chunk_size;
1028
1029            uint32_t original_fourcc;
1030            if (mDataSource->readAt(data_offset, &original_fourcc, 4) < 4) {
1031                return ERROR_IO;
1032            }
1033            original_fourcc = ntohl(original_fourcc);
1034            ALOGV("read original format: %d", original_fourcc);
1035            mLastTrack->meta->setCString(kKeyMIMEType, FourCC2MIME(original_fourcc));
1036            uint32_t num_channels = 0;
1037            uint32_t sample_rate = 0;
1038            if (AdjustChannelsAndRate(original_fourcc, &num_channels, &sample_rate)) {
1039                mLastTrack->meta->setInt32(kKeyChannelCount, num_channels);
1040                mLastTrack->meta->setInt32(kKeySampleRate, sample_rate);
1041            }
1042            break;
1043        }
1044
1045        case FOURCC('t', 'e', 'n', 'c'):
1046        {
1047            *offset += chunk_size;
1048
1049            if (chunk_size < 32) {
1050                return ERROR_MALFORMED;
1051            }
1052
1053            // tenc box contains 1 byte version, 3 byte flags, 3 byte default algorithm id, one byte
1054            // default IV size, 16 bytes default KeyID
1055            // (ISO 23001-7)
1056            char buf[4];
1057            memset(buf, 0, 4);
1058            if (mDataSource->readAt(data_offset + 4, buf + 1, 3) < 3) {
1059                return ERROR_IO;
1060            }
1061            uint32_t defaultAlgorithmId = ntohl(*((int32_t*)buf));
1062            if (defaultAlgorithmId > 1) {
1063                // only 0 (clear) and 1 (AES-128) are valid
1064                return ERROR_MALFORMED;
1065            }
1066
1067            memset(buf, 0, 4);
1068            if (mDataSource->readAt(data_offset + 7, buf + 3, 1) < 1) {
1069                return ERROR_IO;
1070            }
1071            uint32_t defaultIVSize = ntohl(*((int32_t*)buf));
1072
1073            if ((defaultAlgorithmId == 0 && defaultIVSize != 0) ||
1074                    (defaultAlgorithmId != 0 && defaultIVSize == 0)) {
1075                // only unencrypted data must have 0 IV size
1076                return ERROR_MALFORMED;
1077            } else if (defaultIVSize != 0 &&
1078                    defaultIVSize != 8 &&
1079                    defaultIVSize != 16) {
1080                // only supported sizes are 0, 8 and 16
1081                return ERROR_MALFORMED;
1082            }
1083
1084            uint8_t defaultKeyId[16];
1085
1086            if (mDataSource->readAt(data_offset + 8, &defaultKeyId, 16) < 16) {
1087                return ERROR_IO;
1088            }
1089
1090            mLastTrack->meta->setInt32(kKeyCryptoMode, defaultAlgorithmId);
1091            mLastTrack->meta->setInt32(kKeyCryptoDefaultIVSize, defaultIVSize);
1092            mLastTrack->meta->setData(kKeyCryptoKey, 'tenc', defaultKeyId, 16);
1093            break;
1094        }
1095
1096        case FOURCC('t', 'k', 'h', 'd'):
1097        {
1098            *offset += chunk_size;
1099
1100            status_t err;
1101            if ((err = parseTrackHeader(data_offset, chunk_data_size)) != OK) {
1102                return err;
1103            }
1104
1105            break;
1106        }
1107
1108        case FOURCC('p', 's', 's', 'h'):
1109        {
1110            *offset += chunk_size;
1111
1112            PsshInfo pssh;
1113
1114            if (mDataSource->readAt(data_offset + 4, &pssh.uuid, 16) < 16) {
1115                return ERROR_IO;
1116            }
1117
1118            uint32_t psshdatalen = 0;
1119            if (mDataSource->readAt(data_offset + 20, &psshdatalen, 4) < 4) {
1120                return ERROR_IO;
1121            }
1122            pssh.datalen = ntohl(psshdatalen);
1123            ALOGV("pssh data size: %d", pssh.datalen);
1124            if (pssh.datalen + 20 > chunk_size) {
1125                // pssh data length exceeds size of containing box
1126                return ERROR_MALFORMED;
1127            }
1128
1129            pssh.data = new (std::nothrow) uint8_t[pssh.datalen];
1130            if (pssh.data == NULL) {
1131                return ERROR_MALFORMED;
1132            }
1133            ALOGV("allocated pssh @ %p", pssh.data);
1134            ssize_t requested = (ssize_t) pssh.datalen;
1135            if (mDataSource->readAt(data_offset + 24, pssh.data, requested) < requested) {
1136                return ERROR_IO;
1137            }
1138            mPssh.push_back(pssh);
1139
1140            break;
1141        }
1142
1143        case FOURCC('m', 'd', 'h', 'd'):
1144        {
1145            *offset += chunk_size;
1146
1147            if (chunk_data_size < 4 || mLastTrack == NULL) {
1148                return ERROR_MALFORMED;
1149            }
1150
1151            uint8_t version;
1152            if (mDataSource->readAt(
1153                        data_offset, &version, sizeof(version))
1154                    < (ssize_t)sizeof(version)) {
1155                return ERROR_IO;
1156            }
1157
1158            off64_t timescale_offset;
1159
1160            if (version == 1) {
1161                timescale_offset = data_offset + 4 + 16;
1162            } else if (version == 0) {
1163                timescale_offset = data_offset + 4 + 8;
1164            } else {
1165                return ERROR_IO;
1166            }
1167
1168            uint32_t timescale;
1169            if (mDataSource->readAt(
1170                        timescale_offset, &timescale, sizeof(timescale))
1171                    < (ssize_t)sizeof(timescale)) {
1172                return ERROR_IO;
1173            }
1174
1175            if (!timescale) {
1176                ALOGE("timescale should not be ZERO.");
1177                return ERROR_MALFORMED;
1178            }
1179
1180            mLastTrack->timescale = ntohl(timescale);
1181
1182            // 14496-12 says all ones means indeterminate, but some files seem to use
1183            // 0 instead. We treat both the same.
1184            int64_t duration = 0;
1185            if (version == 1) {
1186                if (mDataSource->readAt(
1187                            timescale_offset + 4, &duration, sizeof(duration))
1188                        < (ssize_t)sizeof(duration)) {
1189                    return ERROR_IO;
1190                }
1191                if (duration != -1) {
1192                    duration = ntoh64(duration);
1193                }
1194            } else {
1195                uint32_t duration32;
1196                if (mDataSource->readAt(
1197                            timescale_offset + 4, &duration32, sizeof(duration32))
1198                        < (ssize_t)sizeof(duration32)) {
1199                    return ERROR_IO;
1200                }
1201                if (duration32 != 0xffffffff) {
1202                    duration = ntohl(duration32);
1203                }
1204            }
1205            if (duration != 0) {
1206                mLastTrack->meta->setInt64(
1207                        kKeyDuration, (duration * 1000000) / mLastTrack->timescale);
1208            }
1209
1210            uint8_t lang[2];
1211            off64_t lang_offset;
1212            if (version == 1) {
1213                lang_offset = timescale_offset + 4 + 8;
1214            } else if (version == 0) {
1215                lang_offset = timescale_offset + 4 + 4;
1216            } else {
1217                return ERROR_IO;
1218            }
1219
1220            if (mDataSource->readAt(lang_offset, &lang, sizeof(lang))
1221                    < (ssize_t)sizeof(lang)) {
1222                return ERROR_IO;
1223            }
1224
1225            // To get the ISO-639-2/T three character language code
1226            // 1 bit pad followed by 3 5-bits characters. Each character
1227            // is packed as the difference between its ASCII value and 0x60.
1228            char lang_code[4];
1229            lang_code[0] = ((lang[0] >> 2) & 0x1f) + 0x60;
1230            lang_code[1] = ((lang[0] & 0x3) << 3 | (lang[1] >> 5)) + 0x60;
1231            lang_code[2] = (lang[1] & 0x1f) + 0x60;
1232            lang_code[3] = '\0';
1233
1234            mLastTrack->meta->setCString(
1235                    kKeyMediaLanguage, lang_code);
1236
1237            break;
1238        }
1239
1240        case FOURCC('s', 't', 's', 'd'):
1241        {
1242            if (chunk_data_size < 8) {
1243                return ERROR_MALFORMED;
1244            }
1245
1246            uint8_t buffer[8];
1247            if (chunk_data_size < (off64_t)sizeof(buffer)) {
1248                return ERROR_MALFORMED;
1249            }
1250
1251            if (mDataSource->readAt(
1252                        data_offset, buffer, 8) < 8) {
1253                return ERROR_IO;
1254            }
1255
1256            if (U32_AT(buffer) != 0) {
1257                // Should be version 0, flags 0.
1258                return ERROR_MALFORMED;
1259            }
1260
1261            uint32_t entry_count = U32_AT(&buffer[4]);
1262
1263            if (entry_count > 1) {
1264                // For 3GPP timed text, there could be multiple tx3g boxes contain
1265                // multiple text display formats. These formats will be used to
1266                // display the timed text.
1267                // For encrypted files, there may also be more than one entry.
1268                const char *mime;
1269                CHECK(mLastTrack->meta->findCString(kKeyMIMEType, &mime));
1270                if (strcasecmp(mime, MEDIA_MIMETYPE_TEXT_3GPP) &&
1271                        strcasecmp(mime, "application/octet-stream")) {
1272                    // For now we only support a single type of media per track.
1273                    mLastTrack->skipTrack = true;
1274                    *offset += chunk_size;
1275                    break;
1276                }
1277            }
1278            off64_t stop_offset = *offset + chunk_size;
1279            *offset = data_offset + 8;
1280            for (uint32_t i = 0; i < entry_count; ++i) {
1281                status_t err = parseChunk(offset, depth + 1);
1282                if (err != OK) {
1283                    return err;
1284                }
1285            }
1286
1287            if (*offset != stop_offset) {
1288                return ERROR_MALFORMED;
1289            }
1290            break;
1291        }
1292
1293        case FOURCC('m', 'p', '4', 'a'):
1294        case FOURCC('e', 'n', 'c', 'a'):
1295        case FOURCC('s', 'a', 'm', 'r'):
1296        case FOURCC('s', 'a', 'w', 'b'):
1297        {
1298            uint8_t buffer[8 + 20];
1299            if (chunk_data_size < (ssize_t)sizeof(buffer)) {
1300                // Basic AudioSampleEntry size.
1301                return ERROR_MALFORMED;
1302            }
1303
1304            if (mDataSource->readAt(
1305                        data_offset, buffer, sizeof(buffer)) < (ssize_t)sizeof(buffer)) {
1306                return ERROR_IO;
1307            }
1308
1309            uint16_t data_ref_index __unused = U16_AT(&buffer[6]);
1310            uint32_t num_channels = U16_AT(&buffer[16]);
1311
1312            uint16_t sample_size = U16_AT(&buffer[18]);
1313            uint32_t sample_rate = U32_AT(&buffer[24]) >> 16;
1314
1315            if (chunk_type != FOURCC('e', 'n', 'c', 'a')) {
1316                // if the chunk type is enca, we'll get the type from the sinf/frma box later
1317                mLastTrack->meta->setCString(kKeyMIMEType, FourCC2MIME(chunk_type));
1318                AdjustChannelsAndRate(chunk_type, &num_channels, &sample_rate);
1319            }
1320            ALOGV("*** coding='%s' %d channels, size %d, rate %d\n",
1321                   chunk, num_channels, sample_size, sample_rate);
1322            mLastTrack->meta->setInt32(kKeyChannelCount, num_channels);
1323            mLastTrack->meta->setInt32(kKeySampleRate, sample_rate);
1324
1325            off64_t stop_offset = *offset + chunk_size;
1326            *offset = data_offset + sizeof(buffer);
1327            while (*offset < stop_offset) {
1328                status_t err = parseChunk(offset, depth + 1);
1329                if (err != OK) {
1330                    return err;
1331                }
1332            }
1333
1334            if (*offset != stop_offset) {
1335                return ERROR_MALFORMED;
1336            }
1337            break;
1338        }
1339
1340        case FOURCC('m', 'p', '4', 'v'):
1341        case FOURCC('e', 'n', 'c', 'v'):
1342        case FOURCC('s', '2', '6', '3'):
1343        case FOURCC('H', '2', '6', '3'):
1344        case FOURCC('h', '2', '6', '3'):
1345        case FOURCC('a', 'v', 'c', '1'):
1346        case FOURCC('h', 'v', 'c', '1'):
1347        case FOURCC('h', 'e', 'v', '1'):
1348        {
1349            mHasVideo = true;
1350
1351            uint8_t buffer[78];
1352            if (chunk_data_size < (ssize_t)sizeof(buffer)) {
1353                // Basic VideoSampleEntry size.
1354                return ERROR_MALFORMED;
1355            }
1356
1357            if (mDataSource->readAt(
1358                        data_offset, buffer, sizeof(buffer)) < (ssize_t)sizeof(buffer)) {
1359                return ERROR_IO;
1360            }
1361
1362            uint16_t data_ref_index __unused = U16_AT(&buffer[6]);
1363            uint16_t width = U16_AT(&buffer[6 + 18]);
1364            uint16_t height = U16_AT(&buffer[6 + 20]);
1365
1366            // The video sample is not standard-compliant if it has invalid dimension.
1367            // Use some default width and height value, and
1368            // let the decoder figure out the actual width and height (and thus
1369            // be prepared for INFO_FOMRAT_CHANGED event).
1370            if (width == 0)  width  = 352;
1371            if (height == 0) height = 288;
1372
1373            // printf("*** coding='%s' width=%d height=%d\n",
1374            //        chunk, width, height);
1375
1376            if (chunk_type != FOURCC('e', 'n', 'c', 'v')) {
1377                // if the chunk type is encv, we'll get the type from the sinf/frma box later
1378                mLastTrack->meta->setCString(kKeyMIMEType, FourCC2MIME(chunk_type));
1379            }
1380            mLastTrack->meta->setInt32(kKeyWidth, width);
1381            mLastTrack->meta->setInt32(kKeyHeight, height);
1382
1383            off64_t stop_offset = *offset + chunk_size;
1384            *offset = data_offset + sizeof(buffer);
1385            while (*offset < stop_offset) {
1386                status_t err = parseChunk(offset, depth + 1);
1387                if (err != OK) {
1388                    return err;
1389                }
1390            }
1391
1392            if (*offset != stop_offset) {
1393                return ERROR_MALFORMED;
1394            }
1395            break;
1396        }
1397
1398        case FOURCC('s', 't', 'c', 'o'):
1399        case FOURCC('c', 'o', '6', '4'):
1400        {
1401            status_t err =
1402                mLastTrack->sampleTable->setChunkOffsetParams(
1403                        chunk_type, data_offset, chunk_data_size);
1404
1405            *offset += chunk_size;
1406
1407            if (err != OK) {
1408                return err;
1409            }
1410
1411            break;
1412        }
1413
1414        case FOURCC('s', 't', 's', 'c'):
1415        {
1416            status_t err =
1417                mLastTrack->sampleTable->setSampleToChunkParams(
1418                        data_offset, chunk_data_size);
1419
1420            *offset += chunk_size;
1421
1422            if (err != OK) {
1423                return err;
1424            }
1425
1426            break;
1427        }
1428
1429        case FOURCC('s', 't', 's', 'z'):
1430        case FOURCC('s', 't', 'z', '2'):
1431        {
1432            status_t err =
1433                mLastTrack->sampleTable->setSampleSizeParams(
1434                        chunk_type, data_offset, chunk_data_size);
1435
1436            *offset += chunk_size;
1437
1438            if (err != OK) {
1439                return err;
1440            }
1441
1442            size_t max_size;
1443            err = mLastTrack->sampleTable->getMaxSampleSize(&max_size);
1444
1445            if (err != OK) {
1446                return err;
1447            }
1448
1449            if (max_size != 0) {
1450                // Assume that a given buffer only contains at most 10 chunks,
1451                // each chunk originally prefixed with a 2 byte length will
1452                // have a 4 byte header (0x00 0x00 0x00 0x01) after conversion,
1453                // and thus will grow by 2 bytes per chunk.
1454                if (max_size > SIZE_MAX - 10 * 2) {
1455                    ALOGE("max sample size too big: %zu", max_size);
1456                    return ERROR_MALFORMED;
1457                }
1458                mLastTrack->meta->setInt32(kKeyMaxInputSize, max_size + 10 * 2);
1459            } else {
1460                // No size was specified. Pick a conservatively large size.
1461                uint32_t width, height;
1462                if (!mLastTrack->meta->findInt32(kKeyWidth, (int32_t*)&width) ||
1463                    !mLastTrack->meta->findInt32(kKeyHeight,(int32_t*) &height)) {
1464                    ALOGE("No width or height, assuming worst case 1080p");
1465                    width = 1920;
1466                    height = 1080;
1467                } else {
1468                    // A resolution was specified, check that it's not too big. The values below
1469                    // were chosen so that the calculations below don't cause overflows, they're
1470                    // not indicating that resolutions up to 32kx32k are actually supported.
1471                    if (width > 32768 || height > 32768) {
1472                        ALOGE("can't support %u x %u video", width, height);
1473                        return ERROR_MALFORMED;
1474                    }
1475                }
1476
1477                const char *mime;
1478                CHECK(mLastTrack->meta->findCString(kKeyMIMEType, &mime));
1479                if (!strcmp(mime, MEDIA_MIMETYPE_VIDEO_AVC)) {
1480                    // AVC requires compression ratio of at least 2, and uses
1481                    // macroblocks
1482                    max_size = ((width + 15) / 16) * ((height + 15) / 16) * 192;
1483                } else {
1484                    // For all other formats there is no minimum compression
1485                    // ratio. Use compression ratio of 1.
1486                    max_size = width * height * 3 / 2;
1487                }
1488                mLastTrack->meta->setInt32(kKeyMaxInputSize, max_size);
1489            }
1490
1491            // NOTE: setting another piece of metadata invalidates any pointers (such as the
1492            // mimetype) previously obtained, so don't cache them.
1493            const char *mime;
1494            CHECK(mLastTrack->meta->findCString(kKeyMIMEType, &mime));
1495            // Calculate average frame rate.
1496            if (!strncasecmp("video/", mime, 6)) {
1497                size_t nSamples = mLastTrack->sampleTable->countSamples();
1498                int64_t durationUs;
1499                if (mLastTrack->meta->findInt64(kKeyDuration, &durationUs)) {
1500                    if (durationUs > 0) {
1501                        int32_t frameRate = (nSamples * 1000000LL +
1502                                    (durationUs >> 1)) / durationUs;
1503                        mLastTrack->meta->setInt32(kKeyFrameRate, frameRate);
1504                    }
1505                }
1506            }
1507
1508            break;
1509        }
1510
1511        case FOURCC('s', 't', 't', 's'):
1512        {
1513            *offset += chunk_size;
1514
1515            status_t err =
1516                mLastTrack->sampleTable->setTimeToSampleParams(
1517                        data_offset, chunk_data_size);
1518
1519            if (err != OK) {
1520                return err;
1521            }
1522
1523            break;
1524        }
1525
1526        case FOURCC('c', 't', 't', 's'):
1527        {
1528            *offset += chunk_size;
1529
1530            status_t err =
1531                mLastTrack->sampleTable->setCompositionTimeToSampleParams(
1532                        data_offset, chunk_data_size);
1533
1534            if (err != OK) {
1535                return err;
1536            }
1537
1538            break;
1539        }
1540
1541        case FOURCC('s', 't', 's', 's'):
1542        {
1543            *offset += chunk_size;
1544
1545            status_t err =
1546                mLastTrack->sampleTable->setSyncSampleParams(
1547                        data_offset, chunk_data_size);
1548
1549            if (err != OK) {
1550                return err;
1551            }
1552
1553            break;
1554        }
1555
1556        // �xyz
1557        case FOURCC(0xA9, 'x', 'y', 'z'):
1558        {
1559            *offset += chunk_size;
1560
1561            // Best case the total data length inside "�xyz" box
1562            // would be 8, for instance "�xyz" + "\x00\x04\x15\xc7" + "0+0/",
1563            // where "\x00\x04" is the text string length with value = 4,
1564            // "\0x15\xc7" is the language code = en, and "0+0" is a
1565            // location (string) value with longitude = 0 and latitude = 0.
1566            if (chunk_data_size < 8) {
1567                return ERROR_MALFORMED;
1568            }
1569
1570            // Worst case the location string length would be 18,
1571            // for instance +90.0000-180.0000, without the trailing "/" and
1572            // the string length + language code.
1573            char buffer[18];
1574
1575            // Substracting 5 from the data size is because the text string length +
1576            // language code takes 4 bytes, and the trailing slash "/" takes 1 byte.
1577            off64_t location_length = chunk_data_size - 5;
1578            if (location_length >= (off64_t) sizeof(buffer)) {
1579                return ERROR_MALFORMED;
1580            }
1581
1582            if (mDataSource->readAt(
1583                        data_offset + 4, buffer, location_length) < location_length) {
1584                return ERROR_IO;
1585            }
1586
1587            buffer[location_length] = '\0';
1588            mFileMetaData->setCString(kKeyLocation, buffer);
1589            break;
1590        }
1591
1592        case FOURCC('e', 's', 'd', 's'):
1593        {
1594            *offset += chunk_size;
1595
1596            if (chunk_data_size < 4) {
1597                return ERROR_MALFORMED;
1598            }
1599
1600            uint8_t buffer[256];
1601            if (chunk_data_size > (off64_t)sizeof(buffer)) {
1602                return ERROR_BUFFER_TOO_SMALL;
1603            }
1604
1605            if (mDataSource->readAt(
1606                        data_offset, buffer, chunk_data_size) < chunk_data_size) {
1607                return ERROR_IO;
1608            }
1609
1610            if (U32_AT(buffer) != 0) {
1611                // Should be version 0, flags 0.
1612                return ERROR_MALFORMED;
1613            }
1614
1615            mLastTrack->meta->setData(
1616                    kKeyESDS, kTypeESDS, &buffer[4], chunk_data_size - 4);
1617
1618            if (mPath.size() >= 2
1619                    && mPath[mPath.size() - 2] == FOURCC('m', 'p', '4', 'a')) {
1620                // Information from the ESDS must be relied on for proper
1621                // setup of sample rate and channel count for MPEG4 Audio.
1622                // The generic header appears to only contain generic
1623                // information...
1624
1625                status_t err = updateAudioTrackInfoFromESDS_MPEG4Audio(
1626                        &buffer[4], chunk_data_size - 4);
1627
1628                if (err != OK) {
1629                    return err;
1630                }
1631            }
1632
1633            break;
1634        }
1635
1636        case FOURCC('a', 'v', 'c', 'C'):
1637        {
1638            *offset += chunk_size;
1639
1640            sp<ABuffer> buffer = new ABuffer(chunk_data_size);
1641
1642            if (mDataSource->readAt(
1643                        data_offset, buffer->data(), chunk_data_size) < chunk_data_size) {
1644                return ERROR_IO;
1645            }
1646
1647            mLastTrack->meta->setData(
1648                    kKeyAVCC, kTypeAVCC, buffer->data(), chunk_data_size);
1649
1650            break;
1651        }
1652        case FOURCC('h', 'v', 'c', 'C'):
1653        {
1654            sp<ABuffer> buffer = new ABuffer(chunk_data_size);
1655
1656            if (mDataSource->readAt(
1657                        data_offset, buffer->data(), chunk_data_size) < chunk_data_size) {
1658                return ERROR_IO;
1659            }
1660
1661            mLastTrack->meta->setData(
1662                    kKeyHVCC, kTypeHVCC, buffer->data(), chunk_data_size);
1663
1664            *offset += chunk_size;
1665            break;
1666        }
1667
1668        case FOURCC('d', '2', '6', '3'):
1669        {
1670            *offset += chunk_size;
1671            /*
1672             * d263 contains a fixed 7 bytes part:
1673             *   vendor - 4 bytes
1674             *   version - 1 byte
1675             *   level - 1 byte
1676             *   profile - 1 byte
1677             * optionally, "d263" box itself may contain a 16-byte
1678             * bit rate box (bitr)
1679             *   average bit rate - 4 bytes
1680             *   max bit rate - 4 bytes
1681             */
1682            char buffer[23];
1683            if (chunk_data_size != 7 &&
1684                chunk_data_size != 23) {
1685                ALOGE("Incorrect D263 box size %lld", chunk_data_size);
1686                return ERROR_MALFORMED;
1687            }
1688
1689            if (mDataSource->readAt(
1690                    data_offset, buffer, chunk_data_size) < chunk_data_size) {
1691                return ERROR_IO;
1692            }
1693
1694            mLastTrack->meta->setData(kKeyD263, kTypeD263, buffer, chunk_data_size);
1695
1696            break;
1697        }
1698
1699        case FOURCC('m', 'e', 't', 'a'):
1700        {
1701            uint8_t buffer[4];
1702            if (chunk_data_size < (off64_t)sizeof(buffer)) {
1703                *offset += chunk_size;
1704                return ERROR_MALFORMED;
1705            }
1706
1707            if (mDataSource->readAt(
1708                        data_offset, buffer, 4) < 4) {
1709                *offset += chunk_size;
1710                return ERROR_IO;
1711            }
1712
1713            if (U32_AT(buffer) != 0) {
1714                // Should be version 0, flags 0.
1715
1716                // If it's not, let's assume this is one of those
1717                // apparently malformed chunks that don't have flags
1718                // and completely different semantics than what's
1719                // in the MPEG4 specs and skip it.
1720                *offset += chunk_size;
1721                return OK;
1722            }
1723
1724            off64_t stop_offset = *offset + chunk_size;
1725            *offset = data_offset + sizeof(buffer);
1726            while (*offset < stop_offset) {
1727                status_t err = parseChunk(offset, depth + 1);
1728                if (err != OK) {
1729                    return err;
1730                }
1731            }
1732
1733            if (*offset != stop_offset) {
1734                return ERROR_MALFORMED;
1735            }
1736            break;
1737        }
1738
1739        case FOURCC('m', 'e', 'a', 'n'):
1740        case FOURCC('n', 'a', 'm', 'e'):
1741        case FOURCC('d', 'a', 't', 'a'):
1742        {
1743            *offset += chunk_size;
1744
1745            if (mPath.size() == 6 && underMetaDataPath(mPath)) {
1746                status_t err = parseITunesMetaData(data_offset, chunk_data_size);
1747
1748                if (err != OK) {
1749                    return err;
1750                }
1751            }
1752
1753            break;
1754        }
1755
1756        case FOURCC('m', 'v', 'h', 'd'):
1757        {
1758            *offset += chunk_size;
1759
1760            if (chunk_data_size < 32) {
1761                return ERROR_MALFORMED;
1762            }
1763
1764            uint8_t header[32];
1765            if (mDataSource->readAt(
1766                        data_offset, header, sizeof(header))
1767                    < (ssize_t)sizeof(header)) {
1768                return ERROR_IO;
1769            }
1770
1771            uint64_t creationTime;
1772            uint64_t duration = 0;
1773            if (header[0] == 1) {
1774                creationTime = U64_AT(&header[4]);
1775                mHeaderTimescale = U32_AT(&header[20]);
1776                duration = U64_AT(&header[24]);
1777                if (duration == 0xffffffffffffffff) {
1778                    duration = 0;
1779                }
1780            } else if (header[0] != 0) {
1781                return ERROR_MALFORMED;
1782            } else {
1783                creationTime = U32_AT(&header[4]);
1784                mHeaderTimescale = U32_AT(&header[12]);
1785                uint32_t d32 = U32_AT(&header[16]);
1786                if (d32 == 0xffffffff) {
1787                    d32 = 0;
1788                }
1789                duration = d32;
1790            }
1791            if (duration != 0) {
1792                mFileMetaData->setInt64(kKeyDuration, duration * 1000000 / mHeaderTimescale);
1793            }
1794
1795            String8 s;
1796            convertTimeToDate(creationTime, &s);
1797
1798            mFileMetaData->setCString(kKeyDate, s.string());
1799
1800            break;
1801        }
1802
1803        case FOURCC('m', 'e', 'h', 'd'):
1804        {
1805            *offset += chunk_size;
1806
1807            if (chunk_data_size < 8) {
1808                return ERROR_MALFORMED;
1809            }
1810
1811            uint8_t flags[4];
1812            if (mDataSource->readAt(
1813                        data_offset, flags, sizeof(flags))
1814                    < (ssize_t)sizeof(flags)) {
1815                return ERROR_IO;
1816            }
1817
1818            uint64_t duration = 0;
1819            if (flags[0] == 1) {
1820                // 64 bit
1821                if (chunk_data_size < 12) {
1822                    return ERROR_MALFORMED;
1823                }
1824                mDataSource->getUInt64(data_offset + 4, &duration);
1825                if (duration == 0xffffffffffffffff) {
1826                    duration = 0;
1827                }
1828            } else if (flags[0] == 0) {
1829                // 32 bit
1830                uint32_t d32;
1831                mDataSource->getUInt32(data_offset + 4, &d32);
1832                if (d32 == 0xffffffff) {
1833                    d32 = 0;
1834                }
1835                duration = d32;
1836            } else {
1837                return ERROR_MALFORMED;
1838            }
1839
1840            if (duration != 0) {
1841                mFileMetaData->setInt64(kKeyDuration, duration * 1000000 / mHeaderTimescale);
1842            }
1843
1844            break;
1845        }
1846
1847        case FOURCC('m', 'd', 'a', 't'):
1848        {
1849            ALOGV("mdat chunk, drm: %d", mIsDrm);
1850            if (!mIsDrm) {
1851                *offset += chunk_size;
1852                break;
1853            }
1854
1855            if (chunk_size < 8) {
1856                return ERROR_MALFORMED;
1857            }
1858
1859            return parseDrmSINF(offset, data_offset);
1860        }
1861
1862        case FOURCC('h', 'd', 'l', 'r'):
1863        {
1864            *offset += chunk_size;
1865
1866            uint32_t buffer;
1867            if (mDataSource->readAt(
1868                        data_offset + 8, &buffer, 4) < 4) {
1869                return ERROR_IO;
1870            }
1871
1872            uint32_t type = ntohl(buffer);
1873            // For the 3GPP file format, the handler-type within the 'hdlr' box
1874            // shall be 'text'. We also want to support 'sbtl' handler type
1875            // for a practical reason as various MPEG4 containers use it.
1876            if (type == FOURCC('t', 'e', 'x', 't') || type == FOURCC('s', 'b', 't', 'l')) {
1877                mLastTrack->meta->setCString(kKeyMIMEType, MEDIA_MIMETYPE_TEXT_3GPP);
1878            }
1879
1880            break;
1881        }
1882
1883        case FOURCC('t', 'r', 'e', 'x'):
1884        {
1885            *offset += chunk_size;
1886
1887            if (chunk_data_size < 24) {
1888                return ERROR_IO;
1889            }
1890            Trex trex;
1891            if (!mDataSource->getUInt32(data_offset + 4, &trex.track_ID) ||
1892                !mDataSource->getUInt32(data_offset + 8, &trex.default_sample_description_index) ||
1893                !mDataSource->getUInt32(data_offset + 12, &trex.default_sample_duration) ||
1894                !mDataSource->getUInt32(data_offset + 16, &trex.default_sample_size) ||
1895                !mDataSource->getUInt32(data_offset + 20, &trex.default_sample_flags)) {
1896                return ERROR_IO;
1897            }
1898            mTrex.add(trex);
1899            break;
1900        }
1901
1902        case FOURCC('t', 'x', '3', 'g'):
1903        {
1904            uint32_t type;
1905            const void *data;
1906            size_t size = 0;
1907            if (!mLastTrack->meta->findData(
1908                    kKeyTextFormatData, &type, &data, &size)) {
1909                size = 0;
1910            }
1911
1912            if ((chunk_size > SIZE_MAX) || (SIZE_MAX - chunk_size <= size)) {
1913                return ERROR_MALFORMED;
1914            }
1915
1916            uint8_t *buffer = new (std::nothrow) uint8_t[size + chunk_size];
1917            if (buffer == NULL) {
1918                return ERROR_MALFORMED;
1919            }
1920
1921            if (size > 0) {
1922                memcpy(buffer, data, size);
1923            }
1924
1925            if ((size_t)(mDataSource->readAt(*offset, buffer + size, chunk_size))
1926                    < chunk_size) {
1927                delete[] buffer;
1928                buffer = NULL;
1929
1930                // advance read pointer so we don't end up reading this again
1931                *offset += chunk_size;
1932                return ERROR_IO;
1933            }
1934
1935            mLastTrack->meta->setData(
1936                    kKeyTextFormatData, 0, buffer, size + chunk_size);
1937
1938            delete[] buffer;
1939
1940            *offset += chunk_size;
1941            break;
1942        }
1943
1944        case FOURCC('c', 'o', 'v', 'r'):
1945        {
1946            *offset += chunk_size;
1947
1948            if (mFileMetaData != NULL) {
1949                ALOGV("chunk_data_size = %lld and data_offset = %lld",
1950                        chunk_data_size, data_offset);
1951
1952                if (chunk_data_size < 0 || static_cast<uint64_t>(chunk_data_size) >= SIZE_MAX - 1) {
1953                    return ERROR_MALFORMED;
1954                }
1955                sp<ABuffer> buffer = new ABuffer(chunk_data_size + 1);
1956                if (mDataSource->readAt(
1957                    data_offset, buffer->data(), chunk_data_size) != (ssize_t)chunk_data_size) {
1958                    return ERROR_IO;
1959                }
1960                const int kSkipBytesOfDataBox = 16;
1961                if (chunk_data_size <= kSkipBytesOfDataBox) {
1962                    return ERROR_MALFORMED;
1963                }
1964
1965                mFileMetaData->setData(
1966                    kKeyAlbumArt, MetaData::TYPE_NONE,
1967                    buffer->data() + kSkipBytesOfDataBox, chunk_data_size - kSkipBytesOfDataBox);
1968            }
1969
1970            break;
1971        }
1972
1973        case FOURCC('t', 'i', 't', 'l'):
1974        case FOURCC('p', 'e', 'r', 'f'):
1975        case FOURCC('a', 'u', 't', 'h'):
1976        case FOURCC('g', 'n', 'r', 'e'):
1977        case FOURCC('a', 'l', 'b', 'm'):
1978        case FOURCC('y', 'r', 'r', 'c'):
1979        {
1980            *offset += chunk_size;
1981
1982            status_t err = parse3GPPMetaData(data_offset, chunk_data_size, depth);
1983
1984            if (err != OK) {
1985                return err;
1986            }
1987
1988            break;
1989        }
1990
1991        case FOURCC('I', 'D', '3', '2'):
1992        {
1993            *offset += chunk_size;
1994
1995            if (chunk_data_size < 6) {
1996                return ERROR_MALFORMED;
1997            }
1998
1999            parseID3v2MetaData(data_offset + 6);
2000
2001            break;
2002        }
2003
2004        case FOURCC('-', '-', '-', '-'):
2005        {
2006            mLastCommentMean.clear();
2007            mLastCommentName.clear();
2008            mLastCommentData.clear();
2009            *offset += chunk_size;
2010            break;
2011        }
2012
2013        case FOURCC('s', 'i', 'd', 'x'):
2014        {
2015            parseSegmentIndex(data_offset, chunk_data_size);
2016            *offset += chunk_size;
2017            return UNKNOWN_ERROR; // stop parsing after sidx
2018        }
2019
2020        default:
2021        {
2022            *offset += chunk_size;
2023            break;
2024        }
2025    }
2026
2027    return OK;
2028}
2029
2030status_t MPEG4Extractor::parseSegmentIndex(off64_t offset, size_t size) {
2031  ALOGV("MPEG4Extractor::parseSegmentIndex");
2032
2033    if (size < 12) {
2034      return -EINVAL;
2035    }
2036
2037    uint32_t flags;
2038    if (!mDataSource->getUInt32(offset, &flags)) {
2039        return ERROR_MALFORMED;
2040    }
2041
2042    uint32_t version = flags >> 24;
2043    flags &= 0xffffff;
2044
2045    ALOGV("sidx version %d", version);
2046
2047    uint32_t referenceId;
2048    if (!mDataSource->getUInt32(offset + 4, &referenceId)) {
2049        return ERROR_MALFORMED;
2050    }
2051
2052    uint32_t timeScale;
2053    if (!mDataSource->getUInt32(offset + 8, &timeScale)) {
2054        return ERROR_MALFORMED;
2055    }
2056    ALOGV("sidx refid/timescale: %d/%d", referenceId, timeScale);
2057
2058    uint64_t earliestPresentationTime;
2059    uint64_t firstOffset;
2060
2061    offset += 12;
2062    size -= 12;
2063
2064    if (version == 0) {
2065        if (size < 8) {
2066            return -EINVAL;
2067        }
2068        uint32_t tmp;
2069        if (!mDataSource->getUInt32(offset, &tmp)) {
2070            return ERROR_MALFORMED;
2071        }
2072        earliestPresentationTime = tmp;
2073        if (!mDataSource->getUInt32(offset + 4, &tmp)) {
2074            return ERROR_MALFORMED;
2075        }
2076        firstOffset = tmp;
2077        offset += 8;
2078        size -= 8;
2079    } else {
2080        if (size < 16) {
2081            return -EINVAL;
2082        }
2083        if (!mDataSource->getUInt64(offset, &earliestPresentationTime)) {
2084            return ERROR_MALFORMED;
2085        }
2086        if (!mDataSource->getUInt64(offset + 8, &firstOffset)) {
2087            return ERROR_MALFORMED;
2088        }
2089        offset += 16;
2090        size -= 16;
2091    }
2092    ALOGV("sidx pres/off: %" PRIu64 "/%" PRIu64, earliestPresentationTime, firstOffset);
2093
2094    if (size < 4) {
2095        return -EINVAL;
2096    }
2097
2098    uint16_t referenceCount;
2099    if (!mDataSource->getUInt16(offset + 2, &referenceCount)) {
2100        return ERROR_MALFORMED;
2101    }
2102    offset += 4;
2103    size -= 4;
2104    ALOGV("refcount: %d", referenceCount);
2105
2106    if (size < referenceCount * 12) {
2107        return -EINVAL;
2108    }
2109
2110    uint64_t total_duration = 0;
2111    for (unsigned int i = 0; i < referenceCount; i++) {
2112        uint32_t d1, d2, d3;
2113
2114        if (!mDataSource->getUInt32(offset, &d1) ||     // size
2115            !mDataSource->getUInt32(offset + 4, &d2) || // duration
2116            !mDataSource->getUInt32(offset + 8, &d3)) { // flags
2117            return ERROR_MALFORMED;
2118        }
2119
2120        if (d1 & 0x80000000) {
2121            ALOGW("sub-sidx boxes not supported yet");
2122        }
2123        bool sap = d3 & 0x80000000;
2124        uint32_t saptype = (d3 >> 28) & 7;
2125        if (!sap || (saptype != 1 && saptype != 2)) {
2126            // type 1 and 2 are sync samples
2127            ALOGW("not a stream access point, or unsupported type: %08x", d3);
2128        }
2129        total_duration += d2;
2130        offset += 12;
2131        ALOGV(" item %d, %08x %08x %08x", i, d1, d2, d3);
2132        SidxEntry se;
2133        se.mSize = d1 & 0x7fffffff;
2134        se.mDurationUs = 1000000LL * d2 / timeScale;
2135        mSidxEntries.add(se);
2136    }
2137
2138    uint64_t sidxDuration = total_duration * 1000000 / timeScale;
2139
2140    int64_t metaDuration;
2141    if (!mLastTrack->meta->findInt64(kKeyDuration, &metaDuration) || metaDuration == 0) {
2142        mLastTrack->meta->setInt64(kKeyDuration, sidxDuration);
2143    }
2144    return OK;
2145}
2146
2147
2148
2149status_t MPEG4Extractor::parseTrackHeader(
2150        off64_t data_offset, off64_t data_size) {
2151    if (data_size < 4) {
2152        return ERROR_MALFORMED;
2153    }
2154
2155    uint8_t version;
2156    if (mDataSource->readAt(data_offset, &version, 1) < 1) {
2157        return ERROR_IO;
2158    }
2159
2160    size_t dynSize = (version == 1) ? 36 : 24;
2161
2162    uint8_t buffer[36 + 60];
2163
2164    if (data_size != (off64_t)dynSize + 60) {
2165        return ERROR_MALFORMED;
2166    }
2167
2168    if (mDataSource->readAt(
2169                data_offset, buffer, data_size) < (ssize_t)data_size) {
2170        return ERROR_IO;
2171    }
2172
2173    uint64_t ctime __unused, mtime __unused, duration __unused;
2174    int32_t id;
2175
2176    if (version == 1) {
2177        ctime = U64_AT(&buffer[4]);
2178        mtime = U64_AT(&buffer[12]);
2179        id = U32_AT(&buffer[20]);
2180        duration = U64_AT(&buffer[28]);
2181    } else if (version == 0) {
2182        ctime = U32_AT(&buffer[4]);
2183        mtime = U32_AT(&buffer[8]);
2184        id = U32_AT(&buffer[12]);
2185        duration = U32_AT(&buffer[20]);
2186    } else {
2187        return ERROR_UNSUPPORTED;
2188    }
2189
2190    mLastTrack->meta->setInt32(kKeyTrackID, id);
2191
2192    size_t matrixOffset = dynSize + 16;
2193    int32_t a00 = U32_AT(&buffer[matrixOffset]);
2194    int32_t a01 = U32_AT(&buffer[matrixOffset + 4]);
2195    int32_t a10 = U32_AT(&buffer[matrixOffset + 12]);
2196    int32_t a11 = U32_AT(&buffer[matrixOffset + 16]);
2197
2198#if 0
2199    int32_t dx = U32_AT(&buffer[matrixOffset + 8]);
2200    int32_t dy = U32_AT(&buffer[matrixOffset + 20]);
2201
2202    ALOGI("x' = %.2f * x + %.2f * y + %.2f",
2203         a00 / 65536.0f, a01 / 65536.0f, dx / 65536.0f);
2204    ALOGI("y' = %.2f * x + %.2f * y + %.2f",
2205         a10 / 65536.0f, a11 / 65536.0f, dy / 65536.0f);
2206#endif
2207
2208    uint32_t rotationDegrees;
2209
2210    static const int32_t kFixedOne = 0x10000;
2211    if (a00 == kFixedOne && a01 == 0 && a10 == 0 && a11 == kFixedOne) {
2212        // Identity, no rotation
2213        rotationDegrees = 0;
2214    } else if (a00 == 0 && a01 == kFixedOne && a10 == -kFixedOne && a11 == 0) {
2215        rotationDegrees = 90;
2216    } else if (a00 == 0 && a01 == -kFixedOne && a10 == kFixedOne && a11 == 0) {
2217        rotationDegrees = 270;
2218    } else if (a00 == -kFixedOne && a01 == 0 && a10 == 0 && a11 == -kFixedOne) {
2219        rotationDegrees = 180;
2220    } else {
2221        ALOGW("We only support 0,90,180,270 degree rotation matrices");
2222        rotationDegrees = 0;
2223    }
2224
2225    if (rotationDegrees != 0) {
2226        mLastTrack->meta->setInt32(kKeyRotation, rotationDegrees);
2227    }
2228
2229    // Handle presentation display size, which could be different
2230    // from the image size indicated by kKeyWidth and kKeyHeight.
2231    uint32_t width = U32_AT(&buffer[dynSize + 52]);
2232    uint32_t height = U32_AT(&buffer[dynSize + 56]);
2233    mLastTrack->meta->setInt32(kKeyDisplayWidth, width >> 16);
2234    mLastTrack->meta->setInt32(kKeyDisplayHeight, height >> 16);
2235
2236    return OK;
2237}
2238
2239status_t MPEG4Extractor::parseITunesMetaData(off64_t offset, size_t size) {
2240    if (size < 4 || size == SIZE_MAX) {
2241        return ERROR_MALFORMED;
2242    }
2243
2244    uint8_t *buffer = new (std::nothrow) uint8_t[size + 1];
2245    if (buffer == NULL) {
2246        return ERROR_MALFORMED;
2247    }
2248    if (mDataSource->readAt(
2249                offset, buffer, size) != (ssize_t)size) {
2250        delete[] buffer;
2251        buffer = NULL;
2252
2253        return ERROR_IO;
2254    }
2255
2256    uint32_t flags = U32_AT(buffer);
2257
2258    uint32_t metadataKey = 0;
2259    char chunk[5];
2260    MakeFourCCString(mPath[4], chunk);
2261    ALOGV("meta: %s @ %lld", chunk, offset);
2262    switch ((int32_t)mPath[4]) {
2263        case FOURCC(0xa9, 'a', 'l', 'b'):
2264        {
2265            metadataKey = kKeyAlbum;
2266            break;
2267        }
2268        case FOURCC(0xa9, 'A', 'R', 'T'):
2269        {
2270            metadataKey = kKeyArtist;
2271            break;
2272        }
2273        case FOURCC('a', 'A', 'R', 'T'):
2274        {
2275            metadataKey = kKeyAlbumArtist;
2276            break;
2277        }
2278        case FOURCC(0xa9, 'd', 'a', 'y'):
2279        {
2280            metadataKey = kKeyYear;
2281            break;
2282        }
2283        case FOURCC(0xa9, 'n', 'a', 'm'):
2284        {
2285            metadataKey = kKeyTitle;
2286            break;
2287        }
2288        case FOURCC(0xa9, 'w', 'r', 't'):
2289        {
2290            metadataKey = kKeyWriter;
2291            break;
2292        }
2293        case FOURCC('c', 'o', 'v', 'r'):
2294        {
2295            metadataKey = kKeyAlbumArt;
2296            break;
2297        }
2298        case FOURCC('g', 'n', 'r', 'e'):
2299        {
2300            metadataKey = kKeyGenre;
2301            break;
2302        }
2303        case FOURCC(0xa9, 'g', 'e', 'n'):
2304        {
2305            metadataKey = kKeyGenre;
2306            break;
2307        }
2308        case FOURCC('c', 'p', 'i', 'l'):
2309        {
2310            if (size == 9 && flags == 21) {
2311                char tmp[16];
2312                sprintf(tmp, "%d",
2313                        (int)buffer[size - 1]);
2314
2315                mFileMetaData->setCString(kKeyCompilation, tmp);
2316            }
2317            break;
2318        }
2319        case FOURCC('t', 'r', 'k', 'n'):
2320        {
2321            if (size == 16 && flags == 0) {
2322                char tmp[16];
2323                uint16_t* pTrack = (uint16_t*)&buffer[10];
2324                uint16_t* pTotalTracks = (uint16_t*)&buffer[12];
2325                sprintf(tmp, "%d/%d", ntohs(*pTrack), ntohs(*pTotalTracks));
2326
2327                mFileMetaData->setCString(kKeyCDTrackNumber, tmp);
2328            }
2329            break;
2330        }
2331        case FOURCC('d', 'i', 's', 'k'):
2332        {
2333            if ((size == 14 || size == 16) && flags == 0) {
2334                char tmp[16];
2335                uint16_t* pDisc = (uint16_t*)&buffer[10];
2336                uint16_t* pTotalDiscs = (uint16_t*)&buffer[12];
2337                sprintf(tmp, "%d/%d", ntohs(*pDisc), ntohs(*pTotalDiscs));
2338
2339                mFileMetaData->setCString(kKeyDiscNumber, tmp);
2340            }
2341            break;
2342        }
2343        case FOURCC('-', '-', '-', '-'):
2344        {
2345            buffer[size] = '\0';
2346            switch (mPath[5]) {
2347                case FOURCC('m', 'e', 'a', 'n'):
2348                    mLastCommentMean.setTo((const char *)buffer + 4);
2349                    break;
2350                case FOURCC('n', 'a', 'm', 'e'):
2351                    mLastCommentName.setTo((const char *)buffer + 4);
2352                    break;
2353                case FOURCC('d', 'a', 't', 'a'):
2354                    mLastCommentData.setTo((const char *)buffer + 8);
2355                    break;
2356            }
2357
2358            // Once we have a set of mean/name/data info, go ahead and process
2359            // it to see if its something we are interested in.  Whether or not
2360            // were are interested in the specific tag, make sure to clear out
2361            // the set so we can be ready to process another tuple should one
2362            // show up later in the file.
2363            if ((mLastCommentMean.length() != 0) &&
2364                (mLastCommentName.length() != 0) &&
2365                (mLastCommentData.length() != 0)) {
2366
2367                if (mLastCommentMean == "com.apple.iTunes"
2368                        && mLastCommentName == "iTunSMPB") {
2369                    int32_t delay, padding;
2370                    if (sscanf(mLastCommentData,
2371                               " %*x %x %x %*x", &delay, &padding) == 2) {
2372                        mLastTrack->meta->setInt32(kKeyEncoderDelay, delay);
2373                        mLastTrack->meta->setInt32(kKeyEncoderPadding, padding);
2374                    }
2375                }
2376
2377                mLastCommentMean.clear();
2378                mLastCommentName.clear();
2379                mLastCommentData.clear();
2380            }
2381            break;
2382        }
2383
2384        default:
2385            break;
2386    }
2387
2388    if (size >= 8 && metadataKey && !mFileMetaData->hasData(metadataKey)) {
2389        if (metadataKey == kKeyAlbumArt) {
2390            mFileMetaData->setData(
2391                    kKeyAlbumArt, MetaData::TYPE_NONE,
2392                    buffer + 8, size - 8);
2393        } else if (metadataKey == kKeyGenre) {
2394            if (flags == 0) {
2395                // uint8_t genre code, iTunes genre codes are
2396                // the standard id3 codes, except they start
2397                // at 1 instead of 0 (e.g. Pop is 14, not 13)
2398                // We use standard id3 numbering, so subtract 1.
2399                int genrecode = (int)buffer[size - 1];
2400                genrecode--;
2401                if (genrecode < 0) {
2402                    genrecode = 255; // reserved for 'unknown genre'
2403                }
2404                char genre[10];
2405                sprintf(genre, "%d", genrecode);
2406
2407                mFileMetaData->setCString(metadataKey, genre);
2408            } else if (flags == 1) {
2409                // custom genre string
2410                buffer[size] = '\0';
2411
2412                mFileMetaData->setCString(
2413                        metadataKey, (const char *)buffer + 8);
2414            }
2415        } else {
2416            buffer[size] = '\0';
2417
2418            mFileMetaData->setCString(
2419                    metadataKey, (const char *)buffer + 8);
2420        }
2421    }
2422
2423    delete[] buffer;
2424    buffer = NULL;
2425
2426    return OK;
2427}
2428
2429status_t MPEG4Extractor::parse3GPPMetaData(off64_t offset, size_t size, int depth) {
2430    if (size < 4 || size == SIZE_MAX) {
2431        return ERROR_MALFORMED;
2432    }
2433
2434    uint8_t *buffer = new (std::nothrow) uint8_t[size + 1];
2435    if (buffer == NULL) {
2436        return ERROR_MALFORMED;
2437    }
2438    if (mDataSource->readAt(
2439                offset, buffer, size) != (ssize_t)size) {
2440        delete[] buffer;
2441        buffer = NULL;
2442
2443        return ERROR_IO;
2444    }
2445
2446    uint32_t metadataKey = 0;
2447    switch (mPath[depth]) {
2448        case FOURCC('t', 'i', 't', 'l'):
2449        {
2450            metadataKey = kKeyTitle;
2451            break;
2452        }
2453        case FOURCC('p', 'e', 'r', 'f'):
2454        {
2455            metadataKey = kKeyArtist;
2456            break;
2457        }
2458        case FOURCC('a', 'u', 't', 'h'):
2459        {
2460            metadataKey = kKeyWriter;
2461            break;
2462        }
2463        case FOURCC('g', 'n', 'r', 'e'):
2464        {
2465            metadataKey = kKeyGenre;
2466            break;
2467        }
2468        case FOURCC('a', 'l', 'b', 'm'):
2469        {
2470            if (buffer[size - 1] != '\0') {
2471              char tmp[4];
2472              sprintf(tmp, "%u", buffer[size - 1]);
2473
2474              mFileMetaData->setCString(kKeyCDTrackNumber, tmp);
2475            }
2476
2477            metadataKey = kKeyAlbum;
2478            break;
2479        }
2480        case FOURCC('y', 'r', 'r', 'c'):
2481        {
2482            char tmp[5];
2483            uint16_t year = U16_AT(&buffer[4]);
2484
2485            if (year < 10000) {
2486                sprintf(tmp, "%u", year);
2487
2488                mFileMetaData->setCString(kKeyYear, tmp);
2489            }
2490            break;
2491        }
2492
2493        default:
2494            break;
2495    }
2496
2497    if (metadataKey > 0) {
2498        bool isUTF8 = true; // Common case
2499        char16_t *framedata = NULL;
2500        int len16 = 0; // Number of UTF-16 characters
2501
2502        // smallest possible valid UTF-16 string w BOM: 0xfe 0xff 0x00 0x00
2503        if (size < 6) {
2504            return ERROR_MALFORMED;
2505        }
2506
2507        if (size - 6 >= 4) {
2508            len16 = ((size - 6) / 2) - 1; // don't include 0x0000 terminator
2509            framedata = (char16_t *)(buffer + 6);
2510            if (0xfffe == *framedata) {
2511                // endianness marker (BOM) doesn't match host endianness
2512                for (int i = 0; i < len16; i++) {
2513                    framedata[i] = bswap_16(framedata[i]);
2514                }
2515                // BOM is now swapped to 0xfeff, we will execute next block too
2516            }
2517
2518            if (0xfeff == *framedata) {
2519                // Remove the BOM
2520                framedata++;
2521                len16--;
2522                isUTF8 = false;
2523            }
2524            // else normal non-zero-length UTF-8 string
2525            // we can't handle UTF-16 without BOM as there is no other
2526            // indication of encoding.
2527        }
2528
2529        if (isUTF8) {
2530            buffer[size] = 0;
2531            mFileMetaData->setCString(metadataKey, (const char *)buffer + 6);
2532        } else {
2533            // Convert from UTF-16 string to UTF-8 string.
2534            String8 tmpUTF8str(framedata, len16);
2535            mFileMetaData->setCString(metadataKey, tmpUTF8str.string());
2536        }
2537    }
2538
2539    delete[] buffer;
2540    buffer = NULL;
2541
2542    return OK;
2543}
2544
2545void MPEG4Extractor::parseID3v2MetaData(off64_t offset) {
2546    ID3 id3(mDataSource, true /* ignorev1 */, offset);
2547
2548    if (id3.isValid()) {
2549        struct Map {
2550            int key;
2551            const char *tag1;
2552            const char *tag2;
2553        };
2554        static const Map kMap[] = {
2555            { kKeyAlbum, "TALB", "TAL" },
2556            { kKeyArtist, "TPE1", "TP1" },
2557            { kKeyAlbumArtist, "TPE2", "TP2" },
2558            { kKeyComposer, "TCOM", "TCM" },
2559            { kKeyGenre, "TCON", "TCO" },
2560            { kKeyTitle, "TIT2", "TT2" },
2561            { kKeyYear, "TYE", "TYER" },
2562            { kKeyAuthor, "TXT", "TEXT" },
2563            { kKeyCDTrackNumber, "TRK", "TRCK" },
2564            { kKeyDiscNumber, "TPA", "TPOS" },
2565            { kKeyCompilation, "TCP", "TCMP" },
2566        };
2567        static const size_t kNumMapEntries = sizeof(kMap) / sizeof(kMap[0]);
2568
2569        for (size_t i = 0; i < kNumMapEntries; ++i) {
2570            if (!mFileMetaData->hasData(kMap[i].key)) {
2571                ID3::Iterator *it = new ID3::Iterator(id3, kMap[i].tag1);
2572                if (it->done()) {
2573                    delete it;
2574                    it = new ID3::Iterator(id3, kMap[i].tag2);
2575                }
2576
2577                if (it->done()) {
2578                    delete it;
2579                    continue;
2580                }
2581
2582                String8 s;
2583                it->getString(&s);
2584                delete it;
2585
2586                mFileMetaData->setCString(kMap[i].key, s);
2587            }
2588        }
2589
2590        size_t dataSize;
2591        String8 mime;
2592        const void *data = id3.getAlbumArt(&dataSize, &mime);
2593
2594        if (data) {
2595            mFileMetaData->setData(kKeyAlbumArt, MetaData::TYPE_NONE, data, dataSize);
2596            mFileMetaData->setCString(kKeyAlbumArtMIME, mime.string());
2597        }
2598    }
2599}
2600
2601sp<MediaSource> MPEG4Extractor::getTrack(size_t index) {
2602    status_t err;
2603    if ((err = readMetaData()) != OK) {
2604        return NULL;
2605    }
2606
2607    Track *track = mFirstTrack;
2608    while (index > 0) {
2609        if (track == NULL) {
2610            return NULL;
2611        }
2612
2613        track = track->next;
2614        --index;
2615    }
2616
2617    if (track == NULL) {
2618        return NULL;
2619    }
2620
2621
2622    Trex *trex = NULL;
2623    int32_t trackId;
2624    if (track->meta->findInt32(kKeyTrackID, &trackId)) {
2625        for (size_t i = 0; i < mTrex.size(); i++) {
2626            Trex *t = &mTrex.editItemAt(index);
2627            if (t->track_ID == (uint32_t) trackId) {
2628                trex = t;
2629                break;
2630            }
2631        }
2632    }
2633
2634    ALOGV("getTrack called, pssh: %zu", mPssh.size());
2635
2636    return new MPEG4Source(this,
2637            track->meta, mDataSource, track->timescale, track->sampleTable,
2638            mSidxEntries, trex, mMoofOffset);
2639}
2640
2641// static
2642status_t MPEG4Extractor::verifyTrack(Track *track) {
2643    const char *mime;
2644    CHECK(track->meta->findCString(kKeyMIMEType, &mime));
2645
2646    uint32_t type;
2647    const void *data;
2648    size_t size;
2649    if (!strcasecmp(mime, MEDIA_MIMETYPE_VIDEO_AVC)) {
2650        if (!track->meta->findData(kKeyAVCC, &type, &data, &size)
2651                || type != kTypeAVCC) {
2652            return ERROR_MALFORMED;
2653        }
2654    } else if (!strcasecmp(mime, MEDIA_MIMETYPE_VIDEO_HEVC)) {
2655        if (!track->meta->findData(kKeyHVCC, &type, &data, &size)
2656                    || type != kTypeHVCC) {
2657            return ERROR_MALFORMED;
2658        }
2659    } else if (!strcasecmp(mime, MEDIA_MIMETYPE_VIDEO_MPEG4)
2660            || !strcasecmp(mime, MEDIA_MIMETYPE_AUDIO_AAC)) {
2661        if (!track->meta->findData(kKeyESDS, &type, &data, &size)
2662                || type != kTypeESDS) {
2663            return ERROR_MALFORMED;
2664        }
2665    }
2666
2667    if (track->sampleTable == NULL || !track->sampleTable->isValid()) {
2668        // Make sure we have all the metadata we need.
2669        ALOGE("stbl atom missing/invalid.");
2670        return ERROR_MALFORMED;
2671    }
2672
2673    if (track->timescale == 0) {
2674        ALOGE("timescale invalid.");
2675        return ERROR_MALFORMED;
2676    }
2677
2678    return OK;
2679}
2680
2681typedef enum {
2682    //AOT_NONE             = -1,
2683    //AOT_NULL_OBJECT      = 0,
2684    //AOT_AAC_MAIN         = 1, /**< Main profile                              */
2685    AOT_AAC_LC           = 2,   /**< Low Complexity object                     */
2686    //AOT_AAC_SSR          = 3,
2687    //AOT_AAC_LTP          = 4,
2688    AOT_SBR              = 5,
2689    //AOT_AAC_SCAL         = 6,
2690    //AOT_TWIN_VQ          = 7,
2691    //AOT_CELP             = 8,
2692    //AOT_HVXC             = 9,
2693    //AOT_RSVD_10          = 10, /**< (reserved)                                */
2694    //AOT_RSVD_11          = 11, /**< (reserved)                                */
2695    //AOT_TTSI             = 12, /**< TTSI Object                               */
2696    //AOT_MAIN_SYNTH       = 13, /**< Main Synthetic object                     */
2697    //AOT_WAV_TAB_SYNTH    = 14, /**< Wavetable Synthesis object                */
2698    //AOT_GEN_MIDI         = 15, /**< General MIDI object                       */
2699    //AOT_ALG_SYNTH_AUD_FX = 16, /**< Algorithmic Synthesis and Audio FX object */
2700    AOT_ER_AAC_LC        = 17,   /**< Error Resilient(ER) AAC Low Complexity    */
2701    //AOT_RSVD_18          = 18, /**< (reserved)                                */
2702    //AOT_ER_AAC_LTP       = 19, /**< Error Resilient(ER) AAC LTP object        */
2703    AOT_ER_AAC_SCAL      = 20,   /**< Error Resilient(ER) AAC Scalable object   */
2704    //AOT_ER_TWIN_VQ       = 21, /**< Error Resilient(ER) TwinVQ object         */
2705    AOT_ER_BSAC          = 22,   /**< Error Resilient(ER) BSAC object           */
2706    AOT_ER_AAC_LD        = 23,   /**< Error Resilient(ER) AAC LowDelay object   */
2707    //AOT_ER_CELP          = 24, /**< Error Resilient(ER) CELP object           */
2708    //AOT_ER_HVXC          = 25, /**< Error Resilient(ER) HVXC object           */
2709    //AOT_ER_HILN          = 26, /**< Error Resilient(ER) HILN object           */
2710    //AOT_ER_PARA          = 27, /**< Error Resilient(ER) Parametric object     */
2711    //AOT_RSVD_28          = 28, /**< might become SSC                          */
2712    AOT_PS               = 29,   /**< PS, Parametric Stereo (includes SBR)      */
2713    //AOT_MPEGS            = 30, /**< MPEG Surround                             */
2714
2715    AOT_ESCAPE           = 31,   /**< Signal AOT uses more than 5 bits          */
2716
2717    //AOT_MP3ONMP4_L1      = 32, /**< MPEG-Layer1 in mp4                        */
2718    //AOT_MP3ONMP4_L2      = 33, /**< MPEG-Layer2 in mp4                        */
2719    //AOT_MP3ONMP4_L3      = 34, /**< MPEG-Layer3 in mp4                        */
2720    //AOT_RSVD_35          = 35, /**< might become DST                          */
2721    //AOT_RSVD_36          = 36, /**< might become ALS                          */
2722    //AOT_AAC_SLS          = 37, /**< AAC + SLS                                 */
2723    //AOT_SLS              = 38, /**< SLS                                       */
2724    //AOT_ER_AAC_ELD       = 39, /**< AAC Enhanced Low Delay                    */
2725
2726    //AOT_USAC             = 42, /**< USAC                                      */
2727    //AOT_SAOC             = 43, /**< SAOC                                      */
2728    //AOT_LD_MPEGS         = 44, /**< Low Delay MPEG Surround                   */
2729
2730    //AOT_RSVD50           = 50,  /**< Interim AOT for Rsvd50                   */
2731} AUDIO_OBJECT_TYPE;
2732
2733status_t MPEG4Extractor::updateAudioTrackInfoFromESDS_MPEG4Audio(
2734        const void *esds_data, size_t esds_size) {
2735    ESDS esds(esds_data, esds_size);
2736
2737    uint8_t objectTypeIndication;
2738    if (esds.getObjectTypeIndication(&objectTypeIndication) != OK) {
2739        return ERROR_MALFORMED;
2740    }
2741
2742    if (objectTypeIndication == 0xe1) {
2743        // This isn't MPEG4 audio at all, it's QCELP 14k...
2744        mLastTrack->meta->setCString(kKeyMIMEType, MEDIA_MIMETYPE_AUDIO_QCELP);
2745        return OK;
2746    }
2747
2748    if (objectTypeIndication  == 0x6b) {
2749        // The media subtype is MP3 audio
2750        // Our software MP3 audio decoder may not be able to handle
2751        // packetized MP3 audio; for now, lets just return ERROR_UNSUPPORTED
2752        ALOGE("MP3 track in MP4/3GPP file is not supported");
2753        return ERROR_UNSUPPORTED;
2754    }
2755
2756    const uint8_t *csd;
2757    size_t csd_size;
2758    if (esds.getCodecSpecificInfo(
2759                (const void **)&csd, &csd_size) != OK) {
2760        return ERROR_MALFORMED;
2761    }
2762
2763    if (kUseHexDump) {
2764        printf("ESD of size %d\n", csd_size);
2765        hexdump(csd, csd_size);
2766    }
2767
2768    if (csd_size == 0) {
2769        // There's no further information, i.e. no codec specific data
2770        // Let's assume that the information provided in the mpeg4 headers
2771        // is accurate and hope for the best.
2772
2773        return OK;
2774    }
2775
2776    if (csd_size < 2) {
2777        return ERROR_MALFORMED;
2778    }
2779
2780    static uint32_t kSamplingRate[] = {
2781        96000, 88200, 64000, 48000, 44100, 32000, 24000, 22050,
2782        16000, 12000, 11025, 8000, 7350
2783    };
2784
2785    ABitReader br(csd, csd_size);
2786    uint32_t objectType = br.getBits(5);
2787
2788    if (objectType == 31) {  // AAC-ELD => additional 6 bits
2789        objectType = 32 + br.getBits(6);
2790    }
2791
2792    //keep AOT type
2793    mLastTrack->meta->setInt32(kKeyAACAOT, objectType);
2794
2795    uint32_t freqIndex = br.getBits(4);
2796
2797    int32_t sampleRate = 0;
2798    int32_t numChannels = 0;
2799    if (freqIndex == 15) {
2800        if (csd_size < 5) {
2801            return ERROR_MALFORMED;
2802        }
2803        sampleRate = br.getBits(24);
2804        numChannels = br.getBits(4);
2805    } else {
2806        numChannels = br.getBits(4);
2807
2808        if (freqIndex == 13 || freqIndex == 14) {
2809            return ERROR_MALFORMED;
2810        }
2811
2812        sampleRate = kSamplingRate[freqIndex];
2813    }
2814
2815    if (objectType == AOT_SBR || objectType == AOT_PS) {//SBR specific config per 14496-3 table 1.13
2816        uint32_t extFreqIndex = br.getBits(4);
2817        int32_t extSampleRate __unused;
2818        if (extFreqIndex == 15) {
2819            if (csd_size < 8) {
2820                return ERROR_MALFORMED;
2821            }
2822            extSampleRate = br.getBits(24);
2823        } else {
2824            if (extFreqIndex == 13 || extFreqIndex == 14) {
2825                return ERROR_MALFORMED;
2826            }
2827            extSampleRate = kSamplingRate[extFreqIndex];
2828        }
2829        //TODO: save the extension sampling rate value in meta data =>
2830        //      mLastTrack->meta->setInt32(kKeyExtSampleRate, extSampleRate);
2831    }
2832
2833    switch (numChannels) {
2834        // values defined in 14496-3_2009 amendment-4 Table 1.19 - Channel Configuration
2835        case 0:
2836        case 1:// FC
2837        case 2:// FL FR
2838        case 3:// FC, FL FR
2839        case 4:// FC, FL FR, RC
2840        case 5:// FC, FL FR, SL SR
2841        case 6:// FC, FL FR, SL SR, LFE
2842            //numChannels already contains the right value
2843            break;
2844        case 11:// FC, FL FR, SL SR, RC, LFE
2845            numChannels = 7;
2846            break;
2847        case 7: // FC, FCL FCR, FL FR, SL SR, LFE
2848        case 12:// FC, FL  FR,  SL SR, RL RR, LFE
2849        case 14:// FC, FL  FR,  SL SR, LFE, FHL FHR
2850            numChannels = 8;
2851            break;
2852        default:
2853            return ERROR_UNSUPPORTED;
2854    }
2855
2856    {
2857        if (objectType == AOT_SBR || objectType == AOT_PS) {
2858            objectType = br.getBits(5);
2859
2860            if (objectType == AOT_ESCAPE) {
2861                objectType = 32 + br.getBits(6);
2862            }
2863        }
2864        if (objectType == AOT_AAC_LC || objectType == AOT_ER_AAC_LC ||
2865                objectType == AOT_ER_AAC_LD || objectType == AOT_ER_AAC_SCAL ||
2866                objectType == AOT_ER_BSAC) {
2867            const int32_t frameLengthFlag __unused = br.getBits(1);
2868
2869            const int32_t dependsOnCoreCoder = br.getBits(1);
2870
2871            if (dependsOnCoreCoder ) {
2872                const int32_t coreCoderDelay __unused = br.getBits(14);
2873            }
2874
2875            int32_t extensionFlag = -1;
2876            if (br.numBitsLeft() > 0) {
2877                extensionFlag = br.getBits(1);
2878            } else {
2879                switch (objectType) {
2880                // 14496-3 4.5.1.1 extensionFlag
2881                case AOT_AAC_LC:
2882                    extensionFlag = 0;
2883                    break;
2884                case AOT_ER_AAC_LC:
2885                case AOT_ER_AAC_SCAL:
2886                case AOT_ER_BSAC:
2887                case AOT_ER_AAC_LD:
2888                    extensionFlag = 1;
2889                    break;
2890                default:
2891                    TRESPASS();
2892                    break;
2893                }
2894                ALOGW("csd missing extension flag; assuming %d for object type %u.",
2895                        extensionFlag, objectType);
2896            }
2897
2898            if (numChannels == 0) {
2899                int32_t channelsEffectiveNum = 0;
2900                int32_t channelsNum = 0;
2901                const int32_t ElementInstanceTag __unused = br.getBits(4);
2902                const int32_t Profile __unused = br.getBits(2);
2903                const int32_t SamplingFrequencyIndex __unused = br.getBits(4);
2904                const int32_t NumFrontChannelElements = br.getBits(4);
2905                const int32_t NumSideChannelElements = br.getBits(4);
2906                const int32_t NumBackChannelElements = br.getBits(4);
2907                const int32_t NumLfeChannelElements = br.getBits(2);
2908                const int32_t NumAssocDataElements __unused = br.getBits(3);
2909                const int32_t NumValidCcElements __unused = br.getBits(4);
2910
2911                const int32_t MonoMixdownPresent = br.getBits(1);
2912                if (MonoMixdownPresent != 0) {
2913                    const int32_t MonoMixdownElementNumber __unused = br.getBits(4);
2914                }
2915
2916                const int32_t StereoMixdownPresent = br.getBits(1);
2917                if (StereoMixdownPresent != 0) {
2918                    const int32_t StereoMixdownElementNumber __unused = br.getBits(4);
2919                }
2920
2921                const int32_t MatrixMixdownIndexPresent = br.getBits(1);
2922                if (MatrixMixdownIndexPresent != 0) {
2923                    const int32_t MatrixMixdownIndex __unused = br.getBits(2);
2924                    const int32_t PseudoSurroundEnable __unused = br.getBits(1);
2925                }
2926
2927                int i;
2928                for (i=0; i < NumFrontChannelElements; i++) {
2929                    const int32_t FrontElementIsCpe = br.getBits(1);
2930                    const int32_t FrontElementTagSelect __unused = br.getBits(4);
2931                    channelsNum += FrontElementIsCpe ? 2 : 1;
2932                }
2933
2934                for (i=0; i < NumSideChannelElements; i++) {
2935                    const int32_t SideElementIsCpe = br.getBits(1);
2936                    const int32_t SideElementTagSelect __unused = br.getBits(4);
2937                    channelsNum += SideElementIsCpe ? 2 : 1;
2938                }
2939
2940                for (i=0; i < NumBackChannelElements; i++) {
2941                    const int32_t BackElementIsCpe = br.getBits(1);
2942                    const int32_t BackElementTagSelect __unused = br.getBits(4);
2943                    channelsNum += BackElementIsCpe ? 2 : 1;
2944                }
2945                channelsEffectiveNum = channelsNum;
2946
2947                for (i=0; i < NumLfeChannelElements; i++) {
2948                    const int32_t LfeElementTagSelect __unused = br.getBits(4);
2949                    channelsNum += 1;
2950                }
2951                ALOGV("mpeg4 audio channelsNum = %d", channelsNum);
2952                ALOGV("mpeg4 audio channelsEffectiveNum = %d", channelsEffectiveNum);
2953                numChannels = channelsNum;
2954            }
2955        }
2956    }
2957
2958    if (numChannels == 0) {
2959        return ERROR_UNSUPPORTED;
2960    }
2961
2962    int32_t prevSampleRate;
2963    CHECK(mLastTrack->meta->findInt32(kKeySampleRate, &prevSampleRate));
2964
2965    if (prevSampleRate != sampleRate) {
2966        ALOGV("mpeg4 audio sample rate different from previous setting. "
2967             "was: %d, now: %d", prevSampleRate, sampleRate);
2968    }
2969
2970    mLastTrack->meta->setInt32(kKeySampleRate, sampleRate);
2971
2972    int32_t prevChannelCount;
2973    CHECK(mLastTrack->meta->findInt32(kKeyChannelCount, &prevChannelCount));
2974
2975    if (prevChannelCount != numChannels) {
2976        ALOGV("mpeg4 audio channel count different from previous setting. "
2977             "was: %d, now: %d", prevChannelCount, numChannels);
2978    }
2979
2980    mLastTrack->meta->setInt32(kKeyChannelCount, numChannels);
2981
2982    return OK;
2983}
2984
2985////////////////////////////////////////////////////////////////////////////////
2986
2987MPEG4Source::MPEG4Source(
2988        const sp<MPEG4Extractor> &owner,
2989        const sp<MetaData> &format,
2990        const sp<DataSource> &dataSource,
2991        int32_t timeScale,
2992        const sp<SampleTable> &sampleTable,
2993        Vector<SidxEntry> &sidx,
2994        const Trex *trex,
2995        off64_t firstMoofOffset)
2996    : mOwner(owner),
2997      mFormat(format),
2998      mDataSource(dataSource),
2999      mTimescale(timeScale),
3000      mSampleTable(sampleTable),
3001      mCurrentSampleIndex(0),
3002      mCurrentFragmentIndex(0),
3003      mSegments(sidx),
3004      mTrex(trex),
3005      mFirstMoofOffset(firstMoofOffset),
3006      mCurrentMoofOffset(firstMoofOffset),
3007      mCurrentTime(0),
3008      mCurrentSampleInfoAllocSize(0),
3009      mCurrentSampleInfoSizes(NULL),
3010      mCurrentSampleInfoOffsetsAllocSize(0),
3011      mCurrentSampleInfoOffsets(NULL),
3012      mIsAVC(false),
3013      mIsHEVC(false),
3014      mNALLengthSize(0),
3015      mStarted(false),
3016      mGroup(NULL),
3017      mBuffer(NULL),
3018      mWantsNALFragments(false),
3019      mSrcBuffer(NULL) {
3020
3021    memset(&mTrackFragmentHeaderInfo, 0, sizeof(mTrackFragmentHeaderInfo));
3022
3023    mFormat->findInt32(kKeyCryptoMode, &mCryptoMode);
3024    mDefaultIVSize = 0;
3025    mFormat->findInt32(kKeyCryptoDefaultIVSize, &mDefaultIVSize);
3026    uint32_t keytype;
3027    const void *key;
3028    size_t keysize;
3029    if (mFormat->findData(kKeyCryptoKey, &keytype, &key, &keysize)) {
3030        CHECK(keysize <= 16);
3031        memset(mCryptoKey, 0, 16);
3032        memcpy(mCryptoKey, key, keysize);
3033    }
3034
3035    const char *mime;
3036    bool success = mFormat->findCString(kKeyMIMEType, &mime);
3037    CHECK(success);
3038
3039    mIsAVC = !strcasecmp(mime, MEDIA_MIMETYPE_VIDEO_AVC);
3040    mIsHEVC = !strcasecmp(mime, MEDIA_MIMETYPE_VIDEO_HEVC);
3041
3042    if (mIsAVC) {
3043        uint32_t type;
3044        const void *data;
3045        size_t size;
3046        CHECK(format->findData(kKeyAVCC, &type, &data, &size));
3047
3048        const uint8_t *ptr = (const uint8_t *)data;
3049
3050        CHECK(size >= 7);
3051        CHECK_EQ((unsigned)ptr[0], 1u);  // configurationVersion == 1
3052
3053        // The number of bytes used to encode the length of a NAL unit.
3054        mNALLengthSize = 1 + (ptr[4] & 3);
3055    } else if (mIsHEVC) {
3056        uint32_t type;
3057        const void *data;
3058        size_t size;
3059        CHECK(format->findData(kKeyHVCC, &type, &data, &size));
3060
3061        const uint8_t *ptr = (const uint8_t *)data;
3062
3063        CHECK(size >= 7);
3064        CHECK_EQ((unsigned)ptr[0], 1u);  // configurationVersion == 1
3065
3066        mNALLengthSize = 1 + (ptr[14 + 7] & 3);
3067    }
3068
3069    CHECK(format->findInt32(kKeyTrackID, &mTrackId));
3070
3071    if (mFirstMoofOffset != 0) {
3072        off64_t offset = mFirstMoofOffset;
3073        parseChunk(&offset);
3074    }
3075}
3076
3077MPEG4Source::~MPEG4Source() {
3078    if (mStarted) {
3079        stop();
3080    }
3081    free(mCurrentSampleInfoSizes);
3082    free(mCurrentSampleInfoOffsets);
3083}
3084
3085status_t MPEG4Source::start(MetaData *params) {
3086    Mutex::Autolock autoLock(mLock);
3087
3088    CHECK(!mStarted);
3089
3090    int32_t val;
3091    if (params && params->findInt32(kKeyWantsNALFragments, &val)
3092        && val != 0) {
3093        mWantsNALFragments = true;
3094    } else {
3095        mWantsNALFragments = false;
3096    }
3097
3098    mGroup = new MediaBufferGroup;
3099
3100    int32_t max_size;
3101    CHECK(mFormat->findInt32(kKeyMaxInputSize, &max_size));
3102
3103    mGroup->add_buffer(new MediaBuffer(max_size));
3104
3105    mSrcBuffer = new (std::nothrow) uint8_t[max_size];
3106    if (mSrcBuffer == NULL) {
3107        // file probably specified a bad max size
3108        return ERROR_MALFORMED;
3109    }
3110
3111    mStarted = true;
3112
3113    return OK;
3114}
3115
3116status_t MPEG4Source::stop() {
3117    Mutex::Autolock autoLock(mLock);
3118
3119    CHECK(mStarted);
3120
3121    if (mBuffer != NULL) {
3122        mBuffer->release();
3123        mBuffer = NULL;
3124    }
3125
3126    delete[] mSrcBuffer;
3127    mSrcBuffer = NULL;
3128
3129    delete mGroup;
3130    mGroup = NULL;
3131
3132    mStarted = false;
3133    mCurrentSampleIndex = 0;
3134
3135    return OK;
3136}
3137
3138status_t MPEG4Source::parseChunk(off64_t *offset) {
3139    uint32_t hdr[2];
3140    if (mDataSource->readAt(*offset, hdr, 8) < 8) {
3141        return ERROR_IO;
3142    }
3143    uint64_t chunk_size = ntohl(hdr[0]);
3144    uint32_t chunk_type = ntohl(hdr[1]);
3145    off64_t data_offset = *offset + 8;
3146
3147    if (chunk_size == 1) {
3148        if (mDataSource->readAt(*offset + 8, &chunk_size, 8) < 8) {
3149            return ERROR_IO;
3150        }
3151        chunk_size = ntoh64(chunk_size);
3152        data_offset += 8;
3153
3154        if (chunk_size < 16) {
3155            // The smallest valid chunk is 16 bytes long in this case.
3156            return ERROR_MALFORMED;
3157        }
3158    } else if (chunk_size < 8) {
3159        // The smallest valid chunk is 8 bytes long.
3160        return ERROR_MALFORMED;
3161    }
3162
3163    char chunk[5];
3164    MakeFourCCString(chunk_type, chunk);
3165    ALOGV("MPEG4Source chunk %s @ %llx", chunk, *offset);
3166
3167    off64_t chunk_data_size = *offset + chunk_size - data_offset;
3168
3169    switch(chunk_type) {
3170
3171        case FOURCC('t', 'r', 'a', 'f'):
3172        case FOURCC('m', 'o', 'o', 'f'): {
3173            off64_t stop_offset = *offset + chunk_size;
3174            *offset = data_offset;
3175            while (*offset < stop_offset) {
3176                status_t err = parseChunk(offset);
3177                if (err != OK) {
3178                    return err;
3179                }
3180            }
3181            if (chunk_type == FOURCC('m', 'o', 'o', 'f')) {
3182                // *offset points to the box following this moof. Find the next moof from there.
3183
3184                while (true) {
3185                    if (mDataSource->readAt(*offset, hdr, 8) < 8) {
3186                        return ERROR_END_OF_STREAM;
3187                    }
3188                    chunk_size = ntohl(hdr[0]);
3189                    chunk_type = ntohl(hdr[1]);
3190                    if (chunk_type == FOURCC('m', 'o', 'o', 'f')) {
3191                        mNextMoofOffset = *offset;
3192                        break;
3193                    }
3194                    *offset += chunk_size;
3195                }
3196            }
3197            break;
3198        }
3199
3200        case FOURCC('t', 'f', 'h', 'd'): {
3201                status_t err;
3202                if ((err = parseTrackFragmentHeader(data_offset, chunk_data_size)) != OK) {
3203                    return err;
3204                }
3205                *offset += chunk_size;
3206                break;
3207        }
3208
3209        case FOURCC('t', 'r', 'u', 'n'): {
3210                status_t err;
3211                if (mLastParsedTrackId == mTrackId) {
3212                    if ((err = parseTrackFragmentRun(data_offset, chunk_data_size)) != OK) {
3213                        return err;
3214                    }
3215                }
3216
3217                *offset += chunk_size;
3218                break;
3219        }
3220
3221        case FOURCC('s', 'a', 'i', 'z'): {
3222            status_t err;
3223            if ((err = parseSampleAuxiliaryInformationSizes(data_offset, chunk_data_size)) != OK) {
3224                return err;
3225            }
3226            *offset += chunk_size;
3227            break;
3228        }
3229        case FOURCC('s', 'a', 'i', 'o'): {
3230            status_t err;
3231            if ((err = parseSampleAuxiliaryInformationOffsets(data_offset, chunk_data_size)) != OK) {
3232                return err;
3233            }
3234            *offset += chunk_size;
3235            break;
3236        }
3237
3238        case FOURCC('m', 'd', 'a', 't'): {
3239            // parse DRM info if present
3240            ALOGV("MPEG4Source::parseChunk mdat");
3241            // if saiz/saoi was previously observed, do something with the sampleinfos
3242            *offset += chunk_size;
3243            break;
3244        }
3245
3246        default: {
3247            *offset += chunk_size;
3248            break;
3249        }
3250    }
3251    return OK;
3252}
3253
3254status_t MPEG4Source::parseSampleAuxiliaryInformationSizes(
3255        off64_t offset, off64_t /* size */) {
3256    ALOGV("parseSampleAuxiliaryInformationSizes");
3257    // 14496-12 8.7.12
3258    uint8_t version;
3259    if (mDataSource->readAt(
3260            offset, &version, sizeof(version))
3261            < (ssize_t)sizeof(version)) {
3262        return ERROR_IO;
3263    }
3264
3265    if (version != 0) {
3266        return ERROR_UNSUPPORTED;
3267    }
3268    offset++;
3269
3270    uint32_t flags;
3271    if (!mDataSource->getUInt24(offset, &flags)) {
3272        return ERROR_IO;
3273    }
3274    offset += 3;
3275
3276    if (flags & 1) {
3277        uint32_t tmp;
3278        if (!mDataSource->getUInt32(offset, &tmp)) {
3279            return ERROR_MALFORMED;
3280        }
3281        mCurrentAuxInfoType = tmp;
3282        offset += 4;
3283        if (!mDataSource->getUInt32(offset, &tmp)) {
3284            return ERROR_MALFORMED;
3285        }
3286        mCurrentAuxInfoTypeParameter = tmp;
3287        offset += 4;
3288    }
3289
3290    uint8_t defsize;
3291    if (mDataSource->readAt(offset, &defsize, 1) != 1) {
3292        return ERROR_MALFORMED;
3293    }
3294    mCurrentDefaultSampleInfoSize = defsize;
3295    offset++;
3296
3297    uint32_t smplcnt;
3298    if (!mDataSource->getUInt32(offset, &smplcnt)) {
3299        return ERROR_MALFORMED;
3300    }
3301    mCurrentSampleInfoCount = smplcnt;
3302    offset += 4;
3303
3304    if (mCurrentDefaultSampleInfoSize != 0) {
3305        ALOGV("@@@@ using default sample info size of %d", mCurrentDefaultSampleInfoSize);
3306        return OK;
3307    }
3308    if (smplcnt > mCurrentSampleInfoAllocSize) {
3309        mCurrentSampleInfoSizes = (uint8_t*) realloc(mCurrentSampleInfoSizes, smplcnt);
3310        mCurrentSampleInfoAllocSize = smplcnt;
3311    }
3312
3313    mDataSource->readAt(offset, mCurrentSampleInfoSizes, smplcnt);
3314    return OK;
3315}
3316
3317status_t MPEG4Source::parseSampleAuxiliaryInformationOffsets(
3318        off64_t offset, off64_t /* size */) {
3319    ALOGV("parseSampleAuxiliaryInformationOffsets");
3320    // 14496-12 8.7.13
3321    uint8_t version;
3322    if (mDataSource->readAt(offset, &version, sizeof(version)) != 1) {
3323        return ERROR_IO;
3324    }
3325    offset++;
3326
3327    uint32_t flags;
3328    if (!mDataSource->getUInt24(offset, &flags)) {
3329        return ERROR_IO;
3330    }
3331    offset += 3;
3332
3333    uint32_t entrycount;
3334    if (!mDataSource->getUInt32(offset, &entrycount)) {
3335        return ERROR_IO;
3336    }
3337    offset += 4;
3338
3339    if (entrycount > mCurrentSampleInfoOffsetsAllocSize) {
3340        mCurrentSampleInfoOffsets = (uint64_t*) realloc(mCurrentSampleInfoOffsets, entrycount * 8);
3341        mCurrentSampleInfoOffsetsAllocSize = entrycount;
3342    }
3343    mCurrentSampleInfoOffsetCount = entrycount;
3344
3345    for (size_t i = 0; i < entrycount; i++) {
3346        if (version == 0) {
3347            uint32_t tmp;
3348            if (!mDataSource->getUInt32(offset, &tmp)) {
3349                return ERROR_IO;
3350            }
3351            mCurrentSampleInfoOffsets[i] = tmp;
3352            offset += 4;
3353        } else {
3354            uint64_t tmp;
3355            if (!mDataSource->getUInt64(offset, &tmp)) {
3356                return ERROR_IO;
3357            }
3358            mCurrentSampleInfoOffsets[i] = tmp;
3359            offset += 8;
3360        }
3361    }
3362
3363    // parse clear/encrypted data
3364
3365    off64_t drmoffset = mCurrentSampleInfoOffsets[0]; // from moof
3366
3367    drmoffset += mCurrentMoofOffset;
3368    int ivlength;
3369    CHECK(mFormat->findInt32(kKeyCryptoDefaultIVSize, &ivlength));
3370
3371    // only 0, 8 and 16 byte initialization vectors are supported
3372    if (ivlength != 0 && ivlength != 8 && ivlength != 16) {
3373        ALOGW("unsupported IV length: %d", ivlength);
3374        return ERROR_MALFORMED;
3375    }
3376    // read CencSampleAuxiliaryDataFormats
3377    for (size_t i = 0; i < mCurrentSampleInfoCount; i++) {
3378        if (i >= mCurrentSamples.size()) {
3379            ALOGW("too few samples");
3380            break;
3381        }
3382        Sample *smpl = &mCurrentSamples.editItemAt(i);
3383
3384        memset(smpl->iv, 0, 16);
3385        if (mDataSource->readAt(drmoffset, smpl->iv, ivlength) != ivlength) {
3386            return ERROR_IO;
3387        }
3388
3389        drmoffset += ivlength;
3390
3391        int32_t smplinfosize = mCurrentDefaultSampleInfoSize;
3392        if (smplinfosize == 0) {
3393            smplinfosize = mCurrentSampleInfoSizes[i];
3394        }
3395        if (smplinfosize > ivlength) {
3396            uint16_t numsubsamples;
3397            if (!mDataSource->getUInt16(drmoffset, &numsubsamples)) {
3398                return ERROR_IO;
3399            }
3400            drmoffset += 2;
3401            for (size_t j = 0; j < numsubsamples; j++) {
3402                uint16_t numclear;
3403                uint32_t numencrypted;
3404                if (!mDataSource->getUInt16(drmoffset, &numclear)) {
3405                    return ERROR_IO;
3406                }
3407                drmoffset += 2;
3408                if (!mDataSource->getUInt32(drmoffset, &numencrypted)) {
3409                    return ERROR_IO;
3410                }
3411                drmoffset += 4;
3412                smpl->clearsizes.add(numclear);
3413                smpl->encryptedsizes.add(numencrypted);
3414            }
3415        } else {
3416            smpl->clearsizes.add(0);
3417            smpl->encryptedsizes.add(smpl->size);
3418        }
3419    }
3420
3421
3422    return OK;
3423}
3424
3425status_t MPEG4Source::parseTrackFragmentHeader(off64_t offset, off64_t size) {
3426
3427    if (size < 8) {
3428        return -EINVAL;
3429    }
3430
3431    uint32_t flags;
3432    if (!mDataSource->getUInt32(offset, &flags)) { // actually version + flags
3433        return ERROR_MALFORMED;
3434    }
3435
3436    if (flags & 0xff000000) {
3437        return -EINVAL;
3438    }
3439
3440    if (!mDataSource->getUInt32(offset + 4, (uint32_t*)&mLastParsedTrackId)) {
3441        return ERROR_MALFORMED;
3442    }
3443
3444    if (mLastParsedTrackId != mTrackId) {
3445        // this is not the right track, skip it
3446        return OK;
3447    }
3448
3449    mTrackFragmentHeaderInfo.mFlags = flags;
3450    mTrackFragmentHeaderInfo.mTrackID = mLastParsedTrackId;
3451    offset += 8;
3452    size -= 8;
3453
3454    ALOGV("fragment header: %08x %08x", flags, mTrackFragmentHeaderInfo.mTrackID);
3455
3456    if (flags & TrackFragmentHeaderInfo::kBaseDataOffsetPresent) {
3457        if (size < 8) {
3458            return -EINVAL;
3459        }
3460
3461        if (!mDataSource->getUInt64(offset, &mTrackFragmentHeaderInfo.mBaseDataOffset)) {
3462            return ERROR_MALFORMED;
3463        }
3464        offset += 8;
3465        size -= 8;
3466    }
3467
3468    if (flags & TrackFragmentHeaderInfo::kSampleDescriptionIndexPresent) {
3469        if (size < 4) {
3470            return -EINVAL;
3471        }
3472
3473        if (!mDataSource->getUInt32(offset, &mTrackFragmentHeaderInfo.mSampleDescriptionIndex)) {
3474            return ERROR_MALFORMED;
3475        }
3476        offset += 4;
3477        size -= 4;
3478    }
3479
3480    if (flags & TrackFragmentHeaderInfo::kDefaultSampleDurationPresent) {
3481        if (size < 4) {
3482            return -EINVAL;
3483        }
3484
3485        if (!mDataSource->getUInt32(offset, &mTrackFragmentHeaderInfo.mDefaultSampleDuration)) {
3486            return ERROR_MALFORMED;
3487        }
3488        offset += 4;
3489        size -= 4;
3490    }
3491
3492    if (flags & TrackFragmentHeaderInfo::kDefaultSampleSizePresent) {
3493        if (size < 4) {
3494            return -EINVAL;
3495        }
3496
3497        if (!mDataSource->getUInt32(offset, &mTrackFragmentHeaderInfo.mDefaultSampleSize)) {
3498            return ERROR_MALFORMED;
3499        }
3500        offset += 4;
3501        size -= 4;
3502    }
3503
3504    if (flags & TrackFragmentHeaderInfo::kDefaultSampleFlagsPresent) {
3505        if (size < 4) {
3506            return -EINVAL;
3507        }
3508
3509        if (!mDataSource->getUInt32(offset, &mTrackFragmentHeaderInfo.mDefaultSampleFlags)) {
3510            return ERROR_MALFORMED;
3511        }
3512        offset += 4;
3513        size -= 4;
3514    }
3515
3516    if (!(flags & TrackFragmentHeaderInfo::kBaseDataOffsetPresent)) {
3517        mTrackFragmentHeaderInfo.mBaseDataOffset = mCurrentMoofOffset;
3518    }
3519
3520    mTrackFragmentHeaderInfo.mDataOffset = 0;
3521    return OK;
3522}
3523
3524status_t MPEG4Source::parseTrackFragmentRun(off64_t offset, off64_t size) {
3525
3526    ALOGV("MPEG4Extractor::parseTrackFragmentRun");
3527    if (size < 8) {
3528        return -EINVAL;
3529    }
3530
3531    enum {
3532        kDataOffsetPresent                  = 0x01,
3533        kFirstSampleFlagsPresent            = 0x04,
3534        kSampleDurationPresent              = 0x100,
3535        kSampleSizePresent                  = 0x200,
3536        kSampleFlagsPresent                 = 0x400,
3537        kSampleCompositionTimeOffsetPresent = 0x800,
3538    };
3539
3540    uint32_t flags;
3541    if (!mDataSource->getUInt32(offset, &flags)) {
3542        return ERROR_MALFORMED;
3543    }
3544    ALOGV("fragment run flags: %08x", flags);
3545
3546    if (flags & 0xff000000) {
3547        return -EINVAL;
3548    }
3549
3550    if ((flags & kFirstSampleFlagsPresent) && (flags & kSampleFlagsPresent)) {
3551        // These two shall not be used together.
3552        return -EINVAL;
3553    }
3554
3555    uint32_t sampleCount;
3556    if (!mDataSource->getUInt32(offset + 4, &sampleCount)) {
3557        return ERROR_MALFORMED;
3558    }
3559    offset += 8;
3560    size -= 8;
3561
3562    uint64_t dataOffset = mTrackFragmentHeaderInfo.mDataOffset;
3563
3564    uint32_t firstSampleFlags = 0;
3565
3566    if (flags & kDataOffsetPresent) {
3567        if (size < 4) {
3568            return -EINVAL;
3569        }
3570
3571        int32_t dataOffsetDelta;
3572        if (!mDataSource->getUInt32(offset, (uint32_t*)&dataOffsetDelta)) {
3573            return ERROR_MALFORMED;
3574        }
3575
3576        dataOffset = mTrackFragmentHeaderInfo.mBaseDataOffset + dataOffsetDelta;
3577
3578        offset += 4;
3579        size -= 4;
3580    }
3581
3582    if (flags & kFirstSampleFlagsPresent) {
3583        if (size < 4) {
3584            return -EINVAL;
3585        }
3586
3587        if (!mDataSource->getUInt32(offset, &firstSampleFlags)) {
3588            return ERROR_MALFORMED;
3589        }
3590        offset += 4;
3591        size -= 4;
3592    }
3593
3594    uint32_t sampleDuration = 0, sampleSize = 0, sampleFlags = 0,
3595             sampleCtsOffset = 0;
3596
3597    size_t bytesPerSample = 0;
3598    if (flags & kSampleDurationPresent) {
3599        bytesPerSample += 4;
3600    } else if (mTrackFragmentHeaderInfo.mFlags
3601            & TrackFragmentHeaderInfo::kDefaultSampleDurationPresent) {
3602        sampleDuration = mTrackFragmentHeaderInfo.mDefaultSampleDuration;
3603    } else if (mTrex) {
3604        sampleDuration = mTrex->default_sample_duration;
3605    }
3606
3607    if (flags & kSampleSizePresent) {
3608        bytesPerSample += 4;
3609    } else if (mTrackFragmentHeaderInfo.mFlags
3610            & TrackFragmentHeaderInfo::kDefaultSampleSizePresent) {
3611        sampleSize = mTrackFragmentHeaderInfo.mDefaultSampleSize;
3612    } else {
3613        sampleSize = mTrackFragmentHeaderInfo.mDefaultSampleSize;
3614    }
3615
3616    if (flags & kSampleFlagsPresent) {
3617        bytesPerSample += 4;
3618    } else if (mTrackFragmentHeaderInfo.mFlags
3619            & TrackFragmentHeaderInfo::kDefaultSampleFlagsPresent) {
3620        sampleFlags = mTrackFragmentHeaderInfo.mDefaultSampleFlags;
3621    } else {
3622        sampleFlags = mTrackFragmentHeaderInfo.mDefaultSampleFlags;
3623    }
3624
3625    if (flags & kSampleCompositionTimeOffsetPresent) {
3626        bytesPerSample += 4;
3627    } else {
3628        sampleCtsOffset = 0;
3629    }
3630
3631    if (size < (off64_t)sampleCount * bytesPerSample) {
3632        return -EINVAL;
3633    }
3634
3635    Sample tmp;
3636    for (uint32_t i = 0; i < sampleCount; ++i) {
3637        if (flags & kSampleDurationPresent) {
3638            if (!mDataSource->getUInt32(offset, &sampleDuration)) {
3639                return ERROR_MALFORMED;
3640            }
3641            offset += 4;
3642        }
3643
3644        if (flags & kSampleSizePresent) {
3645            if (!mDataSource->getUInt32(offset, &sampleSize)) {
3646                return ERROR_MALFORMED;
3647            }
3648            offset += 4;
3649        }
3650
3651        if (flags & kSampleFlagsPresent) {
3652            if (!mDataSource->getUInt32(offset, &sampleFlags)) {
3653                return ERROR_MALFORMED;
3654            }
3655            offset += 4;
3656        }
3657
3658        if (flags & kSampleCompositionTimeOffsetPresent) {
3659            if (!mDataSource->getUInt32(offset, &sampleCtsOffset)) {
3660                return ERROR_MALFORMED;
3661            }
3662            offset += 4;
3663        }
3664
3665        ALOGV("adding sample %d at offset 0x%08" PRIx64 ", size %u, duration %u, "
3666              " flags 0x%08x", i + 1,
3667                dataOffset, sampleSize, sampleDuration,
3668                (flags & kFirstSampleFlagsPresent) && i == 0
3669                    ? firstSampleFlags : sampleFlags);
3670        tmp.offset = dataOffset;
3671        tmp.size = sampleSize;
3672        tmp.duration = sampleDuration;
3673        tmp.compositionOffset = sampleCtsOffset;
3674        mCurrentSamples.add(tmp);
3675
3676        dataOffset += sampleSize;
3677    }
3678
3679    mTrackFragmentHeaderInfo.mDataOffset = dataOffset;
3680
3681    return OK;
3682}
3683
3684sp<MetaData> MPEG4Source::getFormat() {
3685    Mutex::Autolock autoLock(mLock);
3686
3687    return mFormat;
3688}
3689
3690size_t MPEG4Source::parseNALSize(const uint8_t *data) const {
3691    switch (mNALLengthSize) {
3692        case 1:
3693            return *data;
3694        case 2:
3695            return U16_AT(data);
3696        case 3:
3697            return ((size_t)data[0] << 16) | U16_AT(&data[1]);
3698        case 4:
3699            return U32_AT(data);
3700    }
3701
3702    // This cannot happen, mNALLengthSize springs to life by adding 1 to
3703    // a 2-bit integer.
3704    CHECK(!"Should not be here.");
3705
3706    return 0;
3707}
3708
3709status_t MPEG4Source::read(
3710        MediaBuffer **out, const ReadOptions *options) {
3711    Mutex::Autolock autoLock(mLock);
3712
3713    CHECK(mStarted);
3714
3715    if (mFirstMoofOffset > 0) {
3716        return fragmentedRead(out, options);
3717    }
3718
3719    *out = NULL;
3720
3721    int64_t targetSampleTimeUs = -1;
3722
3723    int64_t seekTimeUs;
3724    ReadOptions::SeekMode mode;
3725    if (options && options->getSeekTo(&seekTimeUs, &mode)) {
3726        uint32_t findFlags = 0;
3727        switch (mode) {
3728            case ReadOptions::SEEK_PREVIOUS_SYNC:
3729                findFlags = SampleTable::kFlagBefore;
3730                break;
3731            case ReadOptions::SEEK_NEXT_SYNC:
3732                findFlags = SampleTable::kFlagAfter;
3733                break;
3734            case ReadOptions::SEEK_CLOSEST_SYNC:
3735            case ReadOptions::SEEK_CLOSEST:
3736                findFlags = SampleTable::kFlagClosest;
3737                break;
3738            default:
3739                CHECK(!"Should not be here.");
3740                break;
3741        }
3742
3743        uint32_t sampleIndex;
3744        status_t err = mSampleTable->findSampleAtTime(
3745                seekTimeUs, 1000000, mTimescale,
3746                &sampleIndex, findFlags);
3747
3748        if (mode == ReadOptions::SEEK_CLOSEST) {
3749            // We found the closest sample already, now we want the sync
3750            // sample preceding it (or the sample itself of course), even
3751            // if the subsequent sync sample is closer.
3752            findFlags = SampleTable::kFlagBefore;
3753        }
3754
3755        uint32_t syncSampleIndex;
3756        if (err == OK) {
3757            err = mSampleTable->findSyncSampleNear(
3758                    sampleIndex, &syncSampleIndex, findFlags);
3759        }
3760
3761        uint32_t sampleTime;
3762        if (err == OK) {
3763            err = mSampleTable->getMetaDataForSample(
3764                    sampleIndex, NULL, NULL, &sampleTime);
3765        }
3766
3767        if (err != OK) {
3768            if (err == ERROR_OUT_OF_RANGE) {
3769                // An attempt to seek past the end of the stream would
3770                // normally cause this ERROR_OUT_OF_RANGE error. Propagating
3771                // this all the way to the MediaPlayer would cause abnormal
3772                // termination. Legacy behaviour appears to be to behave as if
3773                // we had seeked to the end of stream, ending normally.
3774                err = ERROR_END_OF_STREAM;
3775            }
3776            ALOGV("end of stream");
3777            return err;
3778        }
3779
3780        if (mode == ReadOptions::SEEK_CLOSEST) {
3781            targetSampleTimeUs = (sampleTime * 1000000ll) / mTimescale;
3782        }
3783
3784#if 0
3785        uint32_t syncSampleTime;
3786        CHECK_EQ(OK, mSampleTable->getMetaDataForSample(
3787                    syncSampleIndex, NULL, NULL, &syncSampleTime));
3788
3789        ALOGI("seek to time %lld us => sample at time %lld us, "
3790             "sync sample at time %lld us",
3791             seekTimeUs,
3792             sampleTime * 1000000ll / mTimescale,
3793             syncSampleTime * 1000000ll / mTimescale);
3794#endif
3795
3796        mCurrentSampleIndex = syncSampleIndex;
3797        if (mBuffer != NULL) {
3798            mBuffer->release();
3799            mBuffer = NULL;
3800        }
3801
3802        // fall through
3803    }
3804
3805    off64_t offset;
3806    size_t size;
3807    uint32_t cts, stts;
3808    bool isSyncSample;
3809    bool newBuffer = false;
3810    if (mBuffer == NULL) {
3811        newBuffer = true;
3812
3813        status_t err =
3814            mSampleTable->getMetaDataForSample(
3815                    mCurrentSampleIndex, &offset, &size, &cts, &isSyncSample, &stts);
3816
3817        if (err != OK) {
3818            return err;
3819        }
3820
3821        err = mGroup->acquire_buffer(&mBuffer);
3822
3823        if (err != OK) {
3824            CHECK(mBuffer == NULL);
3825            return err;
3826        }
3827    }
3828
3829    if ((!mIsAVC && !mIsHEVC) || mWantsNALFragments) {
3830        if (newBuffer) {
3831            ssize_t num_bytes_read =
3832                mDataSource->readAt(offset, (uint8_t *)mBuffer->data(), size);
3833
3834            if (num_bytes_read < (ssize_t)size) {
3835                mBuffer->release();
3836                mBuffer = NULL;
3837
3838                return ERROR_IO;
3839            }
3840
3841            CHECK(mBuffer != NULL);
3842            mBuffer->set_range(0, size);
3843            mBuffer->meta_data()->clear();
3844            mBuffer->meta_data()->setInt64(
3845                    kKeyTime, ((int64_t)cts * 1000000) / mTimescale);
3846            mBuffer->meta_data()->setInt64(
3847                    kKeyDuration, ((int64_t)stts * 1000000) / mTimescale);
3848
3849            if (targetSampleTimeUs >= 0) {
3850                mBuffer->meta_data()->setInt64(
3851                        kKeyTargetTime, targetSampleTimeUs);
3852            }
3853
3854            if (isSyncSample) {
3855                mBuffer->meta_data()->setInt32(kKeyIsSyncFrame, 1);
3856            }
3857
3858            ++mCurrentSampleIndex;
3859        }
3860
3861        if (!mIsAVC && !mIsHEVC) {
3862            *out = mBuffer;
3863            mBuffer = NULL;
3864
3865            return OK;
3866        }
3867
3868        // Each NAL unit is split up into its constituent fragments and
3869        // each one of them returned in its own buffer.
3870
3871        CHECK(mBuffer->range_length() >= mNALLengthSize);
3872
3873        const uint8_t *src =
3874            (const uint8_t *)mBuffer->data() + mBuffer->range_offset();
3875
3876        size_t nal_size = parseNALSize(src);
3877        if (mBuffer->range_length() < mNALLengthSize + nal_size) {
3878            ALOGE("incomplete NAL unit.");
3879
3880            mBuffer->release();
3881            mBuffer = NULL;
3882
3883            return ERROR_MALFORMED;
3884        }
3885
3886        MediaBuffer *clone = mBuffer->clone();
3887        CHECK(clone != NULL);
3888        clone->set_range(mBuffer->range_offset() + mNALLengthSize, nal_size);
3889
3890        CHECK(mBuffer != NULL);
3891        mBuffer->set_range(
3892                mBuffer->range_offset() + mNALLengthSize + nal_size,
3893                mBuffer->range_length() - mNALLengthSize - nal_size);
3894
3895        if (mBuffer->range_length() == 0) {
3896            mBuffer->release();
3897            mBuffer = NULL;
3898        }
3899
3900        *out = clone;
3901
3902        return OK;
3903    } else {
3904        // Whole NAL units are returned but each fragment is prefixed by
3905        // the start code (0x00 00 00 01).
3906        ssize_t num_bytes_read = 0;
3907        int32_t drm = 0;
3908        bool usesDRM = (mFormat->findInt32(kKeyIsDRM, &drm) && drm != 0);
3909        if (usesDRM) {
3910            num_bytes_read =
3911                mDataSource->readAt(offset, (uint8_t*)mBuffer->data(), size);
3912        } else {
3913            num_bytes_read = mDataSource->readAt(offset, mSrcBuffer, size);
3914        }
3915
3916        if (num_bytes_read < (ssize_t)size) {
3917            mBuffer->release();
3918            mBuffer = NULL;
3919
3920            return ERROR_IO;
3921        }
3922
3923        if (usesDRM) {
3924            CHECK(mBuffer != NULL);
3925            mBuffer->set_range(0, size);
3926
3927        } else {
3928            uint8_t *dstData = (uint8_t *)mBuffer->data();
3929            size_t srcOffset = 0;
3930            size_t dstOffset = 0;
3931
3932            while (srcOffset < size) {
3933                bool isMalFormed = !isInRange((size_t)0u, size, srcOffset, mNALLengthSize);
3934                size_t nalLength = 0;
3935                if (!isMalFormed) {
3936                    nalLength = parseNALSize(&mSrcBuffer[srcOffset]);
3937                    srcOffset += mNALLengthSize;
3938                    isMalFormed = !isInRange((size_t)0u, size, srcOffset, nalLength);
3939                }
3940
3941                if (isMalFormed) {
3942                    ALOGE("Video is malformed");
3943                    mBuffer->release();
3944                    mBuffer = NULL;
3945                    return ERROR_MALFORMED;
3946                }
3947
3948                if (nalLength == 0) {
3949                    continue;
3950                }
3951
3952                CHECK(dstOffset + 4 <= mBuffer->size());
3953
3954                dstData[dstOffset++] = 0;
3955                dstData[dstOffset++] = 0;
3956                dstData[dstOffset++] = 0;
3957                dstData[dstOffset++] = 1;
3958                memcpy(&dstData[dstOffset], &mSrcBuffer[srcOffset], nalLength);
3959                srcOffset += nalLength;
3960                dstOffset += nalLength;
3961            }
3962            CHECK_EQ(srcOffset, size);
3963            CHECK(mBuffer != NULL);
3964            mBuffer->set_range(0, dstOffset);
3965        }
3966
3967        mBuffer->meta_data()->clear();
3968        mBuffer->meta_data()->setInt64(
3969                kKeyTime, ((int64_t)cts * 1000000) / mTimescale);
3970        mBuffer->meta_data()->setInt64(
3971                kKeyDuration, ((int64_t)stts * 1000000) / mTimescale);
3972
3973        if (targetSampleTimeUs >= 0) {
3974            mBuffer->meta_data()->setInt64(
3975                    kKeyTargetTime, targetSampleTimeUs);
3976        }
3977
3978        if (isSyncSample) {
3979            mBuffer->meta_data()->setInt32(kKeyIsSyncFrame, 1);
3980        }
3981
3982        ++mCurrentSampleIndex;
3983
3984        *out = mBuffer;
3985        mBuffer = NULL;
3986
3987        return OK;
3988    }
3989}
3990
3991status_t MPEG4Source::fragmentedRead(
3992        MediaBuffer **out, const ReadOptions *options) {
3993
3994    ALOGV("MPEG4Source::fragmentedRead");
3995
3996    CHECK(mStarted);
3997
3998    *out = NULL;
3999
4000    int64_t targetSampleTimeUs = -1;
4001
4002    int64_t seekTimeUs;
4003    ReadOptions::SeekMode mode;
4004    if (options && options->getSeekTo(&seekTimeUs, &mode)) {
4005
4006        int numSidxEntries = mSegments.size();
4007        if (numSidxEntries != 0) {
4008            int64_t totalTime = 0;
4009            off64_t totalOffset = mFirstMoofOffset;
4010            for (int i = 0; i < numSidxEntries; i++) {
4011                const SidxEntry *se = &mSegments[i];
4012                if (totalTime + se->mDurationUs > seekTimeUs) {
4013                    // The requested time is somewhere in this segment
4014                    if ((mode == ReadOptions::SEEK_NEXT_SYNC && seekTimeUs > totalTime) ||
4015                        (mode == ReadOptions::SEEK_CLOSEST_SYNC &&
4016                        (seekTimeUs - totalTime) > (totalTime + se->mDurationUs - seekTimeUs))) {
4017                        // requested next sync, or closest sync and it was closer to the end of
4018                        // this segment
4019                        totalTime += se->mDurationUs;
4020                        totalOffset += se->mSize;
4021                    }
4022                    break;
4023                }
4024                totalTime += se->mDurationUs;
4025                totalOffset += se->mSize;
4026            }
4027            mCurrentMoofOffset = totalOffset;
4028            mCurrentSamples.clear();
4029            mCurrentSampleIndex = 0;
4030            parseChunk(&totalOffset);
4031            mCurrentTime = totalTime * mTimescale / 1000000ll;
4032        } else {
4033            // without sidx boxes, we can only seek to 0
4034            mCurrentMoofOffset = mFirstMoofOffset;
4035            mCurrentSamples.clear();
4036            mCurrentSampleIndex = 0;
4037            off64_t tmp = mCurrentMoofOffset;
4038            parseChunk(&tmp);
4039            mCurrentTime = 0;
4040        }
4041
4042        if (mBuffer != NULL) {
4043            mBuffer->release();
4044            mBuffer = NULL;
4045        }
4046
4047        // fall through
4048    }
4049
4050    off64_t offset = 0;
4051    size_t size = 0;
4052    uint32_t cts = 0;
4053    bool isSyncSample = false;
4054    bool newBuffer = false;
4055    if (mBuffer == NULL) {
4056        newBuffer = true;
4057
4058        if (mCurrentSampleIndex >= mCurrentSamples.size()) {
4059            // move to next fragment if there is one
4060            if (mNextMoofOffset <= mCurrentMoofOffset) {
4061                return ERROR_END_OF_STREAM;
4062            }
4063            off64_t nextMoof = mNextMoofOffset;
4064            mCurrentMoofOffset = nextMoof;
4065            mCurrentSamples.clear();
4066            mCurrentSampleIndex = 0;
4067            parseChunk(&nextMoof);
4068            if (mCurrentSampleIndex >= mCurrentSamples.size()) {
4069                return ERROR_END_OF_STREAM;
4070            }
4071        }
4072
4073        const Sample *smpl = &mCurrentSamples[mCurrentSampleIndex];
4074        offset = smpl->offset;
4075        size = smpl->size;
4076        cts = mCurrentTime + smpl->compositionOffset;
4077        mCurrentTime += smpl->duration;
4078        isSyncSample = (mCurrentSampleIndex == 0); // XXX
4079
4080        status_t err = mGroup->acquire_buffer(&mBuffer);
4081
4082        if (err != OK) {
4083            CHECK(mBuffer == NULL);
4084            ALOGV("acquire_buffer returned %d", err);
4085            return err;
4086        }
4087    }
4088
4089    const Sample *smpl = &mCurrentSamples[mCurrentSampleIndex];
4090    const sp<MetaData> bufmeta = mBuffer->meta_data();
4091    bufmeta->clear();
4092    if (smpl->encryptedsizes.size()) {
4093        // store clear/encrypted lengths in metadata
4094        bufmeta->setData(kKeyPlainSizes, 0,
4095                smpl->clearsizes.array(), smpl->clearsizes.size() * 4);
4096        bufmeta->setData(kKeyEncryptedSizes, 0,
4097                smpl->encryptedsizes.array(), smpl->encryptedsizes.size() * 4);
4098        bufmeta->setData(kKeyCryptoIV, 0, smpl->iv, 16); // use 16 or the actual size?
4099        bufmeta->setInt32(kKeyCryptoDefaultIVSize, mDefaultIVSize);
4100        bufmeta->setInt32(kKeyCryptoMode, mCryptoMode);
4101        bufmeta->setData(kKeyCryptoKey, 0, mCryptoKey, 16);
4102    }
4103
4104    if ((!mIsAVC && !mIsHEVC)|| mWantsNALFragments) {
4105        if (newBuffer) {
4106            ssize_t num_bytes_read =
4107                mDataSource->readAt(offset, (uint8_t *)mBuffer->data(), size);
4108
4109            if (num_bytes_read < (ssize_t)size) {
4110                mBuffer->release();
4111                mBuffer = NULL;
4112
4113                ALOGV("i/o error");
4114                return ERROR_IO;
4115            }
4116
4117            CHECK(mBuffer != NULL);
4118            mBuffer->set_range(0, size);
4119            mBuffer->meta_data()->setInt64(
4120                    kKeyTime, ((int64_t)cts * 1000000) / mTimescale);
4121            mBuffer->meta_data()->setInt64(
4122                    kKeyDuration, ((int64_t)smpl->duration * 1000000) / mTimescale);
4123
4124            if (targetSampleTimeUs >= 0) {
4125                mBuffer->meta_data()->setInt64(
4126                        kKeyTargetTime, targetSampleTimeUs);
4127            }
4128
4129            if (isSyncSample) {
4130                mBuffer->meta_data()->setInt32(kKeyIsSyncFrame, 1);
4131            }
4132
4133            ++mCurrentSampleIndex;
4134        }
4135
4136        if (!mIsAVC && !mIsHEVC) {
4137            *out = mBuffer;
4138            mBuffer = NULL;
4139
4140            return OK;
4141        }
4142
4143        // Each NAL unit is split up into its constituent fragments and
4144        // each one of them returned in its own buffer.
4145
4146        CHECK(mBuffer->range_length() >= mNALLengthSize);
4147
4148        const uint8_t *src =
4149            (const uint8_t *)mBuffer->data() + mBuffer->range_offset();
4150
4151        size_t nal_size = parseNALSize(src);
4152        if (mBuffer->range_length() < mNALLengthSize + nal_size) {
4153            ALOGE("incomplete NAL unit.");
4154
4155            mBuffer->release();
4156            mBuffer = NULL;
4157
4158            return ERROR_MALFORMED;
4159        }
4160
4161        MediaBuffer *clone = mBuffer->clone();
4162        CHECK(clone != NULL);
4163        clone->set_range(mBuffer->range_offset() + mNALLengthSize, nal_size);
4164
4165        CHECK(mBuffer != NULL);
4166        mBuffer->set_range(
4167                mBuffer->range_offset() + mNALLengthSize + nal_size,
4168                mBuffer->range_length() - mNALLengthSize - nal_size);
4169
4170        if (mBuffer->range_length() == 0) {
4171            mBuffer->release();
4172            mBuffer = NULL;
4173        }
4174
4175        *out = clone;
4176
4177        return OK;
4178    } else {
4179        ALOGV("whole NAL");
4180        // Whole NAL units are returned but each fragment is prefixed by
4181        // the start code (0x00 00 00 01).
4182        ssize_t num_bytes_read = 0;
4183        int32_t drm = 0;
4184        bool usesDRM = (mFormat->findInt32(kKeyIsDRM, &drm) && drm != 0);
4185        if (usesDRM) {
4186            num_bytes_read =
4187                mDataSource->readAt(offset, (uint8_t*)mBuffer->data(), size);
4188        } else {
4189            num_bytes_read = mDataSource->readAt(offset, mSrcBuffer, size);
4190        }
4191
4192        if (num_bytes_read < (ssize_t)size) {
4193            mBuffer->release();
4194            mBuffer = NULL;
4195
4196            ALOGV("i/o error");
4197            return ERROR_IO;
4198        }
4199
4200        if (usesDRM) {
4201            CHECK(mBuffer != NULL);
4202            mBuffer->set_range(0, size);
4203
4204        } else {
4205            uint8_t *dstData = (uint8_t *)mBuffer->data();
4206            size_t srcOffset = 0;
4207            size_t dstOffset = 0;
4208
4209            while (srcOffset < size) {
4210                bool isMalFormed = !isInRange((size_t)0u, size, srcOffset, mNALLengthSize);
4211                size_t nalLength = 0;
4212                if (!isMalFormed) {
4213                    nalLength = parseNALSize(&mSrcBuffer[srcOffset]);
4214                    srcOffset += mNALLengthSize;
4215                    isMalFormed = !isInRange((size_t)0u, size, srcOffset, nalLength);
4216                }
4217
4218                if (isMalFormed) {
4219                    ALOGE("Video is malformed");
4220                    mBuffer->release();
4221                    mBuffer = NULL;
4222                    return ERROR_MALFORMED;
4223                }
4224
4225                if (nalLength == 0) {
4226                    continue;
4227                }
4228
4229                CHECK(dstOffset + 4 <= mBuffer->size());
4230
4231                dstData[dstOffset++] = 0;
4232                dstData[dstOffset++] = 0;
4233                dstData[dstOffset++] = 0;
4234                dstData[dstOffset++] = 1;
4235                memcpy(&dstData[dstOffset], &mSrcBuffer[srcOffset], nalLength);
4236                srcOffset += nalLength;
4237                dstOffset += nalLength;
4238            }
4239            CHECK_EQ(srcOffset, size);
4240            CHECK(mBuffer != NULL);
4241            mBuffer->set_range(0, dstOffset);
4242        }
4243
4244        mBuffer->meta_data()->setInt64(
4245                kKeyTime, ((int64_t)cts * 1000000) / mTimescale);
4246        mBuffer->meta_data()->setInt64(
4247                kKeyDuration, ((int64_t)smpl->duration * 1000000) / mTimescale);
4248
4249        if (targetSampleTimeUs >= 0) {
4250            mBuffer->meta_data()->setInt64(
4251                    kKeyTargetTime, targetSampleTimeUs);
4252        }
4253
4254        if (isSyncSample) {
4255            mBuffer->meta_data()->setInt32(kKeyIsSyncFrame, 1);
4256        }
4257
4258        ++mCurrentSampleIndex;
4259
4260        *out = mBuffer;
4261        mBuffer = NULL;
4262
4263        return OK;
4264    }
4265}
4266
4267MPEG4Extractor::Track *MPEG4Extractor::findTrackByMimePrefix(
4268        const char *mimePrefix) {
4269    for (Track *track = mFirstTrack; track != NULL; track = track->next) {
4270        const char *mime;
4271        if (track->meta != NULL
4272                && track->meta->findCString(kKeyMIMEType, &mime)
4273                && !strncasecmp(mime, mimePrefix, strlen(mimePrefix))) {
4274            return track;
4275        }
4276    }
4277
4278    return NULL;
4279}
4280
4281static bool LegacySniffMPEG4(
4282        const sp<DataSource> &source, String8 *mimeType, float *confidence) {
4283    uint8_t header[8];
4284
4285    ssize_t n = source->readAt(4, header, sizeof(header));
4286    if (n < (ssize_t)sizeof(header)) {
4287        return false;
4288    }
4289
4290    if (!memcmp(header, "ftyp3gp", 7) || !memcmp(header, "ftypmp42", 8)
4291        || !memcmp(header, "ftyp3gr6", 8) || !memcmp(header, "ftyp3gs6", 8)
4292        || !memcmp(header, "ftyp3ge6", 8) || !memcmp(header, "ftyp3gg6", 8)
4293        || !memcmp(header, "ftypisom", 8) || !memcmp(header, "ftypM4V ", 8)
4294        || !memcmp(header, "ftypM4A ", 8) || !memcmp(header, "ftypf4v ", 8)
4295        || !memcmp(header, "ftypkddi", 8) || !memcmp(header, "ftypM4VP", 8)) {
4296        *mimeType = MEDIA_MIMETYPE_CONTAINER_MPEG4;
4297        *confidence = 0.4;
4298
4299        return true;
4300    }
4301
4302    return false;
4303}
4304
4305static bool isCompatibleBrand(uint32_t fourcc) {
4306    static const uint32_t kCompatibleBrands[] = {
4307        FOURCC('i', 's', 'o', 'm'),
4308        FOURCC('i', 's', 'o', '2'),
4309        FOURCC('a', 'v', 'c', '1'),
4310        FOURCC('h', 'v', 'c', '1'),
4311        FOURCC('h', 'e', 'v', '1'),
4312        FOURCC('3', 'g', 'p', '4'),
4313        FOURCC('m', 'p', '4', '1'),
4314        FOURCC('m', 'p', '4', '2'),
4315
4316        // Won't promise that the following file types can be played.
4317        // Just give these file types a chance.
4318        FOURCC('q', 't', ' ', ' '),  // Apple's QuickTime
4319        FOURCC('M', 'S', 'N', 'V'),  // Sony's PSP
4320
4321        FOURCC('3', 'g', '2', 'a'),  // 3GPP2
4322        FOURCC('3', 'g', '2', 'b'),
4323    };
4324
4325    for (size_t i = 0;
4326         i < sizeof(kCompatibleBrands) / sizeof(kCompatibleBrands[0]);
4327         ++i) {
4328        if (kCompatibleBrands[i] == fourcc) {
4329            return true;
4330        }
4331    }
4332
4333    return false;
4334}
4335
4336// Attempt to actually parse the 'ftyp' atom and determine if a suitable
4337// compatible brand is present.
4338// Also try to identify where this file's metadata ends
4339// (end of the 'moov' atom) and report it to the caller as part of
4340// the metadata.
4341static bool BetterSniffMPEG4(
4342        const sp<DataSource> &source, String8 *mimeType, float *confidence,
4343        sp<AMessage> *meta) {
4344    // We scan up to 128 bytes to identify this file as an MP4.
4345    static const off64_t kMaxScanOffset = 128ll;
4346
4347    off64_t offset = 0ll;
4348    bool foundGoodFileType = false;
4349    off64_t moovAtomEndOffset = -1ll;
4350    bool done = false;
4351
4352    while (!done && offset < kMaxScanOffset) {
4353        uint32_t hdr[2];
4354        if (source->readAt(offset, hdr, 8) < 8) {
4355            return false;
4356        }
4357
4358        uint64_t chunkSize = ntohl(hdr[0]);
4359        uint32_t chunkType = ntohl(hdr[1]);
4360        off64_t chunkDataOffset = offset + 8;
4361
4362        if (chunkSize == 1) {
4363            if (source->readAt(offset + 8, &chunkSize, 8) < 8) {
4364                return false;
4365            }
4366
4367            chunkSize = ntoh64(chunkSize);
4368            chunkDataOffset += 8;
4369
4370            if (chunkSize < 16) {
4371                // The smallest valid chunk is 16 bytes long in this case.
4372                return false;
4373            }
4374        } else if (chunkSize < 8) {
4375            // The smallest valid chunk is 8 bytes long.
4376            return false;
4377        }
4378
4379        off64_t chunkDataSize = offset + chunkSize - chunkDataOffset;
4380
4381        char chunkstring[5];
4382        MakeFourCCString(chunkType, chunkstring);
4383        ALOGV("saw chunk type %s, size %" PRIu64 " @ %lld", chunkstring, chunkSize, offset);
4384        switch (chunkType) {
4385            case FOURCC('f', 't', 'y', 'p'):
4386            {
4387                if (chunkDataSize < 8) {
4388                    return false;
4389                }
4390
4391                uint32_t numCompatibleBrands = (chunkDataSize - 8) / 4;
4392                for (size_t i = 0; i < numCompatibleBrands + 2; ++i) {
4393                    if (i == 1) {
4394                        // Skip this index, it refers to the minorVersion,
4395                        // not a brand.
4396                        continue;
4397                    }
4398
4399                    uint32_t brand;
4400                    if (source->readAt(
4401                                chunkDataOffset + 4 * i, &brand, 4) < 4) {
4402                        return false;
4403                    }
4404
4405                    brand = ntohl(brand);
4406
4407                    if (isCompatibleBrand(brand)) {
4408                        foundGoodFileType = true;
4409                        break;
4410                    }
4411                }
4412
4413                if (!foundGoodFileType) {
4414                    return false;
4415                }
4416
4417                break;
4418            }
4419
4420            case FOURCC('m', 'o', 'o', 'v'):
4421            {
4422                moovAtomEndOffset = offset + chunkSize;
4423
4424                done = true;
4425                break;
4426            }
4427
4428            default:
4429                break;
4430        }
4431
4432        offset += chunkSize;
4433    }
4434
4435    if (!foundGoodFileType) {
4436        return false;
4437    }
4438
4439    *mimeType = MEDIA_MIMETYPE_CONTAINER_MPEG4;
4440    *confidence = 0.4f;
4441
4442    if (moovAtomEndOffset >= 0) {
4443        *meta = new AMessage;
4444        (*meta)->setInt64("meta-data-size", moovAtomEndOffset);
4445
4446        ALOGV("found metadata size: %lld", moovAtomEndOffset);
4447    }
4448
4449    return true;
4450}
4451
4452bool SniffMPEG4(
4453        const sp<DataSource> &source, String8 *mimeType, float *confidence,
4454        sp<AMessage> *meta) {
4455    if (BetterSniffMPEG4(source, mimeType, confidence, meta)) {
4456        return true;
4457    }
4458
4459    if (LegacySniffMPEG4(source, mimeType, confidence)) {
4460        ALOGW("Identified supported mpeg4 through LegacySniffMPEG4.");
4461        return true;
4462    }
4463
4464    return false;
4465}
4466
4467}  // namespace android
4468