MPEG4Extractor.cpp revision e5f0966c76bd0a7e81e4205c8d8b55e6b34c833e
1/*
2 * Copyright (C) 2009 The Android Open Source Project
3 *
4 * Licensed under the Apache License, Version 2.0 (the "License");
5 * you may not use this file except in compliance with the License.
6 * You may obtain a copy of the License at
7 *
8 *      http://www.apache.org/licenses/LICENSE-2.0
9 *
10 * Unless required by applicable law or agreed to in writing, software
11 * distributed under the License is distributed on an "AS IS" BASIS,
12 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13 * See the License for the specific language governing permissions and
14 * limitations under the License.
15 */
16
17//#define LOG_NDEBUG 0
18#define LOG_TAG "MPEG4Extractor"
19#include <utils/Log.h>
20
21#include "include/MPEG4Extractor.h"
22#include "include/SampleTable.h"
23#include "include/ESDS.h"
24
25#include <ctype.h>
26#include <stdint.h>
27#include <stdlib.h>
28#include <string.h>
29
30#include <media/stagefright/foundation/ABitReader.h>
31#include <media/stagefright/foundation/ABuffer.h>
32#include <media/stagefright/foundation/ADebug.h>
33#include <media/stagefright/foundation/AMessage.h>
34#include <media/stagefright/foundation/AUtils.h>
35#include <media/stagefright/MediaBuffer.h>
36#include <media/stagefright/MediaBufferGroup.h>
37#include <media/stagefright/MediaDefs.h>
38#include <media/stagefright/MediaSource.h>
39#include <media/stagefright/MetaData.h>
40#include <utils/String8.h>
41
42namespace android {
43
44class MPEG4Source : public MediaSource {
45public:
46    // Caller retains ownership of both "dataSource" and "sampleTable".
47    MPEG4Source(const sp<MetaData> &format,
48                const sp<DataSource> &dataSource,
49                int32_t timeScale,
50                const sp<SampleTable> &sampleTable,
51                Vector<SidxEntry> &sidx,
52                off64_t firstMoofOffset);
53
54    virtual status_t start(MetaData *params = NULL);
55    virtual status_t stop();
56
57    virtual sp<MetaData> getFormat();
58
59    virtual status_t read(MediaBuffer **buffer, const ReadOptions *options = NULL);
60    virtual status_t fragmentedRead(MediaBuffer **buffer, const ReadOptions *options = NULL);
61
62protected:
63    virtual ~MPEG4Source();
64
65private:
66    Mutex mLock;
67
68    sp<MetaData> mFormat;
69    sp<DataSource> mDataSource;
70    int32_t mTimescale;
71    sp<SampleTable> mSampleTable;
72    uint32_t mCurrentSampleIndex;
73    uint32_t mCurrentFragmentIndex;
74    Vector<SidxEntry> &mSegments;
75    off64_t mFirstMoofOffset;
76    off64_t mCurrentMoofOffset;
77    off64_t mNextMoofOffset;
78    uint32_t mCurrentTime;
79    int32_t mLastParsedTrackId;
80    int32_t mTrackId;
81
82    int32_t mCryptoMode;    // passed in from extractor
83    int32_t mDefaultIVSize; // passed in from extractor
84    uint8_t mCryptoKey[16]; // passed in from extractor
85    uint32_t mCurrentAuxInfoType;
86    uint32_t mCurrentAuxInfoTypeParameter;
87    int32_t mCurrentDefaultSampleInfoSize;
88    uint32_t mCurrentSampleInfoCount;
89    uint32_t mCurrentSampleInfoAllocSize;
90    uint8_t* mCurrentSampleInfoSizes;
91    uint32_t mCurrentSampleInfoOffsetCount;
92    uint32_t mCurrentSampleInfoOffsetsAllocSize;
93    uint64_t* mCurrentSampleInfoOffsets;
94
95    bool mIsAVC;
96    size_t mNALLengthSize;
97
98    bool mStarted;
99
100    MediaBufferGroup *mGroup;
101
102    MediaBuffer *mBuffer;
103
104    bool mWantsNALFragments;
105
106    uint8_t *mSrcBuffer;
107
108    size_t parseNALSize(const uint8_t *data) const;
109    status_t parseChunk(off64_t *offset);
110    status_t parseTrackFragmentHeader(off64_t offset, off64_t size);
111    status_t parseTrackFragmentRun(off64_t offset, off64_t size);
112    status_t parseSampleAuxiliaryInformationSizes(off64_t offset, off64_t size);
113    status_t parseSampleAuxiliaryInformationOffsets(off64_t offset, off64_t size);
114
115    struct TrackFragmentHeaderInfo {
116        enum Flags {
117            kBaseDataOffsetPresent         = 0x01,
118            kSampleDescriptionIndexPresent = 0x02,
119            kDefaultSampleDurationPresent  = 0x08,
120            kDefaultSampleSizePresent      = 0x10,
121            kDefaultSampleFlagsPresent     = 0x20,
122            kDurationIsEmpty               = 0x10000,
123        };
124
125        uint32_t mTrackID;
126        uint32_t mFlags;
127        uint64_t mBaseDataOffset;
128        uint32_t mSampleDescriptionIndex;
129        uint32_t mDefaultSampleDuration;
130        uint32_t mDefaultSampleSize;
131        uint32_t mDefaultSampleFlags;
132
133        uint64_t mDataOffset;
134    };
135    TrackFragmentHeaderInfo mTrackFragmentHeaderInfo;
136
137    struct Sample {
138        off64_t offset;
139        size_t size;
140        uint32_t duration;
141        uint8_t iv[16];
142        Vector<size_t> clearsizes;
143        Vector<size_t> encryptedsizes;
144    };
145    Vector<Sample> mCurrentSamples;
146
147    MPEG4Source(const MPEG4Source &);
148    MPEG4Source &operator=(const MPEG4Source &);
149};
150
151// This custom data source wraps an existing one and satisfies requests
152// falling entirely within a cached range from the cache while forwarding
153// all remaining requests to the wrapped datasource.
154// This is used to cache the full sampletable metadata for a single track,
155// possibly wrapping multiple times to cover all tracks, i.e.
156// Each MPEG4DataSource caches the sampletable metadata for a single track.
157
158struct MPEG4DataSource : public DataSource {
159    MPEG4DataSource(const sp<DataSource> &source);
160
161    virtual status_t initCheck() const;
162    virtual ssize_t readAt(off64_t offset, void *data, size_t size);
163    virtual status_t getSize(off64_t *size);
164    virtual uint32_t flags();
165
166    status_t setCachedRange(off64_t offset, size_t size);
167
168protected:
169    virtual ~MPEG4DataSource();
170
171private:
172    Mutex mLock;
173
174    sp<DataSource> mSource;
175    off64_t mCachedOffset;
176    size_t mCachedSize;
177    uint8_t *mCache;
178
179    void clearCache();
180
181    MPEG4DataSource(const MPEG4DataSource &);
182    MPEG4DataSource &operator=(const MPEG4DataSource &);
183};
184
185MPEG4DataSource::MPEG4DataSource(const sp<DataSource> &source)
186    : mSource(source),
187      mCachedOffset(0),
188      mCachedSize(0),
189      mCache(NULL) {
190}
191
192MPEG4DataSource::~MPEG4DataSource() {
193    clearCache();
194}
195
196void MPEG4DataSource::clearCache() {
197    if (mCache) {
198        free(mCache);
199        mCache = NULL;
200    }
201
202    mCachedOffset = 0;
203    mCachedSize = 0;
204}
205
206status_t MPEG4DataSource::initCheck() const {
207    return mSource->initCheck();
208}
209
210ssize_t MPEG4DataSource::readAt(off64_t offset, void *data, size_t size) {
211    Mutex::Autolock autoLock(mLock);
212
213    if (isInRange(mCachedOffset, mCachedSize, offset, size)) {
214        memcpy(data, &mCache[offset - mCachedOffset], size);
215        return size;
216    }
217
218    return mSource->readAt(offset, data, size);
219}
220
221status_t MPEG4DataSource::getSize(off64_t *size) {
222    return mSource->getSize(size);
223}
224
225uint32_t MPEG4DataSource::flags() {
226    return mSource->flags();
227}
228
229status_t MPEG4DataSource::setCachedRange(off64_t offset, size_t size) {
230    Mutex::Autolock autoLock(mLock);
231
232    clearCache();
233
234    mCache = (uint8_t *)malloc(size);
235
236    if (mCache == NULL) {
237        return -ENOMEM;
238    }
239
240    mCachedOffset = offset;
241    mCachedSize = size;
242
243    ssize_t err = mSource->readAt(mCachedOffset, mCache, mCachedSize);
244
245    if (err < (ssize_t)size) {
246        clearCache();
247
248        return ERROR_IO;
249    }
250
251    return OK;
252}
253
254////////////////////////////////////////////////////////////////////////////////
255
256static void hexdump(const void *_data, size_t size) {
257    const uint8_t *data = (const uint8_t *)_data;
258    size_t offset = 0;
259    while (offset < size) {
260        printf("0x%04x  ", offset);
261
262        size_t n = size - offset;
263        if (n > 16) {
264            n = 16;
265        }
266
267        for (size_t i = 0; i < 16; ++i) {
268            if (i == 8) {
269                printf(" ");
270            }
271
272            if (offset + i < size) {
273                printf("%02x ", data[offset + i]);
274            } else {
275                printf("   ");
276            }
277        }
278
279        printf(" ");
280
281        for (size_t i = 0; i < n; ++i) {
282            if (isprint(data[offset + i])) {
283                printf("%c", data[offset + i]);
284            } else {
285                printf(".");
286            }
287        }
288
289        printf("\n");
290
291        offset += 16;
292    }
293}
294
295static const char *FourCC2MIME(uint32_t fourcc) {
296    switch (fourcc) {
297        case FOURCC('m', 'p', '4', 'a'):
298            return MEDIA_MIMETYPE_AUDIO_AAC;
299
300        case FOURCC('s', 'a', 'm', 'r'):
301            return MEDIA_MIMETYPE_AUDIO_AMR_NB;
302
303        case FOURCC('s', 'a', 'w', 'b'):
304            return MEDIA_MIMETYPE_AUDIO_AMR_WB;
305
306        case FOURCC('m', 'p', '4', 'v'):
307            return MEDIA_MIMETYPE_VIDEO_MPEG4;
308
309        case FOURCC('s', '2', '6', '3'):
310        case FOURCC('h', '2', '6', '3'):
311        case FOURCC('H', '2', '6', '3'):
312            return MEDIA_MIMETYPE_VIDEO_H263;
313
314        case FOURCC('a', 'v', 'c', '1'):
315            return MEDIA_MIMETYPE_VIDEO_AVC;
316
317        default:
318            CHECK(!"should not be here.");
319            return NULL;
320    }
321}
322
323static bool AdjustChannelsAndRate(uint32_t fourcc, uint32_t *channels, uint32_t *rate) {
324    if (!strcasecmp(MEDIA_MIMETYPE_AUDIO_AMR_NB, FourCC2MIME(fourcc))) {
325        // AMR NB audio is always mono, 8kHz
326        *channels = 1;
327        *rate = 8000;
328        return true;
329    } else if (!strcasecmp(MEDIA_MIMETYPE_AUDIO_AMR_WB, FourCC2MIME(fourcc))) {
330        // AMR WB audio is always mono, 16kHz
331        *channels = 1;
332        *rate = 16000;
333        return true;
334    }
335    return false;
336}
337
338MPEG4Extractor::MPEG4Extractor(const sp<DataSource> &source)
339    : mSidxDuration(0),
340      mMoofOffset(0),
341      mDataSource(source),
342      mInitCheck(NO_INIT),
343      mHasVideo(false),
344      mHeaderTimescale(0),
345      mFirstTrack(NULL),
346      mLastTrack(NULL),
347      mFileMetaData(new MetaData),
348      mFirstSINF(NULL),
349      mIsDrm(false) {
350}
351
352MPEG4Extractor::~MPEG4Extractor() {
353    Track *track = mFirstTrack;
354    while (track) {
355        Track *next = track->next;
356
357        delete track;
358        track = next;
359    }
360    mFirstTrack = mLastTrack = NULL;
361
362    SINF *sinf = mFirstSINF;
363    while (sinf) {
364        SINF *next = sinf->next;
365        delete sinf->IPMPData;
366        delete sinf;
367        sinf = next;
368    }
369    mFirstSINF = NULL;
370
371    for (size_t i = 0; i < mPssh.size(); i++) {
372        delete [] mPssh[i].data;
373    }
374}
375
376uint32_t MPEG4Extractor::flags() const {
377    return CAN_PAUSE |
378            ((mMoofOffset == 0 || mSidxEntries.size() != 0) ?
379                    (CAN_SEEK_BACKWARD | CAN_SEEK_FORWARD | CAN_SEEK) : 0);
380}
381
382sp<MetaData> MPEG4Extractor::getMetaData() {
383    status_t err;
384    if ((err = readMetaData()) != OK) {
385        return new MetaData;
386    }
387
388    return mFileMetaData;
389}
390
391size_t MPEG4Extractor::countTracks() {
392    status_t err;
393    if ((err = readMetaData()) != OK) {
394        ALOGV("MPEG4Extractor::countTracks: no tracks");
395        return 0;
396    }
397
398    size_t n = 0;
399    Track *track = mFirstTrack;
400    while (track) {
401        ++n;
402        track = track->next;
403    }
404
405    ALOGV("MPEG4Extractor::countTracks: %d tracks", n);
406    return n;
407}
408
409sp<MetaData> MPEG4Extractor::getTrackMetaData(
410        size_t index, uint32_t flags) {
411    status_t err;
412    if ((err = readMetaData()) != OK) {
413        return NULL;
414    }
415
416    Track *track = mFirstTrack;
417    while (index > 0) {
418        if (track == NULL) {
419            return NULL;
420        }
421
422        track = track->next;
423        --index;
424    }
425
426    if (track == NULL) {
427        return NULL;
428    }
429
430    if ((flags & kIncludeExtensiveMetaData)
431            && !track->includes_expensive_metadata) {
432        track->includes_expensive_metadata = true;
433
434        const char *mime;
435        CHECK(track->meta->findCString(kKeyMIMEType, &mime));
436        if (!strncasecmp("video/", mime, 6)) {
437            if (mMoofOffset > 0) {
438                int64_t duration;
439                if (track->meta->findInt64(kKeyDuration, &duration)) {
440                    // nothing fancy, just pick a frame near 1/4th of the duration
441                    track->meta->setInt64(
442                            kKeyThumbnailTime, duration / 4);
443                }
444            } else {
445                uint32_t sampleIndex;
446                uint32_t sampleTime;
447                if (track->sampleTable->findThumbnailSample(&sampleIndex) == OK
448                        && track->sampleTable->getMetaDataForSample(
449                            sampleIndex, NULL /* offset */, NULL /* size */,
450                            &sampleTime) == OK) {
451                    track->meta->setInt64(
452                            kKeyThumbnailTime,
453                            ((int64_t)sampleTime * 1000000) / track->timescale);
454                }
455            }
456        }
457    }
458
459    return track->meta;
460}
461
462static void MakeFourCCString(uint32_t x, char *s) {
463    s[0] = x >> 24;
464    s[1] = (x >> 16) & 0xff;
465    s[2] = (x >> 8) & 0xff;
466    s[3] = x & 0xff;
467    s[4] = '\0';
468}
469
470status_t MPEG4Extractor::readMetaData() {
471    if (mInitCheck != NO_INIT) {
472        return mInitCheck;
473    }
474
475    off64_t offset = 0;
476    status_t err;
477    while (true) {
478        err = parseChunk(&offset, 0);
479        if (err == OK) {
480            continue;
481        }
482
483        uint32_t hdr[2];
484        if (mDataSource->readAt(offset, hdr, 8) < 8) {
485            break;
486        }
487        uint32_t chunk_type = ntohl(hdr[1]);
488        if (chunk_type == FOURCC('s', 'i', 'd', 'x')) {
489            // parse the sidx box too
490            continue;
491        } else if (chunk_type == FOURCC('m', 'o', 'o', 'f')) {
492            // store the offset of the first segment
493            mMoofOffset = offset;
494        }
495        break;
496    }
497
498    if (mInitCheck == OK) {
499        if (mHasVideo) {
500            mFileMetaData->setCString(
501                    kKeyMIMEType, MEDIA_MIMETYPE_CONTAINER_MPEG4);
502        } else {
503            mFileMetaData->setCString(kKeyMIMEType, "audio/mp4");
504        }
505
506        mInitCheck = OK;
507    } else {
508        mInitCheck = err;
509    }
510
511    CHECK_NE(err, (status_t)NO_INIT);
512
513    // copy pssh data into file metadata
514    int psshsize = 0;
515    for (size_t i = 0; i < mPssh.size(); i++) {
516        psshsize += 20 + mPssh[i].datalen;
517    }
518    if (psshsize) {
519        char *buf = (char*)malloc(psshsize);
520        char *ptr = buf;
521        for (size_t i = 0; i < mPssh.size(); i++) {
522            memcpy(ptr, mPssh[i].uuid, 20); // uuid + length
523            memcpy(ptr + 20, mPssh[i].data, mPssh[i].datalen);
524            ptr += (20 + mPssh[i].datalen);
525        }
526        mFileMetaData->setData(kKeyPssh, 'pssh', buf, psshsize);
527        free(buf);
528    }
529    return mInitCheck;
530}
531
532char* MPEG4Extractor::getDrmTrackInfo(size_t trackID, int *len) {
533    if (mFirstSINF == NULL) {
534        return NULL;
535    }
536
537    SINF *sinf = mFirstSINF;
538    while (sinf && (trackID != sinf->trackID)) {
539        sinf = sinf->next;
540    }
541
542    if (sinf == NULL) {
543        return NULL;
544    }
545
546    *len = sinf->len;
547    return sinf->IPMPData;
548}
549
550// Reads an encoded integer 7 bits at a time until it encounters the high bit clear.
551static int32_t readSize(off64_t offset,
552        const sp<DataSource> DataSource, uint8_t *numOfBytes) {
553    uint32_t size = 0;
554    uint8_t data;
555    bool moreData = true;
556    *numOfBytes = 0;
557
558    while (moreData) {
559        if (DataSource->readAt(offset, &data, 1) < 1) {
560            return -1;
561        }
562        offset ++;
563        moreData = (data >= 128) ? true : false;
564        size = (size << 7) | (data & 0x7f); // Take last 7 bits
565        (*numOfBytes) ++;
566    }
567
568    return size;
569}
570
571status_t MPEG4Extractor::parseDrmSINF(off64_t *offset, off64_t data_offset) {
572    uint8_t updateIdTag;
573    if (mDataSource->readAt(data_offset, &updateIdTag, 1) < 1) {
574        return ERROR_IO;
575    }
576    data_offset ++;
577
578    if (0x01/*OBJECT_DESCRIPTOR_UPDATE_ID_TAG*/ != updateIdTag) {
579        return ERROR_MALFORMED;
580    }
581
582    uint8_t numOfBytes;
583    int32_t size = readSize(data_offset, mDataSource, &numOfBytes);
584    if (size < 0) {
585        return ERROR_IO;
586    }
587    int32_t classSize = size;
588    data_offset += numOfBytes;
589
590    while(size >= 11 ) {
591        uint8_t descriptorTag;
592        if (mDataSource->readAt(data_offset, &descriptorTag, 1) < 1) {
593            return ERROR_IO;
594        }
595        data_offset ++;
596
597        if (0x11/*OBJECT_DESCRIPTOR_ID_TAG*/ != descriptorTag) {
598            return ERROR_MALFORMED;
599        }
600
601        uint8_t buffer[8];
602        //ObjectDescriptorID and ObjectDescriptor url flag
603        if (mDataSource->readAt(data_offset, buffer, 2) < 2) {
604            return ERROR_IO;
605        }
606        data_offset += 2;
607
608        if ((buffer[1] >> 5) & 0x0001) { //url flag is set
609            return ERROR_MALFORMED;
610        }
611
612        if (mDataSource->readAt(data_offset, buffer, 8) < 8) {
613            return ERROR_IO;
614        }
615        data_offset += 8;
616
617        if ((0x0F/*ES_ID_REF_TAG*/ != buffer[1])
618                || ( 0x0A/*IPMP_DESCRIPTOR_POINTER_ID_TAG*/ != buffer[5])) {
619            return ERROR_MALFORMED;
620        }
621
622        SINF *sinf = new SINF;
623        sinf->trackID = U16_AT(&buffer[3]);
624        sinf->IPMPDescriptorID = buffer[7];
625        sinf->next = mFirstSINF;
626        mFirstSINF = sinf;
627
628        size -= (8 + 2 + 1);
629    }
630
631    if (size != 0) {
632        return ERROR_MALFORMED;
633    }
634
635    if (mDataSource->readAt(data_offset, &updateIdTag, 1) < 1) {
636        return ERROR_IO;
637    }
638    data_offset ++;
639
640    if(0x05/*IPMP_DESCRIPTOR_UPDATE_ID_TAG*/ != updateIdTag) {
641        return ERROR_MALFORMED;
642    }
643
644    size = readSize(data_offset, mDataSource, &numOfBytes);
645    if (size < 0) {
646        return ERROR_IO;
647    }
648    classSize = size;
649    data_offset += numOfBytes;
650
651    while (size > 0) {
652        uint8_t tag;
653        int32_t dataLen;
654        if (mDataSource->readAt(data_offset, &tag, 1) < 1) {
655            return ERROR_IO;
656        }
657        data_offset ++;
658
659        if (0x0B/*IPMP_DESCRIPTOR_ID_TAG*/ == tag) {
660            uint8_t id;
661            dataLen = readSize(data_offset, mDataSource, &numOfBytes);
662            if (dataLen < 0) {
663                return ERROR_IO;
664            } else if (dataLen < 4) {
665                return ERROR_MALFORMED;
666            }
667            data_offset += numOfBytes;
668
669            if (mDataSource->readAt(data_offset, &id, 1) < 1) {
670                return ERROR_IO;
671            }
672            data_offset ++;
673
674            SINF *sinf = mFirstSINF;
675            while (sinf && (sinf->IPMPDescriptorID != id)) {
676                sinf = sinf->next;
677            }
678            if (sinf == NULL) {
679                return ERROR_MALFORMED;
680            }
681            sinf->len = dataLen - 3;
682            sinf->IPMPData = new char[sinf->len];
683
684            if (mDataSource->readAt(data_offset + 2, sinf->IPMPData, sinf->len) < sinf->len) {
685                return ERROR_IO;
686            }
687            data_offset += sinf->len;
688
689            size -= (dataLen + numOfBytes + 1);
690        }
691    }
692
693    if (size != 0) {
694        return ERROR_MALFORMED;
695    }
696
697    return UNKNOWN_ERROR;  // Return a dummy error.
698}
699
700struct PathAdder {
701    PathAdder(Vector<uint32_t> *path, uint32_t chunkType)
702        : mPath(path) {
703        mPath->push(chunkType);
704    }
705
706    ~PathAdder() {
707        mPath->pop();
708    }
709
710private:
711    Vector<uint32_t> *mPath;
712
713    PathAdder(const PathAdder &);
714    PathAdder &operator=(const PathAdder &);
715};
716
717static bool underMetaDataPath(const Vector<uint32_t> &path) {
718    return path.size() >= 5
719        && path[0] == FOURCC('m', 'o', 'o', 'v')
720        && path[1] == FOURCC('u', 'd', 't', 'a')
721        && path[2] == FOURCC('m', 'e', 't', 'a')
722        && path[3] == FOURCC('i', 'l', 's', 't');
723}
724
725// Given a time in seconds since Jan 1 1904, produce a human-readable string.
726static void convertTimeToDate(int64_t time_1904, String8 *s) {
727    time_t time_1970 = time_1904 - (((66 * 365 + 17) * 24) * 3600);
728
729    char tmp[32];
730    strftime(tmp, sizeof(tmp), "%Y%m%dT%H%M%S.000Z", gmtime(&time_1970));
731
732    s->setTo(tmp);
733}
734
735status_t MPEG4Extractor::parseChunk(off64_t *offset, int depth) {
736    ALOGV("entering parseChunk %lld/%d", *offset, depth);
737    uint32_t hdr[2];
738    if (mDataSource->readAt(*offset, hdr, 8) < 8) {
739        return ERROR_IO;
740    }
741    uint64_t chunk_size = ntohl(hdr[0]);
742    uint32_t chunk_type = ntohl(hdr[1]);
743    off64_t data_offset = *offset + 8;
744
745    if (chunk_size == 1) {
746        if (mDataSource->readAt(*offset + 8, &chunk_size, 8) < 8) {
747            return ERROR_IO;
748        }
749        chunk_size = ntoh64(chunk_size);
750        data_offset += 8;
751
752        if (chunk_size < 16) {
753            // The smallest valid chunk is 16 bytes long in this case.
754            return ERROR_MALFORMED;
755        }
756    } else if (chunk_size < 8) {
757        // The smallest valid chunk is 8 bytes long.
758        return ERROR_MALFORMED;
759    }
760
761    char chunk[5];
762    MakeFourCCString(chunk_type, chunk);
763    ALOGV("chunk: %s @ %lld, %d", chunk, *offset, depth);
764
765#if 0
766    static const char kWhitespace[] = "                                        ";
767    const char *indent = &kWhitespace[sizeof(kWhitespace) - 1 - 2 * depth];
768    printf("%sfound chunk '%s' of size %lld\n", indent, chunk, chunk_size);
769
770    char buffer[256];
771    size_t n = chunk_size;
772    if (n > sizeof(buffer)) {
773        n = sizeof(buffer);
774    }
775    if (mDataSource->readAt(*offset, buffer, n)
776            < (ssize_t)n) {
777        return ERROR_IO;
778    }
779
780    hexdump(buffer, n);
781#endif
782
783    PathAdder autoAdder(&mPath, chunk_type);
784
785    off64_t chunk_data_size = *offset + chunk_size - data_offset;
786
787    if (chunk_type != FOURCC('c', 'p', 'r', 't')
788            && chunk_type != FOURCC('c', 'o', 'v', 'r')
789            && mPath.size() == 5 && underMetaDataPath(mPath)) {
790        off64_t stop_offset = *offset + chunk_size;
791        *offset = data_offset;
792        while (*offset < stop_offset) {
793            status_t err = parseChunk(offset, depth + 1);
794            if (err != OK) {
795                return err;
796            }
797        }
798
799        if (*offset != stop_offset) {
800            return ERROR_MALFORMED;
801        }
802
803        return OK;
804    }
805
806    switch(chunk_type) {
807        case FOURCC('m', 'o', 'o', 'v'):
808        case FOURCC('t', 'r', 'a', 'k'):
809        case FOURCC('m', 'd', 'i', 'a'):
810        case FOURCC('m', 'i', 'n', 'f'):
811        case FOURCC('d', 'i', 'n', 'f'):
812        case FOURCC('s', 't', 'b', 'l'):
813        case FOURCC('m', 'v', 'e', 'x'):
814        case FOURCC('m', 'o', 'o', 'f'):
815        case FOURCC('t', 'r', 'a', 'f'):
816        case FOURCC('m', 'f', 'r', 'a'):
817        case FOURCC('u', 'd', 't', 'a'):
818        case FOURCC('i', 'l', 's', 't'):
819        case FOURCC('s', 'i', 'n', 'f'):
820        case FOURCC('s', 'c', 'h', 'i'):
821        case FOURCC('e', 'd', 't', 's'):
822        {
823            if (chunk_type == FOURCC('s', 't', 'b', 'l')) {
824                ALOGV("sampleTable chunk is %d bytes long.", (size_t)chunk_size);
825
826                if (mDataSource->flags()
827                        & (DataSource::kWantsPrefetching
828                            | DataSource::kIsCachingDataSource)) {
829                    sp<MPEG4DataSource> cachedSource =
830                        new MPEG4DataSource(mDataSource);
831
832                    if (cachedSource->setCachedRange(*offset, chunk_size) == OK) {
833                        mDataSource = cachedSource;
834                    }
835                }
836
837                mLastTrack->sampleTable = new SampleTable(mDataSource);
838            }
839
840            bool isTrack = false;
841            if (chunk_type == FOURCC('t', 'r', 'a', 'k')) {
842                isTrack = true;
843
844                Track *track = new Track;
845                track->next = NULL;
846                if (mLastTrack) {
847                    mLastTrack->next = track;
848                } else {
849                    mFirstTrack = track;
850                }
851                mLastTrack = track;
852
853                track->meta = new MetaData;
854                track->includes_expensive_metadata = false;
855                track->skipTrack = false;
856                track->timescale = 0;
857                track->meta->setCString(kKeyMIMEType, "application/octet-stream");
858            }
859
860            off64_t stop_offset = *offset + chunk_size;
861            *offset = data_offset;
862            while (*offset < stop_offset) {
863                status_t err = parseChunk(offset, depth + 1);
864                if (err != OK) {
865                    return err;
866                }
867            }
868
869            if (*offset != stop_offset) {
870                return ERROR_MALFORMED;
871            }
872
873            if (isTrack) {
874                if (mLastTrack->skipTrack) {
875                    Track *cur = mFirstTrack;
876
877                    if (cur == mLastTrack) {
878                        delete cur;
879                        mFirstTrack = mLastTrack = NULL;
880                    } else {
881                        while (cur && cur->next != mLastTrack) {
882                            cur = cur->next;
883                        }
884                        cur->next = NULL;
885                        delete mLastTrack;
886                        mLastTrack = cur;
887                    }
888
889                    return OK;
890                }
891
892                status_t err = verifyTrack(mLastTrack);
893
894                if (err != OK) {
895                    return err;
896                }
897            } else if (chunk_type == FOURCC('m', 'o', 'o', 'v')) {
898                mInitCheck = OK;
899
900                if (!mIsDrm) {
901                    return UNKNOWN_ERROR;  // Return a dummy error.
902                } else {
903                    return OK;
904                }
905            }
906            break;
907        }
908
909        case FOURCC('e', 'l', 's', 't'):
910        {
911            // See 14496-12 8.6.6
912            uint8_t version;
913            if (mDataSource->readAt(data_offset, &version, 1) < 1) {
914                return ERROR_IO;
915            }
916
917            uint32_t entry_count;
918            if (!mDataSource->getUInt32(data_offset + 4, &entry_count)) {
919                return ERROR_IO;
920            }
921
922            if (entry_count != 1) {
923                // we only support a single entry at the moment, for gapless playback
924                ALOGW("ignoring edit list with %d entries", entry_count);
925            } else if (mHeaderTimescale == 0) {
926                ALOGW("ignoring edit list because timescale is 0");
927            } else {
928                off64_t entriesoffset = data_offset + 8;
929                uint64_t segment_duration;
930                int64_t media_time;
931
932                if (version == 1) {
933                    if (!mDataSource->getUInt64(entriesoffset, &segment_duration) ||
934                            !mDataSource->getUInt64(entriesoffset + 8, (uint64_t*)&media_time)) {
935                        return ERROR_IO;
936                    }
937                } else if (version == 0) {
938                    uint32_t sd;
939                    int32_t mt;
940                    if (!mDataSource->getUInt32(entriesoffset, &sd) ||
941                            !mDataSource->getUInt32(entriesoffset + 4, (uint32_t*)&mt)) {
942                        return ERROR_IO;
943                    }
944                    segment_duration = sd;
945                    media_time = mt;
946                } else {
947                    return ERROR_IO;
948                }
949
950                uint64_t halfscale = mHeaderTimescale / 2;
951                segment_duration = (segment_duration * 1000000 + halfscale)/ mHeaderTimescale;
952                media_time = (media_time * 1000000 + halfscale) / mHeaderTimescale;
953
954                int64_t duration;
955                int32_t samplerate;
956                if (mLastTrack->meta->findInt64(kKeyDuration, &duration) &&
957                        mLastTrack->meta->findInt32(kKeySampleRate, &samplerate)) {
958
959                    int64_t delay = (media_time  * samplerate + 500000) / 1000000;
960                    mLastTrack->meta->setInt32(kKeyEncoderDelay, delay);
961
962                    int64_t paddingus = duration - (segment_duration + media_time);
963                    if (paddingus < 0) {
964                        // track duration from media header (which is what kKeyDuration is) might
965                        // be slightly shorter than the segment duration, which would make the
966                        // padding negative. Clamp to zero.
967                        paddingus = 0;
968                    }
969                    int64_t paddingsamples = (paddingus * samplerate + 500000) / 1000000;
970                    mLastTrack->meta->setInt32(kKeyEncoderPadding, paddingsamples);
971                }
972            }
973            *offset += chunk_size;
974            break;
975        }
976
977        case FOURCC('f', 'r', 'm', 'a'):
978        {
979            uint32_t original_fourcc;
980            if (mDataSource->readAt(data_offset, &original_fourcc, 4) < 4) {
981                return ERROR_IO;
982            }
983            original_fourcc = ntohl(original_fourcc);
984            ALOGV("read original format: %d", original_fourcc);
985            mLastTrack->meta->setCString(kKeyMIMEType, FourCC2MIME(original_fourcc));
986            uint32_t num_channels = 0;
987            uint32_t sample_rate = 0;
988            if (AdjustChannelsAndRate(original_fourcc, &num_channels, &sample_rate)) {
989                mLastTrack->meta->setInt32(kKeyChannelCount, num_channels);
990                mLastTrack->meta->setInt32(kKeySampleRate, sample_rate);
991            }
992            *offset += chunk_size;
993            break;
994        }
995
996        case FOURCC('t', 'e', 'n', 'c'):
997        {
998            if (chunk_size < 32) {
999                return ERROR_MALFORMED;
1000            }
1001
1002            // tenc box contains 1 byte version, 3 byte flags, 3 byte default algorithm id, one byte
1003            // default IV size, 16 bytes default KeyID
1004            // (ISO 23001-7)
1005            char buf[4];
1006            memset(buf, 0, 4);
1007            if (mDataSource->readAt(data_offset + 4, buf + 1, 3) < 3) {
1008                return ERROR_IO;
1009            }
1010            uint32_t defaultAlgorithmId = ntohl(*((int32_t*)buf));
1011            if (defaultAlgorithmId > 1) {
1012                // only 0 (clear) and 1 (AES-128) are valid
1013                return ERROR_MALFORMED;
1014            }
1015
1016            memset(buf, 0, 4);
1017            if (mDataSource->readAt(data_offset + 7, buf + 3, 1) < 1) {
1018                return ERROR_IO;
1019            }
1020            uint32_t defaultIVSize = ntohl(*((int32_t*)buf));
1021
1022            if ((defaultAlgorithmId == 0 && defaultIVSize != 0) ||
1023                    (defaultAlgorithmId != 0 && defaultIVSize == 0)) {
1024                // only unencrypted data must have 0 IV size
1025                return ERROR_MALFORMED;
1026            } else if (defaultIVSize != 0 &&
1027                    defaultIVSize != 8 &&
1028                    defaultIVSize != 16) {
1029                // only supported sizes are 0, 8 and 16
1030                return ERROR_MALFORMED;
1031            }
1032
1033            uint8_t defaultKeyId[16];
1034
1035            if (mDataSource->readAt(data_offset + 8, &defaultKeyId, 16) < 16) {
1036                return ERROR_IO;
1037            }
1038
1039            mLastTrack->meta->setInt32(kKeyCryptoMode, defaultAlgorithmId);
1040            mLastTrack->meta->setInt32(kKeyCryptoDefaultIVSize, defaultIVSize);
1041            mLastTrack->meta->setData(kKeyCryptoKey, 'tenc', defaultKeyId, 16);
1042            *offset += chunk_size;
1043            break;
1044        }
1045
1046        case FOURCC('t', 'k', 'h', 'd'):
1047        {
1048            status_t err;
1049            if ((err = parseTrackHeader(data_offset, chunk_data_size)) != OK) {
1050                return err;
1051            }
1052
1053            *offset += chunk_size;
1054            break;
1055        }
1056
1057        case FOURCC('p', 's', 's', 'h'):
1058        {
1059            PsshInfo pssh;
1060
1061            if (mDataSource->readAt(data_offset + 4, &pssh.uuid, 16) < 16) {
1062                return ERROR_IO;
1063            }
1064
1065            uint32_t psshdatalen = 0;
1066            if (mDataSource->readAt(data_offset + 20, &psshdatalen, 4) < 4) {
1067                return ERROR_IO;
1068            }
1069            pssh.datalen = ntohl(psshdatalen);
1070            ALOGV("pssh data size: %d", pssh.datalen);
1071            if (pssh.datalen + 20 > chunk_size) {
1072                // pssh data length exceeds size of containing box
1073                return ERROR_MALFORMED;
1074            }
1075
1076            pssh.data = new uint8_t[pssh.datalen];
1077            ALOGV("allocated pssh @ %p", pssh.data);
1078            ssize_t requested = (ssize_t) pssh.datalen;
1079            if (mDataSource->readAt(data_offset + 24, pssh.data, requested) < requested) {
1080                return ERROR_IO;
1081            }
1082            mPssh.push_back(pssh);
1083
1084            *offset += chunk_size;
1085            break;
1086        }
1087
1088        case FOURCC('m', 'd', 'h', 'd'):
1089        {
1090            if (chunk_data_size < 4) {
1091                return ERROR_MALFORMED;
1092            }
1093
1094            uint8_t version;
1095            if (mDataSource->readAt(
1096                        data_offset, &version, sizeof(version))
1097                    < (ssize_t)sizeof(version)) {
1098                return ERROR_IO;
1099            }
1100
1101            off64_t timescale_offset;
1102
1103            if (version == 1) {
1104                timescale_offset = data_offset + 4 + 16;
1105            } else if (version == 0) {
1106                timescale_offset = data_offset + 4 + 8;
1107            } else {
1108                return ERROR_IO;
1109            }
1110
1111            uint32_t timescale;
1112            if (mDataSource->readAt(
1113                        timescale_offset, &timescale, sizeof(timescale))
1114                    < (ssize_t)sizeof(timescale)) {
1115                return ERROR_IO;
1116            }
1117
1118            mLastTrack->timescale = ntohl(timescale);
1119
1120            int64_t duration = 0;
1121            if (version == 1) {
1122                if (mDataSource->readAt(
1123                            timescale_offset + 4, &duration, sizeof(duration))
1124                        < (ssize_t)sizeof(duration)) {
1125                    return ERROR_IO;
1126                }
1127                duration = ntoh64(duration);
1128            } else {
1129                uint32_t duration32;
1130                if (mDataSource->readAt(
1131                            timescale_offset + 4, &duration32, sizeof(duration32))
1132                        < (ssize_t)sizeof(duration32)) {
1133                    return ERROR_IO;
1134                }
1135                // ffmpeg sets duration to -1, which is incorrect.
1136                if (duration32 != 0xffffffff) {
1137                    duration = ntohl(duration32);
1138                }
1139            }
1140            mLastTrack->meta->setInt64(
1141                    kKeyDuration, (duration * 1000000) / mLastTrack->timescale);
1142
1143            uint8_t lang[2];
1144            off64_t lang_offset;
1145            if (version == 1) {
1146                lang_offset = timescale_offset + 4 + 8;
1147            } else if (version == 0) {
1148                lang_offset = timescale_offset + 4 + 4;
1149            } else {
1150                return ERROR_IO;
1151            }
1152
1153            if (mDataSource->readAt(lang_offset, &lang, sizeof(lang))
1154                    < (ssize_t)sizeof(lang)) {
1155                return ERROR_IO;
1156            }
1157
1158            // To get the ISO-639-2/T three character language code
1159            // 1 bit pad followed by 3 5-bits characters. Each character
1160            // is packed as the difference between its ASCII value and 0x60.
1161            char lang_code[4];
1162            lang_code[0] = ((lang[0] >> 2) & 0x1f) + 0x60;
1163            lang_code[1] = ((lang[0] & 0x3) << 3 | (lang[1] >> 5)) + 0x60;
1164            lang_code[2] = (lang[1] & 0x1f) + 0x60;
1165            lang_code[3] = '\0';
1166
1167            mLastTrack->meta->setCString(
1168                    kKeyMediaLanguage, lang_code);
1169
1170            *offset += chunk_size;
1171            break;
1172        }
1173
1174        case FOURCC('s', 't', 's', 'd'):
1175        {
1176            if (chunk_data_size < 8) {
1177                return ERROR_MALFORMED;
1178            }
1179
1180            uint8_t buffer[8];
1181            if (chunk_data_size < (off64_t)sizeof(buffer)) {
1182                return ERROR_MALFORMED;
1183            }
1184
1185            if (mDataSource->readAt(
1186                        data_offset, buffer, 8) < 8) {
1187                return ERROR_IO;
1188            }
1189
1190            if (U32_AT(buffer) != 0) {
1191                // Should be version 0, flags 0.
1192                return ERROR_MALFORMED;
1193            }
1194
1195            uint32_t entry_count = U32_AT(&buffer[4]);
1196
1197            if (entry_count > 1) {
1198                // For 3GPP timed text, there could be multiple tx3g boxes contain
1199                // multiple text display formats. These formats will be used to
1200                // display the timed text.
1201                // For encrypted files, there may also be more than one entry.
1202                const char *mime;
1203                CHECK(mLastTrack->meta->findCString(kKeyMIMEType, &mime));
1204                if (strcasecmp(mime, MEDIA_MIMETYPE_TEXT_3GPP) &&
1205                        strcasecmp(mime, "application/octet-stream")) {
1206                    // For now we only support a single type of media per track.
1207                    mLastTrack->skipTrack = true;
1208                    *offset += chunk_size;
1209                    break;
1210                }
1211            }
1212            off64_t stop_offset = *offset + chunk_size;
1213            *offset = data_offset + 8;
1214            for (uint32_t i = 0; i < entry_count; ++i) {
1215                status_t err = parseChunk(offset, depth + 1);
1216                if (err != OK) {
1217                    return err;
1218                }
1219            }
1220
1221            if (*offset != stop_offset) {
1222                return ERROR_MALFORMED;
1223            }
1224            break;
1225        }
1226
1227        case FOURCC('m', 'p', '4', 'a'):
1228        case FOURCC('e', 'n', 'c', 'a'):
1229        case FOURCC('s', 'a', 'm', 'r'):
1230        case FOURCC('s', 'a', 'w', 'b'):
1231        {
1232            uint8_t buffer[8 + 20];
1233            if (chunk_data_size < (ssize_t)sizeof(buffer)) {
1234                // Basic AudioSampleEntry size.
1235                return ERROR_MALFORMED;
1236            }
1237
1238            if (mDataSource->readAt(
1239                        data_offset, buffer, sizeof(buffer)) < (ssize_t)sizeof(buffer)) {
1240                return ERROR_IO;
1241            }
1242
1243            uint16_t data_ref_index = U16_AT(&buffer[6]);
1244            uint32_t num_channels = U16_AT(&buffer[16]);
1245
1246            uint16_t sample_size = U16_AT(&buffer[18]);
1247            uint32_t sample_rate = U32_AT(&buffer[24]) >> 16;
1248
1249            if (chunk_type != FOURCC('e', 'n', 'c', 'a')) {
1250                // if the chunk type is enca, we'll get the type from the sinf/frma box later
1251                mLastTrack->meta->setCString(kKeyMIMEType, FourCC2MIME(chunk_type));
1252                AdjustChannelsAndRate(chunk_type, &num_channels, &sample_rate);
1253            }
1254            ALOGV("*** coding='%s' %d channels, size %d, rate %d\n",
1255                   chunk, num_channels, sample_size, sample_rate);
1256            mLastTrack->meta->setInt32(kKeyChannelCount, num_channels);
1257            mLastTrack->meta->setInt32(kKeySampleRate, sample_rate);
1258
1259            off64_t stop_offset = *offset + chunk_size;
1260            *offset = data_offset + sizeof(buffer);
1261            while (*offset < stop_offset) {
1262                status_t err = parseChunk(offset, depth + 1);
1263                if (err != OK) {
1264                    return err;
1265                }
1266            }
1267
1268            if (*offset != stop_offset) {
1269                return ERROR_MALFORMED;
1270            }
1271            break;
1272        }
1273
1274        case FOURCC('m', 'p', '4', 'v'):
1275        case FOURCC('e', 'n', 'c', 'v'):
1276        case FOURCC('s', '2', '6', '3'):
1277        case FOURCC('H', '2', '6', '3'):
1278        case FOURCC('h', '2', '6', '3'):
1279        case FOURCC('a', 'v', 'c', '1'):
1280        {
1281            mHasVideo = true;
1282
1283            uint8_t buffer[78];
1284            if (chunk_data_size < (ssize_t)sizeof(buffer)) {
1285                // Basic VideoSampleEntry size.
1286                return ERROR_MALFORMED;
1287            }
1288
1289            if (mDataSource->readAt(
1290                        data_offset, buffer, sizeof(buffer)) < (ssize_t)sizeof(buffer)) {
1291                return ERROR_IO;
1292            }
1293
1294            uint16_t data_ref_index = U16_AT(&buffer[6]);
1295            uint16_t width = U16_AT(&buffer[6 + 18]);
1296            uint16_t height = U16_AT(&buffer[6 + 20]);
1297
1298            // The video sample is not standard-compliant if it has invalid dimension.
1299            // Use some default width and height value, and
1300            // let the decoder figure out the actual width and height (and thus
1301            // be prepared for INFO_FOMRAT_CHANGED event).
1302            if (width == 0)  width  = 352;
1303            if (height == 0) height = 288;
1304
1305            // printf("*** coding='%s' width=%d height=%d\n",
1306            //        chunk, width, height);
1307
1308            if (chunk_type != FOURCC('e', 'n', 'c', 'v')) {
1309                // if the chunk type is encv, we'll get the type from the sinf/frma box later
1310                mLastTrack->meta->setCString(kKeyMIMEType, FourCC2MIME(chunk_type));
1311            }
1312            mLastTrack->meta->setInt32(kKeyWidth, width);
1313            mLastTrack->meta->setInt32(kKeyHeight, height);
1314
1315            off64_t stop_offset = *offset + chunk_size;
1316            *offset = data_offset + sizeof(buffer);
1317            while (*offset < stop_offset) {
1318                status_t err = parseChunk(offset, depth + 1);
1319                if (err != OK) {
1320                    return err;
1321                }
1322            }
1323
1324            if (*offset != stop_offset) {
1325                return ERROR_MALFORMED;
1326            }
1327            break;
1328        }
1329
1330        case FOURCC('s', 't', 'c', 'o'):
1331        case FOURCC('c', 'o', '6', '4'):
1332        {
1333            status_t err =
1334                mLastTrack->sampleTable->setChunkOffsetParams(
1335                        chunk_type, data_offset, chunk_data_size);
1336
1337            if (err != OK) {
1338                return err;
1339            }
1340
1341            *offset += chunk_size;
1342            break;
1343        }
1344
1345        case FOURCC('s', 't', 's', 'c'):
1346        {
1347            status_t err =
1348                mLastTrack->sampleTable->setSampleToChunkParams(
1349                        data_offset, chunk_data_size);
1350
1351            if (err != OK) {
1352                return err;
1353            }
1354
1355            *offset += chunk_size;
1356            break;
1357        }
1358
1359        case FOURCC('s', 't', 's', 'z'):
1360        case FOURCC('s', 't', 'z', '2'):
1361        {
1362            status_t err =
1363                mLastTrack->sampleTable->setSampleSizeParams(
1364                        chunk_type, data_offset, chunk_data_size);
1365
1366            if (err != OK) {
1367                return err;
1368            }
1369
1370            size_t max_size;
1371            err = mLastTrack->sampleTable->getMaxSampleSize(&max_size);
1372
1373            if (err != OK) {
1374                return err;
1375            }
1376
1377            if (max_size != 0) {
1378                // Assume that a given buffer only contains at most 10 chunks,
1379                // each chunk originally prefixed with a 2 byte length will
1380                // have a 4 byte header (0x00 0x00 0x00 0x01) after conversion,
1381                // and thus will grow by 2 bytes per chunk.
1382                mLastTrack->meta->setInt32(kKeyMaxInputSize, max_size + 10 * 2);
1383            } else {
1384                // No size was specified. Pick a conservatively large size.
1385                int32_t width, height;
1386                if (!mLastTrack->meta->findInt32(kKeyWidth, &width) ||
1387                    !mLastTrack->meta->findInt32(kKeyHeight, &height)) {
1388                    ALOGE("No width or height, assuming worst case 1080p");
1389                    width = 1920;
1390                    height = 1080;
1391                }
1392
1393                const char *mime;
1394                CHECK(mLastTrack->meta->findCString(kKeyMIMEType, &mime));
1395                if (!strcmp(mime, MEDIA_MIMETYPE_VIDEO_AVC)) {
1396                    // AVC requires compression ratio of at least 2, and uses
1397                    // macroblocks
1398                    max_size = ((width + 15) / 16) * ((height + 15) / 16) * 192;
1399                } else {
1400                    // For all other formats there is no minimum compression
1401                    // ratio. Use compression ratio of 1.
1402                    max_size = width * height * 3 / 2;
1403                }
1404                mLastTrack->meta->setInt32(kKeyMaxInputSize, max_size);
1405            }
1406            *offset += chunk_size;
1407
1408            // NOTE: setting another piece of metadata invalidates any pointers (such as the
1409            // mimetype) previously obtained, so don't cache them.
1410            const char *mime;
1411            CHECK(mLastTrack->meta->findCString(kKeyMIMEType, &mime));
1412            // Calculate average frame rate.
1413            if (!strncasecmp("video/", mime, 6)) {
1414                size_t nSamples = mLastTrack->sampleTable->countSamples();
1415                int64_t durationUs;
1416                if (mLastTrack->meta->findInt64(kKeyDuration, &durationUs)) {
1417                    if (durationUs > 0) {
1418                        int32_t frameRate = (nSamples * 1000000LL +
1419                                    (durationUs >> 1)) / durationUs;
1420                        mLastTrack->meta->setInt32(kKeyFrameRate, frameRate);
1421                    }
1422                }
1423            }
1424
1425            break;
1426        }
1427
1428        case FOURCC('s', 't', 't', 's'):
1429        {
1430            status_t err =
1431                mLastTrack->sampleTable->setTimeToSampleParams(
1432                        data_offset, chunk_data_size);
1433
1434            if (err != OK) {
1435                return err;
1436            }
1437
1438            *offset += chunk_size;
1439            break;
1440        }
1441
1442        case FOURCC('c', 't', 't', 's'):
1443        {
1444            status_t err =
1445                mLastTrack->sampleTable->setCompositionTimeToSampleParams(
1446                        data_offset, chunk_data_size);
1447
1448            if (err != OK) {
1449                return err;
1450            }
1451
1452            *offset += chunk_size;
1453            break;
1454        }
1455
1456        case FOURCC('s', 't', 's', 's'):
1457        {
1458            status_t err =
1459                mLastTrack->sampleTable->setSyncSampleParams(
1460                        data_offset, chunk_data_size);
1461
1462            if (err != OK) {
1463                return err;
1464            }
1465
1466            *offset += chunk_size;
1467            break;
1468        }
1469
1470        // @xyz
1471        case FOURCC('\xA9', 'x', 'y', 'z'):
1472        {
1473            // Best case the total data length inside "@xyz" box
1474            // would be 8, for instance "@xyz" + "\x00\x04\x15\xc7" + "0+0/",
1475            // where "\x00\x04" is the text string length with value = 4,
1476            // "\0x15\xc7" is the language code = en, and "0+0" is a
1477            // location (string) value with longitude = 0 and latitude = 0.
1478            if (chunk_data_size < 8) {
1479                return ERROR_MALFORMED;
1480            }
1481
1482            // Worst case the location string length would be 18,
1483            // for instance +90.0000-180.0000, without the trailing "/" and
1484            // the string length + language code.
1485            char buffer[18];
1486
1487            // Substracting 5 from the data size is because the text string length +
1488            // language code takes 4 bytes, and the trailing slash "/" takes 1 byte.
1489            off64_t location_length = chunk_data_size - 5;
1490            if (location_length >= (off64_t) sizeof(buffer)) {
1491                return ERROR_MALFORMED;
1492            }
1493
1494            if (mDataSource->readAt(
1495                        data_offset + 4, buffer, location_length) < location_length) {
1496                return ERROR_IO;
1497            }
1498
1499            buffer[location_length] = '\0';
1500            mFileMetaData->setCString(kKeyLocation, buffer);
1501            *offset += chunk_size;
1502            break;
1503        }
1504
1505        case FOURCC('e', 's', 'd', 's'):
1506        {
1507            if (chunk_data_size < 4) {
1508                return ERROR_MALFORMED;
1509            }
1510
1511            uint8_t buffer[256];
1512            if (chunk_data_size > (off64_t)sizeof(buffer)) {
1513                return ERROR_BUFFER_TOO_SMALL;
1514            }
1515
1516            if (mDataSource->readAt(
1517                        data_offset, buffer, chunk_data_size) < chunk_data_size) {
1518                return ERROR_IO;
1519            }
1520
1521            if (U32_AT(buffer) != 0) {
1522                // Should be version 0, flags 0.
1523                return ERROR_MALFORMED;
1524            }
1525
1526            mLastTrack->meta->setData(
1527                    kKeyESDS, kTypeESDS, &buffer[4], chunk_data_size - 4);
1528
1529            if (mPath.size() >= 2
1530                    && mPath[mPath.size() - 2] == FOURCC('m', 'p', '4', 'a')) {
1531                // Information from the ESDS must be relied on for proper
1532                // setup of sample rate and channel count for MPEG4 Audio.
1533                // The generic header appears to only contain generic
1534                // information...
1535
1536                status_t err = updateAudioTrackInfoFromESDS_MPEG4Audio(
1537                        &buffer[4], chunk_data_size - 4);
1538
1539                if (err != OK) {
1540                    return err;
1541                }
1542            }
1543
1544            *offset += chunk_size;
1545            break;
1546        }
1547
1548        case FOURCC('a', 'v', 'c', 'C'):
1549        {
1550            sp<ABuffer> buffer = new ABuffer(chunk_data_size);
1551
1552            if (mDataSource->readAt(
1553                        data_offset, buffer->data(), chunk_data_size) < chunk_data_size) {
1554                return ERROR_IO;
1555            }
1556
1557            mLastTrack->meta->setData(
1558                    kKeyAVCC, kTypeAVCC, buffer->data(), chunk_data_size);
1559
1560            *offset += chunk_size;
1561            break;
1562        }
1563
1564        case FOURCC('d', '2', '6', '3'):
1565        {
1566            /*
1567             * d263 contains a fixed 7 bytes part:
1568             *   vendor - 4 bytes
1569             *   version - 1 byte
1570             *   level - 1 byte
1571             *   profile - 1 byte
1572             * optionally, "d263" box itself may contain a 16-byte
1573             * bit rate box (bitr)
1574             *   average bit rate - 4 bytes
1575             *   max bit rate - 4 bytes
1576             */
1577            char buffer[23];
1578            if (chunk_data_size != 7 &&
1579                chunk_data_size != 23) {
1580                ALOGE("Incorrect D263 box size %lld", chunk_data_size);
1581                return ERROR_MALFORMED;
1582            }
1583
1584            if (mDataSource->readAt(
1585                    data_offset, buffer, chunk_data_size) < chunk_data_size) {
1586                return ERROR_IO;
1587            }
1588
1589            mLastTrack->meta->setData(kKeyD263, kTypeD263, buffer, chunk_data_size);
1590
1591            *offset += chunk_size;
1592            break;
1593        }
1594
1595        case FOURCC('m', 'e', 't', 'a'):
1596        {
1597            uint8_t buffer[4];
1598            if (chunk_data_size < (off64_t)sizeof(buffer)) {
1599                return ERROR_MALFORMED;
1600            }
1601
1602            if (mDataSource->readAt(
1603                        data_offset, buffer, 4) < 4) {
1604                return ERROR_IO;
1605            }
1606
1607            if (U32_AT(buffer) != 0) {
1608                // Should be version 0, flags 0.
1609
1610                // If it's not, let's assume this is one of those
1611                // apparently malformed chunks that don't have flags
1612                // and completely different semantics than what's
1613                // in the MPEG4 specs and skip it.
1614                *offset += chunk_size;
1615                return OK;
1616            }
1617
1618            off64_t stop_offset = *offset + chunk_size;
1619            *offset = data_offset + sizeof(buffer);
1620            while (*offset < stop_offset) {
1621                status_t err = parseChunk(offset, depth + 1);
1622                if (err != OK) {
1623                    return err;
1624                }
1625            }
1626
1627            if (*offset != stop_offset) {
1628                return ERROR_MALFORMED;
1629            }
1630            break;
1631        }
1632
1633        case FOURCC('m', 'e', 'a', 'n'):
1634        case FOURCC('n', 'a', 'm', 'e'):
1635        case FOURCC('d', 'a', 't', 'a'):
1636        {
1637            if (mPath.size() == 6 && underMetaDataPath(mPath)) {
1638                status_t err = parseMetaData(data_offset, chunk_data_size);
1639
1640                if (err != OK) {
1641                    return err;
1642                }
1643            }
1644
1645            *offset += chunk_size;
1646            break;
1647        }
1648
1649        case FOURCC('m', 'v', 'h', 'd'):
1650        {
1651            if (chunk_data_size < 24) {
1652                return ERROR_MALFORMED;
1653            }
1654
1655            uint8_t header[24];
1656            if (mDataSource->readAt(
1657                        data_offset, header, sizeof(header))
1658                    < (ssize_t)sizeof(header)) {
1659                return ERROR_IO;
1660            }
1661
1662            uint64_t creationTime;
1663            if (header[0] == 1) {
1664                creationTime = U64_AT(&header[4]);
1665                mHeaderTimescale = U32_AT(&header[20]);
1666            } else if (header[0] != 0) {
1667                return ERROR_MALFORMED;
1668            } else {
1669                creationTime = U32_AT(&header[4]);
1670                mHeaderTimescale = U32_AT(&header[12]);
1671            }
1672
1673            String8 s;
1674            convertTimeToDate(creationTime, &s);
1675
1676            mFileMetaData->setCString(kKeyDate, s.string());
1677
1678            *offset += chunk_size;
1679            break;
1680        }
1681
1682        case FOURCC('m', 'd', 'a', 't'):
1683        {
1684            ALOGV("mdat chunk, drm: %d", mIsDrm);
1685            if (!mIsDrm) {
1686                *offset += chunk_size;
1687                break;
1688            }
1689
1690            if (chunk_size < 8) {
1691                return ERROR_MALFORMED;
1692            }
1693
1694            return parseDrmSINF(offset, data_offset);
1695        }
1696
1697        case FOURCC('h', 'd', 'l', 'r'):
1698        {
1699            uint32_t buffer;
1700            if (mDataSource->readAt(
1701                        data_offset + 8, &buffer, 4) < 4) {
1702                return ERROR_IO;
1703            }
1704
1705            uint32_t type = ntohl(buffer);
1706            // For the 3GPP file format, the handler-type within the 'hdlr' box
1707            // shall be 'text'. We also want to support 'sbtl' handler type
1708            // for a practical reason as various MPEG4 containers use it.
1709            if (type == FOURCC('t', 'e', 'x', 't') || type == FOURCC('s', 'b', 't', 'l')) {
1710                mLastTrack->meta->setCString(kKeyMIMEType, MEDIA_MIMETYPE_TEXT_3GPP);
1711            }
1712
1713            *offset += chunk_size;
1714            break;
1715        }
1716
1717        case FOURCC('t', 'x', '3', 'g'):
1718        {
1719            uint32_t type;
1720            const void *data;
1721            size_t size = 0;
1722            if (!mLastTrack->meta->findData(
1723                    kKeyTextFormatData, &type, &data, &size)) {
1724                size = 0;
1725            }
1726
1727            if (SIZE_MAX - chunk_size <= size) {
1728                return ERROR_MALFORMED;
1729            }
1730
1731            uint8_t *buffer = new uint8_t[size + chunk_size];
1732            if (buffer == NULL) {
1733                return ERROR_MALFORMED;
1734            }
1735
1736            if (size > 0) {
1737                memcpy(buffer, data, size);
1738            }
1739
1740            if ((size_t)(mDataSource->readAt(*offset, buffer + size, chunk_size))
1741                    < chunk_size) {
1742                delete[] buffer;
1743                buffer = NULL;
1744
1745                return ERROR_IO;
1746            }
1747
1748            mLastTrack->meta->setData(
1749                    kKeyTextFormatData, 0, buffer, size + chunk_size);
1750
1751            delete[] buffer;
1752
1753            *offset += chunk_size;
1754            break;
1755        }
1756
1757        case FOURCC('c', 'o', 'v', 'r'):
1758        {
1759            if (mFileMetaData != NULL) {
1760                ALOGV("chunk_data_size = %lld and data_offset = %lld",
1761                        chunk_data_size, data_offset);
1762                sp<ABuffer> buffer = new ABuffer(chunk_data_size + 1);
1763                if (mDataSource->readAt(
1764                    data_offset, buffer->data(), chunk_data_size) != (ssize_t)chunk_data_size) {
1765                    return ERROR_IO;
1766                }
1767                const int kSkipBytesOfDataBox = 16;
1768                mFileMetaData->setData(
1769                    kKeyAlbumArt, MetaData::TYPE_NONE,
1770                    buffer->data() + kSkipBytesOfDataBox, chunk_data_size - kSkipBytesOfDataBox);
1771            }
1772
1773            *offset += chunk_size;
1774            break;
1775        }
1776
1777        case FOURCC('-', '-', '-', '-'):
1778        {
1779            mLastCommentMean.clear();
1780            mLastCommentName.clear();
1781            mLastCommentData.clear();
1782            *offset += chunk_size;
1783            break;
1784        }
1785
1786        case FOURCC('s', 'i', 'd', 'x'):
1787        {
1788            parseSegmentIndex(data_offset, chunk_data_size);
1789            *offset += chunk_size;
1790            return UNKNOWN_ERROR; // stop parsing after sidx
1791        }
1792
1793        default:
1794        {
1795            *offset += chunk_size;
1796            break;
1797        }
1798    }
1799
1800    return OK;
1801}
1802
1803status_t MPEG4Extractor::parseSegmentIndex(off64_t offset, size_t size) {
1804  ALOGV("MPEG4Extractor::parseSegmentIndex");
1805
1806    if (size < 12) {
1807      return -EINVAL;
1808    }
1809
1810    uint32_t flags;
1811    if (!mDataSource->getUInt32(offset, &flags)) {
1812        return ERROR_MALFORMED;
1813    }
1814
1815    uint32_t version = flags >> 24;
1816    flags &= 0xffffff;
1817
1818    ALOGV("sidx version %d", version);
1819
1820    uint32_t referenceId;
1821    if (!mDataSource->getUInt32(offset + 4, &referenceId)) {
1822        return ERROR_MALFORMED;
1823    }
1824
1825    uint32_t timeScale;
1826    if (!mDataSource->getUInt32(offset + 8, &timeScale)) {
1827        return ERROR_MALFORMED;
1828    }
1829    ALOGV("sidx refid/timescale: %d/%d", referenceId, timeScale);
1830
1831    uint64_t earliestPresentationTime;
1832    uint64_t firstOffset;
1833
1834    offset += 12;
1835    size -= 12;
1836
1837    if (version == 0) {
1838        if (size < 8) {
1839            return -EINVAL;
1840        }
1841        uint32_t tmp;
1842        if (!mDataSource->getUInt32(offset, &tmp)) {
1843            return ERROR_MALFORMED;
1844        }
1845        earliestPresentationTime = tmp;
1846        if (!mDataSource->getUInt32(offset + 4, &tmp)) {
1847            return ERROR_MALFORMED;
1848        }
1849        firstOffset = tmp;
1850        offset += 8;
1851        size -= 8;
1852    } else {
1853        if (size < 16) {
1854            return -EINVAL;
1855        }
1856        if (!mDataSource->getUInt64(offset, &earliestPresentationTime)) {
1857            return ERROR_MALFORMED;
1858        }
1859        if (!mDataSource->getUInt64(offset + 8, &firstOffset)) {
1860            return ERROR_MALFORMED;
1861        }
1862        offset += 16;
1863        size -= 16;
1864    }
1865    ALOGV("sidx pres/off: %Ld/%Ld", earliestPresentationTime, firstOffset);
1866
1867    if (size < 4) {
1868        return -EINVAL;
1869    }
1870
1871    uint16_t referenceCount;
1872    if (!mDataSource->getUInt16(offset + 2, &referenceCount)) {
1873        return ERROR_MALFORMED;
1874    }
1875    offset += 4;
1876    size -= 4;
1877    ALOGV("refcount: %d", referenceCount);
1878
1879    if (size < referenceCount * 12) {
1880        return -EINVAL;
1881    }
1882
1883    uint64_t total_duration = 0;
1884    for (unsigned int i = 0; i < referenceCount; i++) {
1885        uint32_t d1, d2, d3;
1886
1887        if (!mDataSource->getUInt32(offset, &d1) ||     // size
1888            !mDataSource->getUInt32(offset + 4, &d2) || // duration
1889            !mDataSource->getUInt32(offset + 8, &d3)) { // flags
1890            return ERROR_MALFORMED;
1891        }
1892
1893        if (d1 & 0x80000000) {
1894            ALOGW("sub-sidx boxes not supported yet");
1895        }
1896        bool sap = d3 & 0x80000000;
1897        bool saptype = d3 >> 28;
1898        if (!sap || saptype > 2) {
1899            ALOGW("not a stream access point, or unsupported type");
1900        }
1901        total_duration += d2;
1902        offset += 12;
1903        ALOGV(" item %d, %08x %08x %08x", i, d1, d2, d3);
1904        SidxEntry se;
1905        se.mSize = d1 & 0x7fffffff;
1906        se.mDurationUs = 1000000LL * d2 / timeScale;
1907        mSidxEntries.add(se);
1908    }
1909
1910    mSidxDuration = total_duration * 1000000 / timeScale;
1911    ALOGV("duration: %lld", mSidxDuration);
1912
1913    int64_t metaDuration;
1914    if (!mLastTrack->meta->findInt64(kKeyDuration, &metaDuration) || metaDuration == 0) {
1915        mLastTrack->meta->setInt64(kKeyDuration, mSidxDuration);
1916    }
1917    return OK;
1918}
1919
1920
1921
1922status_t MPEG4Extractor::parseTrackHeader(
1923        off64_t data_offset, off64_t data_size) {
1924    if (data_size < 4) {
1925        return ERROR_MALFORMED;
1926    }
1927
1928    uint8_t version;
1929    if (mDataSource->readAt(data_offset, &version, 1) < 1) {
1930        return ERROR_IO;
1931    }
1932
1933    size_t dynSize = (version == 1) ? 36 : 24;
1934
1935    uint8_t buffer[36 + 60];
1936
1937    if (data_size != (off64_t)dynSize + 60) {
1938        return ERROR_MALFORMED;
1939    }
1940
1941    if (mDataSource->readAt(
1942                data_offset, buffer, data_size) < (ssize_t)data_size) {
1943        return ERROR_IO;
1944    }
1945
1946    uint64_t ctime, mtime, duration;
1947    int32_t id;
1948
1949    if (version == 1) {
1950        ctime = U64_AT(&buffer[4]);
1951        mtime = U64_AT(&buffer[12]);
1952        id = U32_AT(&buffer[20]);
1953        duration = U64_AT(&buffer[28]);
1954    } else if (version == 0) {
1955        ctime = U32_AT(&buffer[4]);
1956        mtime = U32_AT(&buffer[8]);
1957        id = U32_AT(&buffer[12]);
1958        duration = U32_AT(&buffer[20]);
1959    } else {
1960        return ERROR_UNSUPPORTED;
1961    }
1962
1963    mLastTrack->meta->setInt32(kKeyTrackID, id);
1964
1965    size_t matrixOffset = dynSize + 16;
1966    int32_t a00 = U32_AT(&buffer[matrixOffset]);
1967    int32_t a01 = U32_AT(&buffer[matrixOffset + 4]);
1968    int32_t dx = U32_AT(&buffer[matrixOffset + 8]);
1969    int32_t a10 = U32_AT(&buffer[matrixOffset + 12]);
1970    int32_t a11 = U32_AT(&buffer[matrixOffset + 16]);
1971    int32_t dy = U32_AT(&buffer[matrixOffset + 20]);
1972
1973#if 0
1974    ALOGI("x' = %.2f * x + %.2f * y + %.2f",
1975         a00 / 65536.0f, a01 / 65536.0f, dx / 65536.0f);
1976    ALOGI("y' = %.2f * x + %.2f * y + %.2f",
1977         a10 / 65536.0f, a11 / 65536.0f, dy / 65536.0f);
1978#endif
1979
1980    uint32_t rotationDegrees;
1981
1982    static const int32_t kFixedOne = 0x10000;
1983    if (a00 == kFixedOne && a01 == 0 && a10 == 0 && a11 == kFixedOne) {
1984        // Identity, no rotation
1985        rotationDegrees = 0;
1986    } else if (a00 == 0 && a01 == kFixedOne && a10 == -kFixedOne && a11 == 0) {
1987        rotationDegrees = 90;
1988    } else if (a00 == 0 && a01 == -kFixedOne && a10 == kFixedOne && a11 == 0) {
1989        rotationDegrees = 270;
1990    } else if (a00 == -kFixedOne && a01 == 0 && a10 == 0 && a11 == -kFixedOne) {
1991        rotationDegrees = 180;
1992    } else {
1993        ALOGW("We only support 0,90,180,270 degree rotation matrices");
1994        rotationDegrees = 0;
1995    }
1996
1997    if (rotationDegrees != 0) {
1998        mLastTrack->meta->setInt32(kKeyRotation, rotationDegrees);
1999    }
2000
2001    // Handle presentation display size, which could be different
2002    // from the image size indicated by kKeyWidth and kKeyHeight.
2003    uint32_t width = U32_AT(&buffer[dynSize + 52]);
2004    uint32_t height = U32_AT(&buffer[dynSize + 56]);
2005    mLastTrack->meta->setInt32(kKeyDisplayWidth, width >> 16);
2006    mLastTrack->meta->setInt32(kKeyDisplayHeight, height >> 16);
2007
2008    return OK;
2009}
2010
2011status_t MPEG4Extractor::parseMetaData(off64_t offset, size_t size) {
2012    if (size < 4) {
2013        return ERROR_MALFORMED;
2014    }
2015
2016    uint8_t *buffer = new uint8_t[size + 1];
2017    if (mDataSource->readAt(
2018                offset, buffer, size) != (ssize_t)size) {
2019        delete[] buffer;
2020        buffer = NULL;
2021
2022        return ERROR_IO;
2023    }
2024
2025    uint32_t flags = U32_AT(buffer);
2026
2027    uint32_t metadataKey = 0;
2028    char chunk[5];
2029    MakeFourCCString(mPath[4], chunk);
2030    ALOGV("meta: %s @ %lld", chunk, offset);
2031    switch (mPath[4]) {
2032        case FOURCC(0xa9, 'a', 'l', 'b'):
2033        {
2034            metadataKey = kKeyAlbum;
2035            break;
2036        }
2037        case FOURCC(0xa9, 'A', 'R', 'T'):
2038        {
2039            metadataKey = kKeyArtist;
2040            break;
2041        }
2042        case FOURCC('a', 'A', 'R', 'T'):
2043        {
2044            metadataKey = kKeyAlbumArtist;
2045            break;
2046        }
2047        case FOURCC(0xa9, 'd', 'a', 'y'):
2048        {
2049            metadataKey = kKeyYear;
2050            break;
2051        }
2052        case FOURCC(0xa9, 'n', 'a', 'm'):
2053        {
2054            metadataKey = kKeyTitle;
2055            break;
2056        }
2057        case FOURCC(0xa9, 'w', 'r', 't'):
2058        {
2059            metadataKey = kKeyWriter;
2060            break;
2061        }
2062        case FOURCC('c', 'o', 'v', 'r'):
2063        {
2064            metadataKey = kKeyAlbumArt;
2065            break;
2066        }
2067        case FOURCC('g', 'n', 'r', 'e'):
2068        {
2069            metadataKey = kKeyGenre;
2070            break;
2071        }
2072        case FOURCC(0xa9, 'g', 'e', 'n'):
2073        {
2074            metadataKey = kKeyGenre;
2075            break;
2076        }
2077        case FOURCC('c', 'p', 'i', 'l'):
2078        {
2079            if (size == 9 && flags == 21) {
2080                char tmp[16];
2081                sprintf(tmp, "%d",
2082                        (int)buffer[size - 1]);
2083
2084                mFileMetaData->setCString(kKeyCompilation, tmp);
2085            }
2086            break;
2087        }
2088        case FOURCC('t', 'r', 'k', 'n'):
2089        {
2090            if (size == 16 && flags == 0) {
2091                char tmp[16];
2092                uint16_t* pTrack = (uint16_t*)&buffer[10];
2093                uint16_t* pTotalTracks = (uint16_t*)&buffer[12];
2094                sprintf(tmp, "%d/%d", ntohs(*pTrack), ntohs(*pTotalTracks));
2095
2096                mFileMetaData->setCString(kKeyCDTrackNumber, tmp);
2097            }
2098            break;
2099        }
2100        case FOURCC('d', 'i', 's', 'k'):
2101        {
2102            if ((size == 14 || size == 16) && flags == 0) {
2103                char tmp[16];
2104                uint16_t* pDisc = (uint16_t*)&buffer[10];
2105                uint16_t* pTotalDiscs = (uint16_t*)&buffer[12];
2106                sprintf(tmp, "%d/%d", ntohs(*pDisc), ntohs(*pTotalDiscs));
2107
2108                mFileMetaData->setCString(kKeyDiscNumber, tmp);
2109            }
2110            break;
2111        }
2112        case FOURCC('-', '-', '-', '-'):
2113        {
2114            buffer[size] = '\0';
2115            switch (mPath[5]) {
2116                case FOURCC('m', 'e', 'a', 'n'):
2117                    mLastCommentMean.setTo((const char *)buffer + 4);
2118                    break;
2119                case FOURCC('n', 'a', 'm', 'e'):
2120                    mLastCommentName.setTo((const char *)buffer + 4);
2121                    break;
2122                case FOURCC('d', 'a', 't', 'a'):
2123                    mLastCommentData.setTo((const char *)buffer + 8);
2124                    break;
2125            }
2126
2127            // Once we have a set of mean/name/data info, go ahead and process
2128            // it to see if its something we are interested in.  Whether or not
2129            // were are interested in the specific tag, make sure to clear out
2130            // the set so we can be ready to process another tuple should one
2131            // show up later in the file.
2132            if ((mLastCommentMean.length() != 0) &&
2133                (mLastCommentName.length() != 0) &&
2134                (mLastCommentData.length() != 0)) {
2135
2136                if (mLastCommentMean == "com.apple.iTunes"
2137                        && mLastCommentName == "iTunSMPB") {
2138                    int32_t delay, padding;
2139                    if (sscanf(mLastCommentData,
2140                               " %*x %x %x %*x", &delay, &padding) == 2) {
2141                        mLastTrack->meta->setInt32(kKeyEncoderDelay, delay);
2142                        mLastTrack->meta->setInt32(kKeyEncoderPadding, padding);
2143                    }
2144                }
2145
2146                mLastCommentMean.clear();
2147                mLastCommentName.clear();
2148                mLastCommentData.clear();
2149            }
2150            break;
2151        }
2152
2153        default:
2154            break;
2155    }
2156
2157    if (size >= 8 && metadataKey) {
2158        if (metadataKey == kKeyAlbumArt) {
2159            mFileMetaData->setData(
2160                    kKeyAlbumArt, MetaData::TYPE_NONE,
2161                    buffer + 8, size - 8);
2162        } else if (metadataKey == kKeyGenre) {
2163            if (flags == 0) {
2164                // uint8_t genre code, iTunes genre codes are
2165                // the standard id3 codes, except they start
2166                // at 1 instead of 0 (e.g. Pop is 14, not 13)
2167                // We use standard id3 numbering, so subtract 1.
2168                int genrecode = (int)buffer[size - 1];
2169                genrecode--;
2170                if (genrecode < 0) {
2171                    genrecode = 255; // reserved for 'unknown genre'
2172                }
2173                char genre[10];
2174                sprintf(genre, "%d", genrecode);
2175
2176                mFileMetaData->setCString(metadataKey, genre);
2177            } else if (flags == 1) {
2178                // custom genre string
2179                buffer[size] = '\0';
2180
2181                mFileMetaData->setCString(
2182                        metadataKey, (const char *)buffer + 8);
2183            }
2184        } else {
2185            buffer[size] = '\0';
2186
2187            mFileMetaData->setCString(
2188                    metadataKey, (const char *)buffer + 8);
2189        }
2190    }
2191
2192    delete[] buffer;
2193    buffer = NULL;
2194
2195    return OK;
2196}
2197
2198sp<MediaSource> MPEG4Extractor::getTrack(size_t index) {
2199    status_t err;
2200    if ((err = readMetaData()) != OK) {
2201        return NULL;
2202    }
2203
2204    Track *track = mFirstTrack;
2205    while (index > 0) {
2206        if (track == NULL) {
2207            return NULL;
2208        }
2209
2210        track = track->next;
2211        --index;
2212    }
2213
2214    if (track == NULL) {
2215        return NULL;
2216    }
2217
2218    ALOGV("getTrack called, pssh: %d", mPssh.size());
2219
2220    return new MPEG4Source(
2221            track->meta, mDataSource, track->timescale, track->sampleTable,
2222            mSidxEntries, mMoofOffset);
2223}
2224
2225// static
2226status_t MPEG4Extractor::verifyTrack(Track *track) {
2227    const char *mime;
2228    CHECK(track->meta->findCString(kKeyMIMEType, &mime));
2229
2230    uint32_t type;
2231    const void *data;
2232    size_t size;
2233    if (!strcasecmp(mime, MEDIA_MIMETYPE_VIDEO_AVC)) {
2234        if (!track->meta->findData(kKeyAVCC, &type, &data, &size)
2235                || type != kTypeAVCC) {
2236            return ERROR_MALFORMED;
2237        }
2238    } else if (!strcasecmp(mime, MEDIA_MIMETYPE_VIDEO_MPEG4)
2239            || !strcasecmp(mime, MEDIA_MIMETYPE_AUDIO_AAC)) {
2240        if (!track->meta->findData(kKeyESDS, &type, &data, &size)
2241                || type != kTypeESDS) {
2242            return ERROR_MALFORMED;
2243        }
2244    }
2245
2246    if (!track->sampleTable->isValid()) {
2247        // Make sure we have all the metadata we need.
2248        return ERROR_MALFORMED;
2249    }
2250
2251    return OK;
2252}
2253
2254status_t MPEG4Extractor::updateAudioTrackInfoFromESDS_MPEG4Audio(
2255        const void *esds_data, size_t esds_size) {
2256    ESDS esds(esds_data, esds_size);
2257
2258    uint8_t objectTypeIndication;
2259    if (esds.getObjectTypeIndication(&objectTypeIndication) != OK) {
2260        return ERROR_MALFORMED;
2261    }
2262
2263    if (objectTypeIndication == 0xe1) {
2264        // This isn't MPEG4 audio at all, it's QCELP 14k...
2265        mLastTrack->meta->setCString(kKeyMIMEType, MEDIA_MIMETYPE_AUDIO_QCELP);
2266        return OK;
2267    }
2268
2269    if (objectTypeIndication  == 0x6b) {
2270        // The media subtype is MP3 audio
2271        // Our software MP3 audio decoder may not be able to handle
2272        // packetized MP3 audio; for now, lets just return ERROR_UNSUPPORTED
2273        ALOGE("MP3 track in MP4/3GPP file is not supported");
2274        return ERROR_UNSUPPORTED;
2275    }
2276
2277    const uint8_t *csd;
2278    size_t csd_size;
2279    if (esds.getCodecSpecificInfo(
2280                (const void **)&csd, &csd_size) != OK) {
2281        return ERROR_MALFORMED;
2282    }
2283
2284#if 0
2285    printf("ESD of size %d\n", csd_size);
2286    hexdump(csd, csd_size);
2287#endif
2288
2289    if (csd_size == 0) {
2290        // There's no further information, i.e. no codec specific data
2291        // Let's assume that the information provided in the mpeg4 headers
2292        // is accurate and hope for the best.
2293
2294        return OK;
2295    }
2296
2297    if (csd_size < 2) {
2298        return ERROR_MALFORMED;
2299    }
2300
2301    static uint32_t kSamplingRate[] = {
2302        96000, 88200, 64000, 48000, 44100, 32000, 24000, 22050,
2303        16000, 12000, 11025, 8000, 7350
2304    };
2305
2306    ABitReader br(csd, csd_size);
2307    uint32_t objectType = br.getBits(5);
2308
2309    if (objectType == 31) {  // AAC-ELD => additional 6 bits
2310        objectType = 32 + br.getBits(6);
2311    }
2312
2313    //keep AOT type
2314    mLastTrack->meta->setInt32(kKeyAACAOT, objectType);
2315
2316    uint32_t freqIndex = br.getBits(4);
2317
2318    int32_t sampleRate = 0;
2319    int32_t numChannels = 0;
2320    if (freqIndex == 15) {
2321        if (csd_size < 5) {
2322            return ERROR_MALFORMED;
2323        }
2324        sampleRate = br.getBits(24);
2325        numChannels = br.getBits(4);
2326    } else {
2327        numChannels = br.getBits(4);
2328
2329        if (freqIndex == 13 || freqIndex == 14) {
2330            return ERROR_MALFORMED;
2331        }
2332
2333        sampleRate = kSamplingRate[freqIndex];
2334    }
2335
2336    if (objectType == 5 || objectType == 29) { // SBR specific config per 14496-3 table 1.13
2337        uint32_t extFreqIndex = br.getBits(4);
2338        int32_t extSampleRate;
2339        if (extFreqIndex == 15) {
2340            if (csd_size < 8) {
2341                return ERROR_MALFORMED;
2342            }
2343            extSampleRate = br.getBits(24);
2344        } else {
2345            if (extFreqIndex == 13 || extFreqIndex == 14) {
2346                return ERROR_MALFORMED;
2347            }
2348            extSampleRate = kSamplingRate[extFreqIndex];
2349        }
2350        //TODO: save the extension sampling rate value in meta data =>
2351        //      mLastTrack->meta->setInt32(kKeyExtSampleRate, extSampleRate);
2352    }
2353
2354    if (numChannels == 0) {
2355        return ERROR_UNSUPPORTED;
2356    }
2357
2358    int32_t prevSampleRate;
2359    CHECK(mLastTrack->meta->findInt32(kKeySampleRate, &prevSampleRate));
2360
2361    if (prevSampleRate != sampleRate) {
2362        ALOGV("mpeg4 audio sample rate different from previous setting. "
2363             "was: %d, now: %d", prevSampleRate, sampleRate);
2364    }
2365
2366    mLastTrack->meta->setInt32(kKeySampleRate, sampleRate);
2367
2368    int32_t prevChannelCount;
2369    CHECK(mLastTrack->meta->findInt32(kKeyChannelCount, &prevChannelCount));
2370
2371    if (prevChannelCount != numChannels) {
2372        ALOGV("mpeg4 audio channel count different from previous setting. "
2373             "was: %d, now: %d", prevChannelCount, numChannels);
2374    }
2375
2376    mLastTrack->meta->setInt32(kKeyChannelCount, numChannels);
2377
2378    return OK;
2379}
2380
2381////////////////////////////////////////////////////////////////////////////////
2382
2383MPEG4Source::MPEG4Source(
2384        const sp<MetaData> &format,
2385        const sp<DataSource> &dataSource,
2386        int32_t timeScale,
2387        const sp<SampleTable> &sampleTable,
2388        Vector<SidxEntry> &sidx,
2389        off64_t firstMoofOffset)
2390    : mFormat(format),
2391      mDataSource(dataSource),
2392      mTimescale(timeScale),
2393      mSampleTable(sampleTable),
2394      mCurrentSampleIndex(0),
2395      mCurrentFragmentIndex(0),
2396      mSegments(sidx),
2397      mFirstMoofOffset(firstMoofOffset),
2398      mCurrentMoofOffset(firstMoofOffset),
2399      mCurrentTime(0),
2400      mCurrentSampleInfoAllocSize(0),
2401      mCurrentSampleInfoSizes(NULL),
2402      mCurrentSampleInfoOffsetsAllocSize(0),
2403      mCurrentSampleInfoOffsets(NULL),
2404      mIsAVC(false),
2405      mNALLengthSize(0),
2406      mStarted(false),
2407      mGroup(NULL),
2408      mBuffer(NULL),
2409      mWantsNALFragments(false),
2410      mSrcBuffer(NULL) {
2411
2412    mFormat->findInt32(kKeyCryptoMode, &mCryptoMode);
2413    mDefaultIVSize = 0;
2414    mFormat->findInt32(kKeyCryptoDefaultIVSize, &mDefaultIVSize);
2415    uint32_t keytype;
2416    const void *key;
2417    size_t keysize;
2418    if (mFormat->findData(kKeyCryptoKey, &keytype, &key, &keysize)) {
2419        CHECK(keysize <= 16);
2420        memset(mCryptoKey, 0, 16);
2421        memcpy(mCryptoKey, key, keysize);
2422    }
2423
2424    const char *mime;
2425    bool success = mFormat->findCString(kKeyMIMEType, &mime);
2426    CHECK(success);
2427
2428    mIsAVC = !strcasecmp(mime, MEDIA_MIMETYPE_VIDEO_AVC);
2429
2430    if (mIsAVC) {
2431        uint32_t type;
2432        const void *data;
2433        size_t size;
2434        CHECK(format->findData(kKeyAVCC, &type, &data, &size));
2435
2436        const uint8_t *ptr = (const uint8_t *)data;
2437
2438        CHECK(size >= 7);
2439        CHECK_EQ((unsigned)ptr[0], 1u);  // configurationVersion == 1
2440
2441        // The number of bytes used to encode the length of a NAL unit.
2442        mNALLengthSize = 1 + (ptr[4] & 3);
2443    }
2444
2445    CHECK(format->findInt32(kKeyTrackID, &mTrackId));
2446
2447    if (mFirstMoofOffset != 0) {
2448        off64_t offset = mFirstMoofOffset;
2449        parseChunk(&offset);
2450    }
2451}
2452
2453MPEG4Source::~MPEG4Source() {
2454    if (mStarted) {
2455        stop();
2456    }
2457    free(mCurrentSampleInfoSizes);
2458    free(mCurrentSampleInfoOffsets);
2459}
2460
2461status_t MPEG4Source::start(MetaData *params) {
2462    Mutex::Autolock autoLock(mLock);
2463
2464    CHECK(!mStarted);
2465
2466    int32_t val;
2467    if (params && params->findInt32(kKeyWantsNALFragments, &val)
2468        && val != 0) {
2469        mWantsNALFragments = true;
2470    } else {
2471        mWantsNALFragments = false;
2472    }
2473
2474    mGroup = new MediaBufferGroup;
2475
2476    int32_t max_size;
2477    CHECK(mFormat->findInt32(kKeyMaxInputSize, &max_size));
2478
2479    mGroup->add_buffer(new MediaBuffer(max_size));
2480
2481    mSrcBuffer = new uint8_t[max_size];
2482
2483    mStarted = true;
2484
2485    return OK;
2486}
2487
2488status_t MPEG4Source::stop() {
2489    Mutex::Autolock autoLock(mLock);
2490
2491    CHECK(mStarted);
2492
2493    if (mBuffer != NULL) {
2494        mBuffer->release();
2495        mBuffer = NULL;
2496    }
2497
2498    delete[] mSrcBuffer;
2499    mSrcBuffer = NULL;
2500
2501    delete mGroup;
2502    mGroup = NULL;
2503
2504    mStarted = false;
2505    mCurrentSampleIndex = 0;
2506
2507    return OK;
2508}
2509
2510status_t MPEG4Source::parseChunk(off64_t *offset) {
2511    uint32_t hdr[2];
2512    if (mDataSource->readAt(*offset, hdr, 8) < 8) {
2513        return ERROR_IO;
2514    }
2515    uint64_t chunk_size = ntohl(hdr[0]);
2516    uint32_t chunk_type = ntohl(hdr[1]);
2517    off64_t data_offset = *offset + 8;
2518
2519    if (chunk_size == 1) {
2520        if (mDataSource->readAt(*offset + 8, &chunk_size, 8) < 8) {
2521            return ERROR_IO;
2522        }
2523        chunk_size = ntoh64(chunk_size);
2524        data_offset += 8;
2525
2526        if (chunk_size < 16) {
2527            // The smallest valid chunk is 16 bytes long in this case.
2528            return ERROR_MALFORMED;
2529        }
2530    } else if (chunk_size < 8) {
2531        // The smallest valid chunk is 8 bytes long.
2532        return ERROR_MALFORMED;
2533    }
2534
2535    char chunk[5];
2536    MakeFourCCString(chunk_type, chunk);
2537    ALOGV("MPEG4Source chunk %s @ %llx", chunk, *offset);
2538
2539    off64_t chunk_data_size = *offset + chunk_size - data_offset;
2540
2541    switch(chunk_type) {
2542
2543        case FOURCC('t', 'r', 'a', 'f'):
2544        case FOURCC('m', 'o', 'o', 'f'): {
2545            off64_t stop_offset = *offset + chunk_size;
2546            *offset = data_offset;
2547            while (*offset < stop_offset) {
2548                status_t err = parseChunk(offset);
2549                if (err != OK) {
2550                    return err;
2551                }
2552            }
2553            if (chunk_type == FOURCC('m', 'o', 'o', 'f')) {
2554                // *offset points to the mdat box following this moof
2555                parseChunk(offset); // doesn't actually parse it, just updates offset
2556                mNextMoofOffset = *offset;
2557            }
2558            break;
2559        }
2560
2561        case FOURCC('t', 'f', 'h', 'd'): {
2562                status_t err;
2563                if ((err = parseTrackFragmentHeader(data_offset, chunk_data_size)) != OK) {
2564                    return err;
2565                }
2566                *offset += chunk_size;
2567                break;
2568        }
2569
2570        case FOURCC('t', 'r', 'u', 'n'): {
2571                status_t err;
2572                if (mLastParsedTrackId == mTrackId) {
2573                    if ((err = parseTrackFragmentRun(data_offset, chunk_data_size)) != OK) {
2574                        return err;
2575                    }
2576                }
2577
2578                *offset += chunk_size;
2579                break;
2580        }
2581
2582        case FOURCC('s', 'a', 'i', 'z'): {
2583            status_t err;
2584            if ((err = parseSampleAuxiliaryInformationSizes(data_offset, chunk_data_size)) != OK) {
2585                return err;
2586            }
2587            *offset += chunk_size;
2588            break;
2589        }
2590        case FOURCC('s', 'a', 'i', 'o'): {
2591            status_t err;
2592            if ((err = parseSampleAuxiliaryInformationOffsets(data_offset, chunk_data_size)) != OK) {
2593                return err;
2594            }
2595            *offset += chunk_size;
2596            break;
2597        }
2598
2599        case FOURCC('m', 'd', 'a', 't'): {
2600            // parse DRM info if present
2601            ALOGV("MPEG4Source::parseChunk mdat");
2602            // if saiz/saoi was previously observed, do something with the sampleinfos
2603            *offset += chunk_size;
2604            break;
2605        }
2606
2607        default: {
2608            *offset += chunk_size;
2609            break;
2610        }
2611    }
2612    return OK;
2613}
2614
2615status_t MPEG4Source::parseSampleAuxiliaryInformationSizes(off64_t offset, off64_t size) {
2616    ALOGV("parseSampleAuxiliaryInformationSizes");
2617    // 14496-12 8.7.12
2618    uint8_t version;
2619    if (mDataSource->readAt(
2620            offset, &version, sizeof(version))
2621            < (ssize_t)sizeof(version)) {
2622        return ERROR_IO;
2623    }
2624
2625    if (version != 0) {
2626        return ERROR_UNSUPPORTED;
2627    }
2628    offset++;
2629
2630    uint32_t flags;
2631    if (!mDataSource->getUInt24(offset, &flags)) {
2632        return ERROR_IO;
2633    }
2634    offset += 3;
2635
2636    if (flags & 1) {
2637        uint32_t tmp;
2638        if (!mDataSource->getUInt32(offset, &tmp)) {
2639            return ERROR_MALFORMED;
2640        }
2641        mCurrentAuxInfoType = tmp;
2642        offset += 4;
2643        if (!mDataSource->getUInt32(offset, &tmp)) {
2644            return ERROR_MALFORMED;
2645        }
2646        mCurrentAuxInfoTypeParameter = tmp;
2647        offset += 4;
2648    }
2649
2650    uint8_t defsize;
2651    if (mDataSource->readAt(offset, &defsize, 1) != 1) {
2652        return ERROR_MALFORMED;
2653    }
2654    mCurrentDefaultSampleInfoSize = defsize;
2655    offset++;
2656
2657    uint32_t smplcnt;
2658    if (!mDataSource->getUInt32(offset, &smplcnt)) {
2659        return ERROR_MALFORMED;
2660    }
2661    mCurrentSampleInfoCount = smplcnt;
2662    offset += 4;
2663
2664    if (mCurrentDefaultSampleInfoSize != 0) {
2665        ALOGV("@@@@ using default sample info size of %d", mCurrentDefaultSampleInfoSize);
2666        return OK;
2667    }
2668    if (smplcnt > mCurrentSampleInfoAllocSize) {
2669        mCurrentSampleInfoSizes = (uint8_t*) realloc(mCurrentSampleInfoSizes, smplcnt);
2670        mCurrentSampleInfoAllocSize = smplcnt;
2671    }
2672
2673    mDataSource->readAt(offset, mCurrentSampleInfoSizes, smplcnt);
2674    return OK;
2675}
2676
2677status_t MPEG4Source::parseSampleAuxiliaryInformationOffsets(off64_t offset, off64_t size) {
2678    ALOGV("parseSampleAuxiliaryInformationOffsets");
2679    // 14496-12 8.7.13
2680    uint8_t version;
2681    if (mDataSource->readAt(offset, &version, sizeof(version)) != 1) {
2682        return ERROR_IO;
2683    }
2684    offset++;
2685
2686    uint32_t flags;
2687    if (!mDataSource->getUInt24(offset, &flags)) {
2688        return ERROR_IO;
2689    }
2690    offset += 3;
2691
2692    uint32_t entrycount;
2693    if (!mDataSource->getUInt32(offset, &entrycount)) {
2694        return ERROR_IO;
2695    }
2696    offset += 4;
2697
2698    if (entrycount > mCurrentSampleInfoOffsetsAllocSize) {
2699        mCurrentSampleInfoOffsets = (uint64_t*) realloc(mCurrentSampleInfoOffsets, entrycount * 8);
2700        mCurrentSampleInfoOffsetsAllocSize = entrycount;
2701    }
2702    mCurrentSampleInfoOffsetCount = entrycount;
2703
2704    for (size_t i = 0; i < entrycount; i++) {
2705        if (version == 0) {
2706            uint32_t tmp;
2707            if (!mDataSource->getUInt32(offset, &tmp)) {
2708                return ERROR_IO;
2709            }
2710            mCurrentSampleInfoOffsets[i] = tmp;
2711            offset += 4;
2712        } else {
2713            uint64_t tmp;
2714            if (!mDataSource->getUInt64(offset, &tmp)) {
2715                return ERROR_IO;
2716            }
2717            mCurrentSampleInfoOffsets[i] = tmp;
2718            offset += 8;
2719        }
2720    }
2721
2722    // parse clear/encrypted data
2723
2724    off64_t drmoffset = mCurrentSampleInfoOffsets[0]; // from moof
2725
2726    drmoffset += mCurrentMoofOffset;
2727    int ivlength;
2728    CHECK(mFormat->findInt32(kKeyCryptoDefaultIVSize, &ivlength));
2729
2730    // read CencSampleAuxiliaryDataFormats
2731    for (size_t i = 0; i < mCurrentSampleInfoCount; i++) {
2732        Sample *smpl = &mCurrentSamples.editItemAt(i);
2733
2734        memset(smpl->iv, 0, 16);
2735        if (mDataSource->readAt(drmoffset, smpl->iv, ivlength) != ivlength) {
2736            return ERROR_IO;
2737        }
2738
2739        drmoffset += ivlength;
2740
2741        int32_t smplinfosize = mCurrentDefaultSampleInfoSize;
2742        if (smplinfosize == 0) {
2743            smplinfosize = mCurrentSampleInfoSizes[i];
2744        }
2745        if (smplinfosize > ivlength) {
2746            uint16_t numsubsamples;
2747            if (!mDataSource->getUInt16(drmoffset, &numsubsamples)) {
2748                return ERROR_IO;
2749            }
2750            drmoffset += 2;
2751            for (size_t j = 0; j < numsubsamples; j++) {
2752                uint16_t numclear;
2753                uint32_t numencrypted;
2754                if (!mDataSource->getUInt16(drmoffset, &numclear)) {
2755                    return ERROR_IO;
2756                }
2757                drmoffset += 2;
2758                if (!mDataSource->getUInt32(drmoffset, &numencrypted)) {
2759                    return ERROR_IO;
2760                }
2761                drmoffset += 4;
2762                smpl->clearsizes.add(numclear);
2763                smpl->encryptedsizes.add(numencrypted);
2764            }
2765        } else {
2766            smpl->clearsizes.add(0);
2767            smpl->encryptedsizes.add(smpl->size);
2768        }
2769    }
2770
2771
2772    return OK;
2773}
2774
2775status_t MPEG4Source::parseTrackFragmentHeader(off64_t offset, off64_t size) {
2776
2777    if (size < 8) {
2778        return -EINVAL;
2779    }
2780
2781    uint32_t flags;
2782    if (!mDataSource->getUInt32(offset, &flags)) { // actually version + flags
2783        return ERROR_MALFORMED;
2784    }
2785
2786    if (flags & 0xff000000) {
2787        return -EINVAL;
2788    }
2789
2790    if (!mDataSource->getUInt32(offset + 4, (uint32_t*)&mLastParsedTrackId)) {
2791        return ERROR_MALFORMED;
2792    }
2793
2794    if (mLastParsedTrackId != mTrackId) {
2795        // this is not the right track, skip it
2796        return OK;
2797    }
2798
2799    mTrackFragmentHeaderInfo.mFlags = flags;
2800    mTrackFragmentHeaderInfo.mTrackID = mLastParsedTrackId;
2801    offset += 8;
2802    size -= 8;
2803
2804    ALOGV("fragment header: %08x %08x", flags, mTrackFragmentHeaderInfo.mTrackID);
2805
2806    if (flags & TrackFragmentHeaderInfo::kBaseDataOffsetPresent) {
2807        if (size < 8) {
2808            return -EINVAL;
2809        }
2810
2811        if (!mDataSource->getUInt64(offset, &mTrackFragmentHeaderInfo.mBaseDataOffset)) {
2812            return ERROR_MALFORMED;
2813        }
2814        offset += 8;
2815        size -= 8;
2816    }
2817
2818    if (flags & TrackFragmentHeaderInfo::kSampleDescriptionIndexPresent) {
2819        if (size < 4) {
2820            return -EINVAL;
2821        }
2822
2823        if (!mDataSource->getUInt32(offset, &mTrackFragmentHeaderInfo.mSampleDescriptionIndex)) {
2824            return ERROR_MALFORMED;
2825        }
2826        offset += 4;
2827        size -= 4;
2828    }
2829
2830    if (flags & TrackFragmentHeaderInfo::kDefaultSampleDurationPresent) {
2831        if (size < 4) {
2832            return -EINVAL;
2833        }
2834
2835        if (!mDataSource->getUInt32(offset, &mTrackFragmentHeaderInfo.mDefaultSampleDuration)) {
2836            return ERROR_MALFORMED;
2837        }
2838        offset += 4;
2839        size -= 4;
2840    }
2841
2842    if (flags & TrackFragmentHeaderInfo::kDefaultSampleSizePresent) {
2843        if (size < 4) {
2844            return -EINVAL;
2845        }
2846
2847        if (!mDataSource->getUInt32(offset, &mTrackFragmentHeaderInfo.mDefaultSampleSize)) {
2848            return ERROR_MALFORMED;
2849        }
2850        offset += 4;
2851        size -= 4;
2852    }
2853
2854    if (flags & TrackFragmentHeaderInfo::kDefaultSampleFlagsPresent) {
2855        if (size < 4) {
2856            return -EINVAL;
2857        }
2858
2859        if (!mDataSource->getUInt32(offset, &mTrackFragmentHeaderInfo.mDefaultSampleFlags)) {
2860            return ERROR_MALFORMED;
2861        }
2862        offset += 4;
2863        size -= 4;
2864    }
2865
2866    if (!(flags & TrackFragmentHeaderInfo::kBaseDataOffsetPresent)) {
2867        mTrackFragmentHeaderInfo.mBaseDataOffset = mCurrentMoofOffset;
2868    }
2869
2870    mTrackFragmentHeaderInfo.mDataOffset = 0;
2871    return OK;
2872}
2873
2874status_t MPEG4Source::parseTrackFragmentRun(off64_t offset, off64_t size) {
2875
2876    ALOGV("MPEG4Extractor::parseTrackFragmentRun");
2877    if (size < 8) {
2878        return -EINVAL;
2879    }
2880
2881    enum {
2882        kDataOffsetPresent                  = 0x01,
2883        kFirstSampleFlagsPresent            = 0x04,
2884        kSampleDurationPresent              = 0x100,
2885        kSampleSizePresent                  = 0x200,
2886        kSampleFlagsPresent                 = 0x400,
2887        kSampleCompositionTimeOffsetPresent = 0x800,
2888    };
2889
2890    uint32_t flags;
2891    if (!mDataSource->getUInt32(offset, &flags)) {
2892        return ERROR_MALFORMED;
2893    }
2894    ALOGV("fragment run flags: %08x", flags);
2895
2896    if (flags & 0xff000000) {
2897        return -EINVAL;
2898    }
2899
2900    if ((flags & kFirstSampleFlagsPresent) && (flags & kSampleFlagsPresent)) {
2901        // These two shall not be used together.
2902        return -EINVAL;
2903    }
2904
2905    uint32_t sampleCount;
2906    if (!mDataSource->getUInt32(offset + 4, &sampleCount)) {
2907        return ERROR_MALFORMED;
2908    }
2909    offset += 8;
2910    size -= 8;
2911
2912    uint64_t dataOffset = mTrackFragmentHeaderInfo.mDataOffset;
2913
2914    uint32_t firstSampleFlags = 0;
2915
2916    if (flags & kDataOffsetPresent) {
2917        if (size < 4) {
2918            return -EINVAL;
2919        }
2920
2921        int32_t dataOffsetDelta;
2922        if (!mDataSource->getUInt32(offset, (uint32_t*)&dataOffsetDelta)) {
2923            return ERROR_MALFORMED;
2924        }
2925
2926        dataOffset = mTrackFragmentHeaderInfo.mBaseDataOffset + dataOffsetDelta;
2927
2928        offset += 4;
2929        size -= 4;
2930    }
2931
2932    if (flags & kFirstSampleFlagsPresent) {
2933        if (size < 4) {
2934            return -EINVAL;
2935        }
2936
2937        if (!mDataSource->getUInt32(offset, &firstSampleFlags)) {
2938            return ERROR_MALFORMED;
2939        }
2940        offset += 4;
2941        size -= 4;
2942    }
2943
2944    uint32_t sampleDuration = 0, sampleSize = 0, sampleFlags = 0,
2945             sampleCtsOffset = 0;
2946
2947    size_t bytesPerSample = 0;
2948    if (flags & kSampleDurationPresent) {
2949        bytesPerSample += 4;
2950    } else if (mTrackFragmentHeaderInfo.mFlags
2951            & TrackFragmentHeaderInfo::kDefaultSampleDurationPresent) {
2952        sampleDuration = mTrackFragmentHeaderInfo.mDefaultSampleDuration;
2953    } else {
2954        sampleDuration = mTrackFragmentHeaderInfo.mDefaultSampleDuration;
2955    }
2956
2957    if (flags & kSampleSizePresent) {
2958        bytesPerSample += 4;
2959    } else if (mTrackFragmentHeaderInfo.mFlags
2960            & TrackFragmentHeaderInfo::kDefaultSampleSizePresent) {
2961        sampleSize = mTrackFragmentHeaderInfo.mDefaultSampleSize;
2962    } else {
2963        sampleSize = mTrackFragmentHeaderInfo.mDefaultSampleSize;
2964    }
2965
2966    if (flags & kSampleFlagsPresent) {
2967        bytesPerSample += 4;
2968    } else if (mTrackFragmentHeaderInfo.mFlags
2969            & TrackFragmentHeaderInfo::kDefaultSampleFlagsPresent) {
2970        sampleFlags = mTrackFragmentHeaderInfo.mDefaultSampleFlags;
2971    } else {
2972        sampleFlags = mTrackFragmentHeaderInfo.mDefaultSampleFlags;
2973    }
2974
2975    if (flags & kSampleCompositionTimeOffsetPresent) {
2976        bytesPerSample += 4;
2977    } else {
2978        sampleCtsOffset = 0;
2979    }
2980
2981    if (size < sampleCount * bytesPerSample) {
2982        return -EINVAL;
2983    }
2984
2985    Sample tmp;
2986    for (uint32_t i = 0; i < sampleCount; ++i) {
2987        if (flags & kSampleDurationPresent) {
2988            if (!mDataSource->getUInt32(offset, &sampleDuration)) {
2989                return ERROR_MALFORMED;
2990            }
2991            offset += 4;
2992        }
2993
2994        if (flags & kSampleSizePresent) {
2995            if (!mDataSource->getUInt32(offset, &sampleSize)) {
2996                return ERROR_MALFORMED;
2997            }
2998            offset += 4;
2999        }
3000
3001        if (flags & kSampleFlagsPresent) {
3002            if (!mDataSource->getUInt32(offset, &sampleFlags)) {
3003                return ERROR_MALFORMED;
3004            }
3005            offset += 4;
3006        }
3007
3008        if (flags & kSampleCompositionTimeOffsetPresent) {
3009            if (!mDataSource->getUInt32(offset, &sampleCtsOffset)) {
3010                return ERROR_MALFORMED;
3011            }
3012            offset += 4;
3013        }
3014
3015        ALOGV("adding sample %d at offset 0x%08llx, size %u, duration %u, "
3016              " flags 0x%08x", i + 1,
3017                dataOffset, sampleSize, sampleDuration,
3018                (flags & kFirstSampleFlagsPresent) && i == 0
3019                    ? firstSampleFlags : sampleFlags);
3020        tmp.offset = dataOffset;
3021        tmp.size = sampleSize;
3022        tmp.duration = sampleDuration;
3023        mCurrentSamples.add(tmp);
3024
3025        dataOffset += sampleSize;
3026    }
3027
3028    mTrackFragmentHeaderInfo.mDataOffset = dataOffset;
3029
3030    return OK;
3031}
3032
3033sp<MetaData> MPEG4Source::getFormat() {
3034    Mutex::Autolock autoLock(mLock);
3035
3036    return mFormat;
3037}
3038
3039size_t MPEG4Source::parseNALSize(const uint8_t *data) const {
3040    switch (mNALLengthSize) {
3041        case 1:
3042            return *data;
3043        case 2:
3044            return U16_AT(data);
3045        case 3:
3046            return ((size_t)data[0] << 16) | U16_AT(&data[1]);
3047        case 4:
3048            return U32_AT(data);
3049    }
3050
3051    // This cannot happen, mNALLengthSize springs to life by adding 1 to
3052    // a 2-bit integer.
3053    CHECK(!"Should not be here.");
3054
3055    return 0;
3056}
3057
3058status_t MPEG4Source::read(
3059        MediaBuffer **out, const ReadOptions *options) {
3060    Mutex::Autolock autoLock(mLock);
3061
3062    CHECK(mStarted);
3063
3064    if (mFirstMoofOffset > 0) {
3065        return fragmentedRead(out, options);
3066    }
3067
3068    *out = NULL;
3069
3070    int64_t targetSampleTimeUs = -1;
3071
3072    int64_t seekTimeUs;
3073    ReadOptions::SeekMode mode;
3074    if (options && options->getSeekTo(&seekTimeUs, &mode)) {
3075        uint32_t findFlags = 0;
3076        switch (mode) {
3077            case ReadOptions::SEEK_PREVIOUS_SYNC:
3078                findFlags = SampleTable::kFlagBefore;
3079                break;
3080            case ReadOptions::SEEK_NEXT_SYNC:
3081                findFlags = SampleTable::kFlagAfter;
3082                break;
3083            case ReadOptions::SEEK_CLOSEST_SYNC:
3084            case ReadOptions::SEEK_CLOSEST:
3085                findFlags = SampleTable::kFlagClosest;
3086                break;
3087            default:
3088                CHECK(!"Should not be here.");
3089                break;
3090        }
3091
3092        uint32_t sampleIndex;
3093        status_t err = mSampleTable->findSampleAtTime(
3094                seekTimeUs * mTimescale / 1000000,
3095                &sampleIndex, findFlags);
3096
3097        if (mode == ReadOptions::SEEK_CLOSEST) {
3098            // We found the closest sample already, now we want the sync
3099            // sample preceding it (or the sample itself of course), even
3100            // if the subsequent sync sample is closer.
3101            findFlags = SampleTable::kFlagBefore;
3102        }
3103
3104        uint32_t syncSampleIndex;
3105        if (err == OK) {
3106            err = mSampleTable->findSyncSampleNear(
3107                    sampleIndex, &syncSampleIndex, findFlags);
3108        }
3109
3110        uint32_t sampleTime;
3111        if (err == OK) {
3112            err = mSampleTable->getMetaDataForSample(
3113                    sampleIndex, NULL, NULL, &sampleTime);
3114        }
3115
3116        if (err != OK) {
3117            if (err == ERROR_OUT_OF_RANGE) {
3118                // An attempt to seek past the end of the stream would
3119                // normally cause this ERROR_OUT_OF_RANGE error. Propagating
3120                // this all the way to the MediaPlayer would cause abnormal
3121                // termination. Legacy behaviour appears to be to behave as if
3122                // we had seeked to the end of stream, ending normally.
3123                err = ERROR_END_OF_STREAM;
3124            }
3125            ALOGV("end of stream");
3126            return err;
3127        }
3128
3129        if (mode == ReadOptions::SEEK_CLOSEST) {
3130            targetSampleTimeUs = (sampleTime * 1000000ll) / mTimescale;
3131        }
3132
3133#if 0
3134        uint32_t syncSampleTime;
3135        CHECK_EQ(OK, mSampleTable->getMetaDataForSample(
3136                    syncSampleIndex, NULL, NULL, &syncSampleTime));
3137
3138        ALOGI("seek to time %lld us => sample at time %lld us, "
3139             "sync sample at time %lld us",
3140             seekTimeUs,
3141             sampleTime * 1000000ll / mTimescale,
3142             syncSampleTime * 1000000ll / mTimescale);
3143#endif
3144
3145        mCurrentSampleIndex = syncSampleIndex;
3146        if (mBuffer != NULL) {
3147            mBuffer->release();
3148            mBuffer = NULL;
3149        }
3150
3151        // fall through
3152    }
3153
3154    off64_t offset;
3155    size_t size;
3156    uint32_t cts;
3157    bool isSyncSample;
3158    bool newBuffer = false;
3159    if (mBuffer == NULL) {
3160        newBuffer = true;
3161
3162        status_t err =
3163            mSampleTable->getMetaDataForSample(
3164                    mCurrentSampleIndex, &offset, &size, &cts, &isSyncSample);
3165
3166        if (err != OK) {
3167            return err;
3168        }
3169
3170        err = mGroup->acquire_buffer(&mBuffer);
3171
3172        if (err != OK) {
3173            CHECK(mBuffer == NULL);
3174            return err;
3175        }
3176    }
3177
3178    if (!mIsAVC || mWantsNALFragments) {
3179        if (newBuffer) {
3180            ssize_t num_bytes_read =
3181                mDataSource->readAt(offset, (uint8_t *)mBuffer->data(), size);
3182
3183            if (num_bytes_read < (ssize_t)size) {
3184                mBuffer->release();
3185                mBuffer = NULL;
3186
3187                return ERROR_IO;
3188            }
3189
3190            CHECK(mBuffer != NULL);
3191            mBuffer->set_range(0, size);
3192            mBuffer->meta_data()->clear();
3193            mBuffer->meta_data()->setInt64(
3194                    kKeyTime, ((int64_t)cts * 1000000) / mTimescale);
3195
3196            if (targetSampleTimeUs >= 0) {
3197                mBuffer->meta_data()->setInt64(
3198                        kKeyTargetTime, targetSampleTimeUs);
3199            }
3200
3201            if (isSyncSample) {
3202                mBuffer->meta_data()->setInt32(kKeyIsSyncFrame, 1);
3203            }
3204
3205            ++mCurrentSampleIndex;
3206        }
3207
3208        if (!mIsAVC) {
3209            *out = mBuffer;
3210            mBuffer = NULL;
3211
3212            return OK;
3213        }
3214
3215        // Each NAL unit is split up into its constituent fragments and
3216        // each one of them returned in its own buffer.
3217
3218        CHECK(mBuffer->range_length() >= mNALLengthSize);
3219
3220        const uint8_t *src =
3221            (const uint8_t *)mBuffer->data() + mBuffer->range_offset();
3222
3223        size_t nal_size = parseNALSize(src);
3224        if (mBuffer->range_length() < mNALLengthSize + nal_size) {
3225            ALOGE("incomplete NAL unit.");
3226
3227            mBuffer->release();
3228            mBuffer = NULL;
3229
3230            return ERROR_MALFORMED;
3231        }
3232
3233        MediaBuffer *clone = mBuffer->clone();
3234        CHECK(clone != NULL);
3235        clone->set_range(mBuffer->range_offset() + mNALLengthSize, nal_size);
3236
3237        CHECK(mBuffer != NULL);
3238        mBuffer->set_range(
3239                mBuffer->range_offset() + mNALLengthSize + nal_size,
3240                mBuffer->range_length() - mNALLengthSize - nal_size);
3241
3242        if (mBuffer->range_length() == 0) {
3243            mBuffer->release();
3244            mBuffer = NULL;
3245        }
3246
3247        *out = clone;
3248
3249        return OK;
3250    } else {
3251        // Whole NAL units are returned but each fragment is prefixed by
3252        // the start code (0x00 00 00 01).
3253        ssize_t num_bytes_read = 0;
3254        int32_t drm = 0;
3255        bool usesDRM = (mFormat->findInt32(kKeyIsDRM, &drm) && drm != 0);
3256        if (usesDRM) {
3257            num_bytes_read =
3258                mDataSource->readAt(offset, (uint8_t*)mBuffer->data(), size);
3259        } else {
3260            num_bytes_read = mDataSource->readAt(offset, mSrcBuffer, size);
3261        }
3262
3263        if (num_bytes_read < (ssize_t)size) {
3264            mBuffer->release();
3265            mBuffer = NULL;
3266
3267            return ERROR_IO;
3268        }
3269
3270        if (usesDRM) {
3271            CHECK(mBuffer != NULL);
3272            mBuffer->set_range(0, size);
3273
3274        } else {
3275            uint8_t *dstData = (uint8_t *)mBuffer->data();
3276            size_t srcOffset = 0;
3277            size_t dstOffset = 0;
3278
3279            while (srcOffset < size) {
3280                bool isMalFormed = !isInRange(0u, size, srcOffset, mNALLengthSize);
3281                size_t nalLength = 0;
3282                if (!isMalFormed) {
3283                    nalLength = parseNALSize(&mSrcBuffer[srcOffset]);
3284                    srcOffset += mNALLengthSize;
3285                    isMalFormed = !isInRange(0u, size, srcOffset, nalLength);
3286                }
3287
3288                if (isMalFormed) {
3289                    ALOGE("Video is malformed");
3290                    mBuffer->release();
3291                    mBuffer = NULL;
3292                    return ERROR_MALFORMED;
3293                }
3294
3295                if (nalLength == 0) {
3296                    continue;
3297                }
3298
3299                CHECK(dstOffset + 4 <= mBuffer->size());
3300
3301                dstData[dstOffset++] = 0;
3302                dstData[dstOffset++] = 0;
3303                dstData[dstOffset++] = 0;
3304                dstData[dstOffset++] = 1;
3305                memcpy(&dstData[dstOffset], &mSrcBuffer[srcOffset], nalLength);
3306                srcOffset += nalLength;
3307                dstOffset += nalLength;
3308            }
3309            CHECK_EQ(srcOffset, size);
3310            CHECK(mBuffer != NULL);
3311            mBuffer->set_range(0, dstOffset);
3312        }
3313
3314        mBuffer->meta_data()->clear();
3315        mBuffer->meta_data()->setInt64(
3316                kKeyTime, ((int64_t)cts * 1000000) / mTimescale);
3317
3318        if (targetSampleTimeUs >= 0) {
3319            mBuffer->meta_data()->setInt64(
3320                    kKeyTargetTime, targetSampleTimeUs);
3321        }
3322
3323        if (isSyncSample) {
3324            mBuffer->meta_data()->setInt32(kKeyIsSyncFrame, 1);
3325        }
3326
3327        ++mCurrentSampleIndex;
3328
3329        *out = mBuffer;
3330        mBuffer = NULL;
3331
3332        return OK;
3333    }
3334}
3335
3336status_t MPEG4Source::fragmentedRead(
3337        MediaBuffer **out, const ReadOptions *options) {
3338
3339    ALOGV("MPEG4Source::fragmentedRead");
3340
3341    CHECK(mStarted);
3342
3343    *out = NULL;
3344
3345    int64_t targetSampleTimeUs = -1;
3346
3347    int64_t seekTimeUs;
3348    ReadOptions::SeekMode mode;
3349    if (options && options->getSeekTo(&seekTimeUs, &mode)) {
3350
3351        int numSidxEntries = mSegments.size();
3352        if (numSidxEntries != 0) {
3353            int64_t totalTime = 0;
3354            off64_t totalOffset = mFirstMoofOffset;
3355            for (int i = 0; i < numSidxEntries; i++) {
3356                const SidxEntry *se = &mSegments[i];
3357                if (totalTime + se->mDurationUs > seekTimeUs) {
3358                    // The requested time is somewhere in this segment
3359                    if ((mode == ReadOptions::SEEK_NEXT_SYNC) ||
3360                        (mode == ReadOptions::SEEK_CLOSEST_SYNC &&
3361                        (seekTimeUs - totalTime) > (totalTime + se->mDurationUs - seekTimeUs))) {
3362                        // requested next sync, or closest sync and it was closer to the end of
3363                        // this segment
3364                        totalTime += se->mDurationUs;
3365                        totalOffset += se->mSize;
3366                    }
3367                    break;
3368                }
3369                totalTime += se->mDurationUs;
3370                totalOffset += se->mSize;
3371            }
3372        mCurrentMoofOffset = totalOffset;
3373        mCurrentSamples.clear();
3374        mCurrentSampleIndex = 0;
3375        parseChunk(&totalOffset);
3376        mCurrentTime = totalTime * mTimescale / 1000000ll;
3377        }
3378
3379        if (mBuffer != NULL) {
3380            mBuffer->release();
3381            mBuffer = NULL;
3382        }
3383
3384        // fall through
3385    }
3386
3387    off64_t offset = 0;
3388    size_t size;
3389    uint32_t cts = 0;
3390    bool isSyncSample = false;
3391    bool newBuffer = false;
3392    if (mBuffer == NULL) {
3393        newBuffer = true;
3394
3395        if (mCurrentSampleIndex >= mCurrentSamples.size()) {
3396            // move to next fragment
3397            Sample lastSample = mCurrentSamples[mCurrentSamples.size() - 1];
3398            off64_t nextMoof = mNextMoofOffset; // lastSample.offset + lastSample.size;
3399            mCurrentMoofOffset = nextMoof;
3400            mCurrentSamples.clear();
3401            mCurrentSampleIndex = 0;
3402            parseChunk(&nextMoof);
3403                if (mCurrentSampleIndex >= mCurrentSamples.size()) {
3404                    return ERROR_END_OF_STREAM;
3405                }
3406        }
3407
3408        const Sample *smpl = &mCurrentSamples[mCurrentSampleIndex];
3409        offset = smpl->offset;
3410        size = smpl->size;
3411        cts = mCurrentTime;
3412        mCurrentTime += smpl->duration;
3413        isSyncSample = (mCurrentSampleIndex == 0); // XXX
3414
3415        status_t err = mGroup->acquire_buffer(&mBuffer);
3416
3417        if (err != OK) {
3418            CHECK(mBuffer == NULL);
3419            ALOGV("acquire_buffer returned %d", err);
3420            return err;
3421        }
3422    }
3423
3424    const Sample *smpl = &mCurrentSamples[mCurrentSampleIndex];
3425    const sp<MetaData> bufmeta = mBuffer->meta_data();
3426    bufmeta->clear();
3427    if (smpl->encryptedsizes.size()) {
3428        // store clear/encrypted lengths in metadata
3429        bufmeta->setData(kKeyPlainSizes, 0,
3430                smpl->clearsizes.array(), smpl->clearsizes.size() * 4);
3431        bufmeta->setData(kKeyEncryptedSizes, 0,
3432                smpl->encryptedsizes.array(), smpl->encryptedsizes.size() * 4);
3433        bufmeta->setData(kKeyCryptoIV, 0, smpl->iv, 16); // use 16 or the actual size?
3434        bufmeta->setInt32(kKeyCryptoDefaultIVSize, mDefaultIVSize);
3435        bufmeta->setInt32(kKeyCryptoMode, mCryptoMode);
3436        bufmeta->setData(kKeyCryptoKey, 0, mCryptoKey, 16);
3437    }
3438
3439    if (!mIsAVC || mWantsNALFragments) {
3440        if (newBuffer) {
3441            ssize_t num_bytes_read =
3442                mDataSource->readAt(offset, (uint8_t *)mBuffer->data(), size);
3443
3444            if (num_bytes_read < (ssize_t)size) {
3445                mBuffer->release();
3446                mBuffer = NULL;
3447
3448                ALOGV("i/o error");
3449                return ERROR_IO;
3450            }
3451
3452            CHECK(mBuffer != NULL);
3453            mBuffer->set_range(0, size);
3454            mBuffer->meta_data()->setInt64(
3455                    kKeyTime, ((int64_t)cts * 1000000) / mTimescale);
3456
3457            if (targetSampleTimeUs >= 0) {
3458                mBuffer->meta_data()->setInt64(
3459                        kKeyTargetTime, targetSampleTimeUs);
3460            }
3461
3462            if (isSyncSample) {
3463                mBuffer->meta_data()->setInt32(kKeyIsSyncFrame, 1);
3464            }
3465
3466            ++mCurrentSampleIndex;
3467        }
3468
3469        if (!mIsAVC) {
3470            *out = mBuffer;
3471            mBuffer = NULL;
3472
3473            return OK;
3474        }
3475
3476        // Each NAL unit is split up into its constituent fragments and
3477        // each one of them returned in its own buffer.
3478
3479        CHECK(mBuffer->range_length() >= mNALLengthSize);
3480
3481        const uint8_t *src =
3482            (const uint8_t *)mBuffer->data() + mBuffer->range_offset();
3483
3484        size_t nal_size = parseNALSize(src);
3485        if (mBuffer->range_length() < mNALLengthSize + nal_size) {
3486            ALOGE("incomplete NAL unit.");
3487
3488            mBuffer->release();
3489            mBuffer = NULL;
3490
3491            return ERROR_MALFORMED;
3492        }
3493
3494        MediaBuffer *clone = mBuffer->clone();
3495        CHECK(clone != NULL);
3496        clone->set_range(mBuffer->range_offset() + mNALLengthSize, nal_size);
3497
3498        CHECK(mBuffer != NULL);
3499        mBuffer->set_range(
3500                mBuffer->range_offset() + mNALLengthSize + nal_size,
3501                mBuffer->range_length() - mNALLengthSize - nal_size);
3502
3503        if (mBuffer->range_length() == 0) {
3504            mBuffer->release();
3505            mBuffer = NULL;
3506        }
3507
3508        *out = clone;
3509
3510        return OK;
3511    } else {
3512        ALOGV("whole NAL");
3513        // Whole NAL units are returned but each fragment is prefixed by
3514        // the start code (0x00 00 00 01).
3515        ssize_t num_bytes_read = 0;
3516        int32_t drm = 0;
3517        bool usesDRM = (mFormat->findInt32(kKeyIsDRM, &drm) && drm != 0);
3518        if (usesDRM) {
3519            num_bytes_read =
3520                mDataSource->readAt(offset, (uint8_t*)mBuffer->data(), size);
3521        } else {
3522            num_bytes_read = mDataSource->readAt(offset, mSrcBuffer, size);
3523        }
3524
3525        if (num_bytes_read < (ssize_t)size) {
3526            mBuffer->release();
3527            mBuffer = NULL;
3528
3529            ALOGV("i/o error");
3530            return ERROR_IO;
3531        }
3532
3533        if (usesDRM) {
3534            CHECK(mBuffer != NULL);
3535            mBuffer->set_range(0, size);
3536
3537        } else {
3538            uint8_t *dstData = (uint8_t *)mBuffer->data();
3539            size_t srcOffset = 0;
3540            size_t dstOffset = 0;
3541
3542            while (srcOffset < size) {
3543                bool isMalFormed = !isInRange(0u, size, srcOffset, mNALLengthSize);
3544                size_t nalLength = 0;
3545                if (!isMalFormed) {
3546                    nalLength = parseNALSize(&mSrcBuffer[srcOffset]);
3547                    srcOffset += mNALLengthSize;
3548                    isMalFormed = !isInRange(0u, size, srcOffset, nalLength);
3549                }
3550
3551                if (isMalFormed) {
3552                    ALOGE("Video is malformed");
3553                    mBuffer->release();
3554                    mBuffer = NULL;
3555                    return ERROR_MALFORMED;
3556                }
3557
3558                if (nalLength == 0) {
3559                    continue;
3560                }
3561
3562                CHECK(dstOffset + 4 <= mBuffer->size());
3563
3564                dstData[dstOffset++] = 0;
3565                dstData[dstOffset++] = 0;
3566                dstData[dstOffset++] = 0;
3567                dstData[dstOffset++] = 1;
3568                memcpy(&dstData[dstOffset], &mSrcBuffer[srcOffset], nalLength);
3569                srcOffset += nalLength;
3570                dstOffset += nalLength;
3571            }
3572            CHECK_EQ(srcOffset, size);
3573            CHECK(mBuffer != NULL);
3574            mBuffer->set_range(0, dstOffset);
3575        }
3576
3577        mBuffer->meta_data()->setInt64(
3578                kKeyTime, ((int64_t)cts * 1000000) / mTimescale);
3579
3580        if (targetSampleTimeUs >= 0) {
3581            mBuffer->meta_data()->setInt64(
3582                    kKeyTargetTime, targetSampleTimeUs);
3583        }
3584
3585        if (isSyncSample) {
3586            mBuffer->meta_data()->setInt32(kKeyIsSyncFrame, 1);
3587        }
3588
3589        ++mCurrentSampleIndex;
3590
3591        *out = mBuffer;
3592        mBuffer = NULL;
3593
3594        return OK;
3595    }
3596}
3597
3598MPEG4Extractor::Track *MPEG4Extractor::findTrackByMimePrefix(
3599        const char *mimePrefix) {
3600    for (Track *track = mFirstTrack; track != NULL; track = track->next) {
3601        const char *mime;
3602        if (track->meta != NULL
3603                && track->meta->findCString(kKeyMIMEType, &mime)
3604                && !strncasecmp(mime, mimePrefix, strlen(mimePrefix))) {
3605            return track;
3606        }
3607    }
3608
3609    return NULL;
3610}
3611
3612static bool LegacySniffMPEG4(
3613        const sp<DataSource> &source, String8 *mimeType, float *confidence) {
3614    uint8_t header[8];
3615
3616    ssize_t n = source->readAt(4, header, sizeof(header));
3617    if (n < (ssize_t)sizeof(header)) {
3618        return false;
3619    }
3620
3621    if (!memcmp(header, "ftyp3gp", 7) || !memcmp(header, "ftypmp42", 8)
3622        || !memcmp(header, "ftyp3gr6", 8) || !memcmp(header, "ftyp3gs6", 8)
3623        || !memcmp(header, "ftyp3ge6", 8) || !memcmp(header, "ftyp3gg6", 8)
3624        || !memcmp(header, "ftypisom", 8) || !memcmp(header, "ftypM4V ", 8)
3625        || !memcmp(header, "ftypM4A ", 8) || !memcmp(header, "ftypf4v ", 8)
3626        || !memcmp(header, "ftypkddi", 8) || !memcmp(header, "ftypM4VP", 8)) {
3627        *mimeType = MEDIA_MIMETYPE_CONTAINER_MPEG4;
3628        *confidence = 0.4;
3629
3630        return true;
3631    }
3632
3633    return false;
3634}
3635
3636static bool isCompatibleBrand(uint32_t fourcc) {
3637    static const uint32_t kCompatibleBrands[] = {
3638        FOURCC('i', 's', 'o', 'm'),
3639        FOURCC('i', 's', 'o', '2'),
3640        FOURCC('a', 'v', 'c', '1'),
3641        FOURCC('3', 'g', 'p', '4'),
3642        FOURCC('m', 'p', '4', '1'),
3643        FOURCC('m', 'p', '4', '2'),
3644
3645        // Won't promise that the following file types can be played.
3646        // Just give these file types a chance.
3647        FOURCC('q', 't', ' ', ' '),  // Apple's QuickTime
3648        FOURCC('M', 'S', 'N', 'V'),  // Sony's PSP
3649
3650        FOURCC('3', 'g', '2', 'a'),  // 3GPP2
3651        FOURCC('3', 'g', '2', 'b'),
3652    };
3653
3654    for (size_t i = 0;
3655         i < sizeof(kCompatibleBrands) / sizeof(kCompatibleBrands[0]);
3656         ++i) {
3657        if (kCompatibleBrands[i] == fourcc) {
3658            return true;
3659        }
3660    }
3661
3662    return false;
3663}
3664
3665// Attempt to actually parse the 'ftyp' atom and determine if a suitable
3666// compatible brand is present.
3667// Also try to identify where this file's metadata ends
3668// (end of the 'moov' atom) and report it to the caller as part of
3669// the metadata.
3670static bool BetterSniffMPEG4(
3671        const sp<DataSource> &source, String8 *mimeType, float *confidence,
3672        sp<AMessage> *meta) {
3673    // We scan up to 128 bytes to identify this file as an MP4.
3674    static const off64_t kMaxScanOffset = 128ll;
3675
3676    off64_t offset = 0ll;
3677    bool foundGoodFileType = false;
3678    off64_t moovAtomEndOffset = -1ll;
3679    bool done = false;
3680
3681    while (!done && offset < kMaxScanOffset) {
3682        uint32_t hdr[2];
3683        if (source->readAt(offset, hdr, 8) < 8) {
3684            return false;
3685        }
3686
3687        uint64_t chunkSize = ntohl(hdr[0]);
3688        uint32_t chunkType = ntohl(hdr[1]);
3689        off64_t chunkDataOffset = offset + 8;
3690
3691        if (chunkSize == 1) {
3692            if (source->readAt(offset + 8, &chunkSize, 8) < 8) {
3693                return false;
3694            }
3695
3696            chunkSize = ntoh64(chunkSize);
3697            chunkDataOffset += 8;
3698
3699            if (chunkSize < 16) {
3700                // The smallest valid chunk is 16 bytes long in this case.
3701                return false;
3702            }
3703        } else if (chunkSize < 8) {
3704            // The smallest valid chunk is 8 bytes long.
3705            return false;
3706        }
3707
3708        off64_t chunkDataSize = offset + chunkSize - chunkDataOffset;
3709
3710        char chunkstring[5];
3711        MakeFourCCString(chunkType, chunkstring);
3712        ALOGV("saw chunk type %s, size %lld @ %lld", chunkstring, chunkSize, offset);
3713        switch (chunkType) {
3714            case FOURCC('f', 't', 'y', 'p'):
3715            {
3716                if (chunkDataSize < 8) {
3717                    return false;
3718                }
3719
3720                uint32_t numCompatibleBrands = (chunkDataSize - 8) / 4;
3721                for (size_t i = 0; i < numCompatibleBrands + 2; ++i) {
3722                    if (i == 1) {
3723                        // Skip this index, it refers to the minorVersion,
3724                        // not a brand.
3725                        continue;
3726                    }
3727
3728                    uint32_t brand;
3729                    if (source->readAt(
3730                                chunkDataOffset + 4 * i, &brand, 4) < 4) {
3731                        return false;
3732                    }
3733
3734                    brand = ntohl(brand);
3735
3736                    if (isCompatibleBrand(brand)) {
3737                        foundGoodFileType = true;
3738                        break;
3739                    }
3740                }
3741
3742                if (!foundGoodFileType) {
3743                    return false;
3744                }
3745
3746                break;
3747            }
3748
3749            case FOURCC('m', 'o', 'o', 'v'):
3750            {
3751                moovAtomEndOffset = offset + chunkSize;
3752
3753                done = true;
3754                break;
3755            }
3756
3757            default:
3758                break;
3759        }
3760
3761        offset += chunkSize;
3762    }
3763
3764    if (!foundGoodFileType) {
3765        return false;
3766    }
3767
3768    *mimeType = MEDIA_MIMETYPE_CONTAINER_MPEG4;
3769    *confidence = 0.4f;
3770
3771    if (moovAtomEndOffset >= 0) {
3772        *meta = new AMessage;
3773        (*meta)->setInt64("meta-data-size", moovAtomEndOffset);
3774
3775        ALOGV("found metadata size: %lld", moovAtomEndOffset);
3776    }
3777
3778    return true;
3779}
3780
3781bool SniffMPEG4(
3782        const sp<DataSource> &source, String8 *mimeType, float *confidence,
3783        sp<AMessage> *meta) {
3784    if (BetterSniffMPEG4(source, mimeType, confidence, meta)) {
3785        return true;
3786    }
3787
3788    if (LegacySniffMPEG4(source, mimeType, confidence)) {
3789        ALOGW("Identified supported mpeg4 through LegacySniffMPEG4.");
3790        return true;
3791    }
3792
3793    return false;
3794}
3795
3796}  // namespace android
3797