1/*
2 * Copyright (C) 2009 The Android Open Source Project
3 *
4 * Licensed under the Apache License, Version 2.0 (the "License");
5 * you may not use this file except in compliance with the License.
6 * You may obtain a copy of the License at
7 *
8 *      http://www.apache.org/licenses/LICENSE-2.0
9 *
10 * Unless required by applicable law or agreed to in writing, software
11 * distributed under the License is distributed on an "AS IS" BASIS,
12 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13 * See the License for the specific language governing permissions and
14 * limitations under the License.
15 */
16
17//#define LOG_NDEBUG 0
18#define LOG_TAG "MPEG4Extractor"
19#include <utils/Log.h>
20
21#include "include/MPEG4Extractor.h"
22#include "include/SampleTable.h"
23#include "include/ESDS.h"
24
25#include <ctype.h>
26#include <stdint.h>
27#include <stdlib.h>
28#include <string.h>
29
30#include <media/stagefright/foundation/ABitReader.h>
31#include <media/stagefright/foundation/ABuffer.h>
32#include <media/stagefright/foundation/ADebug.h>
33#include <media/stagefright/foundation/AMessage.h>
34#include <media/stagefright/MediaBuffer.h>
35#include <media/stagefright/MediaBufferGroup.h>
36#include <media/stagefright/MediaDefs.h>
37#include <media/stagefright/MediaSource.h>
38#include <media/stagefright/MetaData.h>
39#include <utils/String8.h>
40
41namespace android {
42
43class MPEG4Source : public MediaSource {
44public:
45    // Caller retains ownership of both "dataSource" and "sampleTable".
46    MPEG4Source(const sp<MetaData> &format,
47                const sp<DataSource> &dataSource,
48                int32_t timeScale,
49                const sp<SampleTable> &sampleTable,
50                Vector<SidxEntry> &sidx,
51                off64_t firstMoofOffset);
52
53    virtual status_t start(MetaData *params = NULL);
54    virtual status_t stop();
55
56    virtual sp<MetaData> getFormat();
57
58    virtual status_t read(MediaBuffer **buffer, const ReadOptions *options = NULL);
59    virtual status_t fragmentedRead(MediaBuffer **buffer, const ReadOptions *options = NULL);
60
61protected:
62    virtual ~MPEG4Source();
63
64private:
65    Mutex mLock;
66
67    sp<MetaData> mFormat;
68    sp<DataSource> mDataSource;
69    int32_t mTimescale;
70    sp<SampleTable> mSampleTable;
71    uint32_t mCurrentSampleIndex;
72    uint32_t mCurrentFragmentIndex;
73    Vector<SidxEntry> &mSegments;
74    off64_t mFirstMoofOffset;
75    off64_t mCurrentMoofOffset;
76    off64_t mNextMoofOffset;
77    uint32_t mCurrentTime;
78    int32_t mLastParsedTrackId;
79    int32_t mTrackId;
80
81    int32_t mCryptoMode;    // passed in from extractor
82    int32_t mDefaultIVSize; // passed in from extractor
83    uint8_t mCryptoKey[16]; // passed in from extractor
84    uint32_t mCurrentAuxInfoType;
85    uint32_t mCurrentAuxInfoTypeParameter;
86    int32_t mCurrentDefaultSampleInfoSize;
87    uint32_t mCurrentSampleInfoCount;
88    uint32_t mCurrentSampleInfoAllocSize;
89    uint8_t* mCurrentSampleInfoSizes;
90    uint32_t mCurrentSampleInfoOffsetCount;
91    uint32_t mCurrentSampleInfoOffsetsAllocSize;
92    uint64_t* mCurrentSampleInfoOffsets;
93
94    bool mIsAVC;
95    size_t mNALLengthSize;
96
97    bool mStarted;
98
99    MediaBufferGroup *mGroup;
100
101    MediaBuffer *mBuffer;
102
103    bool mWantsNALFragments;
104
105    uint8_t *mSrcBuffer;
106
107    size_t parseNALSize(const uint8_t *data) const;
108    status_t parseChunk(off64_t *offset);
109    status_t parseTrackFragmentHeader(off64_t offset, off64_t size);
110    status_t parseTrackFragmentRun(off64_t offset, off64_t size);
111    status_t parseSampleAuxiliaryInformationSizes(off64_t offset, off64_t size);
112    status_t parseSampleAuxiliaryInformationOffsets(off64_t offset, off64_t size);
113
114    struct TrackFragmentHeaderInfo {
115        enum Flags {
116            kBaseDataOffsetPresent         = 0x01,
117            kSampleDescriptionIndexPresent = 0x02,
118            kDefaultSampleDurationPresent  = 0x08,
119            kDefaultSampleSizePresent      = 0x10,
120            kDefaultSampleFlagsPresent     = 0x20,
121            kDurationIsEmpty               = 0x10000,
122        };
123
124        uint32_t mTrackID;
125        uint32_t mFlags;
126        uint64_t mBaseDataOffset;
127        uint32_t mSampleDescriptionIndex;
128        uint32_t mDefaultSampleDuration;
129        uint32_t mDefaultSampleSize;
130        uint32_t mDefaultSampleFlags;
131
132        uint64_t mDataOffset;
133    };
134    TrackFragmentHeaderInfo mTrackFragmentHeaderInfo;
135
136    struct Sample {
137        off64_t offset;
138        size_t size;
139        uint32_t duration;
140        uint8_t iv[16];
141        Vector<size_t> clearsizes;
142        Vector<size_t> encryptedsizes;
143    };
144    Vector<Sample> mCurrentSamples;
145
146    MPEG4Source(const MPEG4Source &);
147    MPEG4Source &operator=(const MPEG4Source &);
148};
149
150// This custom data source wraps an existing one and satisfies requests
151// falling entirely within a cached range from the cache while forwarding
152// all remaining requests to the wrapped datasource.
153// This is used to cache the full sampletable metadata for a single track,
154// possibly wrapping multiple times to cover all tracks, i.e.
155// Each MPEG4DataSource caches the sampletable metadata for a single track.
156
157struct MPEG4DataSource : public DataSource {
158    MPEG4DataSource(const sp<DataSource> &source);
159
160    virtual status_t initCheck() const;
161    virtual ssize_t readAt(off64_t offset, void *data, size_t size);
162    virtual status_t getSize(off64_t *size);
163    virtual uint32_t flags();
164
165    status_t setCachedRange(off64_t offset, size_t size);
166
167protected:
168    virtual ~MPEG4DataSource();
169
170private:
171    Mutex mLock;
172
173    sp<DataSource> mSource;
174    off64_t mCachedOffset;
175    size_t mCachedSize;
176    uint8_t *mCache;
177
178    void clearCache();
179
180    MPEG4DataSource(const MPEG4DataSource &);
181    MPEG4DataSource &operator=(const MPEG4DataSource &);
182};
183
184MPEG4DataSource::MPEG4DataSource(const sp<DataSource> &source)
185    : mSource(source),
186      mCachedOffset(0),
187      mCachedSize(0),
188      mCache(NULL) {
189}
190
191MPEG4DataSource::~MPEG4DataSource() {
192    clearCache();
193}
194
195void MPEG4DataSource::clearCache() {
196    if (mCache) {
197        free(mCache);
198        mCache = NULL;
199    }
200
201    mCachedOffset = 0;
202    mCachedSize = 0;
203}
204
205status_t MPEG4DataSource::initCheck() const {
206    return mSource->initCheck();
207}
208
209ssize_t MPEG4DataSource::readAt(off64_t offset, void *data, size_t size) {
210    Mutex::Autolock autoLock(mLock);
211
212    if (offset >= mCachedOffset
213            && offset + size <= mCachedOffset + mCachedSize) {
214        memcpy(data, &mCache[offset - mCachedOffset], size);
215        return size;
216    }
217
218    return mSource->readAt(offset, data, size);
219}
220
221status_t MPEG4DataSource::getSize(off64_t *size) {
222    return mSource->getSize(size);
223}
224
225uint32_t MPEG4DataSource::flags() {
226    return mSource->flags();
227}
228
229status_t MPEG4DataSource::setCachedRange(off64_t offset, size_t size) {
230    Mutex::Autolock autoLock(mLock);
231
232    clearCache();
233
234    mCache = (uint8_t *)malloc(size);
235
236    if (mCache == NULL) {
237        return -ENOMEM;
238    }
239
240    mCachedOffset = offset;
241    mCachedSize = size;
242
243    ssize_t err = mSource->readAt(mCachedOffset, mCache, mCachedSize);
244
245    if (err < (ssize_t)size) {
246        clearCache();
247
248        return ERROR_IO;
249    }
250
251    return OK;
252}
253
254////////////////////////////////////////////////////////////////////////////////
255
256static void hexdump(const void *_data, size_t size) {
257    const uint8_t *data = (const uint8_t *)_data;
258    size_t offset = 0;
259    while (offset < size) {
260        printf("0x%04x  ", offset);
261
262        size_t n = size - offset;
263        if (n > 16) {
264            n = 16;
265        }
266
267        for (size_t i = 0; i < 16; ++i) {
268            if (i == 8) {
269                printf(" ");
270            }
271
272            if (offset + i < size) {
273                printf("%02x ", data[offset + i]);
274            } else {
275                printf("   ");
276            }
277        }
278
279        printf(" ");
280
281        for (size_t i = 0; i < n; ++i) {
282            if (isprint(data[offset + i])) {
283                printf("%c", data[offset + i]);
284            } else {
285                printf(".");
286            }
287        }
288
289        printf("\n");
290
291        offset += 16;
292    }
293}
294
295static const char *FourCC2MIME(uint32_t fourcc) {
296    switch (fourcc) {
297        case FOURCC('m', 'p', '4', 'a'):
298            return MEDIA_MIMETYPE_AUDIO_AAC;
299
300        case FOURCC('s', 'a', 'm', 'r'):
301            return MEDIA_MIMETYPE_AUDIO_AMR_NB;
302
303        case FOURCC('s', 'a', 'w', 'b'):
304            return MEDIA_MIMETYPE_AUDIO_AMR_WB;
305
306        case FOURCC('m', 'p', '4', 'v'):
307            return MEDIA_MIMETYPE_VIDEO_MPEG4;
308
309        case FOURCC('s', '2', '6', '3'):
310        case FOURCC('h', '2', '6', '3'):
311        case FOURCC('H', '2', '6', '3'):
312            return MEDIA_MIMETYPE_VIDEO_H263;
313
314        case FOURCC('a', 'v', 'c', '1'):
315            return MEDIA_MIMETYPE_VIDEO_AVC;
316
317        default:
318            CHECK(!"should not be here.");
319            return NULL;
320    }
321}
322
323static bool AdjustChannelsAndRate(uint32_t fourcc, uint32_t *channels, uint32_t *rate) {
324    if (!strcasecmp(MEDIA_MIMETYPE_AUDIO_AMR_NB, FourCC2MIME(fourcc))) {
325        // AMR NB audio is always mono, 8kHz
326        *channels = 1;
327        *rate = 8000;
328        return true;
329    } else if (!strcasecmp(MEDIA_MIMETYPE_AUDIO_AMR_WB, FourCC2MIME(fourcc))) {
330        // AMR WB audio is always mono, 16kHz
331        *channels = 1;
332        *rate = 16000;
333        return true;
334    }
335    return false;
336}
337
338MPEG4Extractor::MPEG4Extractor(const sp<DataSource> &source)
339    : mSidxDuration(0),
340      mMoofOffset(0),
341      mDataSource(source),
342      mInitCheck(NO_INIT),
343      mHasVideo(false),
344      mHeaderTimescale(0),
345      mFirstTrack(NULL),
346      mLastTrack(NULL),
347      mFileMetaData(new MetaData),
348      mFirstSINF(NULL),
349      mIsDrm(false) {
350}
351
352MPEG4Extractor::~MPEG4Extractor() {
353    Track *track = mFirstTrack;
354    while (track) {
355        Track *next = track->next;
356
357        delete track;
358        track = next;
359    }
360    mFirstTrack = mLastTrack = NULL;
361
362    SINF *sinf = mFirstSINF;
363    while (sinf) {
364        SINF *next = sinf->next;
365        delete sinf->IPMPData;
366        delete sinf;
367        sinf = next;
368    }
369    mFirstSINF = NULL;
370
371    for (size_t i = 0; i < mPssh.size(); i++) {
372        delete [] mPssh[i].data;
373    }
374}
375
376uint32_t MPEG4Extractor::flags() const {
377    return CAN_PAUSE |
378            ((mMoofOffset == 0 || mSidxEntries.size() != 0) ?
379                    (CAN_SEEK_BACKWARD | CAN_SEEK_FORWARD | CAN_SEEK) : 0);
380}
381
382sp<MetaData> MPEG4Extractor::getMetaData() {
383    status_t err;
384    if ((err = readMetaData()) != OK) {
385        return new MetaData;
386    }
387
388    return mFileMetaData;
389}
390
391size_t MPEG4Extractor::countTracks() {
392    status_t err;
393    if ((err = readMetaData()) != OK) {
394        ALOGV("MPEG4Extractor::countTracks: no tracks");
395        return 0;
396    }
397
398    size_t n = 0;
399    Track *track = mFirstTrack;
400    while (track) {
401        ++n;
402        track = track->next;
403    }
404
405    ALOGV("MPEG4Extractor::countTracks: %d tracks", n);
406    return n;
407}
408
409sp<MetaData> MPEG4Extractor::getTrackMetaData(
410        size_t index, uint32_t flags) {
411    status_t err;
412    if ((err = readMetaData()) != OK) {
413        return NULL;
414    }
415
416    Track *track = mFirstTrack;
417    while (index > 0) {
418        if (track == NULL) {
419            return NULL;
420        }
421
422        track = track->next;
423        --index;
424    }
425
426    if (track == NULL) {
427        return NULL;
428    }
429
430    if ((flags & kIncludeExtensiveMetaData)
431            && !track->includes_expensive_metadata) {
432        track->includes_expensive_metadata = true;
433
434        const char *mime;
435        CHECK(track->meta->findCString(kKeyMIMEType, &mime));
436        if (!strncasecmp("video/", mime, 6)) {
437            if (mMoofOffset > 0) {
438                int64_t duration;
439                if (track->meta->findInt64(kKeyDuration, &duration)) {
440                    // nothing fancy, just pick a frame near 1/4th of the duration
441                    track->meta->setInt64(
442                            kKeyThumbnailTime, duration / 4);
443                }
444            } else {
445                uint32_t sampleIndex;
446                uint32_t sampleTime;
447                if (track->sampleTable->findThumbnailSample(&sampleIndex) == OK
448                        && track->sampleTable->getMetaDataForSample(
449                            sampleIndex, NULL /* offset */, NULL /* size */,
450                            &sampleTime) == OK) {
451                    track->meta->setInt64(
452                            kKeyThumbnailTime,
453                            ((int64_t)sampleTime * 1000000) / track->timescale);
454                }
455            }
456        }
457    }
458
459    return track->meta;
460}
461
462static void MakeFourCCString(uint32_t x, char *s) {
463    s[0] = x >> 24;
464    s[1] = (x >> 16) & 0xff;
465    s[2] = (x >> 8) & 0xff;
466    s[3] = x & 0xff;
467    s[4] = '\0';
468}
469
470status_t MPEG4Extractor::readMetaData() {
471    if (mInitCheck != NO_INIT) {
472        return mInitCheck;
473    }
474
475    off64_t offset = 0;
476    status_t err;
477    while (true) {
478        err = parseChunk(&offset, 0);
479        if (err == OK) {
480            continue;
481        }
482
483        uint32_t hdr[2];
484        if (mDataSource->readAt(offset, hdr, 8) < 8) {
485            break;
486        }
487        uint32_t chunk_type = ntohl(hdr[1]);
488        if (chunk_type == FOURCC('s', 'i', 'd', 'x')) {
489            // parse the sidx box too
490            continue;
491        } else if (chunk_type == FOURCC('m', 'o', 'o', 'f')) {
492            // store the offset of the first segment
493            mMoofOffset = offset;
494        }
495        break;
496    }
497
498    if (mInitCheck == OK) {
499        if (mHasVideo) {
500            mFileMetaData->setCString(
501                    kKeyMIMEType, MEDIA_MIMETYPE_CONTAINER_MPEG4);
502        } else {
503            mFileMetaData->setCString(kKeyMIMEType, "audio/mp4");
504        }
505
506        mInitCheck = OK;
507    } else {
508        mInitCheck = err;
509    }
510
511    CHECK_NE(err, (status_t)NO_INIT);
512
513    // copy pssh data into file metadata
514    int psshsize = 0;
515    for (size_t i = 0; i < mPssh.size(); i++) {
516        psshsize += 20 + mPssh[i].datalen;
517    }
518    if (psshsize) {
519        char *buf = (char*)malloc(psshsize);
520        char *ptr = buf;
521        for (size_t i = 0; i < mPssh.size(); i++) {
522            memcpy(ptr, mPssh[i].uuid, 20); // uuid + length
523            memcpy(ptr + 20, mPssh[i].data, mPssh[i].datalen);
524            ptr += (20 + mPssh[i].datalen);
525        }
526        mFileMetaData->setData(kKeyPssh, 'pssh', buf, psshsize);
527        free(buf);
528    }
529    return mInitCheck;
530}
531
532char* MPEG4Extractor::getDrmTrackInfo(size_t trackID, int *len) {
533    if (mFirstSINF == NULL) {
534        return NULL;
535    }
536
537    SINF *sinf = mFirstSINF;
538    while (sinf && (trackID != sinf->trackID)) {
539        sinf = sinf->next;
540    }
541
542    if (sinf == NULL) {
543        return NULL;
544    }
545
546    *len = sinf->len;
547    return sinf->IPMPData;
548}
549
550// Reads an encoded integer 7 bits at a time until it encounters the high bit clear.
551static int32_t readSize(off64_t offset,
552        const sp<DataSource> DataSource, uint8_t *numOfBytes) {
553    uint32_t size = 0;
554    uint8_t data;
555    bool moreData = true;
556    *numOfBytes = 0;
557
558    while (moreData) {
559        if (DataSource->readAt(offset, &data, 1) < 1) {
560            return -1;
561        }
562        offset ++;
563        moreData = (data >= 128) ? true : false;
564        size = (size << 7) | (data & 0x7f); // Take last 7 bits
565        (*numOfBytes) ++;
566    }
567
568    return size;
569}
570
571status_t MPEG4Extractor::parseDrmSINF(off64_t *offset, off64_t data_offset) {
572    uint8_t updateIdTag;
573    if (mDataSource->readAt(data_offset, &updateIdTag, 1) < 1) {
574        return ERROR_IO;
575    }
576    data_offset ++;
577
578    if (0x01/*OBJECT_DESCRIPTOR_UPDATE_ID_TAG*/ != updateIdTag) {
579        return ERROR_MALFORMED;
580    }
581
582    uint8_t numOfBytes;
583    int32_t size = readSize(data_offset, mDataSource, &numOfBytes);
584    if (size < 0) {
585        return ERROR_IO;
586    }
587    int32_t classSize = size;
588    data_offset += numOfBytes;
589
590    while(size >= 11 ) {
591        uint8_t descriptorTag;
592        if (mDataSource->readAt(data_offset, &descriptorTag, 1) < 1) {
593            return ERROR_IO;
594        }
595        data_offset ++;
596
597        if (0x11/*OBJECT_DESCRIPTOR_ID_TAG*/ != descriptorTag) {
598            return ERROR_MALFORMED;
599        }
600
601        uint8_t buffer[8];
602        //ObjectDescriptorID and ObjectDescriptor url flag
603        if (mDataSource->readAt(data_offset, buffer, 2) < 2) {
604            return ERROR_IO;
605        }
606        data_offset += 2;
607
608        if ((buffer[1] >> 5) & 0x0001) { //url flag is set
609            return ERROR_MALFORMED;
610        }
611
612        if (mDataSource->readAt(data_offset, buffer, 8) < 8) {
613            return ERROR_IO;
614        }
615        data_offset += 8;
616
617        if ((0x0F/*ES_ID_REF_TAG*/ != buffer[1])
618                || ( 0x0A/*IPMP_DESCRIPTOR_POINTER_ID_TAG*/ != buffer[5])) {
619            return ERROR_MALFORMED;
620        }
621
622        SINF *sinf = new SINF;
623        sinf->trackID = U16_AT(&buffer[3]);
624        sinf->IPMPDescriptorID = buffer[7];
625        sinf->next = mFirstSINF;
626        mFirstSINF = sinf;
627
628        size -= (8 + 2 + 1);
629    }
630
631    if (size != 0) {
632        return ERROR_MALFORMED;
633    }
634
635    if (mDataSource->readAt(data_offset, &updateIdTag, 1) < 1) {
636        return ERROR_IO;
637    }
638    data_offset ++;
639
640    if(0x05/*IPMP_DESCRIPTOR_UPDATE_ID_TAG*/ != updateIdTag) {
641        return ERROR_MALFORMED;
642    }
643
644    size = readSize(data_offset, mDataSource, &numOfBytes);
645    if (size < 0) {
646        return ERROR_IO;
647    }
648    classSize = size;
649    data_offset += numOfBytes;
650
651    while (size > 0) {
652        uint8_t tag;
653        int32_t dataLen;
654        if (mDataSource->readAt(data_offset, &tag, 1) < 1) {
655            return ERROR_IO;
656        }
657        data_offset ++;
658
659        if (0x0B/*IPMP_DESCRIPTOR_ID_TAG*/ == tag) {
660            uint8_t id;
661            dataLen = readSize(data_offset, mDataSource, &numOfBytes);
662            if (dataLen < 0) {
663                return ERROR_IO;
664            } else if (dataLen < 4) {
665                return ERROR_MALFORMED;
666            }
667            data_offset += numOfBytes;
668
669            if (mDataSource->readAt(data_offset, &id, 1) < 1) {
670                return ERROR_IO;
671            }
672            data_offset ++;
673
674            SINF *sinf = mFirstSINF;
675            while (sinf && (sinf->IPMPDescriptorID != id)) {
676                sinf = sinf->next;
677            }
678            if (sinf == NULL) {
679                return ERROR_MALFORMED;
680            }
681            sinf->len = dataLen - 3;
682            sinf->IPMPData = new char[sinf->len];
683
684            if (mDataSource->readAt(data_offset + 2, sinf->IPMPData, sinf->len) < sinf->len) {
685                return ERROR_IO;
686            }
687            data_offset += sinf->len;
688
689            size -= (dataLen + numOfBytes + 1);
690        }
691    }
692
693    if (size != 0) {
694        return ERROR_MALFORMED;
695    }
696
697    return UNKNOWN_ERROR;  // Return a dummy error.
698}
699
700struct PathAdder {
701    PathAdder(Vector<uint32_t> *path, uint32_t chunkType)
702        : mPath(path) {
703        mPath->push(chunkType);
704    }
705
706    ~PathAdder() {
707        mPath->pop();
708    }
709
710private:
711    Vector<uint32_t> *mPath;
712
713    PathAdder(const PathAdder &);
714    PathAdder &operator=(const PathAdder &);
715};
716
717static bool underMetaDataPath(const Vector<uint32_t> &path) {
718    return path.size() >= 5
719        && path[0] == FOURCC('m', 'o', 'o', 'v')
720        && path[1] == FOURCC('u', 'd', 't', 'a')
721        && path[2] == FOURCC('m', 'e', 't', 'a')
722        && path[3] == FOURCC('i', 'l', 's', 't');
723}
724
725// Given a time in seconds since Jan 1 1904, produce a human-readable string.
726static void convertTimeToDate(int64_t time_1904, String8 *s) {
727    time_t time_1970 = time_1904 - (((66 * 365 + 17) * 24) * 3600);
728
729    char tmp[32];
730    strftime(tmp, sizeof(tmp), "%Y%m%dT%H%M%S.000Z", gmtime(&time_1970));
731
732    s->setTo(tmp);
733}
734
735status_t MPEG4Extractor::parseChunk(off64_t *offset, int depth) {
736    ALOGV("entering parseChunk %lld/%d", *offset, depth);
737    uint32_t hdr[2];
738    if (mDataSource->readAt(*offset, hdr, 8) < 8) {
739        return ERROR_IO;
740    }
741    uint64_t chunk_size = ntohl(hdr[0]);
742    uint32_t chunk_type = ntohl(hdr[1]);
743    off64_t data_offset = *offset + 8;
744
745    if (chunk_size == 1) {
746        if (mDataSource->readAt(*offset + 8, &chunk_size, 8) < 8) {
747            return ERROR_IO;
748        }
749        chunk_size = ntoh64(chunk_size);
750        data_offset += 8;
751
752        if (chunk_size < 16) {
753            // The smallest valid chunk is 16 bytes long in this case.
754            return ERROR_MALFORMED;
755        }
756    } else if (chunk_size < 8) {
757        // The smallest valid chunk is 8 bytes long.
758        return ERROR_MALFORMED;
759    }
760
761    char chunk[5];
762    MakeFourCCString(chunk_type, chunk);
763    ALOGV("chunk: %s @ %lld, %d", chunk, *offset, depth);
764
765#if 0
766    static const char kWhitespace[] = "                                        ";
767    const char *indent = &kWhitespace[sizeof(kWhitespace) - 1 - 2 * depth];
768    printf("%sfound chunk '%s' of size %lld\n", indent, chunk, chunk_size);
769
770    char buffer[256];
771    size_t n = chunk_size;
772    if (n > sizeof(buffer)) {
773        n = sizeof(buffer);
774    }
775    if (mDataSource->readAt(*offset, buffer, n)
776            < (ssize_t)n) {
777        return ERROR_IO;
778    }
779
780    hexdump(buffer, n);
781#endif
782
783    PathAdder autoAdder(&mPath, chunk_type);
784
785    off64_t chunk_data_size = *offset + chunk_size - data_offset;
786
787    if (chunk_type != FOURCC('c', 'p', 'r', 't')
788            && chunk_type != FOURCC('c', 'o', 'v', 'r')
789            && mPath.size() == 5 && underMetaDataPath(mPath)) {
790        off64_t stop_offset = *offset + chunk_size;
791        *offset = data_offset;
792        while (*offset < stop_offset) {
793            status_t err = parseChunk(offset, depth + 1);
794            if (err != OK) {
795                return err;
796            }
797        }
798
799        if (*offset != stop_offset) {
800            return ERROR_MALFORMED;
801        }
802
803        return OK;
804    }
805
806    switch(chunk_type) {
807        case FOURCC('m', 'o', 'o', 'v'):
808        case FOURCC('t', 'r', 'a', 'k'):
809        case FOURCC('m', 'd', 'i', 'a'):
810        case FOURCC('m', 'i', 'n', 'f'):
811        case FOURCC('d', 'i', 'n', 'f'):
812        case FOURCC('s', 't', 'b', 'l'):
813        case FOURCC('m', 'v', 'e', 'x'):
814        case FOURCC('m', 'o', 'o', 'f'):
815        case FOURCC('t', 'r', 'a', 'f'):
816        case FOURCC('m', 'f', 'r', 'a'):
817        case FOURCC('u', 'd', 't', 'a'):
818        case FOURCC('i', 'l', 's', 't'):
819        case FOURCC('s', 'i', 'n', 'f'):
820        case FOURCC('s', 'c', 'h', 'i'):
821        case FOURCC('e', 'd', 't', 's'):
822        {
823            if (chunk_type == FOURCC('s', 't', 'b', 'l')) {
824                ALOGV("sampleTable chunk is %d bytes long.", (size_t)chunk_size);
825
826                if (mDataSource->flags()
827                        & (DataSource::kWantsPrefetching
828                            | DataSource::kIsCachingDataSource)) {
829                    sp<MPEG4DataSource> cachedSource =
830                        new MPEG4DataSource(mDataSource);
831
832                    if (cachedSource->setCachedRange(*offset, chunk_size) == OK) {
833                        mDataSource = cachedSource;
834                    }
835                }
836
837                mLastTrack->sampleTable = new SampleTable(mDataSource);
838            }
839
840            bool isTrack = false;
841            if (chunk_type == FOURCC('t', 'r', 'a', 'k')) {
842                isTrack = true;
843
844                Track *track = new Track;
845                track->next = NULL;
846                if (mLastTrack) {
847                    mLastTrack->next = track;
848                } else {
849                    mFirstTrack = track;
850                }
851                mLastTrack = track;
852
853                track->meta = new MetaData;
854                track->includes_expensive_metadata = false;
855                track->skipTrack = false;
856                track->timescale = 0;
857                track->meta->setCString(kKeyMIMEType, "application/octet-stream");
858            }
859
860            off64_t stop_offset = *offset + chunk_size;
861            *offset = data_offset;
862            while (*offset < stop_offset) {
863                status_t err = parseChunk(offset, depth + 1);
864                if (err != OK) {
865                    return err;
866                }
867            }
868
869            if (*offset != stop_offset) {
870                return ERROR_MALFORMED;
871            }
872
873            if (isTrack) {
874                if (mLastTrack->skipTrack) {
875                    Track *cur = mFirstTrack;
876
877                    if (cur == mLastTrack) {
878                        delete cur;
879                        mFirstTrack = mLastTrack = NULL;
880                    } else {
881                        while (cur && cur->next != mLastTrack) {
882                            cur = cur->next;
883                        }
884                        cur->next = NULL;
885                        delete mLastTrack;
886                        mLastTrack = cur;
887                    }
888
889                    return OK;
890                }
891
892                status_t err = verifyTrack(mLastTrack);
893
894                if (err != OK) {
895                    return err;
896                }
897            } else if (chunk_type == FOURCC('m', 'o', 'o', 'v')) {
898                mInitCheck = OK;
899
900                if (!mIsDrm) {
901                    return UNKNOWN_ERROR;  // Return a dummy error.
902                } else {
903                    return OK;
904                }
905            }
906            break;
907        }
908
909        case FOURCC('e', 'l', 's', 't'):
910        {
911            // See 14496-12 8.6.6
912            uint8_t version;
913            if (mDataSource->readAt(data_offset, &version, 1) < 1) {
914                return ERROR_IO;
915            }
916
917            uint32_t entry_count;
918            if (!mDataSource->getUInt32(data_offset + 4, &entry_count)) {
919                return ERROR_IO;
920            }
921
922            if (entry_count != 1) {
923                // we only support a single entry at the moment, for gapless playback
924                ALOGW("ignoring edit list with %d entries", entry_count);
925            } else if (mHeaderTimescale == 0) {
926                ALOGW("ignoring edit list because timescale is 0");
927            } else {
928                off64_t entriesoffset = data_offset + 8;
929                uint64_t segment_duration;
930                int64_t media_time;
931
932                if (version == 1) {
933                    if (!mDataSource->getUInt64(entriesoffset, &segment_duration) ||
934                            !mDataSource->getUInt64(entriesoffset + 8, (uint64_t*)&media_time)) {
935                        return ERROR_IO;
936                    }
937                } else if (version == 0) {
938                    uint32_t sd;
939                    int32_t mt;
940                    if (!mDataSource->getUInt32(entriesoffset, &sd) ||
941                            !mDataSource->getUInt32(entriesoffset + 4, (uint32_t*)&mt)) {
942                        return ERROR_IO;
943                    }
944                    segment_duration = sd;
945                    media_time = mt;
946                } else {
947                    return ERROR_IO;
948                }
949
950                uint64_t halfscale = mHeaderTimescale / 2;
951                segment_duration = (segment_duration * 1000000 + halfscale)/ mHeaderTimescale;
952                media_time = (media_time * 1000000 + halfscale) / mHeaderTimescale;
953
954                int64_t duration;
955                int32_t samplerate;
956                if (mLastTrack->meta->findInt64(kKeyDuration, &duration) &&
957                        mLastTrack->meta->findInt32(kKeySampleRate, &samplerate)) {
958
959                    int64_t delay = (media_time  * samplerate + 500000) / 1000000;
960                    mLastTrack->meta->setInt32(kKeyEncoderDelay, delay);
961
962                    int64_t paddingus = duration - (segment_duration + media_time);
963                    if (paddingus < 0) {
964                        // track duration from media header (which is what kKeyDuration is) might
965                        // be slightly shorter than the segment duration, which would make the
966                        // padding negative. Clamp to zero.
967                        paddingus = 0;
968                    }
969                    int64_t paddingsamples = (paddingus * samplerate + 500000) / 1000000;
970                    mLastTrack->meta->setInt32(kKeyEncoderPadding, paddingsamples);
971                }
972            }
973            *offset += chunk_size;
974            break;
975        }
976
977        case FOURCC('f', 'r', 'm', 'a'):
978        {
979            uint32_t original_fourcc;
980            if (mDataSource->readAt(data_offset, &original_fourcc, 4) < 4) {
981                return ERROR_IO;
982            }
983            original_fourcc = ntohl(original_fourcc);
984            ALOGV("read original format: %d", original_fourcc);
985            mLastTrack->meta->setCString(kKeyMIMEType, FourCC2MIME(original_fourcc));
986            uint32_t num_channels = 0;
987            uint32_t sample_rate = 0;
988            if (AdjustChannelsAndRate(original_fourcc, &num_channels, &sample_rate)) {
989                mLastTrack->meta->setInt32(kKeyChannelCount, num_channels);
990                mLastTrack->meta->setInt32(kKeySampleRate, sample_rate);
991            }
992            *offset += chunk_size;
993            break;
994        }
995
996        case FOURCC('t', 'e', 'n', 'c'):
997        {
998            if (chunk_size < 32) {
999                return ERROR_MALFORMED;
1000            }
1001
1002            // tenc box contains 1 byte version, 3 byte flags, 3 byte default algorithm id, one byte
1003            // default IV size, 16 bytes default KeyID
1004            // (ISO 23001-7)
1005            char buf[4];
1006            memset(buf, 0, 4);
1007            if (mDataSource->readAt(data_offset + 4, buf + 1, 3) < 3) {
1008                return ERROR_IO;
1009            }
1010            uint32_t defaultAlgorithmId = ntohl(*((int32_t*)buf));
1011            if (defaultAlgorithmId > 1) {
1012                // only 0 (clear) and 1 (AES-128) are valid
1013                return ERROR_MALFORMED;
1014            }
1015
1016            memset(buf, 0, 4);
1017            if (mDataSource->readAt(data_offset + 7, buf + 3, 1) < 1) {
1018                return ERROR_IO;
1019            }
1020            uint32_t defaultIVSize = ntohl(*((int32_t*)buf));
1021
1022            if ((defaultAlgorithmId == 0 && defaultIVSize != 0) ||
1023                    (defaultAlgorithmId != 0 && defaultIVSize == 0)) {
1024                // only unencrypted data must have 0 IV size
1025                return ERROR_MALFORMED;
1026            } else if (defaultIVSize != 0 &&
1027                    defaultIVSize != 8 &&
1028                    defaultIVSize != 16) {
1029                // only supported sizes are 0, 8 and 16
1030                return ERROR_MALFORMED;
1031            }
1032
1033            uint8_t defaultKeyId[16];
1034
1035            if (mDataSource->readAt(data_offset + 8, &defaultKeyId, 16) < 16) {
1036                return ERROR_IO;
1037            }
1038
1039            mLastTrack->meta->setInt32(kKeyCryptoMode, defaultAlgorithmId);
1040            mLastTrack->meta->setInt32(kKeyCryptoDefaultIVSize, defaultIVSize);
1041            mLastTrack->meta->setData(kKeyCryptoKey, 'tenc', defaultKeyId, 16);
1042            *offset += chunk_size;
1043            break;
1044        }
1045
1046        case FOURCC('t', 'k', 'h', 'd'):
1047        {
1048            status_t err;
1049            if ((err = parseTrackHeader(data_offset, chunk_data_size)) != OK) {
1050                return err;
1051            }
1052
1053            *offset += chunk_size;
1054            break;
1055        }
1056
1057        case FOURCC('p', 's', 's', 'h'):
1058        {
1059            PsshInfo pssh;
1060
1061            if (mDataSource->readAt(data_offset + 4, &pssh.uuid, 16) < 16) {
1062                return ERROR_IO;
1063            }
1064
1065            uint32_t psshdatalen = 0;
1066            if (mDataSource->readAt(data_offset + 20, &psshdatalen, 4) < 4) {
1067                return ERROR_IO;
1068            }
1069            pssh.datalen = ntohl(psshdatalen);
1070            ALOGV("pssh data size: %d", pssh.datalen);
1071            if (pssh.datalen + 20 > chunk_size) {
1072                // pssh data length exceeds size of containing box
1073                return ERROR_MALFORMED;
1074            }
1075
1076            pssh.data = new uint8_t[pssh.datalen];
1077            ALOGV("allocated pssh @ %p", pssh.data);
1078            ssize_t requested = (ssize_t) pssh.datalen;
1079            if (mDataSource->readAt(data_offset + 24, pssh.data, requested) < requested) {
1080                return ERROR_IO;
1081            }
1082            mPssh.push_back(pssh);
1083
1084            *offset += chunk_size;
1085            break;
1086        }
1087
1088        case FOURCC('m', 'd', 'h', 'd'):
1089        {
1090            if (chunk_data_size < 4) {
1091                return ERROR_MALFORMED;
1092            }
1093
1094            uint8_t version;
1095            if (mDataSource->readAt(
1096                        data_offset, &version, sizeof(version))
1097                    < (ssize_t)sizeof(version)) {
1098                return ERROR_IO;
1099            }
1100
1101            off64_t timescale_offset;
1102
1103            if (version == 1) {
1104                timescale_offset = data_offset + 4 + 16;
1105            } else if (version == 0) {
1106                timescale_offset = data_offset + 4 + 8;
1107            } else {
1108                return ERROR_IO;
1109            }
1110
1111            uint32_t timescale;
1112            if (mDataSource->readAt(
1113                        timescale_offset, &timescale, sizeof(timescale))
1114                    < (ssize_t)sizeof(timescale)) {
1115                return ERROR_IO;
1116            }
1117
1118            mLastTrack->timescale = ntohl(timescale);
1119
1120            int64_t duration = 0;
1121            if (version == 1) {
1122                if (mDataSource->readAt(
1123                            timescale_offset + 4, &duration, sizeof(duration))
1124                        < (ssize_t)sizeof(duration)) {
1125                    return ERROR_IO;
1126                }
1127                duration = ntoh64(duration);
1128            } else {
1129                uint32_t duration32;
1130                if (mDataSource->readAt(
1131                            timescale_offset + 4, &duration32, sizeof(duration32))
1132                        < (ssize_t)sizeof(duration32)) {
1133                    return ERROR_IO;
1134                }
1135                // ffmpeg sets duration to -1, which is incorrect.
1136                if (duration32 != 0xffffffff) {
1137                    duration = ntohl(duration32);
1138                }
1139            }
1140            mLastTrack->meta->setInt64(
1141                    kKeyDuration, (duration * 1000000) / mLastTrack->timescale);
1142
1143            uint8_t lang[2];
1144            off64_t lang_offset;
1145            if (version == 1) {
1146                lang_offset = timescale_offset + 4 + 8;
1147            } else if (version == 0) {
1148                lang_offset = timescale_offset + 4 + 4;
1149            } else {
1150                return ERROR_IO;
1151            }
1152
1153            if (mDataSource->readAt(lang_offset, &lang, sizeof(lang))
1154                    < (ssize_t)sizeof(lang)) {
1155                return ERROR_IO;
1156            }
1157
1158            // To get the ISO-639-2/T three character language code
1159            // 1 bit pad followed by 3 5-bits characters. Each character
1160            // is packed as the difference between its ASCII value and 0x60.
1161            char lang_code[4];
1162            lang_code[0] = ((lang[0] >> 2) & 0x1f) + 0x60;
1163            lang_code[1] = ((lang[0] & 0x3) << 3 | (lang[1] >> 5)) + 0x60;
1164            lang_code[2] = (lang[1] & 0x1f) + 0x60;
1165            lang_code[3] = '\0';
1166
1167            mLastTrack->meta->setCString(
1168                    kKeyMediaLanguage, lang_code);
1169
1170            *offset += chunk_size;
1171            break;
1172        }
1173
1174        case FOURCC('s', 't', 's', 'd'):
1175        {
1176            if (chunk_data_size < 8) {
1177                return ERROR_MALFORMED;
1178            }
1179
1180            uint8_t buffer[8];
1181            if (chunk_data_size < (off64_t)sizeof(buffer)) {
1182                return ERROR_MALFORMED;
1183            }
1184
1185            if (mDataSource->readAt(
1186                        data_offset, buffer, 8) < 8) {
1187                return ERROR_IO;
1188            }
1189
1190            if (U32_AT(buffer) != 0) {
1191                // Should be version 0, flags 0.
1192                return ERROR_MALFORMED;
1193            }
1194
1195            uint32_t entry_count = U32_AT(&buffer[4]);
1196
1197            if (entry_count > 1) {
1198                // For 3GPP timed text, there could be multiple tx3g boxes contain
1199                // multiple text display formats. These formats will be used to
1200                // display the timed text.
1201                // For encrypted files, there may also be more than one entry.
1202                const char *mime;
1203                CHECK(mLastTrack->meta->findCString(kKeyMIMEType, &mime));
1204                if (strcasecmp(mime, MEDIA_MIMETYPE_TEXT_3GPP) &&
1205                        strcasecmp(mime, "application/octet-stream")) {
1206                    // For now we only support a single type of media per track.
1207                    mLastTrack->skipTrack = true;
1208                    *offset += chunk_size;
1209                    break;
1210                }
1211            }
1212            off64_t stop_offset = *offset + chunk_size;
1213            *offset = data_offset + 8;
1214            for (uint32_t i = 0; i < entry_count; ++i) {
1215                status_t err = parseChunk(offset, depth + 1);
1216                if (err != OK) {
1217                    return err;
1218                }
1219            }
1220
1221            if (*offset != stop_offset) {
1222                return ERROR_MALFORMED;
1223            }
1224            break;
1225        }
1226
1227        case FOURCC('m', 'p', '4', 'a'):
1228        case FOURCC('e', 'n', 'c', 'a'):
1229        case FOURCC('s', 'a', 'm', 'r'):
1230        case FOURCC('s', 'a', 'w', 'b'):
1231        {
1232            uint8_t buffer[8 + 20];
1233            if (chunk_data_size < (ssize_t)sizeof(buffer)) {
1234                // Basic AudioSampleEntry size.
1235                return ERROR_MALFORMED;
1236            }
1237
1238            if (mDataSource->readAt(
1239                        data_offset, buffer, sizeof(buffer)) < (ssize_t)sizeof(buffer)) {
1240                return ERROR_IO;
1241            }
1242
1243            uint16_t data_ref_index = U16_AT(&buffer[6]);
1244            uint32_t num_channels = U16_AT(&buffer[16]);
1245
1246            uint16_t sample_size = U16_AT(&buffer[18]);
1247            uint32_t sample_rate = U32_AT(&buffer[24]) >> 16;
1248
1249            if (chunk_type != FOURCC('e', 'n', 'c', 'a')) {
1250                // if the chunk type is enca, we'll get the type from the sinf/frma box later
1251                mLastTrack->meta->setCString(kKeyMIMEType, FourCC2MIME(chunk_type));
1252                AdjustChannelsAndRate(chunk_type, &num_channels, &sample_rate);
1253            }
1254            ALOGV("*** coding='%s' %d channels, size %d, rate %d\n",
1255                   chunk, num_channels, sample_size, sample_rate);
1256            mLastTrack->meta->setInt32(kKeyChannelCount, num_channels);
1257            mLastTrack->meta->setInt32(kKeySampleRate, sample_rate);
1258
1259            off64_t stop_offset = *offset + chunk_size;
1260            *offset = data_offset + sizeof(buffer);
1261            while (*offset < stop_offset) {
1262                status_t err = parseChunk(offset, depth + 1);
1263                if (err != OK) {
1264                    return err;
1265                }
1266            }
1267
1268            if (*offset != stop_offset) {
1269                return ERROR_MALFORMED;
1270            }
1271            break;
1272        }
1273
1274        case FOURCC('m', 'p', '4', 'v'):
1275        case FOURCC('e', 'n', 'c', 'v'):
1276        case FOURCC('s', '2', '6', '3'):
1277        case FOURCC('H', '2', '6', '3'):
1278        case FOURCC('h', '2', '6', '3'):
1279        case FOURCC('a', 'v', 'c', '1'):
1280        {
1281            mHasVideo = true;
1282
1283            uint8_t buffer[78];
1284            if (chunk_data_size < (ssize_t)sizeof(buffer)) {
1285                // Basic VideoSampleEntry size.
1286                return ERROR_MALFORMED;
1287            }
1288
1289            if (mDataSource->readAt(
1290                        data_offset, buffer, sizeof(buffer)) < (ssize_t)sizeof(buffer)) {
1291                return ERROR_IO;
1292            }
1293
1294            uint16_t data_ref_index = U16_AT(&buffer[6]);
1295            uint16_t width = U16_AT(&buffer[6 + 18]);
1296            uint16_t height = U16_AT(&buffer[6 + 20]);
1297
1298            // The video sample is not standard-compliant if it has invalid dimension.
1299            // Use some default width and height value, and
1300            // let the decoder figure out the actual width and height (and thus
1301            // be prepared for INFO_FOMRAT_CHANGED event).
1302            if (width == 0)  width  = 352;
1303            if (height == 0) height = 288;
1304
1305            // printf("*** coding='%s' width=%d height=%d\n",
1306            //        chunk, width, height);
1307
1308            if (chunk_type != FOURCC('e', 'n', 'c', 'v')) {
1309                // if the chunk type is encv, we'll get the type from the sinf/frma box later
1310                mLastTrack->meta->setCString(kKeyMIMEType, FourCC2MIME(chunk_type));
1311            }
1312            mLastTrack->meta->setInt32(kKeyWidth, width);
1313            mLastTrack->meta->setInt32(kKeyHeight, height);
1314
1315            off64_t stop_offset = *offset + chunk_size;
1316            *offset = data_offset + sizeof(buffer);
1317            while (*offset < stop_offset) {
1318                status_t err = parseChunk(offset, depth + 1);
1319                if (err != OK) {
1320                    return err;
1321                }
1322            }
1323
1324            if (*offset != stop_offset) {
1325                return ERROR_MALFORMED;
1326            }
1327            break;
1328        }
1329
1330        case FOURCC('s', 't', 'c', 'o'):
1331        case FOURCC('c', 'o', '6', '4'):
1332        {
1333            status_t err =
1334                mLastTrack->sampleTable->setChunkOffsetParams(
1335                        chunk_type, data_offset, chunk_data_size);
1336
1337            if (err != OK) {
1338                return err;
1339            }
1340
1341            *offset += chunk_size;
1342            break;
1343        }
1344
1345        case FOURCC('s', 't', 's', 'c'):
1346        {
1347            status_t err =
1348                mLastTrack->sampleTable->setSampleToChunkParams(
1349                        data_offset, chunk_data_size);
1350
1351            if (err != OK) {
1352                return err;
1353            }
1354
1355            *offset += chunk_size;
1356            break;
1357        }
1358
1359        case FOURCC('s', 't', 's', 'z'):
1360        case FOURCC('s', 't', 'z', '2'):
1361        {
1362            status_t err =
1363                mLastTrack->sampleTable->setSampleSizeParams(
1364                        chunk_type, data_offset, chunk_data_size);
1365
1366            if (err != OK) {
1367                return err;
1368            }
1369
1370            size_t max_size;
1371            err = mLastTrack->sampleTable->getMaxSampleSize(&max_size);
1372
1373            if (err != OK) {
1374                return err;
1375            }
1376
1377            if (max_size != 0) {
1378                // Assume that a given buffer only contains at most 10 chunks,
1379                // each chunk originally prefixed with a 2 byte length will
1380                // have a 4 byte header (0x00 0x00 0x00 0x01) after conversion,
1381                // and thus will grow by 2 bytes per chunk.
1382                mLastTrack->meta->setInt32(kKeyMaxInputSize, max_size + 10 * 2);
1383            } else {
1384                // No size was specified. Pick a conservatively large size.
1385                int32_t width, height;
1386                if (!mLastTrack->meta->findInt32(kKeyWidth, &width) ||
1387                    !mLastTrack->meta->findInt32(kKeyHeight, &height)) {
1388                    ALOGE("No width or height, assuming worst case 1080p");
1389                    width = 1920;
1390                    height = 1080;
1391                }
1392
1393                const char *mime;
1394                CHECK(mLastTrack->meta->findCString(kKeyMIMEType, &mime));
1395                if (!strcmp(mime, MEDIA_MIMETYPE_VIDEO_AVC)) {
1396                    // AVC requires compression ratio of at least 2, and uses
1397                    // macroblocks
1398                    max_size = ((width + 15) / 16) * ((height + 15) / 16) * 192;
1399                } else {
1400                    // For all other formats there is no minimum compression
1401                    // ratio. Use compression ratio of 1.
1402                    max_size = width * height * 3 / 2;
1403                }
1404                mLastTrack->meta->setInt32(kKeyMaxInputSize, max_size);
1405            }
1406            *offset += chunk_size;
1407
1408            // NOTE: setting another piece of metadata invalidates any pointers (such as the
1409            // mimetype) previously obtained, so don't cache them.
1410            const char *mime;
1411            CHECK(mLastTrack->meta->findCString(kKeyMIMEType, &mime));
1412            // Calculate average frame rate.
1413            if (!strncasecmp("video/", mime, 6)) {
1414                size_t nSamples = mLastTrack->sampleTable->countSamples();
1415                int64_t durationUs;
1416                if (mLastTrack->meta->findInt64(kKeyDuration, &durationUs)) {
1417                    if (durationUs > 0) {
1418                        int32_t frameRate = (nSamples * 1000000LL +
1419                                    (durationUs >> 1)) / durationUs;
1420                        mLastTrack->meta->setInt32(kKeyFrameRate, frameRate);
1421                    }
1422                }
1423            }
1424
1425            break;
1426        }
1427
1428        case FOURCC('s', 't', 't', 's'):
1429        {
1430            status_t err =
1431                mLastTrack->sampleTable->setTimeToSampleParams(
1432                        data_offset, chunk_data_size);
1433
1434            if (err != OK) {
1435                return err;
1436            }
1437
1438            *offset += chunk_size;
1439            break;
1440        }
1441
1442        case FOURCC('c', 't', 't', 's'):
1443        {
1444            status_t err =
1445                mLastTrack->sampleTable->setCompositionTimeToSampleParams(
1446                        data_offset, chunk_data_size);
1447
1448            if (err != OK) {
1449                return err;
1450            }
1451
1452            *offset += chunk_size;
1453            break;
1454        }
1455
1456        case FOURCC('s', 't', 's', 's'):
1457        {
1458            status_t err =
1459                mLastTrack->sampleTable->setSyncSampleParams(
1460                        data_offset, chunk_data_size);
1461
1462            if (err != OK) {
1463                return err;
1464            }
1465
1466            *offset += chunk_size;
1467            break;
1468        }
1469
1470        // @xyz
1471        case FOURCC('\xA9', 'x', 'y', 'z'):
1472        {
1473            // Best case the total data length inside "@xyz" box
1474            // would be 8, for instance "@xyz" + "\x00\x04\x15\xc7" + "0+0/",
1475            // where "\x00\x04" is the text string length with value = 4,
1476            // "\0x15\xc7" is the language code = en, and "0+0" is a
1477            // location (string) value with longitude = 0 and latitude = 0.
1478            if (chunk_data_size < 8) {
1479                return ERROR_MALFORMED;
1480            }
1481
1482            // Worst case the location string length would be 18,
1483            // for instance +90.0000-180.0000, without the trailing "/" and
1484            // the string length + language code.
1485            char buffer[18];
1486
1487            // Substracting 5 from the data size is because the text string length +
1488            // language code takes 4 bytes, and the trailing slash "/" takes 1 byte.
1489            off64_t location_length = chunk_data_size - 5;
1490            if (location_length >= (off64_t) sizeof(buffer)) {
1491                return ERROR_MALFORMED;
1492            }
1493
1494            if (mDataSource->readAt(
1495                        data_offset + 4, buffer, location_length) < location_length) {
1496                return ERROR_IO;
1497            }
1498
1499            buffer[location_length] = '\0';
1500            mFileMetaData->setCString(kKeyLocation, buffer);
1501            *offset += chunk_size;
1502            break;
1503        }
1504
1505        case FOURCC('e', 's', 'd', 's'):
1506        {
1507            if (chunk_data_size < 4) {
1508                return ERROR_MALFORMED;
1509            }
1510
1511            uint8_t buffer[256];
1512            if (chunk_data_size > (off64_t)sizeof(buffer)) {
1513                return ERROR_BUFFER_TOO_SMALL;
1514            }
1515
1516            if (mDataSource->readAt(
1517                        data_offset, buffer, chunk_data_size) < chunk_data_size) {
1518                return ERROR_IO;
1519            }
1520
1521            if (U32_AT(buffer) != 0) {
1522                // Should be version 0, flags 0.
1523                return ERROR_MALFORMED;
1524            }
1525
1526            mLastTrack->meta->setData(
1527                    kKeyESDS, kTypeESDS, &buffer[4], chunk_data_size - 4);
1528
1529            if (mPath.size() >= 2
1530                    && mPath[mPath.size() - 2] == FOURCC('m', 'p', '4', 'a')) {
1531                // Information from the ESDS must be relied on for proper
1532                // setup of sample rate and channel count for MPEG4 Audio.
1533                // The generic header appears to only contain generic
1534                // information...
1535
1536                status_t err = updateAudioTrackInfoFromESDS_MPEG4Audio(
1537                        &buffer[4], chunk_data_size - 4);
1538
1539                if (err != OK) {
1540                    return err;
1541                }
1542            }
1543
1544            *offset += chunk_size;
1545            break;
1546        }
1547
1548        case FOURCC('a', 'v', 'c', 'C'):
1549        {
1550            sp<ABuffer> buffer = new ABuffer(chunk_data_size);
1551
1552            if (mDataSource->readAt(
1553                        data_offset, buffer->data(), chunk_data_size) < chunk_data_size) {
1554                return ERROR_IO;
1555            }
1556
1557            mLastTrack->meta->setData(
1558                    kKeyAVCC, kTypeAVCC, buffer->data(), chunk_data_size);
1559
1560            *offset += chunk_size;
1561            break;
1562        }
1563
1564        case FOURCC('d', '2', '6', '3'):
1565        {
1566            /*
1567             * d263 contains a fixed 7 bytes part:
1568             *   vendor - 4 bytes
1569             *   version - 1 byte
1570             *   level - 1 byte
1571             *   profile - 1 byte
1572             * optionally, "d263" box itself may contain a 16-byte
1573             * bit rate box (bitr)
1574             *   average bit rate - 4 bytes
1575             *   max bit rate - 4 bytes
1576             */
1577            char buffer[23];
1578            if (chunk_data_size != 7 &&
1579                chunk_data_size != 23) {
1580                ALOGE("Incorrect D263 box size %lld", chunk_data_size);
1581                return ERROR_MALFORMED;
1582            }
1583
1584            if (mDataSource->readAt(
1585                    data_offset, buffer, chunk_data_size) < chunk_data_size) {
1586                return ERROR_IO;
1587            }
1588
1589            mLastTrack->meta->setData(kKeyD263, kTypeD263, buffer, chunk_data_size);
1590
1591            *offset += chunk_size;
1592            break;
1593        }
1594
1595        case FOURCC('m', 'e', 't', 'a'):
1596        {
1597            uint8_t buffer[4];
1598            if (chunk_data_size < (off64_t)sizeof(buffer)) {
1599                return ERROR_MALFORMED;
1600            }
1601
1602            if (mDataSource->readAt(
1603                        data_offset, buffer, 4) < 4) {
1604                return ERROR_IO;
1605            }
1606
1607            if (U32_AT(buffer) != 0) {
1608                // Should be version 0, flags 0.
1609
1610                // If it's not, let's assume this is one of those
1611                // apparently malformed chunks that don't have flags
1612                // and completely different semantics than what's
1613                // in the MPEG4 specs and skip it.
1614                *offset += chunk_size;
1615                return OK;
1616            }
1617
1618            off64_t stop_offset = *offset + chunk_size;
1619            *offset = data_offset + sizeof(buffer);
1620            while (*offset < stop_offset) {
1621                status_t err = parseChunk(offset, depth + 1);
1622                if (err != OK) {
1623                    return err;
1624                }
1625            }
1626
1627            if (*offset != stop_offset) {
1628                return ERROR_MALFORMED;
1629            }
1630            break;
1631        }
1632
1633        case FOURCC('m', 'e', 'a', 'n'):
1634        case FOURCC('n', 'a', 'm', 'e'):
1635        case FOURCC('d', 'a', 't', 'a'):
1636        {
1637            if (mPath.size() == 6 && underMetaDataPath(mPath)) {
1638                status_t err = parseMetaData(data_offset, chunk_data_size);
1639
1640                if (err != OK) {
1641                    return err;
1642                }
1643            }
1644
1645            *offset += chunk_size;
1646            break;
1647        }
1648
1649        case FOURCC('m', 'v', 'h', 'd'):
1650        {
1651            if (chunk_data_size < 24) {
1652                return ERROR_MALFORMED;
1653            }
1654
1655            uint8_t header[24];
1656            if (mDataSource->readAt(
1657                        data_offset, header, sizeof(header))
1658                    < (ssize_t)sizeof(header)) {
1659                return ERROR_IO;
1660            }
1661
1662            uint64_t creationTime;
1663            if (header[0] == 1) {
1664                creationTime = U64_AT(&header[4]);
1665                mHeaderTimescale = U32_AT(&header[20]);
1666            } else if (header[0] != 0) {
1667                return ERROR_MALFORMED;
1668            } else {
1669                creationTime = U32_AT(&header[4]);
1670                mHeaderTimescale = U32_AT(&header[12]);
1671            }
1672
1673            String8 s;
1674            convertTimeToDate(creationTime, &s);
1675
1676            mFileMetaData->setCString(kKeyDate, s.string());
1677
1678            *offset += chunk_size;
1679            break;
1680        }
1681
1682        case FOURCC('m', 'd', 'a', 't'):
1683        {
1684            ALOGV("mdat chunk, drm: %d", mIsDrm);
1685            if (!mIsDrm) {
1686                *offset += chunk_size;
1687                break;
1688            }
1689
1690            if (chunk_size < 8) {
1691                return ERROR_MALFORMED;
1692            }
1693
1694            return parseDrmSINF(offset, data_offset);
1695        }
1696
1697        case FOURCC('h', 'd', 'l', 'r'):
1698        {
1699            uint32_t buffer;
1700            if (mDataSource->readAt(
1701                        data_offset + 8, &buffer, 4) < 4) {
1702                return ERROR_IO;
1703            }
1704
1705            uint32_t type = ntohl(buffer);
1706            // For the 3GPP file format, the handler-type within the 'hdlr' box
1707            // shall be 'text'. We also want to support 'sbtl' handler type
1708            // for a practical reason as various MPEG4 containers use it.
1709            if (type == FOURCC('t', 'e', 'x', 't') || type == FOURCC('s', 'b', 't', 'l')) {
1710                mLastTrack->meta->setCString(kKeyMIMEType, MEDIA_MIMETYPE_TEXT_3GPP);
1711            }
1712
1713            *offset += chunk_size;
1714            break;
1715        }
1716
1717        case FOURCC('t', 'x', '3', 'g'):
1718        {
1719            uint32_t type;
1720            const void *data;
1721            size_t size = 0;
1722            if (!mLastTrack->meta->findData(
1723                    kKeyTextFormatData, &type, &data, &size)) {
1724                size = 0;
1725            }
1726
1727            uint8_t *buffer = new uint8_t[size + chunk_size];
1728
1729            if (size > 0) {
1730                memcpy(buffer, data, size);
1731            }
1732
1733            if ((size_t)(mDataSource->readAt(*offset, buffer + size, chunk_size))
1734                    < chunk_size) {
1735                delete[] buffer;
1736                buffer = NULL;
1737
1738                return ERROR_IO;
1739            }
1740
1741            mLastTrack->meta->setData(
1742                    kKeyTextFormatData, 0, buffer, size + chunk_size);
1743
1744            delete[] buffer;
1745
1746            *offset += chunk_size;
1747            break;
1748        }
1749
1750        case FOURCC('c', 'o', 'v', 'r'):
1751        {
1752            if (mFileMetaData != NULL) {
1753                ALOGV("chunk_data_size = %lld and data_offset = %lld",
1754                        chunk_data_size, data_offset);
1755                sp<ABuffer> buffer = new ABuffer(chunk_data_size + 1);
1756                if (mDataSource->readAt(
1757                    data_offset, buffer->data(), chunk_data_size) != (ssize_t)chunk_data_size) {
1758                    return ERROR_IO;
1759                }
1760                const int kSkipBytesOfDataBox = 16;
1761                mFileMetaData->setData(
1762                    kKeyAlbumArt, MetaData::TYPE_NONE,
1763                    buffer->data() + kSkipBytesOfDataBox, chunk_data_size - kSkipBytesOfDataBox);
1764            }
1765
1766            *offset += chunk_size;
1767            break;
1768        }
1769
1770        case FOURCC('-', '-', '-', '-'):
1771        {
1772            mLastCommentMean.clear();
1773            mLastCommentName.clear();
1774            mLastCommentData.clear();
1775            *offset += chunk_size;
1776            break;
1777        }
1778
1779        case FOURCC('s', 'i', 'd', 'x'):
1780        {
1781            parseSegmentIndex(data_offset, chunk_data_size);
1782            *offset += chunk_size;
1783            return UNKNOWN_ERROR; // stop parsing after sidx
1784        }
1785
1786        default:
1787        {
1788            *offset += chunk_size;
1789            break;
1790        }
1791    }
1792
1793    return OK;
1794}
1795
1796status_t MPEG4Extractor::parseSegmentIndex(off64_t offset, size_t size) {
1797  ALOGV("MPEG4Extractor::parseSegmentIndex");
1798
1799    if (size < 12) {
1800      return -EINVAL;
1801    }
1802
1803    uint32_t flags;
1804    if (!mDataSource->getUInt32(offset, &flags)) {
1805        return ERROR_MALFORMED;
1806    }
1807
1808    uint32_t version = flags >> 24;
1809    flags &= 0xffffff;
1810
1811    ALOGV("sidx version %d", version);
1812
1813    uint32_t referenceId;
1814    if (!mDataSource->getUInt32(offset + 4, &referenceId)) {
1815        return ERROR_MALFORMED;
1816    }
1817
1818    uint32_t timeScale;
1819    if (!mDataSource->getUInt32(offset + 8, &timeScale)) {
1820        return ERROR_MALFORMED;
1821    }
1822    ALOGV("sidx refid/timescale: %d/%d", referenceId, timeScale);
1823
1824    uint64_t earliestPresentationTime;
1825    uint64_t firstOffset;
1826
1827    offset += 12;
1828    size -= 12;
1829
1830    if (version == 0) {
1831        if (size < 8) {
1832            return -EINVAL;
1833        }
1834        uint32_t tmp;
1835        if (!mDataSource->getUInt32(offset, &tmp)) {
1836            return ERROR_MALFORMED;
1837        }
1838        earliestPresentationTime = tmp;
1839        if (!mDataSource->getUInt32(offset + 4, &tmp)) {
1840            return ERROR_MALFORMED;
1841        }
1842        firstOffset = tmp;
1843        offset += 8;
1844        size -= 8;
1845    } else {
1846        if (size < 16) {
1847            return -EINVAL;
1848        }
1849        if (!mDataSource->getUInt64(offset, &earliestPresentationTime)) {
1850            return ERROR_MALFORMED;
1851        }
1852        if (!mDataSource->getUInt64(offset + 8, &firstOffset)) {
1853            return ERROR_MALFORMED;
1854        }
1855        offset += 16;
1856        size -= 16;
1857    }
1858    ALOGV("sidx pres/off: %Ld/%Ld", earliestPresentationTime, firstOffset);
1859
1860    if (size < 4) {
1861        return -EINVAL;
1862    }
1863
1864    uint16_t referenceCount;
1865    if (!mDataSource->getUInt16(offset + 2, &referenceCount)) {
1866        return ERROR_MALFORMED;
1867    }
1868    offset += 4;
1869    size -= 4;
1870    ALOGV("refcount: %d", referenceCount);
1871
1872    if (size < referenceCount * 12) {
1873        return -EINVAL;
1874    }
1875
1876    uint64_t total_duration = 0;
1877    for (unsigned int i = 0; i < referenceCount; i++) {
1878        uint32_t d1, d2, d3;
1879
1880        if (!mDataSource->getUInt32(offset, &d1) ||     // size
1881            !mDataSource->getUInt32(offset + 4, &d2) || // duration
1882            !mDataSource->getUInt32(offset + 8, &d3)) { // flags
1883            return ERROR_MALFORMED;
1884        }
1885
1886        if (d1 & 0x80000000) {
1887            ALOGW("sub-sidx boxes not supported yet");
1888        }
1889        bool sap = d3 & 0x80000000;
1890        bool saptype = d3 >> 28;
1891        if (!sap || saptype > 2) {
1892            ALOGW("not a stream access point, or unsupported type");
1893        }
1894        total_duration += d2;
1895        offset += 12;
1896        ALOGV(" item %d, %08x %08x %08x", i, d1, d2, d3);
1897        SidxEntry se;
1898        se.mSize = d1 & 0x7fffffff;
1899        se.mDurationUs = 1000000LL * d2 / timeScale;
1900        mSidxEntries.add(se);
1901    }
1902
1903    mSidxDuration = total_duration * 1000000 / timeScale;
1904    ALOGV("duration: %lld", mSidxDuration);
1905
1906    int64_t metaDuration;
1907    if (!mLastTrack->meta->findInt64(kKeyDuration, &metaDuration) || metaDuration == 0) {
1908        mLastTrack->meta->setInt64(kKeyDuration, mSidxDuration);
1909    }
1910    return OK;
1911}
1912
1913
1914
1915status_t MPEG4Extractor::parseTrackHeader(
1916        off64_t data_offset, off64_t data_size) {
1917    if (data_size < 4) {
1918        return ERROR_MALFORMED;
1919    }
1920
1921    uint8_t version;
1922    if (mDataSource->readAt(data_offset, &version, 1) < 1) {
1923        return ERROR_IO;
1924    }
1925
1926    size_t dynSize = (version == 1) ? 36 : 24;
1927
1928    uint8_t buffer[36 + 60];
1929
1930    if (data_size != (off64_t)dynSize + 60) {
1931        return ERROR_MALFORMED;
1932    }
1933
1934    if (mDataSource->readAt(
1935                data_offset, buffer, data_size) < (ssize_t)data_size) {
1936        return ERROR_IO;
1937    }
1938
1939    uint64_t ctime, mtime, duration;
1940    int32_t id;
1941
1942    if (version == 1) {
1943        ctime = U64_AT(&buffer[4]);
1944        mtime = U64_AT(&buffer[12]);
1945        id = U32_AT(&buffer[20]);
1946        duration = U64_AT(&buffer[28]);
1947    } else if (version == 0) {
1948        ctime = U32_AT(&buffer[4]);
1949        mtime = U32_AT(&buffer[8]);
1950        id = U32_AT(&buffer[12]);
1951        duration = U32_AT(&buffer[20]);
1952    } else {
1953        return ERROR_UNSUPPORTED;
1954    }
1955
1956    mLastTrack->meta->setInt32(kKeyTrackID, id);
1957
1958    size_t matrixOffset = dynSize + 16;
1959    int32_t a00 = U32_AT(&buffer[matrixOffset]);
1960    int32_t a01 = U32_AT(&buffer[matrixOffset + 4]);
1961    int32_t dx = U32_AT(&buffer[matrixOffset + 8]);
1962    int32_t a10 = U32_AT(&buffer[matrixOffset + 12]);
1963    int32_t a11 = U32_AT(&buffer[matrixOffset + 16]);
1964    int32_t dy = U32_AT(&buffer[matrixOffset + 20]);
1965
1966#if 0
1967    ALOGI("x' = %.2f * x + %.2f * y + %.2f",
1968         a00 / 65536.0f, a01 / 65536.0f, dx / 65536.0f);
1969    ALOGI("y' = %.2f * x + %.2f * y + %.2f",
1970         a10 / 65536.0f, a11 / 65536.0f, dy / 65536.0f);
1971#endif
1972
1973    uint32_t rotationDegrees;
1974
1975    static const int32_t kFixedOne = 0x10000;
1976    if (a00 == kFixedOne && a01 == 0 && a10 == 0 && a11 == kFixedOne) {
1977        // Identity, no rotation
1978        rotationDegrees = 0;
1979    } else if (a00 == 0 && a01 == kFixedOne && a10 == -kFixedOne && a11 == 0) {
1980        rotationDegrees = 90;
1981    } else if (a00 == 0 && a01 == -kFixedOne && a10 == kFixedOne && a11 == 0) {
1982        rotationDegrees = 270;
1983    } else if (a00 == -kFixedOne && a01 == 0 && a10 == 0 && a11 == -kFixedOne) {
1984        rotationDegrees = 180;
1985    } else {
1986        ALOGW("We only support 0,90,180,270 degree rotation matrices");
1987        rotationDegrees = 0;
1988    }
1989
1990    if (rotationDegrees != 0) {
1991        mLastTrack->meta->setInt32(kKeyRotation, rotationDegrees);
1992    }
1993
1994    // Handle presentation display size, which could be different
1995    // from the image size indicated by kKeyWidth and kKeyHeight.
1996    uint32_t width = U32_AT(&buffer[dynSize + 52]);
1997    uint32_t height = U32_AT(&buffer[dynSize + 56]);
1998    mLastTrack->meta->setInt32(kKeyDisplayWidth, width >> 16);
1999    mLastTrack->meta->setInt32(kKeyDisplayHeight, height >> 16);
2000
2001    return OK;
2002}
2003
2004status_t MPEG4Extractor::parseMetaData(off64_t offset, size_t size) {
2005    if (size < 4) {
2006        return ERROR_MALFORMED;
2007    }
2008
2009    uint8_t *buffer = new uint8_t[size + 1];
2010    if (mDataSource->readAt(
2011                offset, buffer, size) != (ssize_t)size) {
2012        delete[] buffer;
2013        buffer = NULL;
2014
2015        return ERROR_IO;
2016    }
2017
2018    uint32_t flags = U32_AT(buffer);
2019
2020    uint32_t metadataKey = 0;
2021    char chunk[5];
2022    MakeFourCCString(mPath[4], chunk);
2023    ALOGV("meta: %s @ %lld", chunk, offset);
2024    switch (mPath[4]) {
2025        case FOURCC(0xa9, 'a', 'l', 'b'):
2026        {
2027            metadataKey = kKeyAlbum;
2028            break;
2029        }
2030        case FOURCC(0xa9, 'A', 'R', 'T'):
2031        {
2032            metadataKey = kKeyArtist;
2033            break;
2034        }
2035        case FOURCC('a', 'A', 'R', 'T'):
2036        {
2037            metadataKey = kKeyAlbumArtist;
2038            break;
2039        }
2040        case FOURCC(0xa9, 'd', 'a', 'y'):
2041        {
2042            metadataKey = kKeyYear;
2043            break;
2044        }
2045        case FOURCC(0xa9, 'n', 'a', 'm'):
2046        {
2047            metadataKey = kKeyTitle;
2048            break;
2049        }
2050        case FOURCC(0xa9, 'w', 'r', 't'):
2051        {
2052            metadataKey = kKeyWriter;
2053            break;
2054        }
2055        case FOURCC('c', 'o', 'v', 'r'):
2056        {
2057            metadataKey = kKeyAlbumArt;
2058            break;
2059        }
2060        case FOURCC('g', 'n', 'r', 'e'):
2061        {
2062            metadataKey = kKeyGenre;
2063            break;
2064        }
2065        case FOURCC(0xa9, 'g', 'e', 'n'):
2066        {
2067            metadataKey = kKeyGenre;
2068            break;
2069        }
2070        case FOURCC('c', 'p', 'i', 'l'):
2071        {
2072            if (size == 9 && flags == 21) {
2073                char tmp[16];
2074                sprintf(tmp, "%d",
2075                        (int)buffer[size - 1]);
2076
2077                mFileMetaData->setCString(kKeyCompilation, tmp);
2078            }
2079            break;
2080        }
2081        case FOURCC('t', 'r', 'k', 'n'):
2082        {
2083            if (size == 16 && flags == 0) {
2084                char tmp[16];
2085                uint16_t* pTrack = (uint16_t*)&buffer[10];
2086                uint16_t* pTotalTracks = (uint16_t*)&buffer[12];
2087                sprintf(tmp, "%d/%d", ntohs(*pTrack), ntohs(*pTotalTracks));
2088
2089                mFileMetaData->setCString(kKeyCDTrackNumber, tmp);
2090            }
2091            break;
2092        }
2093        case FOURCC('d', 'i', 's', 'k'):
2094        {
2095            if ((size == 14 || size == 16) && flags == 0) {
2096                char tmp[16];
2097                uint16_t* pDisc = (uint16_t*)&buffer[10];
2098                uint16_t* pTotalDiscs = (uint16_t*)&buffer[12];
2099                sprintf(tmp, "%d/%d", ntohs(*pDisc), ntohs(*pTotalDiscs));
2100
2101                mFileMetaData->setCString(kKeyDiscNumber, tmp);
2102            }
2103            break;
2104        }
2105        case FOURCC('-', '-', '-', '-'):
2106        {
2107            buffer[size] = '\0';
2108            switch (mPath[5]) {
2109                case FOURCC('m', 'e', 'a', 'n'):
2110                    mLastCommentMean.setTo((const char *)buffer + 4);
2111                    break;
2112                case FOURCC('n', 'a', 'm', 'e'):
2113                    mLastCommentName.setTo((const char *)buffer + 4);
2114                    break;
2115                case FOURCC('d', 'a', 't', 'a'):
2116                    mLastCommentData.setTo((const char *)buffer + 8);
2117                    break;
2118            }
2119
2120            // Once we have a set of mean/name/data info, go ahead and process
2121            // it to see if its something we are interested in.  Whether or not
2122            // were are interested in the specific tag, make sure to clear out
2123            // the set so we can be ready to process another tuple should one
2124            // show up later in the file.
2125            if ((mLastCommentMean.length() != 0) &&
2126                (mLastCommentName.length() != 0) &&
2127                (mLastCommentData.length() != 0)) {
2128
2129                if (mLastCommentMean == "com.apple.iTunes"
2130                        && mLastCommentName == "iTunSMPB") {
2131                    int32_t delay, padding;
2132                    if (sscanf(mLastCommentData,
2133                               " %*x %x %x %*x", &delay, &padding) == 2) {
2134                        mLastTrack->meta->setInt32(kKeyEncoderDelay, delay);
2135                        mLastTrack->meta->setInt32(kKeyEncoderPadding, padding);
2136                    }
2137                }
2138
2139                mLastCommentMean.clear();
2140                mLastCommentName.clear();
2141                mLastCommentData.clear();
2142            }
2143            break;
2144        }
2145
2146        default:
2147            break;
2148    }
2149
2150    if (size >= 8 && metadataKey) {
2151        if (metadataKey == kKeyAlbumArt) {
2152            mFileMetaData->setData(
2153                    kKeyAlbumArt, MetaData::TYPE_NONE,
2154                    buffer + 8, size - 8);
2155        } else if (metadataKey == kKeyGenre) {
2156            if (flags == 0) {
2157                // uint8_t genre code, iTunes genre codes are
2158                // the standard id3 codes, except they start
2159                // at 1 instead of 0 (e.g. Pop is 14, not 13)
2160                // We use standard id3 numbering, so subtract 1.
2161                int genrecode = (int)buffer[size - 1];
2162                genrecode--;
2163                if (genrecode < 0) {
2164                    genrecode = 255; // reserved for 'unknown genre'
2165                }
2166                char genre[10];
2167                sprintf(genre, "%d", genrecode);
2168
2169                mFileMetaData->setCString(metadataKey, genre);
2170            } else if (flags == 1) {
2171                // custom genre string
2172                buffer[size] = '\0';
2173
2174                mFileMetaData->setCString(
2175                        metadataKey, (const char *)buffer + 8);
2176            }
2177        } else {
2178            buffer[size] = '\0';
2179
2180            mFileMetaData->setCString(
2181                    metadataKey, (const char *)buffer + 8);
2182        }
2183    }
2184
2185    delete[] buffer;
2186    buffer = NULL;
2187
2188    return OK;
2189}
2190
2191sp<MediaSource> MPEG4Extractor::getTrack(size_t index) {
2192    status_t err;
2193    if ((err = readMetaData()) != OK) {
2194        return NULL;
2195    }
2196
2197    Track *track = mFirstTrack;
2198    while (index > 0) {
2199        if (track == NULL) {
2200            return NULL;
2201        }
2202
2203        track = track->next;
2204        --index;
2205    }
2206
2207    if (track == NULL) {
2208        return NULL;
2209    }
2210
2211    ALOGV("getTrack called, pssh: %d", mPssh.size());
2212
2213    return new MPEG4Source(
2214            track->meta, mDataSource, track->timescale, track->sampleTable,
2215            mSidxEntries, mMoofOffset);
2216}
2217
2218// static
2219status_t MPEG4Extractor::verifyTrack(Track *track) {
2220    const char *mime;
2221    CHECK(track->meta->findCString(kKeyMIMEType, &mime));
2222
2223    uint32_t type;
2224    const void *data;
2225    size_t size;
2226    if (!strcasecmp(mime, MEDIA_MIMETYPE_VIDEO_AVC)) {
2227        if (!track->meta->findData(kKeyAVCC, &type, &data, &size)
2228                || type != kTypeAVCC) {
2229            return ERROR_MALFORMED;
2230        }
2231    } else if (!strcasecmp(mime, MEDIA_MIMETYPE_VIDEO_MPEG4)
2232            || !strcasecmp(mime, MEDIA_MIMETYPE_AUDIO_AAC)) {
2233        if (!track->meta->findData(kKeyESDS, &type, &data, &size)
2234                || type != kTypeESDS) {
2235            return ERROR_MALFORMED;
2236        }
2237    }
2238
2239    if (!track->sampleTable->isValid()) {
2240        // Make sure we have all the metadata we need.
2241        return ERROR_MALFORMED;
2242    }
2243
2244    return OK;
2245}
2246
2247status_t MPEG4Extractor::updateAudioTrackInfoFromESDS_MPEG4Audio(
2248        const void *esds_data, size_t esds_size) {
2249    ESDS esds(esds_data, esds_size);
2250
2251    uint8_t objectTypeIndication;
2252    if (esds.getObjectTypeIndication(&objectTypeIndication) != OK) {
2253        return ERROR_MALFORMED;
2254    }
2255
2256    if (objectTypeIndication == 0xe1) {
2257        // This isn't MPEG4 audio at all, it's QCELP 14k...
2258        mLastTrack->meta->setCString(kKeyMIMEType, MEDIA_MIMETYPE_AUDIO_QCELP);
2259        return OK;
2260    }
2261
2262    if (objectTypeIndication  == 0x6b) {
2263        // The media subtype is MP3 audio
2264        // Our software MP3 audio decoder may not be able to handle
2265        // packetized MP3 audio; for now, lets just return ERROR_UNSUPPORTED
2266        ALOGE("MP3 track in MP4/3GPP file is not supported");
2267        return ERROR_UNSUPPORTED;
2268    }
2269
2270    const uint8_t *csd;
2271    size_t csd_size;
2272    if (esds.getCodecSpecificInfo(
2273                (const void **)&csd, &csd_size) != OK) {
2274        return ERROR_MALFORMED;
2275    }
2276
2277#if 0
2278    printf("ESD of size %d\n", csd_size);
2279    hexdump(csd, csd_size);
2280#endif
2281
2282    if (csd_size == 0) {
2283        // There's no further information, i.e. no codec specific data
2284        // Let's assume that the information provided in the mpeg4 headers
2285        // is accurate and hope for the best.
2286
2287        return OK;
2288    }
2289
2290    if (csd_size < 2) {
2291        return ERROR_MALFORMED;
2292    }
2293
2294    static uint32_t kSamplingRate[] = {
2295        96000, 88200, 64000, 48000, 44100, 32000, 24000, 22050,
2296        16000, 12000, 11025, 8000, 7350
2297    };
2298
2299    ABitReader br(csd, csd_size);
2300    uint32_t objectType = br.getBits(5);
2301
2302    if (objectType == 31) {  // AAC-ELD => additional 6 bits
2303        objectType = 32 + br.getBits(6);
2304    }
2305
2306    //keep AOT type
2307    mLastTrack->meta->setInt32(kKeyAACAOT, objectType);
2308
2309    uint32_t freqIndex = br.getBits(4);
2310
2311    int32_t sampleRate = 0;
2312    int32_t numChannels = 0;
2313    if (freqIndex == 15) {
2314        if (csd_size < 5) {
2315            return ERROR_MALFORMED;
2316        }
2317        sampleRate = br.getBits(24);
2318        numChannels = br.getBits(4);
2319    } else {
2320        numChannels = br.getBits(4);
2321
2322        if (freqIndex == 13 || freqIndex == 14) {
2323            return ERROR_MALFORMED;
2324        }
2325
2326        sampleRate = kSamplingRate[freqIndex];
2327    }
2328
2329    if (objectType == 5 || objectType == 29) { // SBR specific config per 14496-3 table 1.13
2330        uint32_t extFreqIndex = br.getBits(4);
2331        int32_t extSampleRate;
2332        if (extFreqIndex == 15) {
2333            if (csd_size < 8) {
2334                return ERROR_MALFORMED;
2335            }
2336            extSampleRate = br.getBits(24);
2337        } else {
2338            if (extFreqIndex == 13 || extFreqIndex == 14) {
2339                return ERROR_MALFORMED;
2340            }
2341            extSampleRate = kSamplingRate[extFreqIndex];
2342        }
2343        //TODO: save the extension sampling rate value in meta data =>
2344        //      mLastTrack->meta->setInt32(kKeyExtSampleRate, extSampleRate);
2345    }
2346
2347    if (numChannels == 0) {
2348        return ERROR_UNSUPPORTED;
2349    }
2350
2351    int32_t prevSampleRate;
2352    CHECK(mLastTrack->meta->findInt32(kKeySampleRate, &prevSampleRate));
2353
2354    if (prevSampleRate != sampleRate) {
2355        ALOGV("mpeg4 audio sample rate different from previous setting. "
2356             "was: %d, now: %d", prevSampleRate, sampleRate);
2357    }
2358
2359    mLastTrack->meta->setInt32(kKeySampleRate, sampleRate);
2360
2361    int32_t prevChannelCount;
2362    CHECK(mLastTrack->meta->findInt32(kKeyChannelCount, &prevChannelCount));
2363
2364    if (prevChannelCount != numChannels) {
2365        ALOGV("mpeg4 audio channel count different from previous setting. "
2366             "was: %d, now: %d", prevChannelCount, numChannels);
2367    }
2368
2369    mLastTrack->meta->setInt32(kKeyChannelCount, numChannels);
2370
2371    return OK;
2372}
2373
2374////////////////////////////////////////////////////////////////////////////////
2375
2376MPEG4Source::MPEG4Source(
2377        const sp<MetaData> &format,
2378        const sp<DataSource> &dataSource,
2379        int32_t timeScale,
2380        const sp<SampleTable> &sampleTable,
2381        Vector<SidxEntry> &sidx,
2382        off64_t firstMoofOffset)
2383    : mFormat(format),
2384      mDataSource(dataSource),
2385      mTimescale(timeScale),
2386      mSampleTable(sampleTable),
2387      mCurrentSampleIndex(0),
2388      mCurrentFragmentIndex(0),
2389      mSegments(sidx),
2390      mFirstMoofOffset(firstMoofOffset),
2391      mCurrentMoofOffset(firstMoofOffset),
2392      mCurrentTime(0),
2393      mCurrentSampleInfoAllocSize(0),
2394      mCurrentSampleInfoSizes(NULL),
2395      mCurrentSampleInfoOffsetsAllocSize(0),
2396      mCurrentSampleInfoOffsets(NULL),
2397      mIsAVC(false),
2398      mNALLengthSize(0),
2399      mStarted(false),
2400      mGroup(NULL),
2401      mBuffer(NULL),
2402      mWantsNALFragments(false),
2403      mSrcBuffer(NULL) {
2404
2405    mFormat->findInt32(kKeyCryptoMode, &mCryptoMode);
2406    mDefaultIVSize = 0;
2407    mFormat->findInt32(kKeyCryptoDefaultIVSize, &mDefaultIVSize);
2408    uint32_t keytype;
2409    const void *key;
2410    size_t keysize;
2411    if (mFormat->findData(kKeyCryptoKey, &keytype, &key, &keysize)) {
2412        CHECK(keysize <= 16);
2413        memset(mCryptoKey, 0, 16);
2414        memcpy(mCryptoKey, key, keysize);
2415    }
2416
2417    const char *mime;
2418    bool success = mFormat->findCString(kKeyMIMEType, &mime);
2419    CHECK(success);
2420
2421    mIsAVC = !strcasecmp(mime, MEDIA_MIMETYPE_VIDEO_AVC);
2422
2423    if (mIsAVC) {
2424        uint32_t type;
2425        const void *data;
2426        size_t size;
2427        CHECK(format->findData(kKeyAVCC, &type, &data, &size));
2428
2429        const uint8_t *ptr = (const uint8_t *)data;
2430
2431        CHECK(size >= 7);
2432        CHECK_EQ((unsigned)ptr[0], 1u);  // configurationVersion == 1
2433
2434        // The number of bytes used to encode the length of a NAL unit.
2435        mNALLengthSize = 1 + (ptr[4] & 3);
2436    }
2437
2438    CHECK(format->findInt32(kKeyTrackID, &mTrackId));
2439
2440    if (mFirstMoofOffset != 0) {
2441        off64_t offset = mFirstMoofOffset;
2442        parseChunk(&offset);
2443    }
2444}
2445
2446MPEG4Source::~MPEG4Source() {
2447    if (mStarted) {
2448        stop();
2449    }
2450    free(mCurrentSampleInfoSizes);
2451    free(mCurrentSampleInfoOffsets);
2452}
2453
2454status_t MPEG4Source::start(MetaData *params) {
2455    Mutex::Autolock autoLock(mLock);
2456
2457    CHECK(!mStarted);
2458
2459    int32_t val;
2460    if (params && params->findInt32(kKeyWantsNALFragments, &val)
2461        && val != 0) {
2462        mWantsNALFragments = true;
2463    } else {
2464        mWantsNALFragments = false;
2465    }
2466
2467    mGroup = new MediaBufferGroup;
2468
2469    int32_t max_size;
2470    CHECK(mFormat->findInt32(kKeyMaxInputSize, &max_size));
2471
2472    mGroup->add_buffer(new MediaBuffer(max_size));
2473
2474    mSrcBuffer = new uint8_t[max_size];
2475
2476    mStarted = true;
2477
2478    return OK;
2479}
2480
2481status_t MPEG4Source::stop() {
2482    Mutex::Autolock autoLock(mLock);
2483
2484    CHECK(mStarted);
2485
2486    if (mBuffer != NULL) {
2487        mBuffer->release();
2488        mBuffer = NULL;
2489    }
2490
2491    delete[] mSrcBuffer;
2492    mSrcBuffer = NULL;
2493
2494    delete mGroup;
2495    mGroup = NULL;
2496
2497    mStarted = false;
2498    mCurrentSampleIndex = 0;
2499
2500    return OK;
2501}
2502
2503status_t MPEG4Source::parseChunk(off64_t *offset) {
2504    uint32_t hdr[2];
2505    if (mDataSource->readAt(*offset, hdr, 8) < 8) {
2506        return ERROR_IO;
2507    }
2508    uint64_t chunk_size = ntohl(hdr[0]);
2509    uint32_t chunk_type = ntohl(hdr[1]);
2510    off64_t data_offset = *offset + 8;
2511
2512    if (chunk_size == 1) {
2513        if (mDataSource->readAt(*offset + 8, &chunk_size, 8) < 8) {
2514            return ERROR_IO;
2515        }
2516        chunk_size = ntoh64(chunk_size);
2517        data_offset += 8;
2518
2519        if (chunk_size < 16) {
2520            // The smallest valid chunk is 16 bytes long in this case.
2521            return ERROR_MALFORMED;
2522        }
2523    } else if (chunk_size < 8) {
2524        // The smallest valid chunk is 8 bytes long.
2525        return ERROR_MALFORMED;
2526    }
2527
2528    char chunk[5];
2529    MakeFourCCString(chunk_type, chunk);
2530    ALOGV("MPEG4Source chunk %s @ %llx", chunk, *offset);
2531
2532    off64_t chunk_data_size = *offset + chunk_size - data_offset;
2533
2534    switch(chunk_type) {
2535
2536        case FOURCC('t', 'r', 'a', 'f'):
2537        case FOURCC('m', 'o', 'o', 'f'): {
2538            off64_t stop_offset = *offset + chunk_size;
2539            *offset = data_offset;
2540            while (*offset < stop_offset) {
2541                status_t err = parseChunk(offset);
2542                if (err != OK) {
2543                    return err;
2544                }
2545            }
2546            if (chunk_type == FOURCC('m', 'o', 'o', 'f')) {
2547                // *offset points to the mdat box following this moof
2548                parseChunk(offset); // doesn't actually parse it, just updates offset
2549                mNextMoofOffset = *offset;
2550            }
2551            break;
2552        }
2553
2554        case FOURCC('t', 'f', 'h', 'd'): {
2555                status_t err;
2556                if ((err = parseTrackFragmentHeader(data_offset, chunk_data_size)) != OK) {
2557                    return err;
2558                }
2559                *offset += chunk_size;
2560                break;
2561        }
2562
2563        case FOURCC('t', 'r', 'u', 'n'): {
2564                status_t err;
2565                if (mLastParsedTrackId == mTrackId) {
2566                    if ((err = parseTrackFragmentRun(data_offset, chunk_data_size)) != OK) {
2567                        return err;
2568                    }
2569                }
2570
2571                *offset += chunk_size;
2572                break;
2573        }
2574
2575        case FOURCC('s', 'a', 'i', 'z'): {
2576            status_t err;
2577            if ((err = parseSampleAuxiliaryInformationSizes(data_offset, chunk_data_size)) != OK) {
2578                return err;
2579            }
2580            *offset += chunk_size;
2581            break;
2582        }
2583        case FOURCC('s', 'a', 'i', 'o'): {
2584            status_t err;
2585            if ((err = parseSampleAuxiliaryInformationOffsets(data_offset, chunk_data_size)) != OK) {
2586                return err;
2587            }
2588            *offset += chunk_size;
2589            break;
2590        }
2591
2592        case FOURCC('m', 'd', 'a', 't'): {
2593            // parse DRM info if present
2594            ALOGV("MPEG4Source::parseChunk mdat");
2595            // if saiz/saoi was previously observed, do something with the sampleinfos
2596            *offset += chunk_size;
2597            break;
2598        }
2599
2600        default: {
2601            *offset += chunk_size;
2602            break;
2603        }
2604    }
2605    return OK;
2606}
2607
2608status_t MPEG4Source::parseSampleAuxiliaryInformationSizes(off64_t offset, off64_t size) {
2609    ALOGV("parseSampleAuxiliaryInformationSizes");
2610    // 14496-12 8.7.12
2611    uint8_t version;
2612    if (mDataSource->readAt(
2613            offset, &version, sizeof(version))
2614            < (ssize_t)sizeof(version)) {
2615        return ERROR_IO;
2616    }
2617
2618    if (version != 0) {
2619        return ERROR_UNSUPPORTED;
2620    }
2621    offset++;
2622
2623    uint32_t flags;
2624    if (!mDataSource->getUInt24(offset, &flags)) {
2625        return ERROR_IO;
2626    }
2627    offset += 3;
2628
2629    if (flags & 1) {
2630        uint32_t tmp;
2631        if (!mDataSource->getUInt32(offset, &tmp)) {
2632            return ERROR_MALFORMED;
2633        }
2634        mCurrentAuxInfoType = tmp;
2635        offset += 4;
2636        if (!mDataSource->getUInt32(offset, &tmp)) {
2637            return ERROR_MALFORMED;
2638        }
2639        mCurrentAuxInfoTypeParameter = tmp;
2640        offset += 4;
2641    }
2642
2643    uint8_t defsize;
2644    if (mDataSource->readAt(offset, &defsize, 1) != 1) {
2645        return ERROR_MALFORMED;
2646    }
2647    mCurrentDefaultSampleInfoSize = defsize;
2648    offset++;
2649
2650    uint32_t smplcnt;
2651    if (!mDataSource->getUInt32(offset, &smplcnt)) {
2652        return ERROR_MALFORMED;
2653    }
2654    mCurrentSampleInfoCount = smplcnt;
2655    offset += 4;
2656
2657    if (mCurrentDefaultSampleInfoSize != 0) {
2658        ALOGV("@@@@ using default sample info size of %d", mCurrentDefaultSampleInfoSize);
2659        return OK;
2660    }
2661    if (smplcnt > mCurrentSampleInfoAllocSize) {
2662        mCurrentSampleInfoSizes = (uint8_t*) realloc(mCurrentSampleInfoSizes, smplcnt);
2663        mCurrentSampleInfoAllocSize = smplcnt;
2664    }
2665
2666    mDataSource->readAt(offset, mCurrentSampleInfoSizes, smplcnt);
2667    return OK;
2668}
2669
2670status_t MPEG4Source::parseSampleAuxiliaryInformationOffsets(off64_t offset, off64_t size) {
2671    ALOGV("parseSampleAuxiliaryInformationOffsets");
2672    // 14496-12 8.7.13
2673    uint8_t version;
2674    if (mDataSource->readAt(offset, &version, sizeof(version)) != 1) {
2675        return ERROR_IO;
2676    }
2677    offset++;
2678
2679    uint32_t flags;
2680    if (!mDataSource->getUInt24(offset, &flags)) {
2681        return ERROR_IO;
2682    }
2683    offset += 3;
2684
2685    uint32_t entrycount;
2686    if (!mDataSource->getUInt32(offset, &entrycount)) {
2687        return ERROR_IO;
2688    }
2689    offset += 4;
2690
2691    if (entrycount > mCurrentSampleInfoOffsetsAllocSize) {
2692        mCurrentSampleInfoOffsets = (uint64_t*) realloc(mCurrentSampleInfoOffsets, entrycount * 8);
2693        mCurrentSampleInfoOffsetsAllocSize = entrycount;
2694    }
2695    mCurrentSampleInfoOffsetCount = entrycount;
2696
2697    for (size_t i = 0; i < entrycount; i++) {
2698        if (version == 0) {
2699            uint32_t tmp;
2700            if (!mDataSource->getUInt32(offset, &tmp)) {
2701                return ERROR_IO;
2702            }
2703            mCurrentSampleInfoOffsets[i] = tmp;
2704            offset += 4;
2705        } else {
2706            uint64_t tmp;
2707            if (!mDataSource->getUInt64(offset, &tmp)) {
2708                return ERROR_IO;
2709            }
2710            mCurrentSampleInfoOffsets[i] = tmp;
2711            offset += 8;
2712        }
2713    }
2714
2715    // parse clear/encrypted data
2716
2717    off64_t drmoffset = mCurrentSampleInfoOffsets[0]; // from moof
2718
2719    drmoffset += mCurrentMoofOffset;
2720    int ivlength;
2721    CHECK(mFormat->findInt32(kKeyCryptoDefaultIVSize, &ivlength));
2722
2723    // read CencSampleAuxiliaryDataFormats
2724    for (size_t i = 0; i < mCurrentSampleInfoCount; i++) {
2725        Sample *smpl = &mCurrentSamples.editItemAt(i);
2726
2727        memset(smpl->iv, 0, 16);
2728        if (mDataSource->readAt(drmoffset, smpl->iv, ivlength) != ivlength) {
2729            return ERROR_IO;
2730        }
2731
2732        drmoffset += ivlength;
2733
2734        int32_t smplinfosize = mCurrentDefaultSampleInfoSize;
2735        if (smplinfosize == 0) {
2736            smplinfosize = mCurrentSampleInfoSizes[i];
2737        }
2738        if (smplinfosize > ivlength) {
2739            uint16_t numsubsamples;
2740            if (!mDataSource->getUInt16(drmoffset, &numsubsamples)) {
2741                return ERROR_IO;
2742            }
2743            drmoffset += 2;
2744            for (size_t j = 0; j < numsubsamples; j++) {
2745                uint16_t numclear;
2746                uint32_t numencrypted;
2747                if (!mDataSource->getUInt16(drmoffset, &numclear)) {
2748                    return ERROR_IO;
2749                }
2750                drmoffset += 2;
2751                if (!mDataSource->getUInt32(drmoffset, &numencrypted)) {
2752                    return ERROR_IO;
2753                }
2754                drmoffset += 4;
2755                smpl->clearsizes.add(numclear);
2756                smpl->encryptedsizes.add(numencrypted);
2757            }
2758        } else {
2759            smpl->clearsizes.add(0);
2760            smpl->encryptedsizes.add(smpl->size);
2761        }
2762    }
2763
2764
2765    return OK;
2766}
2767
2768status_t MPEG4Source::parseTrackFragmentHeader(off64_t offset, off64_t size) {
2769
2770    if (size < 8) {
2771        return -EINVAL;
2772    }
2773
2774    uint32_t flags;
2775    if (!mDataSource->getUInt32(offset, &flags)) { // actually version + flags
2776        return ERROR_MALFORMED;
2777    }
2778
2779    if (flags & 0xff000000) {
2780        return -EINVAL;
2781    }
2782
2783    if (!mDataSource->getUInt32(offset + 4, (uint32_t*)&mLastParsedTrackId)) {
2784        return ERROR_MALFORMED;
2785    }
2786
2787    if (mLastParsedTrackId != mTrackId) {
2788        // this is not the right track, skip it
2789        return OK;
2790    }
2791
2792    mTrackFragmentHeaderInfo.mFlags = flags;
2793    mTrackFragmentHeaderInfo.mTrackID = mLastParsedTrackId;
2794    offset += 8;
2795    size -= 8;
2796
2797    ALOGV("fragment header: %08x %08x", flags, mTrackFragmentHeaderInfo.mTrackID);
2798
2799    if (flags & TrackFragmentHeaderInfo::kBaseDataOffsetPresent) {
2800        if (size < 8) {
2801            return -EINVAL;
2802        }
2803
2804        if (!mDataSource->getUInt64(offset, &mTrackFragmentHeaderInfo.mBaseDataOffset)) {
2805            return ERROR_MALFORMED;
2806        }
2807        offset += 8;
2808        size -= 8;
2809    }
2810
2811    if (flags & TrackFragmentHeaderInfo::kSampleDescriptionIndexPresent) {
2812        if (size < 4) {
2813            return -EINVAL;
2814        }
2815
2816        if (!mDataSource->getUInt32(offset, &mTrackFragmentHeaderInfo.mSampleDescriptionIndex)) {
2817            return ERROR_MALFORMED;
2818        }
2819        offset += 4;
2820        size -= 4;
2821    }
2822
2823    if (flags & TrackFragmentHeaderInfo::kDefaultSampleDurationPresent) {
2824        if (size < 4) {
2825            return -EINVAL;
2826        }
2827
2828        if (!mDataSource->getUInt32(offset, &mTrackFragmentHeaderInfo.mDefaultSampleDuration)) {
2829            return ERROR_MALFORMED;
2830        }
2831        offset += 4;
2832        size -= 4;
2833    }
2834
2835    if (flags & TrackFragmentHeaderInfo::kDefaultSampleSizePresent) {
2836        if (size < 4) {
2837            return -EINVAL;
2838        }
2839
2840        if (!mDataSource->getUInt32(offset, &mTrackFragmentHeaderInfo.mDefaultSampleSize)) {
2841            return ERROR_MALFORMED;
2842        }
2843        offset += 4;
2844        size -= 4;
2845    }
2846
2847    if (flags & TrackFragmentHeaderInfo::kDefaultSampleFlagsPresent) {
2848        if (size < 4) {
2849            return -EINVAL;
2850        }
2851
2852        if (!mDataSource->getUInt32(offset, &mTrackFragmentHeaderInfo.mDefaultSampleFlags)) {
2853            return ERROR_MALFORMED;
2854        }
2855        offset += 4;
2856        size -= 4;
2857    }
2858
2859    if (!(flags & TrackFragmentHeaderInfo::kBaseDataOffsetPresent)) {
2860        mTrackFragmentHeaderInfo.mBaseDataOffset = mCurrentMoofOffset;
2861    }
2862
2863    mTrackFragmentHeaderInfo.mDataOffset = 0;
2864    return OK;
2865}
2866
2867status_t MPEG4Source::parseTrackFragmentRun(off64_t offset, off64_t size) {
2868
2869    ALOGV("MPEG4Extractor::parseTrackFragmentRun");
2870    if (size < 8) {
2871        return -EINVAL;
2872    }
2873
2874    enum {
2875        kDataOffsetPresent                  = 0x01,
2876        kFirstSampleFlagsPresent            = 0x04,
2877        kSampleDurationPresent              = 0x100,
2878        kSampleSizePresent                  = 0x200,
2879        kSampleFlagsPresent                 = 0x400,
2880        kSampleCompositionTimeOffsetPresent = 0x800,
2881    };
2882
2883    uint32_t flags;
2884    if (!mDataSource->getUInt32(offset, &flags)) {
2885        return ERROR_MALFORMED;
2886    }
2887    ALOGV("fragment run flags: %08x", flags);
2888
2889    if (flags & 0xff000000) {
2890        return -EINVAL;
2891    }
2892
2893    if ((flags & kFirstSampleFlagsPresent) && (flags & kSampleFlagsPresent)) {
2894        // These two shall not be used together.
2895        return -EINVAL;
2896    }
2897
2898    uint32_t sampleCount;
2899    if (!mDataSource->getUInt32(offset + 4, &sampleCount)) {
2900        return ERROR_MALFORMED;
2901    }
2902    offset += 8;
2903    size -= 8;
2904
2905    uint64_t dataOffset = mTrackFragmentHeaderInfo.mDataOffset;
2906
2907    uint32_t firstSampleFlags = 0;
2908
2909    if (flags & kDataOffsetPresent) {
2910        if (size < 4) {
2911            return -EINVAL;
2912        }
2913
2914        int32_t dataOffsetDelta;
2915        if (!mDataSource->getUInt32(offset, (uint32_t*)&dataOffsetDelta)) {
2916            return ERROR_MALFORMED;
2917        }
2918
2919        dataOffset = mTrackFragmentHeaderInfo.mBaseDataOffset + dataOffsetDelta;
2920
2921        offset += 4;
2922        size -= 4;
2923    }
2924
2925    if (flags & kFirstSampleFlagsPresent) {
2926        if (size < 4) {
2927            return -EINVAL;
2928        }
2929
2930        if (!mDataSource->getUInt32(offset, &firstSampleFlags)) {
2931            return ERROR_MALFORMED;
2932        }
2933        offset += 4;
2934        size -= 4;
2935    }
2936
2937    uint32_t sampleDuration = 0, sampleSize = 0, sampleFlags = 0,
2938             sampleCtsOffset = 0;
2939
2940    size_t bytesPerSample = 0;
2941    if (flags & kSampleDurationPresent) {
2942        bytesPerSample += 4;
2943    } else if (mTrackFragmentHeaderInfo.mFlags
2944            & TrackFragmentHeaderInfo::kDefaultSampleDurationPresent) {
2945        sampleDuration = mTrackFragmentHeaderInfo.mDefaultSampleDuration;
2946    } else {
2947        sampleDuration = mTrackFragmentHeaderInfo.mDefaultSampleDuration;
2948    }
2949
2950    if (flags & kSampleSizePresent) {
2951        bytesPerSample += 4;
2952    } else if (mTrackFragmentHeaderInfo.mFlags
2953            & TrackFragmentHeaderInfo::kDefaultSampleSizePresent) {
2954        sampleSize = mTrackFragmentHeaderInfo.mDefaultSampleSize;
2955    } else {
2956        sampleSize = mTrackFragmentHeaderInfo.mDefaultSampleSize;
2957    }
2958
2959    if (flags & kSampleFlagsPresent) {
2960        bytesPerSample += 4;
2961    } else if (mTrackFragmentHeaderInfo.mFlags
2962            & TrackFragmentHeaderInfo::kDefaultSampleFlagsPresent) {
2963        sampleFlags = mTrackFragmentHeaderInfo.mDefaultSampleFlags;
2964    } else {
2965        sampleFlags = mTrackFragmentHeaderInfo.mDefaultSampleFlags;
2966    }
2967
2968    if (flags & kSampleCompositionTimeOffsetPresent) {
2969        bytesPerSample += 4;
2970    } else {
2971        sampleCtsOffset = 0;
2972    }
2973
2974    if (size < sampleCount * bytesPerSample) {
2975        return -EINVAL;
2976    }
2977
2978    Sample tmp;
2979    for (uint32_t i = 0; i < sampleCount; ++i) {
2980        if (flags & kSampleDurationPresent) {
2981            if (!mDataSource->getUInt32(offset, &sampleDuration)) {
2982                return ERROR_MALFORMED;
2983            }
2984            offset += 4;
2985        }
2986
2987        if (flags & kSampleSizePresent) {
2988            if (!mDataSource->getUInt32(offset, &sampleSize)) {
2989                return ERROR_MALFORMED;
2990            }
2991            offset += 4;
2992        }
2993
2994        if (flags & kSampleFlagsPresent) {
2995            if (!mDataSource->getUInt32(offset, &sampleFlags)) {
2996                return ERROR_MALFORMED;
2997            }
2998            offset += 4;
2999        }
3000
3001        if (flags & kSampleCompositionTimeOffsetPresent) {
3002            if (!mDataSource->getUInt32(offset, &sampleCtsOffset)) {
3003                return ERROR_MALFORMED;
3004            }
3005            offset += 4;
3006        }
3007
3008        ALOGV("adding sample %d at offset 0x%08llx, size %u, duration %u, "
3009              " flags 0x%08x", i + 1,
3010                dataOffset, sampleSize, sampleDuration,
3011                (flags & kFirstSampleFlagsPresent) && i == 0
3012                    ? firstSampleFlags : sampleFlags);
3013        tmp.offset = dataOffset;
3014        tmp.size = sampleSize;
3015        tmp.duration = sampleDuration;
3016        mCurrentSamples.add(tmp);
3017
3018        dataOffset += sampleSize;
3019    }
3020
3021    mTrackFragmentHeaderInfo.mDataOffset = dataOffset;
3022
3023    return OK;
3024}
3025
3026sp<MetaData> MPEG4Source::getFormat() {
3027    Mutex::Autolock autoLock(mLock);
3028
3029    return mFormat;
3030}
3031
3032size_t MPEG4Source::parseNALSize(const uint8_t *data) const {
3033    switch (mNALLengthSize) {
3034        case 1:
3035            return *data;
3036        case 2:
3037            return U16_AT(data);
3038        case 3:
3039            return ((size_t)data[0] << 16) | U16_AT(&data[1]);
3040        case 4:
3041            return U32_AT(data);
3042    }
3043
3044    // This cannot happen, mNALLengthSize springs to life by adding 1 to
3045    // a 2-bit integer.
3046    CHECK(!"Should not be here.");
3047
3048    return 0;
3049}
3050
3051status_t MPEG4Source::read(
3052        MediaBuffer **out, const ReadOptions *options) {
3053    Mutex::Autolock autoLock(mLock);
3054
3055    CHECK(mStarted);
3056
3057    if (mFirstMoofOffset > 0) {
3058        return fragmentedRead(out, options);
3059    }
3060
3061    *out = NULL;
3062
3063    int64_t targetSampleTimeUs = -1;
3064
3065    int64_t seekTimeUs;
3066    ReadOptions::SeekMode mode;
3067    if (options && options->getSeekTo(&seekTimeUs, &mode)) {
3068        uint32_t findFlags = 0;
3069        switch (mode) {
3070            case ReadOptions::SEEK_PREVIOUS_SYNC:
3071                findFlags = SampleTable::kFlagBefore;
3072                break;
3073            case ReadOptions::SEEK_NEXT_SYNC:
3074                findFlags = SampleTable::kFlagAfter;
3075                break;
3076            case ReadOptions::SEEK_CLOSEST_SYNC:
3077            case ReadOptions::SEEK_CLOSEST:
3078                findFlags = SampleTable::kFlagClosest;
3079                break;
3080            default:
3081                CHECK(!"Should not be here.");
3082                break;
3083        }
3084
3085        uint32_t sampleIndex;
3086        status_t err = mSampleTable->findSampleAtTime(
3087                seekTimeUs * mTimescale / 1000000,
3088                &sampleIndex, findFlags);
3089
3090        if (mode == ReadOptions::SEEK_CLOSEST) {
3091            // We found the closest sample already, now we want the sync
3092            // sample preceding it (or the sample itself of course), even
3093            // if the subsequent sync sample is closer.
3094            findFlags = SampleTable::kFlagBefore;
3095        }
3096
3097        uint32_t syncSampleIndex;
3098        if (err == OK) {
3099            err = mSampleTable->findSyncSampleNear(
3100                    sampleIndex, &syncSampleIndex, findFlags);
3101        }
3102
3103        uint32_t sampleTime;
3104        if (err == OK) {
3105            err = mSampleTable->getMetaDataForSample(
3106                    sampleIndex, NULL, NULL, &sampleTime);
3107        }
3108
3109        if (err != OK) {
3110            if (err == ERROR_OUT_OF_RANGE) {
3111                // An attempt to seek past the end of the stream would
3112                // normally cause this ERROR_OUT_OF_RANGE error. Propagating
3113                // this all the way to the MediaPlayer would cause abnormal
3114                // termination. Legacy behaviour appears to be to behave as if
3115                // we had seeked to the end of stream, ending normally.
3116                err = ERROR_END_OF_STREAM;
3117            }
3118            ALOGV("end of stream");
3119            return err;
3120        }
3121
3122        if (mode == ReadOptions::SEEK_CLOSEST) {
3123            targetSampleTimeUs = (sampleTime * 1000000ll) / mTimescale;
3124        }
3125
3126#if 0
3127        uint32_t syncSampleTime;
3128        CHECK_EQ(OK, mSampleTable->getMetaDataForSample(
3129                    syncSampleIndex, NULL, NULL, &syncSampleTime));
3130
3131        ALOGI("seek to time %lld us => sample at time %lld us, "
3132             "sync sample at time %lld us",
3133             seekTimeUs,
3134             sampleTime * 1000000ll / mTimescale,
3135             syncSampleTime * 1000000ll / mTimescale);
3136#endif
3137
3138        mCurrentSampleIndex = syncSampleIndex;
3139        if (mBuffer != NULL) {
3140            mBuffer->release();
3141            mBuffer = NULL;
3142        }
3143
3144        // fall through
3145    }
3146
3147    off64_t offset;
3148    size_t size;
3149    uint32_t cts;
3150    bool isSyncSample;
3151    bool newBuffer = false;
3152    if (mBuffer == NULL) {
3153        newBuffer = true;
3154
3155        status_t err =
3156            mSampleTable->getMetaDataForSample(
3157                    mCurrentSampleIndex, &offset, &size, &cts, &isSyncSample);
3158
3159        if (err != OK) {
3160            return err;
3161        }
3162
3163        err = mGroup->acquire_buffer(&mBuffer);
3164
3165        if (err != OK) {
3166            CHECK(mBuffer == NULL);
3167            return err;
3168        }
3169    }
3170
3171    if (!mIsAVC || mWantsNALFragments) {
3172        if (newBuffer) {
3173            ssize_t num_bytes_read =
3174                mDataSource->readAt(offset, (uint8_t *)mBuffer->data(), size);
3175
3176            if (num_bytes_read < (ssize_t)size) {
3177                mBuffer->release();
3178                mBuffer = NULL;
3179
3180                return ERROR_IO;
3181            }
3182
3183            CHECK(mBuffer != NULL);
3184            mBuffer->set_range(0, size);
3185            mBuffer->meta_data()->clear();
3186            mBuffer->meta_data()->setInt64(
3187                    kKeyTime, ((int64_t)cts * 1000000) / mTimescale);
3188
3189            if (targetSampleTimeUs >= 0) {
3190                mBuffer->meta_data()->setInt64(
3191                        kKeyTargetTime, targetSampleTimeUs);
3192            }
3193
3194            if (isSyncSample) {
3195                mBuffer->meta_data()->setInt32(kKeyIsSyncFrame, 1);
3196            }
3197
3198            ++mCurrentSampleIndex;
3199        }
3200
3201        if (!mIsAVC) {
3202            *out = mBuffer;
3203            mBuffer = NULL;
3204
3205            return OK;
3206        }
3207
3208        // Each NAL unit is split up into its constituent fragments and
3209        // each one of them returned in its own buffer.
3210
3211        CHECK(mBuffer->range_length() >= mNALLengthSize);
3212
3213        const uint8_t *src =
3214            (const uint8_t *)mBuffer->data() + mBuffer->range_offset();
3215
3216        size_t nal_size = parseNALSize(src);
3217        if (mBuffer->range_length() < mNALLengthSize + nal_size) {
3218            ALOGE("incomplete NAL unit.");
3219
3220            mBuffer->release();
3221            mBuffer = NULL;
3222
3223            return ERROR_MALFORMED;
3224        }
3225
3226        MediaBuffer *clone = mBuffer->clone();
3227        CHECK(clone != NULL);
3228        clone->set_range(mBuffer->range_offset() + mNALLengthSize, nal_size);
3229
3230        CHECK(mBuffer != NULL);
3231        mBuffer->set_range(
3232                mBuffer->range_offset() + mNALLengthSize + nal_size,
3233                mBuffer->range_length() - mNALLengthSize - nal_size);
3234
3235        if (mBuffer->range_length() == 0) {
3236            mBuffer->release();
3237            mBuffer = NULL;
3238        }
3239
3240        *out = clone;
3241
3242        return OK;
3243    } else {
3244        // Whole NAL units are returned but each fragment is prefixed by
3245        // the start code (0x00 00 00 01).
3246        ssize_t num_bytes_read = 0;
3247        int32_t drm = 0;
3248        bool usesDRM = (mFormat->findInt32(kKeyIsDRM, &drm) && drm != 0);
3249        if (usesDRM) {
3250            num_bytes_read =
3251                mDataSource->readAt(offset, (uint8_t*)mBuffer->data(), size);
3252        } else {
3253            num_bytes_read = mDataSource->readAt(offset, mSrcBuffer, size);
3254        }
3255
3256        if (num_bytes_read < (ssize_t)size) {
3257            mBuffer->release();
3258            mBuffer = NULL;
3259
3260            return ERROR_IO;
3261        }
3262
3263        if (usesDRM) {
3264            CHECK(mBuffer != NULL);
3265            mBuffer->set_range(0, size);
3266
3267        } else {
3268            uint8_t *dstData = (uint8_t *)mBuffer->data();
3269            size_t srcOffset = 0;
3270            size_t dstOffset = 0;
3271
3272            while (srcOffset < size) {
3273                bool isMalFormed = (srcOffset + mNALLengthSize > size);
3274                size_t nalLength = 0;
3275                if (!isMalFormed) {
3276                    nalLength = parseNALSize(&mSrcBuffer[srcOffset]);
3277                    srcOffset += mNALLengthSize;
3278                    isMalFormed = srcOffset + nalLength > size;
3279                }
3280
3281                if (isMalFormed) {
3282                    ALOGE("Video is malformed");
3283                    mBuffer->release();
3284                    mBuffer = NULL;
3285                    return ERROR_MALFORMED;
3286                }
3287
3288                if (nalLength == 0) {
3289                    continue;
3290                }
3291
3292                CHECK(dstOffset + 4 <= mBuffer->size());
3293
3294                dstData[dstOffset++] = 0;
3295                dstData[dstOffset++] = 0;
3296                dstData[dstOffset++] = 0;
3297                dstData[dstOffset++] = 1;
3298                memcpy(&dstData[dstOffset], &mSrcBuffer[srcOffset], nalLength);
3299                srcOffset += nalLength;
3300                dstOffset += nalLength;
3301            }
3302            CHECK_EQ(srcOffset, size);
3303            CHECK(mBuffer != NULL);
3304            mBuffer->set_range(0, dstOffset);
3305        }
3306
3307        mBuffer->meta_data()->clear();
3308        mBuffer->meta_data()->setInt64(
3309                kKeyTime, ((int64_t)cts * 1000000) / mTimescale);
3310
3311        if (targetSampleTimeUs >= 0) {
3312            mBuffer->meta_data()->setInt64(
3313                    kKeyTargetTime, targetSampleTimeUs);
3314        }
3315
3316        if (isSyncSample) {
3317            mBuffer->meta_data()->setInt32(kKeyIsSyncFrame, 1);
3318        }
3319
3320        ++mCurrentSampleIndex;
3321
3322        *out = mBuffer;
3323        mBuffer = NULL;
3324
3325        return OK;
3326    }
3327}
3328
3329status_t MPEG4Source::fragmentedRead(
3330        MediaBuffer **out, const ReadOptions *options) {
3331
3332    ALOGV("MPEG4Source::fragmentedRead");
3333
3334    CHECK(mStarted);
3335
3336    *out = NULL;
3337
3338    int64_t targetSampleTimeUs = -1;
3339
3340    int64_t seekTimeUs;
3341    ReadOptions::SeekMode mode;
3342    if (options && options->getSeekTo(&seekTimeUs, &mode)) {
3343
3344        int numSidxEntries = mSegments.size();
3345        if (numSidxEntries != 0) {
3346            int64_t totalTime = 0;
3347            off64_t totalOffset = mFirstMoofOffset;
3348            for (int i = 0; i < numSidxEntries; i++) {
3349                const SidxEntry *se = &mSegments[i];
3350                if (totalTime + se->mDurationUs > seekTimeUs) {
3351                    // The requested time is somewhere in this segment
3352                    if ((mode == ReadOptions::SEEK_NEXT_SYNC) ||
3353                        (mode == ReadOptions::SEEK_CLOSEST_SYNC &&
3354                        (seekTimeUs - totalTime) > (totalTime + se->mDurationUs - seekTimeUs))) {
3355                        // requested next sync, or closest sync and it was closer to the end of
3356                        // this segment
3357                        totalTime += se->mDurationUs;
3358                        totalOffset += se->mSize;
3359                    }
3360                    break;
3361                }
3362                totalTime += se->mDurationUs;
3363                totalOffset += se->mSize;
3364            }
3365        mCurrentMoofOffset = totalOffset;
3366        mCurrentSamples.clear();
3367        mCurrentSampleIndex = 0;
3368        parseChunk(&totalOffset);
3369        mCurrentTime = totalTime * mTimescale / 1000000ll;
3370        }
3371
3372        if (mBuffer != NULL) {
3373            mBuffer->release();
3374            mBuffer = NULL;
3375        }
3376
3377        // fall through
3378    }
3379
3380    off64_t offset = 0;
3381    size_t size;
3382    uint32_t cts = 0;
3383    bool isSyncSample = false;
3384    bool newBuffer = false;
3385    if (mBuffer == NULL) {
3386        newBuffer = true;
3387
3388        if (mCurrentSampleIndex >= mCurrentSamples.size()) {
3389            // move to next fragment
3390            Sample lastSample = mCurrentSamples[mCurrentSamples.size() - 1];
3391            off64_t nextMoof = mNextMoofOffset; // lastSample.offset + lastSample.size;
3392            mCurrentMoofOffset = nextMoof;
3393            mCurrentSamples.clear();
3394            mCurrentSampleIndex = 0;
3395            parseChunk(&nextMoof);
3396                if (mCurrentSampleIndex >= mCurrentSamples.size()) {
3397                    return ERROR_END_OF_STREAM;
3398                }
3399        }
3400
3401        const Sample *smpl = &mCurrentSamples[mCurrentSampleIndex];
3402        offset = smpl->offset;
3403        size = smpl->size;
3404        cts = mCurrentTime;
3405        mCurrentTime += smpl->duration;
3406        isSyncSample = (mCurrentSampleIndex == 0); // XXX
3407
3408        status_t err = mGroup->acquire_buffer(&mBuffer);
3409
3410        if (err != OK) {
3411            CHECK(mBuffer == NULL);
3412            ALOGV("acquire_buffer returned %d", err);
3413            return err;
3414        }
3415    }
3416
3417    const Sample *smpl = &mCurrentSamples[mCurrentSampleIndex];
3418    const sp<MetaData> bufmeta = mBuffer->meta_data();
3419    bufmeta->clear();
3420    if (smpl->encryptedsizes.size()) {
3421        // store clear/encrypted lengths in metadata
3422        bufmeta->setData(kKeyPlainSizes, 0,
3423                smpl->clearsizes.array(), smpl->clearsizes.size() * 4);
3424        bufmeta->setData(kKeyEncryptedSizes, 0,
3425                smpl->encryptedsizes.array(), smpl->encryptedsizes.size() * 4);
3426        bufmeta->setData(kKeyCryptoIV, 0, smpl->iv, 16); // use 16 or the actual size?
3427        bufmeta->setInt32(kKeyCryptoDefaultIVSize, mDefaultIVSize);
3428        bufmeta->setInt32(kKeyCryptoMode, mCryptoMode);
3429        bufmeta->setData(kKeyCryptoKey, 0, mCryptoKey, 16);
3430    }
3431
3432    if (!mIsAVC || mWantsNALFragments) {
3433        if (newBuffer) {
3434            ssize_t num_bytes_read =
3435                mDataSource->readAt(offset, (uint8_t *)mBuffer->data(), size);
3436
3437            if (num_bytes_read < (ssize_t)size) {
3438                mBuffer->release();
3439                mBuffer = NULL;
3440
3441                ALOGV("i/o error");
3442                return ERROR_IO;
3443            }
3444
3445            CHECK(mBuffer != NULL);
3446            mBuffer->set_range(0, size);
3447            mBuffer->meta_data()->setInt64(
3448                    kKeyTime, ((int64_t)cts * 1000000) / mTimescale);
3449
3450            if (targetSampleTimeUs >= 0) {
3451                mBuffer->meta_data()->setInt64(
3452                        kKeyTargetTime, targetSampleTimeUs);
3453            }
3454
3455            if (isSyncSample) {
3456                mBuffer->meta_data()->setInt32(kKeyIsSyncFrame, 1);
3457            }
3458
3459            ++mCurrentSampleIndex;
3460        }
3461
3462        if (!mIsAVC) {
3463            *out = mBuffer;
3464            mBuffer = NULL;
3465
3466            return OK;
3467        }
3468
3469        // Each NAL unit is split up into its constituent fragments and
3470        // each one of them returned in its own buffer.
3471
3472        CHECK(mBuffer->range_length() >= mNALLengthSize);
3473
3474        const uint8_t *src =
3475            (const uint8_t *)mBuffer->data() + mBuffer->range_offset();
3476
3477        size_t nal_size = parseNALSize(src);
3478        if (mBuffer->range_length() < mNALLengthSize + nal_size) {
3479            ALOGE("incomplete NAL unit.");
3480
3481            mBuffer->release();
3482            mBuffer = NULL;
3483
3484            return ERROR_MALFORMED;
3485        }
3486
3487        MediaBuffer *clone = mBuffer->clone();
3488        CHECK(clone != NULL);
3489        clone->set_range(mBuffer->range_offset() + mNALLengthSize, nal_size);
3490
3491        CHECK(mBuffer != NULL);
3492        mBuffer->set_range(
3493                mBuffer->range_offset() + mNALLengthSize + nal_size,
3494                mBuffer->range_length() - mNALLengthSize - nal_size);
3495
3496        if (mBuffer->range_length() == 0) {
3497            mBuffer->release();
3498            mBuffer = NULL;
3499        }
3500
3501        *out = clone;
3502
3503        return OK;
3504    } else {
3505        ALOGV("whole NAL");
3506        // Whole NAL units are returned but each fragment is prefixed by
3507        // the start code (0x00 00 00 01).
3508        ssize_t num_bytes_read = 0;
3509        int32_t drm = 0;
3510        bool usesDRM = (mFormat->findInt32(kKeyIsDRM, &drm) && drm != 0);
3511        if (usesDRM) {
3512            num_bytes_read =
3513                mDataSource->readAt(offset, (uint8_t*)mBuffer->data(), size);
3514        } else {
3515            num_bytes_read = mDataSource->readAt(offset, mSrcBuffer, size);
3516        }
3517
3518        if (num_bytes_read < (ssize_t)size) {
3519            mBuffer->release();
3520            mBuffer = NULL;
3521
3522            ALOGV("i/o error");
3523            return ERROR_IO;
3524        }
3525
3526        if (usesDRM) {
3527            CHECK(mBuffer != NULL);
3528            mBuffer->set_range(0, size);
3529
3530        } else {
3531            uint8_t *dstData = (uint8_t *)mBuffer->data();
3532            size_t srcOffset = 0;
3533            size_t dstOffset = 0;
3534
3535            while (srcOffset < size) {
3536                bool isMalFormed = (srcOffset + mNALLengthSize > size);
3537                size_t nalLength = 0;
3538                if (!isMalFormed) {
3539                    nalLength = parseNALSize(&mSrcBuffer[srcOffset]);
3540                    srcOffset += mNALLengthSize;
3541                    isMalFormed = srcOffset + nalLength > size;
3542                }
3543
3544                if (isMalFormed) {
3545                    ALOGE("Video is malformed");
3546                    mBuffer->release();
3547                    mBuffer = NULL;
3548                    return ERROR_MALFORMED;
3549                }
3550
3551                if (nalLength == 0) {
3552                    continue;
3553                }
3554
3555                CHECK(dstOffset + 4 <= mBuffer->size());
3556
3557                dstData[dstOffset++] = 0;
3558                dstData[dstOffset++] = 0;
3559                dstData[dstOffset++] = 0;
3560                dstData[dstOffset++] = 1;
3561                memcpy(&dstData[dstOffset], &mSrcBuffer[srcOffset], nalLength);
3562                srcOffset += nalLength;
3563                dstOffset += nalLength;
3564            }
3565            CHECK_EQ(srcOffset, size);
3566            CHECK(mBuffer != NULL);
3567            mBuffer->set_range(0, dstOffset);
3568        }
3569
3570        mBuffer->meta_data()->setInt64(
3571                kKeyTime, ((int64_t)cts * 1000000) / mTimescale);
3572
3573        if (targetSampleTimeUs >= 0) {
3574            mBuffer->meta_data()->setInt64(
3575                    kKeyTargetTime, targetSampleTimeUs);
3576        }
3577
3578        if (isSyncSample) {
3579            mBuffer->meta_data()->setInt32(kKeyIsSyncFrame, 1);
3580        }
3581
3582        ++mCurrentSampleIndex;
3583
3584        *out = mBuffer;
3585        mBuffer = NULL;
3586
3587        return OK;
3588    }
3589}
3590
3591MPEG4Extractor::Track *MPEG4Extractor::findTrackByMimePrefix(
3592        const char *mimePrefix) {
3593    for (Track *track = mFirstTrack; track != NULL; track = track->next) {
3594        const char *mime;
3595        if (track->meta != NULL
3596                && track->meta->findCString(kKeyMIMEType, &mime)
3597                && !strncasecmp(mime, mimePrefix, strlen(mimePrefix))) {
3598            return track;
3599        }
3600    }
3601
3602    return NULL;
3603}
3604
3605static bool LegacySniffMPEG4(
3606        const sp<DataSource> &source, String8 *mimeType, float *confidence) {
3607    uint8_t header[8];
3608
3609    ssize_t n = source->readAt(4, header, sizeof(header));
3610    if (n < (ssize_t)sizeof(header)) {
3611        return false;
3612    }
3613
3614    if (!memcmp(header, "ftyp3gp", 7) || !memcmp(header, "ftypmp42", 8)
3615        || !memcmp(header, "ftyp3gr6", 8) || !memcmp(header, "ftyp3gs6", 8)
3616        || !memcmp(header, "ftyp3ge6", 8) || !memcmp(header, "ftyp3gg6", 8)
3617        || !memcmp(header, "ftypisom", 8) || !memcmp(header, "ftypM4V ", 8)
3618        || !memcmp(header, "ftypM4A ", 8) || !memcmp(header, "ftypf4v ", 8)
3619        || !memcmp(header, "ftypkddi", 8) || !memcmp(header, "ftypM4VP", 8)) {
3620        *mimeType = MEDIA_MIMETYPE_CONTAINER_MPEG4;
3621        *confidence = 0.4;
3622
3623        return true;
3624    }
3625
3626    return false;
3627}
3628
3629static bool isCompatibleBrand(uint32_t fourcc) {
3630    static const uint32_t kCompatibleBrands[] = {
3631        FOURCC('i', 's', 'o', 'm'),
3632        FOURCC('i', 's', 'o', '2'),
3633        FOURCC('a', 'v', 'c', '1'),
3634        FOURCC('3', 'g', 'p', '4'),
3635        FOURCC('m', 'p', '4', '1'),
3636        FOURCC('m', 'p', '4', '2'),
3637
3638        // Won't promise that the following file types can be played.
3639        // Just give these file types a chance.
3640        FOURCC('q', 't', ' ', ' '),  // Apple's QuickTime
3641        FOURCC('M', 'S', 'N', 'V'),  // Sony's PSP
3642
3643        FOURCC('3', 'g', '2', 'a'),  // 3GPP2
3644        FOURCC('3', 'g', '2', 'b'),
3645    };
3646
3647    for (size_t i = 0;
3648         i < sizeof(kCompatibleBrands) / sizeof(kCompatibleBrands[0]);
3649         ++i) {
3650        if (kCompatibleBrands[i] == fourcc) {
3651            return true;
3652        }
3653    }
3654
3655    return false;
3656}
3657
3658// Attempt to actually parse the 'ftyp' atom and determine if a suitable
3659// compatible brand is present.
3660// Also try to identify where this file's metadata ends
3661// (end of the 'moov' atom) and report it to the caller as part of
3662// the metadata.
3663static bool BetterSniffMPEG4(
3664        const sp<DataSource> &source, String8 *mimeType, float *confidence,
3665        sp<AMessage> *meta) {
3666    // We scan up to 128 bytes to identify this file as an MP4.
3667    static const off64_t kMaxScanOffset = 128ll;
3668
3669    off64_t offset = 0ll;
3670    bool foundGoodFileType = false;
3671    off64_t moovAtomEndOffset = -1ll;
3672    bool done = false;
3673
3674    while (!done && offset < kMaxScanOffset) {
3675        uint32_t hdr[2];
3676        if (source->readAt(offset, hdr, 8) < 8) {
3677            return false;
3678        }
3679
3680        uint64_t chunkSize = ntohl(hdr[0]);
3681        uint32_t chunkType = ntohl(hdr[1]);
3682        off64_t chunkDataOffset = offset + 8;
3683
3684        if (chunkSize == 1) {
3685            if (source->readAt(offset + 8, &chunkSize, 8) < 8) {
3686                return false;
3687            }
3688
3689            chunkSize = ntoh64(chunkSize);
3690            chunkDataOffset += 8;
3691
3692            if (chunkSize < 16) {
3693                // The smallest valid chunk is 16 bytes long in this case.
3694                return false;
3695            }
3696        } else if (chunkSize < 8) {
3697            // The smallest valid chunk is 8 bytes long.
3698            return false;
3699        }
3700
3701        off64_t chunkDataSize = offset + chunkSize - chunkDataOffset;
3702
3703        char chunkstring[5];
3704        MakeFourCCString(chunkType, chunkstring);
3705        ALOGV("saw chunk type %s, size %lld @ %lld", chunkstring, chunkSize, offset);
3706        switch (chunkType) {
3707            case FOURCC('f', 't', 'y', 'p'):
3708            {
3709                if (chunkDataSize < 8) {
3710                    return false;
3711                }
3712
3713                uint32_t numCompatibleBrands = (chunkDataSize - 8) / 4;
3714                for (size_t i = 0; i < numCompatibleBrands + 2; ++i) {
3715                    if (i == 1) {
3716                        // Skip this index, it refers to the minorVersion,
3717                        // not a brand.
3718                        continue;
3719                    }
3720
3721                    uint32_t brand;
3722                    if (source->readAt(
3723                                chunkDataOffset + 4 * i, &brand, 4) < 4) {
3724                        return false;
3725                    }
3726
3727                    brand = ntohl(brand);
3728
3729                    if (isCompatibleBrand(brand)) {
3730                        foundGoodFileType = true;
3731                        break;
3732                    }
3733                }
3734
3735                if (!foundGoodFileType) {
3736                    return false;
3737                }
3738
3739                break;
3740            }
3741
3742            case FOURCC('m', 'o', 'o', 'v'):
3743            {
3744                moovAtomEndOffset = offset + chunkSize;
3745
3746                done = true;
3747                break;
3748            }
3749
3750            default:
3751                break;
3752        }
3753
3754        offset += chunkSize;
3755    }
3756
3757    if (!foundGoodFileType) {
3758        return false;
3759    }
3760
3761    *mimeType = MEDIA_MIMETYPE_CONTAINER_MPEG4;
3762    *confidence = 0.4f;
3763
3764    if (moovAtomEndOffset >= 0) {
3765        *meta = new AMessage;
3766        (*meta)->setInt64("meta-data-size", moovAtomEndOffset);
3767
3768        ALOGV("found metadata size: %lld", moovAtomEndOffset);
3769    }
3770
3771    return true;
3772}
3773
3774bool SniffMPEG4(
3775        const sp<DataSource> &source, String8 *mimeType, float *confidence,
3776        sp<AMessage> *meta) {
3777    if (BetterSniffMPEG4(source, mimeType, confidence, meta)) {
3778        return true;
3779    }
3780
3781    if (LegacySniffMPEG4(source, mimeType, confidence)) {
3782        ALOGW("Identified supported mpeg4 through LegacySniffMPEG4.");
3783        return true;
3784    }
3785
3786    return false;
3787}
3788
3789}  // namespace android
3790