MPEG4Extractor.cpp revision db170bb1cae145d07efc803a3c208963de0c6087
1/*
2 * Copyright (C) 2009 The Android Open Source Project
3 *
4 * Licensed under the Apache License, Version 2.0 (the "License");
5 * you may not use this file except in compliance with the License.
6 * You may obtain a copy of the License at
7 *
8 *      http://www.apache.org/licenses/LICENSE-2.0
9 *
10 * Unless required by applicable law or agreed to in writing, software
11 * distributed under the License is distributed on an "AS IS" BASIS,
12 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13 * See the License for the specific language governing permissions and
14 * limitations under the License.
15 */
16
17//#define LOG_NDEBUG 0
18#define LOG_TAG "MPEG4Extractor"
19#include <utils/Log.h>
20
21#include "include/MPEG4Extractor.h"
22#include "include/SampleTable.h"
23#include "include/ESDS.h"
24
25#include <ctype.h>
26#include <stdint.h>
27#include <stdlib.h>
28#include <string.h>
29
30#include <media/stagefright/foundation/ABitReader.h>
31#include <media/stagefright/foundation/ABuffer.h>
32#include <media/stagefright/foundation/ADebug.h>
33#include <media/stagefright/foundation/AMessage.h>
34#include <media/stagefright/MediaBuffer.h>
35#include <media/stagefright/MediaBufferGroup.h>
36#include <media/stagefright/MediaDefs.h>
37#include <media/stagefright/MediaSource.h>
38#include <media/stagefright/MetaData.h>
39#include <utils/String8.h>
40
41#include <byteswap.h>
42
43namespace android {
44
45class MPEG4Source : public MediaSource {
46public:
47    // Caller retains ownership of both "dataSource" and "sampleTable".
48    MPEG4Source(const sp<MetaData> &format,
49                const sp<DataSource> &dataSource,
50                int32_t timeScale,
51                const sp<SampleTable> &sampleTable,
52                Vector<SidxEntry> &sidx,
53                off64_t firstMoofOffset);
54
55    virtual status_t start(MetaData *params = NULL);
56    virtual status_t stop();
57
58    virtual sp<MetaData> getFormat();
59
60    virtual status_t read(MediaBuffer **buffer, const ReadOptions *options = NULL);
61    virtual status_t fragmentedRead(MediaBuffer **buffer, const ReadOptions *options = NULL);
62
63protected:
64    virtual ~MPEG4Source();
65
66private:
67    Mutex mLock;
68
69    sp<MetaData> mFormat;
70    sp<DataSource> mDataSource;
71    int32_t mTimescale;
72    sp<SampleTable> mSampleTable;
73    uint32_t mCurrentSampleIndex;
74    uint32_t mCurrentFragmentIndex;
75    Vector<SidxEntry> &mSegments;
76    off64_t mFirstMoofOffset;
77    off64_t mCurrentMoofOffset;
78    off64_t mNextMoofOffset;
79    uint32_t mCurrentTime;
80    int32_t mLastParsedTrackId;
81    int32_t mTrackId;
82
83    int32_t mCryptoMode;    // passed in from extractor
84    int32_t mDefaultIVSize; // passed in from extractor
85    uint8_t mCryptoKey[16]; // passed in from extractor
86    uint32_t mCurrentAuxInfoType;
87    uint32_t mCurrentAuxInfoTypeParameter;
88    int32_t mCurrentDefaultSampleInfoSize;
89    uint32_t mCurrentSampleInfoCount;
90    uint32_t mCurrentSampleInfoAllocSize;
91    uint8_t* mCurrentSampleInfoSizes;
92    uint32_t mCurrentSampleInfoOffsetCount;
93    uint32_t mCurrentSampleInfoOffsetsAllocSize;
94    uint64_t* mCurrentSampleInfoOffsets;
95
96    bool mIsAVC;
97    size_t mNALLengthSize;
98
99    bool mStarted;
100
101    MediaBufferGroup *mGroup;
102
103    MediaBuffer *mBuffer;
104
105    bool mWantsNALFragments;
106
107    uint8_t *mSrcBuffer;
108
109    size_t parseNALSize(const uint8_t *data) const;
110    status_t parseChunk(off64_t *offset);
111    status_t parseTrackFragmentHeader(off64_t offset, off64_t size);
112    status_t parseTrackFragmentRun(off64_t offset, off64_t size);
113    status_t parseSampleAuxiliaryInformationSizes(off64_t offset, off64_t size);
114    status_t parseSampleAuxiliaryInformationOffsets(off64_t offset, off64_t size);
115
116    struct TrackFragmentHeaderInfo {
117        enum Flags {
118            kBaseDataOffsetPresent         = 0x01,
119            kSampleDescriptionIndexPresent = 0x02,
120            kDefaultSampleDurationPresent  = 0x08,
121            kDefaultSampleSizePresent      = 0x10,
122            kDefaultSampleFlagsPresent     = 0x20,
123            kDurationIsEmpty               = 0x10000,
124        };
125
126        uint32_t mTrackID;
127        uint32_t mFlags;
128        uint64_t mBaseDataOffset;
129        uint32_t mSampleDescriptionIndex;
130        uint32_t mDefaultSampleDuration;
131        uint32_t mDefaultSampleSize;
132        uint32_t mDefaultSampleFlags;
133
134        uint64_t mDataOffset;
135    };
136    TrackFragmentHeaderInfo mTrackFragmentHeaderInfo;
137
138    struct Sample {
139        off64_t offset;
140        size_t size;
141        uint32_t duration;
142        uint8_t iv[16];
143        Vector<size_t> clearsizes;
144        Vector<size_t> encryptedsizes;
145    };
146    Vector<Sample> mCurrentSamples;
147
148    MPEG4Source(const MPEG4Source &);
149    MPEG4Source &operator=(const MPEG4Source &);
150};
151
152// This custom data source wraps an existing one and satisfies requests
153// falling entirely within a cached range from the cache while forwarding
154// all remaining requests to the wrapped datasource.
155// This is used to cache the full sampletable metadata for a single track,
156// possibly wrapping multiple times to cover all tracks, i.e.
157// Each MPEG4DataSource caches the sampletable metadata for a single track.
158
159struct MPEG4DataSource : public DataSource {
160    MPEG4DataSource(const sp<DataSource> &source);
161
162    virtual status_t initCheck() const;
163    virtual ssize_t readAt(off64_t offset, void *data, size_t size);
164    virtual status_t getSize(off64_t *size);
165    virtual uint32_t flags();
166
167    status_t setCachedRange(off64_t offset, size_t size);
168
169protected:
170    virtual ~MPEG4DataSource();
171
172private:
173    Mutex mLock;
174
175    sp<DataSource> mSource;
176    off64_t mCachedOffset;
177    size_t mCachedSize;
178    uint8_t *mCache;
179
180    void clearCache();
181
182    MPEG4DataSource(const MPEG4DataSource &);
183    MPEG4DataSource &operator=(const MPEG4DataSource &);
184};
185
186MPEG4DataSource::MPEG4DataSource(const sp<DataSource> &source)
187    : mSource(source),
188      mCachedOffset(0),
189      mCachedSize(0),
190      mCache(NULL) {
191}
192
193MPEG4DataSource::~MPEG4DataSource() {
194    clearCache();
195}
196
197void MPEG4DataSource::clearCache() {
198    if (mCache) {
199        free(mCache);
200        mCache = NULL;
201    }
202
203    mCachedOffset = 0;
204    mCachedSize = 0;
205}
206
207status_t MPEG4DataSource::initCheck() const {
208    return mSource->initCheck();
209}
210
211ssize_t MPEG4DataSource::readAt(off64_t offset, void *data, size_t size) {
212    Mutex::Autolock autoLock(mLock);
213
214    if (offset >= mCachedOffset
215            && offset + size <= mCachedOffset + mCachedSize) {
216        memcpy(data, &mCache[offset - mCachedOffset], size);
217        return size;
218    }
219
220    return mSource->readAt(offset, data, size);
221}
222
223status_t MPEG4DataSource::getSize(off64_t *size) {
224    return mSource->getSize(size);
225}
226
227uint32_t MPEG4DataSource::flags() {
228    return mSource->flags();
229}
230
231status_t MPEG4DataSource::setCachedRange(off64_t offset, size_t size) {
232    Mutex::Autolock autoLock(mLock);
233
234    clearCache();
235
236    mCache = (uint8_t *)malloc(size);
237
238    if (mCache == NULL) {
239        return -ENOMEM;
240    }
241
242    mCachedOffset = offset;
243    mCachedSize = size;
244
245    ssize_t err = mSource->readAt(mCachedOffset, mCache, mCachedSize);
246
247    if (err < (ssize_t)size) {
248        clearCache();
249
250        return ERROR_IO;
251    }
252
253    return OK;
254}
255
256////////////////////////////////////////////////////////////////////////////////
257
258static void hexdump(const void *_data, size_t size) {
259    const uint8_t *data = (const uint8_t *)_data;
260    size_t offset = 0;
261    while (offset < size) {
262        printf("0x%04x  ", offset);
263
264        size_t n = size - offset;
265        if (n > 16) {
266            n = 16;
267        }
268
269        for (size_t i = 0; i < 16; ++i) {
270            if (i == 8) {
271                printf(" ");
272            }
273
274            if (offset + i < size) {
275                printf("%02x ", data[offset + i]);
276            } else {
277                printf("   ");
278            }
279        }
280
281        printf(" ");
282
283        for (size_t i = 0; i < n; ++i) {
284            if (isprint(data[offset + i])) {
285                printf("%c", data[offset + i]);
286            } else {
287                printf(".");
288            }
289        }
290
291        printf("\n");
292
293        offset += 16;
294    }
295}
296
297static const char *FourCC2MIME(uint32_t fourcc) {
298    switch (fourcc) {
299        case FOURCC('m', 'p', '4', 'a'):
300            return MEDIA_MIMETYPE_AUDIO_AAC;
301
302        case FOURCC('s', 'a', 'm', 'r'):
303            return MEDIA_MIMETYPE_AUDIO_AMR_NB;
304
305        case FOURCC('s', 'a', 'w', 'b'):
306            return MEDIA_MIMETYPE_AUDIO_AMR_WB;
307
308        case FOURCC('m', 'p', '4', 'v'):
309            return MEDIA_MIMETYPE_VIDEO_MPEG4;
310
311        case FOURCC('s', '2', '6', '3'):
312        case FOURCC('h', '2', '6', '3'):
313        case FOURCC('H', '2', '6', '3'):
314            return MEDIA_MIMETYPE_VIDEO_H263;
315
316        case FOURCC('a', 'v', 'c', '1'):
317            return MEDIA_MIMETYPE_VIDEO_AVC;
318
319        default:
320            CHECK(!"should not be here.");
321            return NULL;
322    }
323}
324
325static bool AdjustChannelsAndRate(uint32_t fourcc, uint32_t *channels, uint32_t *rate) {
326    if (!strcasecmp(MEDIA_MIMETYPE_AUDIO_AMR_NB, FourCC2MIME(fourcc))) {
327        // AMR NB audio is always mono, 8kHz
328        *channels = 1;
329        *rate = 8000;
330        return true;
331    } else if (!strcasecmp(MEDIA_MIMETYPE_AUDIO_AMR_WB, FourCC2MIME(fourcc))) {
332        // AMR WB audio is always mono, 16kHz
333        *channels = 1;
334        *rate = 16000;
335        return true;
336    }
337    return false;
338}
339
340MPEG4Extractor::MPEG4Extractor(const sp<DataSource> &source)
341    : mSidxDuration(0),
342      mMoofOffset(0),
343      mDataSource(source),
344      mInitCheck(NO_INIT),
345      mHasVideo(false),
346      mHeaderTimescale(0),
347      mFirstTrack(NULL),
348      mLastTrack(NULL),
349      mFileMetaData(new MetaData),
350      mFirstSINF(NULL),
351      mIsDrm(false) {
352}
353
354MPEG4Extractor::~MPEG4Extractor() {
355    Track *track = mFirstTrack;
356    while (track) {
357        Track *next = track->next;
358
359        delete track;
360        track = next;
361    }
362    mFirstTrack = mLastTrack = NULL;
363
364    SINF *sinf = mFirstSINF;
365    while (sinf) {
366        SINF *next = sinf->next;
367        delete sinf->IPMPData;
368        delete sinf;
369        sinf = next;
370    }
371    mFirstSINF = NULL;
372
373    for (size_t i = 0; i < mPssh.size(); i++) {
374        delete [] mPssh[i].data;
375    }
376}
377
378uint32_t MPEG4Extractor::flags() const {
379    return CAN_PAUSE |
380            ((mMoofOffset == 0 || mSidxEntries.size() != 0) ?
381                    (CAN_SEEK_BACKWARD | CAN_SEEK_FORWARD | CAN_SEEK) : 0);
382}
383
384sp<MetaData> MPEG4Extractor::getMetaData() {
385    status_t err;
386    if ((err = readMetaData()) != OK) {
387        return new MetaData;
388    }
389
390    return mFileMetaData;
391}
392
393size_t MPEG4Extractor::countTracks() {
394    status_t err;
395    if ((err = readMetaData()) != OK) {
396        ALOGV("MPEG4Extractor::countTracks: no tracks");
397        return 0;
398    }
399
400    size_t n = 0;
401    Track *track = mFirstTrack;
402    while (track) {
403        ++n;
404        track = track->next;
405    }
406
407    ALOGV("MPEG4Extractor::countTracks: %d tracks", n);
408    return n;
409}
410
411sp<MetaData> MPEG4Extractor::getTrackMetaData(
412        size_t index, uint32_t flags) {
413    status_t err;
414    if ((err = readMetaData()) != OK) {
415        return NULL;
416    }
417
418    Track *track = mFirstTrack;
419    while (index > 0) {
420        if (track == NULL) {
421            return NULL;
422        }
423
424        track = track->next;
425        --index;
426    }
427
428    if (track == NULL) {
429        return NULL;
430    }
431
432    if ((flags & kIncludeExtensiveMetaData)
433            && !track->includes_expensive_metadata) {
434        track->includes_expensive_metadata = true;
435
436        const char *mime;
437        CHECK(track->meta->findCString(kKeyMIMEType, &mime));
438        if (!strncasecmp("video/", mime, 6)) {
439            if (mMoofOffset > 0) {
440                int64_t duration;
441                if (track->meta->findInt64(kKeyDuration, &duration)) {
442                    // nothing fancy, just pick a frame near 1/4th of the duration
443                    track->meta->setInt64(
444                            kKeyThumbnailTime, duration / 4);
445                }
446            } else {
447                uint32_t sampleIndex;
448                uint32_t sampleTime;
449                if (track->sampleTable->findThumbnailSample(&sampleIndex) == OK
450                        && track->sampleTable->getMetaDataForSample(
451                            sampleIndex, NULL /* offset */, NULL /* size */,
452                            &sampleTime) == OK) {
453                    track->meta->setInt64(
454                            kKeyThumbnailTime,
455                            ((int64_t)sampleTime * 1000000) / track->timescale);
456                }
457            }
458        }
459    }
460
461    return track->meta;
462}
463
464static void MakeFourCCString(uint32_t x, char *s) {
465    s[0] = x >> 24;
466    s[1] = (x >> 16) & 0xff;
467    s[2] = (x >> 8) & 0xff;
468    s[3] = x & 0xff;
469    s[4] = '\0';
470}
471
472status_t MPEG4Extractor::readMetaData() {
473    if (mInitCheck != NO_INIT) {
474        return mInitCheck;
475    }
476
477    off64_t offset = 0;
478    status_t err;
479    while (true) {
480        err = parseChunk(&offset, 0);
481        if (err == OK) {
482            continue;
483        }
484
485        uint32_t hdr[2];
486        if (mDataSource->readAt(offset, hdr, 8) < 8) {
487            break;
488        }
489        uint32_t chunk_type = ntohl(hdr[1]);
490        if (chunk_type == FOURCC('s', 'i', 'd', 'x')) {
491            // parse the sidx box too
492            continue;
493        } else if (chunk_type == FOURCC('m', 'o', 'o', 'f')) {
494            // store the offset of the first segment
495            mMoofOffset = offset;
496        }
497        break;
498    }
499
500    if (mInitCheck == OK) {
501        if (mHasVideo) {
502            mFileMetaData->setCString(
503                    kKeyMIMEType, MEDIA_MIMETYPE_CONTAINER_MPEG4);
504        } else {
505            mFileMetaData->setCString(kKeyMIMEType, "audio/mp4");
506        }
507
508        mInitCheck = OK;
509    } else {
510        mInitCheck = err;
511    }
512
513    CHECK_NE(err, (status_t)NO_INIT);
514
515    // copy pssh data into file metadata
516    int psshsize = 0;
517    for (size_t i = 0; i < mPssh.size(); i++) {
518        psshsize += 20 + mPssh[i].datalen;
519    }
520    if (psshsize) {
521        char *buf = (char*)malloc(psshsize);
522        char *ptr = buf;
523        for (size_t i = 0; i < mPssh.size(); i++) {
524            memcpy(ptr, mPssh[i].uuid, 20); // uuid + length
525            memcpy(ptr + 20, mPssh[i].data, mPssh[i].datalen);
526            ptr += (20 + mPssh[i].datalen);
527        }
528        mFileMetaData->setData(kKeyPssh, 'pssh', buf, psshsize);
529        free(buf);
530    }
531    return mInitCheck;
532}
533
534char* MPEG4Extractor::getDrmTrackInfo(size_t trackID, int *len) {
535    if (mFirstSINF == NULL) {
536        return NULL;
537    }
538
539    SINF *sinf = mFirstSINF;
540    while (sinf && (trackID != sinf->trackID)) {
541        sinf = sinf->next;
542    }
543
544    if (sinf == NULL) {
545        return NULL;
546    }
547
548    *len = sinf->len;
549    return sinf->IPMPData;
550}
551
552// Reads an encoded integer 7 bits at a time until it encounters the high bit clear.
553static int32_t readSize(off64_t offset,
554        const sp<DataSource> DataSource, uint8_t *numOfBytes) {
555    uint32_t size = 0;
556    uint8_t data;
557    bool moreData = true;
558    *numOfBytes = 0;
559
560    while (moreData) {
561        if (DataSource->readAt(offset, &data, 1) < 1) {
562            return -1;
563        }
564        offset ++;
565        moreData = (data >= 128) ? true : false;
566        size = (size << 7) | (data & 0x7f); // Take last 7 bits
567        (*numOfBytes) ++;
568    }
569
570    return size;
571}
572
573status_t MPEG4Extractor::parseDrmSINF(off64_t *offset, off64_t data_offset) {
574    uint8_t updateIdTag;
575    if (mDataSource->readAt(data_offset, &updateIdTag, 1) < 1) {
576        return ERROR_IO;
577    }
578    data_offset ++;
579
580    if (0x01/*OBJECT_DESCRIPTOR_UPDATE_ID_TAG*/ != updateIdTag) {
581        return ERROR_MALFORMED;
582    }
583
584    uint8_t numOfBytes;
585    int32_t size = readSize(data_offset, mDataSource, &numOfBytes);
586    if (size < 0) {
587        return ERROR_IO;
588    }
589    int32_t classSize = size;
590    data_offset += numOfBytes;
591
592    while(size >= 11 ) {
593        uint8_t descriptorTag;
594        if (mDataSource->readAt(data_offset, &descriptorTag, 1) < 1) {
595            return ERROR_IO;
596        }
597        data_offset ++;
598
599        if (0x11/*OBJECT_DESCRIPTOR_ID_TAG*/ != descriptorTag) {
600            return ERROR_MALFORMED;
601        }
602
603        uint8_t buffer[8];
604        //ObjectDescriptorID and ObjectDescriptor url flag
605        if (mDataSource->readAt(data_offset, buffer, 2) < 2) {
606            return ERROR_IO;
607        }
608        data_offset += 2;
609
610        if ((buffer[1] >> 5) & 0x0001) { //url flag is set
611            return ERROR_MALFORMED;
612        }
613
614        if (mDataSource->readAt(data_offset, buffer, 8) < 8) {
615            return ERROR_IO;
616        }
617        data_offset += 8;
618
619        if ((0x0F/*ES_ID_REF_TAG*/ != buffer[1])
620                || ( 0x0A/*IPMP_DESCRIPTOR_POINTER_ID_TAG*/ != buffer[5])) {
621            return ERROR_MALFORMED;
622        }
623
624        SINF *sinf = new SINF;
625        sinf->trackID = U16_AT(&buffer[3]);
626        sinf->IPMPDescriptorID = buffer[7];
627        sinf->next = mFirstSINF;
628        mFirstSINF = sinf;
629
630        size -= (8 + 2 + 1);
631    }
632
633    if (size != 0) {
634        return ERROR_MALFORMED;
635    }
636
637    if (mDataSource->readAt(data_offset, &updateIdTag, 1) < 1) {
638        return ERROR_IO;
639    }
640    data_offset ++;
641
642    if(0x05/*IPMP_DESCRIPTOR_UPDATE_ID_TAG*/ != updateIdTag) {
643        return ERROR_MALFORMED;
644    }
645
646    size = readSize(data_offset, mDataSource, &numOfBytes);
647    if (size < 0) {
648        return ERROR_IO;
649    }
650    classSize = size;
651    data_offset += numOfBytes;
652
653    while (size > 0) {
654        uint8_t tag;
655        int32_t dataLen;
656        if (mDataSource->readAt(data_offset, &tag, 1) < 1) {
657            return ERROR_IO;
658        }
659        data_offset ++;
660
661        if (0x0B/*IPMP_DESCRIPTOR_ID_TAG*/ == tag) {
662            uint8_t id;
663            dataLen = readSize(data_offset, mDataSource, &numOfBytes);
664            if (dataLen < 0) {
665                return ERROR_IO;
666            } else if (dataLen < 4) {
667                return ERROR_MALFORMED;
668            }
669            data_offset += numOfBytes;
670
671            if (mDataSource->readAt(data_offset, &id, 1) < 1) {
672                return ERROR_IO;
673            }
674            data_offset ++;
675
676            SINF *sinf = mFirstSINF;
677            while (sinf && (sinf->IPMPDescriptorID != id)) {
678                sinf = sinf->next;
679            }
680            if (sinf == NULL) {
681                return ERROR_MALFORMED;
682            }
683            sinf->len = dataLen - 3;
684            sinf->IPMPData = new char[sinf->len];
685            data_offset += 2;
686
687            if (mDataSource->readAt(data_offset, sinf->IPMPData, sinf->len) < sinf->len) {
688                return ERROR_IO;
689            }
690            data_offset += sinf->len;
691
692            size -= (dataLen + numOfBytes + 1);
693        }
694    }
695
696    if (size != 0) {
697        return ERROR_MALFORMED;
698    }
699
700    return UNKNOWN_ERROR;  // Return a dummy error.
701}
702
703struct PathAdder {
704    PathAdder(Vector<uint32_t> *path, uint32_t chunkType)
705        : mPath(path) {
706        mPath->push(chunkType);
707    }
708
709    ~PathAdder() {
710        mPath->pop();
711    }
712
713private:
714    Vector<uint32_t> *mPath;
715
716    PathAdder(const PathAdder &);
717    PathAdder &operator=(const PathAdder &);
718};
719
720static bool underMetaDataPath(const Vector<uint32_t> &path) {
721    return path.size() >= 5
722        && path[0] == FOURCC('m', 'o', 'o', 'v')
723        && path[1] == FOURCC('u', 'd', 't', 'a')
724        && path[2] == FOURCC('m', 'e', 't', 'a')
725        && path[3] == FOURCC('i', 'l', 's', 't');
726}
727
728// Given a time in seconds since Jan 1 1904, produce a human-readable string.
729static void convertTimeToDate(int64_t time_1904, String8 *s) {
730    time_t time_1970 = time_1904 - (((66 * 365 + 17) * 24) * 3600);
731
732    char tmp[32];
733    strftime(tmp, sizeof(tmp), "%Y%m%dT%H%M%S.000Z", gmtime(&time_1970));
734
735    s->setTo(tmp);
736}
737
738status_t MPEG4Extractor::parseChunk(off64_t *offset, int depth) {
739    ALOGV("entering parseChunk %lld/%d", *offset, depth);
740    uint32_t hdr[2];
741    if (mDataSource->readAt(*offset, hdr, 8) < 8) {
742        return ERROR_IO;
743    }
744    uint64_t chunk_size = ntohl(hdr[0]);
745    uint32_t chunk_type = ntohl(hdr[1]);
746    off64_t data_offset = *offset + 8;
747
748    if (chunk_size == 1) {
749        if (mDataSource->readAt(*offset + 8, &chunk_size, 8) < 8) {
750            return ERROR_IO;
751        }
752        chunk_size = ntoh64(chunk_size);
753        data_offset += 8;
754
755        if (chunk_size < 16) {
756            // The smallest valid chunk is 16 bytes long in this case.
757            return ERROR_MALFORMED;
758        }
759    } else if (chunk_size < 8) {
760        // The smallest valid chunk is 8 bytes long.
761        return ERROR_MALFORMED;
762    }
763
764    char chunk[5];
765    MakeFourCCString(chunk_type, chunk);
766    ALOGV("chunk: %s @ %lld, %d", chunk, *offset, depth);
767
768#if 0
769    static const char kWhitespace[] = "                                        ";
770    const char *indent = &kWhitespace[sizeof(kWhitespace) - 1 - 2 * depth];
771    printf("%sfound chunk '%s' of size %lld\n", indent, chunk, chunk_size);
772
773    char buffer[256];
774    size_t n = chunk_size;
775    if (n > sizeof(buffer)) {
776        n = sizeof(buffer);
777    }
778    if (mDataSource->readAt(*offset, buffer, n)
779            < (ssize_t)n) {
780        return ERROR_IO;
781    }
782
783    hexdump(buffer, n);
784#endif
785
786    PathAdder autoAdder(&mPath, chunk_type);
787
788    off64_t chunk_data_size = *offset + chunk_size - data_offset;
789
790    if (chunk_type != FOURCC('c', 'p', 'r', 't')
791            && chunk_type != FOURCC('c', 'o', 'v', 'r')
792            && mPath.size() == 5 && underMetaDataPath(mPath)) {
793        off64_t stop_offset = *offset + chunk_size;
794        *offset = data_offset;
795        while (*offset < stop_offset) {
796            status_t err = parseChunk(offset, depth + 1);
797            if (err != OK) {
798                return err;
799            }
800        }
801
802        if (*offset != stop_offset) {
803            return ERROR_MALFORMED;
804        }
805
806        return OK;
807    }
808
809    switch(chunk_type) {
810        case FOURCC('m', 'o', 'o', 'v'):
811        case FOURCC('t', 'r', 'a', 'k'):
812        case FOURCC('m', 'd', 'i', 'a'):
813        case FOURCC('m', 'i', 'n', 'f'):
814        case FOURCC('d', 'i', 'n', 'f'):
815        case FOURCC('s', 't', 'b', 'l'):
816        case FOURCC('m', 'v', 'e', 'x'):
817        case FOURCC('m', 'o', 'o', 'f'):
818        case FOURCC('t', 'r', 'a', 'f'):
819        case FOURCC('m', 'f', 'r', 'a'):
820        case FOURCC('u', 'd', 't', 'a'):
821        case FOURCC('i', 'l', 's', 't'):
822        case FOURCC('s', 'i', 'n', 'f'):
823        case FOURCC('s', 'c', 'h', 'i'):
824        case FOURCC('e', 'd', 't', 's'):
825        {
826            if (chunk_type == FOURCC('s', 't', 'b', 'l')) {
827                ALOGV("sampleTable chunk is %d bytes long.", (size_t)chunk_size);
828
829                if (mDataSource->flags()
830                        & (DataSource::kWantsPrefetching
831                            | DataSource::kIsCachingDataSource)) {
832                    sp<MPEG4DataSource> cachedSource =
833                        new MPEG4DataSource(mDataSource);
834
835                    if (cachedSource->setCachedRange(*offset, chunk_size) == OK) {
836                        mDataSource = cachedSource;
837                    }
838                }
839
840                mLastTrack->sampleTable = new SampleTable(mDataSource);
841            }
842
843            bool isTrack = false;
844            if (chunk_type == FOURCC('t', 'r', 'a', 'k')) {
845                isTrack = true;
846
847                Track *track = new Track;
848                track->next = NULL;
849                if (mLastTrack) {
850                    mLastTrack->next = track;
851                } else {
852                    mFirstTrack = track;
853                }
854                mLastTrack = track;
855
856                track->meta = new MetaData;
857                track->includes_expensive_metadata = false;
858                track->skipTrack = false;
859                track->timescale = 0;
860                track->meta->setCString(kKeyMIMEType, "application/octet-stream");
861            }
862
863            off64_t stop_offset = *offset + chunk_size;
864            *offset = data_offset;
865            while (*offset < stop_offset) {
866                status_t err = parseChunk(offset, depth + 1);
867                if (err != OK) {
868                    return err;
869                }
870            }
871
872            if (*offset != stop_offset) {
873                return ERROR_MALFORMED;
874            }
875
876            if (isTrack) {
877                if (mLastTrack->skipTrack) {
878                    Track *cur = mFirstTrack;
879
880                    if (cur == mLastTrack) {
881                        delete cur;
882                        mFirstTrack = mLastTrack = NULL;
883                    } else {
884                        while (cur && cur->next != mLastTrack) {
885                            cur = cur->next;
886                        }
887                        cur->next = NULL;
888                        delete mLastTrack;
889                        mLastTrack = cur;
890                    }
891
892                    return OK;
893                }
894
895                status_t err = verifyTrack(mLastTrack);
896
897                if (err != OK) {
898                    return err;
899                }
900            } else if (chunk_type == FOURCC('m', 'o', 'o', 'v')) {
901                mInitCheck = OK;
902
903                if (!mIsDrm) {
904                    return UNKNOWN_ERROR;  // Return a dummy error.
905                } else {
906                    return OK;
907                }
908            }
909            break;
910        }
911
912        case FOURCC('e', 'l', 's', 't'):
913        {
914            // See 14496-12 8.6.6
915            uint8_t version;
916            if (mDataSource->readAt(data_offset, &version, 1) < 1) {
917                return ERROR_IO;
918            }
919
920            uint32_t entry_count;
921            if (!mDataSource->getUInt32(data_offset + 4, &entry_count)) {
922                return ERROR_IO;
923            }
924
925            if (entry_count != 1) {
926                // we only support a single entry at the moment, for gapless playback
927                ALOGW("ignoring edit list with %d entries", entry_count);
928            } else if (mHeaderTimescale == 0) {
929                ALOGW("ignoring edit list because timescale is 0");
930            } else {
931                off64_t entriesoffset = data_offset + 8;
932                uint64_t segment_duration;
933                int64_t media_time;
934
935                if (version == 1) {
936                    if (!mDataSource->getUInt64(entriesoffset, &segment_duration) ||
937                            !mDataSource->getUInt64(entriesoffset + 8, (uint64_t*)&media_time)) {
938                        return ERROR_IO;
939                    }
940                } else if (version == 0) {
941                    uint32_t sd;
942                    int32_t mt;
943                    if (!mDataSource->getUInt32(entriesoffset, &sd) ||
944                            !mDataSource->getUInt32(entriesoffset + 4, (uint32_t*)&mt)) {
945                        return ERROR_IO;
946                    }
947                    segment_duration = sd;
948                    media_time = mt;
949                } else {
950                    return ERROR_IO;
951                }
952
953                uint64_t halfscale = mHeaderTimescale / 2;
954                segment_duration = (segment_duration * 1000000 + halfscale)/ mHeaderTimescale;
955                media_time = (media_time * 1000000 + halfscale) / mHeaderTimescale;
956
957                int64_t duration;
958                int32_t samplerate;
959                if (mLastTrack->meta->findInt64(kKeyDuration, &duration) &&
960                        mLastTrack->meta->findInt32(kKeySampleRate, &samplerate)) {
961
962                    int64_t delay = (media_time  * samplerate + 500000) / 1000000;
963                    mLastTrack->meta->setInt32(kKeyEncoderDelay, delay);
964
965                    int64_t paddingus = duration - (segment_duration + media_time);
966                    if (paddingus < 0) {
967                        // track duration from media header (which is what kKeyDuration is) might
968                        // be slightly shorter than the segment duration, which would make the
969                        // padding negative. Clamp to zero.
970                        paddingus = 0;
971                    }
972                    int64_t paddingsamples = (paddingus * samplerate + 500000) / 1000000;
973                    mLastTrack->meta->setInt32(kKeyEncoderPadding, paddingsamples);
974                }
975            }
976            *offset += chunk_size;
977            break;
978        }
979
980        case FOURCC('f', 'r', 'm', 'a'):
981        {
982            uint32_t original_fourcc;
983            if (mDataSource->readAt(data_offset, &original_fourcc, 4) < 4) {
984                return ERROR_IO;
985            }
986            original_fourcc = ntohl(original_fourcc);
987            ALOGV("read original format: %d", original_fourcc);
988            mLastTrack->meta->setCString(kKeyMIMEType, FourCC2MIME(original_fourcc));
989            uint32_t num_channels = 0;
990            uint32_t sample_rate = 0;
991            if (AdjustChannelsAndRate(original_fourcc, &num_channels, &sample_rate)) {
992                mLastTrack->meta->setInt32(kKeyChannelCount, num_channels);
993                mLastTrack->meta->setInt32(kKeySampleRate, sample_rate);
994            }
995            *offset += chunk_size;
996            break;
997        }
998
999        case FOURCC('t', 'e', 'n', 'c'):
1000        {
1001            if (chunk_size < 32) {
1002                return ERROR_MALFORMED;
1003            }
1004
1005            // tenc box contains 1 byte version, 3 byte flags, 3 byte default algorithm id, one byte
1006            // default IV size, 16 bytes default KeyID
1007            // (ISO 23001-7)
1008            char buf[4];
1009            memset(buf, 0, 4);
1010            if (mDataSource->readAt(data_offset + 4, buf + 1, 3) < 3) {
1011                return ERROR_IO;
1012            }
1013            uint32_t defaultAlgorithmId = ntohl(*((int32_t*)buf));
1014            if (defaultAlgorithmId > 1) {
1015                // only 0 (clear) and 1 (AES-128) are valid
1016                return ERROR_MALFORMED;
1017            }
1018
1019            memset(buf, 0, 4);
1020            if (mDataSource->readAt(data_offset + 7, buf + 3, 1) < 1) {
1021                return ERROR_IO;
1022            }
1023            uint32_t defaultIVSize = ntohl(*((int32_t*)buf));
1024
1025            if ((defaultAlgorithmId == 0 && defaultIVSize != 0) ||
1026                    (defaultAlgorithmId != 0 && defaultIVSize == 0)) {
1027                // only unencrypted data must have 0 IV size
1028                return ERROR_MALFORMED;
1029            } else if (defaultIVSize != 0 &&
1030                    defaultIVSize != 8 &&
1031                    defaultIVSize != 16) {
1032                // only supported sizes are 0, 8 and 16
1033                return ERROR_MALFORMED;
1034            }
1035
1036            uint8_t defaultKeyId[16];
1037
1038            if (mDataSource->readAt(data_offset + 8, &defaultKeyId, 16) < 16) {
1039                return ERROR_IO;
1040            }
1041
1042            mLastTrack->meta->setInt32(kKeyCryptoMode, defaultAlgorithmId);
1043            mLastTrack->meta->setInt32(kKeyCryptoDefaultIVSize, defaultIVSize);
1044            mLastTrack->meta->setData(kKeyCryptoKey, 'tenc', defaultKeyId, 16);
1045            *offset += chunk_size;
1046            break;
1047        }
1048
1049        case FOURCC('t', 'k', 'h', 'd'):
1050        {
1051            status_t err;
1052            if ((err = parseTrackHeader(data_offset, chunk_data_size)) != OK) {
1053                return err;
1054            }
1055
1056            *offset += chunk_size;
1057            break;
1058        }
1059
1060        case FOURCC('p', 's', 's', 'h'):
1061        {
1062            PsshInfo pssh;
1063
1064            if (mDataSource->readAt(data_offset + 4, &pssh.uuid, 16) < 16) {
1065                return ERROR_IO;
1066            }
1067
1068            uint32_t psshdatalen = 0;
1069            if (mDataSource->readAt(data_offset + 20, &psshdatalen, 4) < 4) {
1070                return ERROR_IO;
1071            }
1072            pssh.datalen = ntohl(psshdatalen);
1073            ALOGV("pssh data size: %d", pssh.datalen);
1074            if (pssh.datalen + 20 > chunk_size) {
1075                // pssh data length exceeds size of containing box
1076                return ERROR_MALFORMED;
1077            }
1078
1079            pssh.data = new uint8_t[pssh.datalen];
1080            ALOGV("allocated pssh @ %p", pssh.data);
1081            ssize_t requested = (ssize_t) pssh.datalen;
1082            if (mDataSource->readAt(data_offset + 24, pssh.data, requested) < requested) {
1083                return ERROR_IO;
1084            }
1085            mPssh.push_back(pssh);
1086
1087            *offset += chunk_size;
1088            break;
1089        }
1090
1091        case FOURCC('m', 'd', 'h', 'd'):
1092        {
1093            if (chunk_data_size < 4) {
1094                return ERROR_MALFORMED;
1095            }
1096
1097            uint8_t version;
1098            if (mDataSource->readAt(
1099                        data_offset, &version, sizeof(version))
1100                    < (ssize_t)sizeof(version)) {
1101                return ERROR_IO;
1102            }
1103
1104            off64_t timescale_offset;
1105
1106            if (version == 1) {
1107                timescale_offset = data_offset + 4 + 16;
1108            } else if (version == 0) {
1109                timescale_offset = data_offset + 4 + 8;
1110            } else {
1111                return ERROR_IO;
1112            }
1113
1114            uint32_t timescale;
1115            if (mDataSource->readAt(
1116                        timescale_offset, &timescale, sizeof(timescale))
1117                    < (ssize_t)sizeof(timescale)) {
1118                return ERROR_IO;
1119            }
1120
1121            mLastTrack->timescale = ntohl(timescale);
1122
1123            int64_t duration = 0;
1124            if (version == 1) {
1125                if (mDataSource->readAt(
1126                            timescale_offset + 4, &duration, sizeof(duration))
1127                        < (ssize_t)sizeof(duration)) {
1128                    return ERROR_IO;
1129                }
1130                duration = ntoh64(duration);
1131            } else {
1132                uint32_t duration32;
1133                if (mDataSource->readAt(
1134                            timescale_offset + 4, &duration32, sizeof(duration32))
1135                        < (ssize_t)sizeof(duration32)) {
1136                    return ERROR_IO;
1137                }
1138                // ffmpeg sets duration to -1, which is incorrect.
1139                if (duration32 != 0xffffffff) {
1140                    duration = ntohl(duration32);
1141                }
1142            }
1143            mLastTrack->meta->setInt64(
1144                    kKeyDuration, (duration * 1000000) / mLastTrack->timescale);
1145
1146            uint8_t lang[2];
1147            off64_t lang_offset;
1148            if (version == 1) {
1149                lang_offset = timescale_offset + 4 + 8;
1150            } else if (version == 0) {
1151                lang_offset = timescale_offset + 4 + 4;
1152            } else {
1153                return ERROR_IO;
1154            }
1155
1156            if (mDataSource->readAt(lang_offset, &lang, sizeof(lang))
1157                    < (ssize_t)sizeof(lang)) {
1158                return ERROR_IO;
1159            }
1160
1161            // To get the ISO-639-2/T three character language code
1162            // 1 bit pad followed by 3 5-bits characters. Each character
1163            // is packed as the difference between its ASCII value and 0x60.
1164            char lang_code[4];
1165            lang_code[0] = ((lang[0] >> 2) & 0x1f) + 0x60;
1166            lang_code[1] = ((lang[0] & 0x3) << 3 | (lang[1] >> 5)) + 0x60;
1167            lang_code[2] = (lang[1] & 0x1f) + 0x60;
1168            lang_code[3] = '\0';
1169
1170            mLastTrack->meta->setCString(
1171                    kKeyMediaLanguage, lang_code);
1172
1173            *offset += chunk_size;
1174            break;
1175        }
1176
1177        case FOURCC('s', 't', 's', 'd'):
1178        {
1179            if (chunk_data_size < 8) {
1180                return ERROR_MALFORMED;
1181            }
1182
1183            uint8_t buffer[8];
1184            if (chunk_data_size < (off64_t)sizeof(buffer)) {
1185                return ERROR_MALFORMED;
1186            }
1187
1188            if (mDataSource->readAt(
1189                        data_offset, buffer, 8) < 8) {
1190                return ERROR_IO;
1191            }
1192
1193            if (U32_AT(buffer) != 0) {
1194                // Should be version 0, flags 0.
1195                return ERROR_MALFORMED;
1196            }
1197
1198            uint32_t entry_count = U32_AT(&buffer[4]);
1199
1200            if (entry_count > 1) {
1201                // For 3GPP timed text, there could be multiple tx3g boxes contain
1202                // multiple text display formats. These formats will be used to
1203                // display the timed text.
1204                // For encrypted files, there may also be more than one entry.
1205                const char *mime;
1206                CHECK(mLastTrack->meta->findCString(kKeyMIMEType, &mime));
1207                if (strcasecmp(mime, MEDIA_MIMETYPE_TEXT_3GPP) &&
1208                        strcasecmp(mime, "application/octet-stream")) {
1209                    // For now we only support a single type of media per track.
1210                    mLastTrack->skipTrack = true;
1211                    *offset += chunk_size;
1212                    break;
1213                }
1214            }
1215            off64_t stop_offset = *offset + chunk_size;
1216            *offset = data_offset + 8;
1217            for (uint32_t i = 0; i < entry_count; ++i) {
1218                status_t err = parseChunk(offset, depth + 1);
1219                if (err != OK) {
1220                    return err;
1221                }
1222            }
1223
1224            if (*offset != stop_offset) {
1225                return ERROR_MALFORMED;
1226            }
1227            break;
1228        }
1229
1230        case FOURCC('m', 'p', '4', 'a'):
1231        case FOURCC('e', 'n', 'c', 'a'):
1232        case FOURCC('s', 'a', 'm', 'r'):
1233        case FOURCC('s', 'a', 'w', 'b'):
1234        {
1235            uint8_t buffer[8 + 20];
1236            if (chunk_data_size < (ssize_t)sizeof(buffer)) {
1237                // Basic AudioSampleEntry size.
1238                return ERROR_MALFORMED;
1239            }
1240
1241            if (mDataSource->readAt(
1242                        data_offset, buffer, sizeof(buffer)) < (ssize_t)sizeof(buffer)) {
1243                return ERROR_IO;
1244            }
1245
1246            uint16_t data_ref_index = U16_AT(&buffer[6]);
1247            uint32_t num_channels = U16_AT(&buffer[16]);
1248
1249            uint16_t sample_size = U16_AT(&buffer[18]);
1250            uint32_t sample_rate = U32_AT(&buffer[24]) >> 16;
1251
1252            if (chunk_type != FOURCC('e', 'n', 'c', 'a')) {
1253                // if the chunk type is enca, we'll get the type from the sinf/frma box later
1254                mLastTrack->meta->setCString(kKeyMIMEType, FourCC2MIME(chunk_type));
1255                AdjustChannelsAndRate(chunk_type, &num_channels, &sample_rate);
1256            }
1257            ALOGV("*** coding='%s' %d channels, size %d, rate %d\n",
1258                   chunk, num_channels, sample_size, sample_rate);
1259            mLastTrack->meta->setInt32(kKeyChannelCount, num_channels);
1260            mLastTrack->meta->setInt32(kKeySampleRate, sample_rate);
1261
1262            off64_t stop_offset = *offset + chunk_size;
1263            *offset = data_offset + sizeof(buffer);
1264            while (*offset < stop_offset) {
1265                status_t err = parseChunk(offset, depth + 1);
1266                if (err != OK) {
1267                    return err;
1268                }
1269            }
1270
1271            if (*offset != stop_offset) {
1272                return ERROR_MALFORMED;
1273            }
1274            break;
1275        }
1276
1277        case FOURCC('m', 'p', '4', 'v'):
1278        case FOURCC('e', 'n', 'c', 'v'):
1279        case FOURCC('s', '2', '6', '3'):
1280        case FOURCC('H', '2', '6', '3'):
1281        case FOURCC('h', '2', '6', '3'):
1282        case FOURCC('a', 'v', 'c', '1'):
1283        {
1284            mHasVideo = true;
1285
1286            uint8_t buffer[78];
1287            if (chunk_data_size < (ssize_t)sizeof(buffer)) {
1288                // Basic VideoSampleEntry size.
1289                return ERROR_MALFORMED;
1290            }
1291
1292            if (mDataSource->readAt(
1293                        data_offset, buffer, sizeof(buffer)) < (ssize_t)sizeof(buffer)) {
1294                return ERROR_IO;
1295            }
1296
1297            uint16_t data_ref_index = U16_AT(&buffer[6]);
1298            uint16_t width = U16_AT(&buffer[6 + 18]);
1299            uint16_t height = U16_AT(&buffer[6 + 20]);
1300
1301            // The video sample is not standard-compliant if it has invalid dimension.
1302            // Use some default width and height value, and
1303            // let the decoder figure out the actual width and height (and thus
1304            // be prepared for INFO_FOMRAT_CHANGED event).
1305            if (width == 0)  width  = 352;
1306            if (height == 0) height = 288;
1307
1308            // printf("*** coding='%s' width=%d height=%d\n",
1309            //        chunk, width, height);
1310
1311            if (chunk_type != FOURCC('e', 'n', 'c', 'v')) {
1312                // if the chunk type is encv, we'll get the type from the sinf/frma box later
1313                mLastTrack->meta->setCString(kKeyMIMEType, FourCC2MIME(chunk_type));
1314            }
1315            mLastTrack->meta->setInt32(kKeyWidth, width);
1316            mLastTrack->meta->setInt32(kKeyHeight, height);
1317
1318            off64_t stop_offset = *offset + chunk_size;
1319            *offset = data_offset + sizeof(buffer);
1320            while (*offset < stop_offset) {
1321                status_t err = parseChunk(offset, depth + 1);
1322                if (err != OK) {
1323                    return err;
1324                }
1325            }
1326
1327            if (*offset != stop_offset) {
1328                return ERROR_MALFORMED;
1329            }
1330            break;
1331        }
1332
1333        case FOURCC('s', 't', 'c', 'o'):
1334        case FOURCC('c', 'o', '6', '4'):
1335        {
1336            status_t err =
1337                mLastTrack->sampleTable->setChunkOffsetParams(
1338                        chunk_type, data_offset, chunk_data_size);
1339
1340            if (err != OK) {
1341                return err;
1342            }
1343
1344            *offset += chunk_size;
1345            break;
1346        }
1347
1348        case FOURCC('s', 't', 's', 'c'):
1349        {
1350            status_t err =
1351                mLastTrack->sampleTable->setSampleToChunkParams(
1352                        data_offset, chunk_data_size);
1353
1354            if (err != OK) {
1355                return err;
1356            }
1357
1358            *offset += chunk_size;
1359            break;
1360        }
1361
1362        case FOURCC('s', 't', 's', 'z'):
1363        case FOURCC('s', 't', 'z', '2'):
1364        {
1365            status_t err =
1366                mLastTrack->sampleTable->setSampleSizeParams(
1367                        chunk_type, data_offset, chunk_data_size);
1368
1369            if (err != OK) {
1370                return err;
1371            }
1372
1373            size_t max_size;
1374            err = mLastTrack->sampleTable->getMaxSampleSize(&max_size);
1375
1376            if (err != OK) {
1377                return err;
1378            }
1379
1380            if (max_size != 0) {
1381                // Assume that a given buffer only contains at most 10 chunks,
1382                // each chunk originally prefixed with a 2 byte length will
1383                // have a 4 byte header (0x00 0x00 0x00 0x01) after conversion,
1384                // and thus will grow by 2 bytes per chunk.
1385                mLastTrack->meta->setInt32(kKeyMaxInputSize, max_size + 10 * 2);
1386            } else {
1387                // No size was specified. Pick a conservatively large size.
1388                int32_t width, height;
1389                if (!mLastTrack->meta->findInt32(kKeyWidth, &width) ||
1390                    !mLastTrack->meta->findInt32(kKeyHeight, &height)) {
1391                    ALOGE("No width or height, assuming worst case 1080p");
1392                    width = 1920;
1393                    height = 1080;
1394                }
1395
1396                const char *mime;
1397                CHECK(mLastTrack->meta->findCString(kKeyMIMEType, &mime));
1398                if (!strcmp(mime, MEDIA_MIMETYPE_VIDEO_AVC)) {
1399                    // AVC requires compression ratio of at least 2, and uses
1400                    // macroblocks
1401                    max_size = ((width + 15) / 16) * ((height + 15) / 16) * 192;
1402                } else {
1403                    // For all other formats there is no minimum compression
1404                    // ratio. Use compression ratio of 1.
1405                    max_size = width * height * 3 / 2;
1406                }
1407                mLastTrack->meta->setInt32(kKeyMaxInputSize, max_size);
1408            }
1409            *offset += chunk_size;
1410
1411            // NOTE: setting another piece of metadata invalidates any pointers (such as the
1412            // mimetype) previously obtained, so don't cache them.
1413            const char *mime;
1414            CHECK(mLastTrack->meta->findCString(kKeyMIMEType, &mime));
1415            // Calculate average frame rate.
1416            if (!strncasecmp("video/", mime, 6)) {
1417                size_t nSamples = mLastTrack->sampleTable->countSamples();
1418                int64_t durationUs;
1419                if (mLastTrack->meta->findInt64(kKeyDuration, &durationUs)) {
1420                    if (durationUs > 0) {
1421                        int32_t frameRate = (nSamples * 1000000LL +
1422                                    (durationUs >> 1)) / durationUs;
1423                        mLastTrack->meta->setInt32(kKeyFrameRate, frameRate);
1424                    }
1425                }
1426            }
1427
1428            break;
1429        }
1430
1431        case FOURCC('s', 't', 't', 's'):
1432        {
1433            status_t err =
1434                mLastTrack->sampleTable->setTimeToSampleParams(
1435                        data_offset, chunk_data_size);
1436
1437            if (err != OK) {
1438                return err;
1439            }
1440
1441            *offset += chunk_size;
1442            break;
1443        }
1444
1445        case FOURCC('c', 't', 't', 's'):
1446        {
1447            status_t err =
1448                mLastTrack->sampleTable->setCompositionTimeToSampleParams(
1449                        data_offset, chunk_data_size);
1450
1451            if (err != OK) {
1452                return err;
1453            }
1454
1455            *offset += chunk_size;
1456            break;
1457        }
1458
1459        case FOURCC('s', 't', 's', 's'):
1460        {
1461            status_t err =
1462                mLastTrack->sampleTable->setSyncSampleParams(
1463                        data_offset, chunk_data_size);
1464
1465            if (err != OK) {
1466                return err;
1467            }
1468
1469            *offset += chunk_size;
1470            break;
1471        }
1472
1473        // @xyz
1474        case FOURCC('\xA9', 'x', 'y', 'z'):
1475        {
1476            // Best case the total data length inside "@xyz" box
1477            // would be 8, for instance "@xyz" + "\x00\x04\x15\xc7" + "0+0/",
1478            // where "\x00\x04" is the text string length with value = 4,
1479            // "\0x15\xc7" is the language code = en, and "0+0" is a
1480            // location (string) value with longitude = 0 and latitude = 0.
1481            if (chunk_data_size < 8) {
1482                return ERROR_MALFORMED;
1483            }
1484
1485            // Worst case the location string length would be 18,
1486            // for instance +90.0000-180.0000, without the trailing "/" and
1487            // the string length + language code.
1488            char buffer[18];
1489
1490            // Substracting 5 from the data size is because the text string length +
1491            // language code takes 4 bytes, and the trailing slash "/" takes 1 byte.
1492            off64_t location_length = chunk_data_size - 5;
1493            if (location_length >= (off64_t) sizeof(buffer)) {
1494                return ERROR_MALFORMED;
1495            }
1496
1497            if (mDataSource->readAt(
1498                        data_offset + 4, buffer, location_length) < location_length) {
1499                return ERROR_IO;
1500            }
1501
1502            buffer[location_length] = '\0';
1503            mFileMetaData->setCString(kKeyLocation, buffer);
1504            *offset += chunk_size;
1505            break;
1506        }
1507
1508        case FOURCC('e', 's', 'd', 's'):
1509        {
1510            if (chunk_data_size < 4) {
1511                return ERROR_MALFORMED;
1512            }
1513
1514            uint8_t buffer[256];
1515            if (chunk_data_size > (off64_t)sizeof(buffer)) {
1516                return ERROR_BUFFER_TOO_SMALL;
1517            }
1518
1519            if (mDataSource->readAt(
1520                        data_offset, buffer, chunk_data_size) < chunk_data_size) {
1521                return ERROR_IO;
1522            }
1523
1524            if (U32_AT(buffer) != 0) {
1525                // Should be version 0, flags 0.
1526                return ERROR_MALFORMED;
1527            }
1528
1529            mLastTrack->meta->setData(
1530                    kKeyESDS, kTypeESDS, &buffer[4], chunk_data_size - 4);
1531
1532            if (mPath.size() >= 2
1533                    && mPath[mPath.size() - 2] == FOURCC('m', 'p', '4', 'a')) {
1534                // Information from the ESDS must be relied on for proper
1535                // setup of sample rate and channel count for MPEG4 Audio.
1536                // The generic header appears to only contain generic
1537                // information...
1538
1539                status_t err = updateAudioTrackInfoFromESDS_MPEG4Audio(
1540                        &buffer[4], chunk_data_size - 4);
1541
1542                if (err != OK) {
1543                    return err;
1544                }
1545            }
1546
1547            *offset += chunk_size;
1548            break;
1549        }
1550
1551        case FOURCC('a', 'v', 'c', 'C'):
1552        {
1553            sp<ABuffer> buffer = new ABuffer(chunk_data_size);
1554
1555            if (mDataSource->readAt(
1556                        data_offset, buffer->data(), chunk_data_size) < chunk_data_size) {
1557                return ERROR_IO;
1558            }
1559
1560            mLastTrack->meta->setData(
1561                    kKeyAVCC, kTypeAVCC, buffer->data(), chunk_data_size);
1562
1563            *offset += chunk_size;
1564            break;
1565        }
1566
1567        case FOURCC('d', '2', '6', '3'):
1568        {
1569            /*
1570             * d263 contains a fixed 7 bytes part:
1571             *   vendor - 4 bytes
1572             *   version - 1 byte
1573             *   level - 1 byte
1574             *   profile - 1 byte
1575             * optionally, "d263" box itself may contain a 16-byte
1576             * bit rate box (bitr)
1577             *   average bit rate - 4 bytes
1578             *   max bit rate - 4 bytes
1579             */
1580            char buffer[23];
1581            if (chunk_data_size != 7 &&
1582                chunk_data_size != 23) {
1583                ALOGE("Incorrect D263 box size %lld", chunk_data_size);
1584                return ERROR_MALFORMED;
1585            }
1586
1587            if (mDataSource->readAt(
1588                    data_offset, buffer, chunk_data_size) < chunk_data_size) {
1589                return ERROR_IO;
1590            }
1591
1592            mLastTrack->meta->setData(kKeyD263, kTypeD263, buffer, chunk_data_size);
1593
1594            *offset += chunk_size;
1595            break;
1596        }
1597
1598        case FOURCC('m', 'e', 't', 'a'):
1599        {
1600            uint8_t buffer[4];
1601            if (chunk_data_size < (off64_t)sizeof(buffer)) {
1602                return ERROR_MALFORMED;
1603            }
1604
1605            if (mDataSource->readAt(
1606                        data_offset, buffer, 4) < 4) {
1607                return ERROR_IO;
1608            }
1609
1610            if (U32_AT(buffer) != 0) {
1611                // Should be version 0, flags 0.
1612
1613                // If it's not, let's assume this is one of those
1614                // apparently malformed chunks that don't have flags
1615                // and completely different semantics than what's
1616                // in the MPEG4 specs and skip it.
1617                *offset += chunk_size;
1618                return OK;
1619            }
1620
1621            off64_t stop_offset = *offset + chunk_size;
1622            *offset = data_offset + sizeof(buffer);
1623            while (*offset < stop_offset) {
1624                status_t err = parseChunk(offset, depth + 1);
1625                if (err != OK) {
1626                    return err;
1627                }
1628            }
1629
1630            if (*offset != stop_offset) {
1631                return ERROR_MALFORMED;
1632            }
1633            break;
1634        }
1635
1636        case FOURCC('m', 'e', 'a', 'n'):
1637        case FOURCC('n', 'a', 'm', 'e'):
1638        case FOURCC('d', 'a', 't', 'a'):
1639        {
1640            if (mPath.size() == 6 && underMetaDataPath(mPath)) {
1641                status_t err = parseITunesMetaData(data_offset, chunk_data_size);
1642
1643                if (err != OK) {
1644                    return err;
1645                }
1646            }
1647
1648            *offset += chunk_size;
1649            break;
1650        }
1651
1652        case FOURCC('m', 'v', 'h', 'd'):
1653        {
1654            if (chunk_data_size < 24) {
1655                return ERROR_MALFORMED;
1656            }
1657
1658            uint8_t header[24];
1659            if (mDataSource->readAt(
1660                        data_offset, header, sizeof(header))
1661                    < (ssize_t)sizeof(header)) {
1662                return ERROR_IO;
1663            }
1664
1665            uint64_t creationTime;
1666            if (header[0] == 1) {
1667                creationTime = U64_AT(&header[4]);
1668                mHeaderTimescale = U32_AT(&header[20]);
1669            } else if (header[0] != 0) {
1670                return ERROR_MALFORMED;
1671            } else {
1672                creationTime = U32_AT(&header[4]);
1673                mHeaderTimescale = U32_AT(&header[12]);
1674            }
1675
1676            String8 s;
1677            convertTimeToDate(creationTime, &s);
1678
1679            mFileMetaData->setCString(kKeyDate, s.string());
1680
1681            *offset += chunk_size;
1682            break;
1683        }
1684
1685        case FOURCC('m', 'd', 'a', 't'):
1686        {
1687            ALOGV("mdat chunk, drm: %d", mIsDrm);
1688            if (!mIsDrm) {
1689                *offset += chunk_size;
1690                break;
1691            }
1692
1693            if (chunk_size < 8) {
1694                return ERROR_MALFORMED;
1695            }
1696
1697            return parseDrmSINF(offset, data_offset);
1698        }
1699
1700        case FOURCC('h', 'd', 'l', 'r'):
1701        {
1702            uint32_t buffer;
1703            if (mDataSource->readAt(
1704                        data_offset + 8, &buffer, 4) < 4) {
1705                return ERROR_IO;
1706            }
1707
1708            uint32_t type = ntohl(buffer);
1709            // For the 3GPP file format, the handler-type within the 'hdlr' box
1710            // shall be 'text'. We also want to support 'sbtl' handler type
1711            // for a practical reason as various MPEG4 containers use it.
1712            if (type == FOURCC('t', 'e', 'x', 't') || type == FOURCC('s', 'b', 't', 'l')) {
1713                mLastTrack->meta->setCString(kKeyMIMEType, MEDIA_MIMETYPE_TEXT_3GPP);
1714            }
1715
1716            *offset += chunk_size;
1717            break;
1718        }
1719
1720        case FOURCC('t', 'x', '3', 'g'):
1721        {
1722            uint32_t type;
1723            const void *data;
1724            size_t size = 0;
1725            if (!mLastTrack->meta->findData(
1726                    kKeyTextFormatData, &type, &data, &size)) {
1727                size = 0;
1728            }
1729
1730            uint8_t *buffer = new uint8_t[size + chunk_size];
1731
1732            if (size > 0) {
1733                memcpy(buffer, data, size);
1734            }
1735
1736            if ((size_t)(mDataSource->readAt(*offset, buffer + size, chunk_size))
1737                    < chunk_size) {
1738                delete[] buffer;
1739                buffer = NULL;
1740
1741                return ERROR_IO;
1742            }
1743
1744            mLastTrack->meta->setData(
1745                    kKeyTextFormatData, 0, buffer, size + chunk_size);
1746
1747            delete[] buffer;
1748
1749            *offset += chunk_size;
1750            break;
1751        }
1752
1753        case FOURCC('c', 'o', 'v', 'r'):
1754        {
1755            if (mFileMetaData != NULL) {
1756                ALOGV("chunk_data_size = %lld and data_offset = %lld",
1757                        chunk_data_size, data_offset);
1758                sp<ABuffer> buffer = new ABuffer(chunk_data_size + 1);
1759                if (mDataSource->readAt(
1760                    data_offset, buffer->data(), chunk_data_size) != (ssize_t)chunk_data_size) {
1761                    return ERROR_IO;
1762                }
1763                const int kSkipBytesOfDataBox = 16;
1764                mFileMetaData->setData(
1765                    kKeyAlbumArt, MetaData::TYPE_NONE,
1766                    buffer->data() + kSkipBytesOfDataBox, chunk_data_size - kSkipBytesOfDataBox);
1767            }
1768
1769            *offset += chunk_size;
1770            break;
1771        }
1772
1773        case FOURCC('t', 'i', 't', 'l'):
1774        case FOURCC('p', 'e', 'r', 'f'):
1775        case FOURCC('a', 'u', 't', 'h'):
1776        case FOURCC('g', 'n', 'r', 'e'):
1777        case FOURCC('a', 'l', 'b', 'm'):
1778        case FOURCC('y', 'r', 'r', 'c'):
1779        {
1780            status_t err = parse3GPPMetaData(data_offset, chunk_data_size, depth);
1781
1782            if (err != OK) {
1783                return err;
1784            }
1785
1786            *offset += chunk_size;
1787            break;
1788        }
1789
1790        case FOURCC('-', '-', '-', '-'):
1791        {
1792            mLastCommentMean.clear();
1793            mLastCommentName.clear();
1794            mLastCommentData.clear();
1795            *offset += chunk_size;
1796            break;
1797        }
1798
1799        case FOURCC('s', 'i', 'd', 'x'):
1800        {
1801            parseSegmentIndex(data_offset, chunk_data_size);
1802            *offset += chunk_size;
1803            return UNKNOWN_ERROR; // stop parsing after sidx
1804        }
1805
1806        default:
1807        {
1808            *offset += chunk_size;
1809            break;
1810        }
1811    }
1812
1813    return OK;
1814}
1815
1816status_t MPEG4Extractor::parseSegmentIndex(off64_t offset, size_t size) {
1817  ALOGV("MPEG4Extractor::parseSegmentIndex");
1818
1819    if (size < 12) {
1820      return -EINVAL;
1821    }
1822
1823    uint32_t flags;
1824    if (!mDataSource->getUInt32(offset, &flags)) {
1825        return ERROR_MALFORMED;
1826    }
1827
1828    uint32_t version = flags >> 24;
1829    flags &= 0xffffff;
1830
1831    ALOGV("sidx version %d", version);
1832
1833    uint32_t referenceId;
1834    if (!mDataSource->getUInt32(offset + 4, &referenceId)) {
1835        return ERROR_MALFORMED;
1836    }
1837
1838    uint32_t timeScale;
1839    if (!mDataSource->getUInt32(offset + 8, &timeScale)) {
1840        return ERROR_MALFORMED;
1841    }
1842    ALOGV("sidx refid/timescale: %d/%d", referenceId, timeScale);
1843
1844    uint64_t earliestPresentationTime;
1845    uint64_t firstOffset;
1846
1847    offset += 12;
1848    size -= 12;
1849
1850    if (version == 0) {
1851        if (size < 8) {
1852            return -EINVAL;
1853        }
1854        uint32_t tmp;
1855        if (!mDataSource->getUInt32(offset, &tmp)) {
1856            return ERROR_MALFORMED;
1857        }
1858        earliestPresentationTime = tmp;
1859        if (!mDataSource->getUInt32(offset + 4, &tmp)) {
1860            return ERROR_MALFORMED;
1861        }
1862        firstOffset = tmp;
1863        offset += 8;
1864        size -= 8;
1865    } else {
1866        if (size < 16) {
1867            return -EINVAL;
1868        }
1869        if (!mDataSource->getUInt64(offset, &earliestPresentationTime)) {
1870            return ERROR_MALFORMED;
1871        }
1872        if (!mDataSource->getUInt64(offset + 8, &firstOffset)) {
1873            return ERROR_MALFORMED;
1874        }
1875        offset += 16;
1876        size -= 16;
1877    }
1878    ALOGV("sidx pres/off: %Ld/%Ld", earliestPresentationTime, firstOffset);
1879
1880    if (size < 4) {
1881        return -EINVAL;
1882    }
1883
1884    uint16_t referenceCount;
1885    if (!mDataSource->getUInt16(offset + 2, &referenceCount)) {
1886        return ERROR_MALFORMED;
1887    }
1888    offset += 4;
1889    size -= 4;
1890    ALOGV("refcount: %d", referenceCount);
1891
1892    if (size < referenceCount * 12) {
1893        return -EINVAL;
1894    }
1895
1896    uint64_t total_duration = 0;
1897    for (unsigned int i = 0; i < referenceCount; i++) {
1898        uint32_t d1, d2, d3;
1899
1900        if (!mDataSource->getUInt32(offset, &d1) ||     // size
1901            !mDataSource->getUInt32(offset + 4, &d2) || // duration
1902            !mDataSource->getUInt32(offset + 8, &d3)) { // flags
1903            return ERROR_MALFORMED;
1904        }
1905
1906        if (d1 & 0x80000000) {
1907            ALOGW("sub-sidx boxes not supported yet");
1908        }
1909        bool sap = d3 & 0x80000000;
1910        bool saptype = d3 >> 28;
1911        if (!sap || saptype > 2) {
1912            ALOGW("not a stream access point, or unsupported type");
1913        }
1914        total_duration += d2;
1915        offset += 12;
1916        ALOGV(" item %d, %08x %08x %08x", i, d1, d2, d3);
1917        SidxEntry se;
1918        se.mSize = d1 & 0x7fffffff;
1919        se.mDurationUs = 1000000LL * d2 / timeScale;
1920        mSidxEntries.add(se);
1921    }
1922
1923    mSidxDuration = total_duration * 1000000 / timeScale;
1924    ALOGV("duration: %lld", mSidxDuration);
1925
1926    int64_t metaDuration;
1927    if (!mLastTrack->meta->findInt64(kKeyDuration, &metaDuration) || metaDuration == 0) {
1928        mLastTrack->meta->setInt64(kKeyDuration, mSidxDuration);
1929    }
1930    return OK;
1931}
1932
1933
1934
1935status_t MPEG4Extractor::parseTrackHeader(
1936        off64_t data_offset, off64_t data_size) {
1937    if (data_size < 4) {
1938        return ERROR_MALFORMED;
1939    }
1940
1941    uint8_t version;
1942    if (mDataSource->readAt(data_offset, &version, 1) < 1) {
1943        return ERROR_IO;
1944    }
1945
1946    size_t dynSize = (version == 1) ? 36 : 24;
1947
1948    uint8_t buffer[36 + 60];
1949
1950    if (data_size != (off64_t)dynSize + 60) {
1951        return ERROR_MALFORMED;
1952    }
1953
1954    if (mDataSource->readAt(
1955                data_offset, buffer, data_size) < (ssize_t)data_size) {
1956        return ERROR_IO;
1957    }
1958
1959    uint64_t ctime, mtime, duration;
1960    int32_t id;
1961
1962    if (version == 1) {
1963        ctime = U64_AT(&buffer[4]);
1964        mtime = U64_AT(&buffer[12]);
1965        id = U32_AT(&buffer[20]);
1966        duration = U64_AT(&buffer[28]);
1967    } else if (version == 0) {
1968        ctime = U32_AT(&buffer[4]);
1969        mtime = U32_AT(&buffer[8]);
1970        id = U32_AT(&buffer[12]);
1971        duration = U32_AT(&buffer[20]);
1972    } else {
1973        return ERROR_UNSUPPORTED;
1974    }
1975
1976    mLastTrack->meta->setInt32(kKeyTrackID, id);
1977
1978    size_t matrixOffset = dynSize + 16;
1979    int32_t a00 = U32_AT(&buffer[matrixOffset]);
1980    int32_t a01 = U32_AT(&buffer[matrixOffset + 4]);
1981    int32_t dx = U32_AT(&buffer[matrixOffset + 8]);
1982    int32_t a10 = U32_AT(&buffer[matrixOffset + 12]);
1983    int32_t a11 = U32_AT(&buffer[matrixOffset + 16]);
1984    int32_t dy = U32_AT(&buffer[matrixOffset + 20]);
1985
1986#if 0
1987    ALOGI("x' = %.2f * x + %.2f * y + %.2f",
1988         a00 / 65536.0f, a01 / 65536.0f, dx / 65536.0f);
1989    ALOGI("y' = %.2f * x + %.2f * y + %.2f",
1990         a10 / 65536.0f, a11 / 65536.0f, dy / 65536.0f);
1991#endif
1992
1993    uint32_t rotationDegrees;
1994
1995    static const int32_t kFixedOne = 0x10000;
1996    if (a00 == kFixedOne && a01 == 0 && a10 == 0 && a11 == kFixedOne) {
1997        // Identity, no rotation
1998        rotationDegrees = 0;
1999    } else if (a00 == 0 && a01 == kFixedOne && a10 == -kFixedOne && a11 == 0) {
2000        rotationDegrees = 90;
2001    } else if (a00 == 0 && a01 == -kFixedOne && a10 == kFixedOne && a11 == 0) {
2002        rotationDegrees = 270;
2003    } else if (a00 == -kFixedOne && a01 == 0 && a10 == 0 && a11 == -kFixedOne) {
2004        rotationDegrees = 180;
2005    } else {
2006        ALOGW("We only support 0,90,180,270 degree rotation matrices");
2007        rotationDegrees = 0;
2008    }
2009
2010    if (rotationDegrees != 0) {
2011        mLastTrack->meta->setInt32(kKeyRotation, rotationDegrees);
2012    }
2013
2014    // Handle presentation display size, which could be different
2015    // from the image size indicated by kKeyWidth and kKeyHeight.
2016    uint32_t width = U32_AT(&buffer[dynSize + 52]);
2017    uint32_t height = U32_AT(&buffer[dynSize + 56]);
2018    mLastTrack->meta->setInt32(kKeyDisplayWidth, width >> 16);
2019    mLastTrack->meta->setInt32(kKeyDisplayHeight, height >> 16);
2020
2021    return OK;
2022}
2023
2024status_t MPEG4Extractor::parseITunesMetaData(off64_t offset, size_t size) {
2025    if (size < 4) {
2026        return ERROR_MALFORMED;
2027    }
2028
2029    uint8_t *buffer = new uint8_t[size + 1];
2030    if (mDataSource->readAt(
2031                offset, buffer, size) != (ssize_t)size) {
2032        delete[] buffer;
2033        buffer = NULL;
2034
2035        return ERROR_IO;
2036    }
2037
2038    uint32_t flags = U32_AT(buffer);
2039
2040    uint32_t metadataKey = 0;
2041    char chunk[5];
2042    MakeFourCCString(mPath[4], chunk);
2043    ALOGV("meta: %s @ %lld", chunk, offset);
2044    switch (mPath[4]) {
2045        case FOURCC(0xa9, 'a', 'l', 'b'):
2046        {
2047            metadataKey = kKeyAlbum;
2048            break;
2049        }
2050        case FOURCC(0xa9, 'A', 'R', 'T'):
2051        {
2052            metadataKey = kKeyArtist;
2053            break;
2054        }
2055        case FOURCC('a', 'A', 'R', 'T'):
2056        {
2057            metadataKey = kKeyAlbumArtist;
2058            break;
2059        }
2060        case FOURCC(0xa9, 'd', 'a', 'y'):
2061        {
2062            metadataKey = kKeyYear;
2063            break;
2064        }
2065        case FOURCC(0xa9, 'n', 'a', 'm'):
2066        {
2067            metadataKey = kKeyTitle;
2068            break;
2069        }
2070        case FOURCC(0xa9, 'w', 'r', 't'):
2071        {
2072            metadataKey = kKeyWriter;
2073            break;
2074        }
2075        case FOURCC('c', 'o', 'v', 'r'):
2076        {
2077            metadataKey = kKeyAlbumArt;
2078            break;
2079        }
2080        case FOURCC('g', 'n', 'r', 'e'):
2081        {
2082            metadataKey = kKeyGenre;
2083            break;
2084        }
2085        case FOURCC(0xa9, 'g', 'e', 'n'):
2086        {
2087            metadataKey = kKeyGenre;
2088            break;
2089        }
2090        case FOURCC('c', 'p', 'i', 'l'):
2091        {
2092            if (size == 9 && flags == 21) {
2093                char tmp[16];
2094                sprintf(tmp, "%d",
2095                        (int)buffer[size - 1]);
2096
2097                mFileMetaData->setCString(kKeyCompilation, tmp);
2098            }
2099            break;
2100        }
2101        case FOURCC('t', 'r', 'k', 'n'):
2102        {
2103            if (size == 16 && flags == 0) {
2104                char tmp[16];
2105                uint16_t* pTrack = (uint16_t*)&buffer[10];
2106                uint16_t* pTotalTracks = (uint16_t*)&buffer[12];
2107                sprintf(tmp, "%d/%d", ntohs(*pTrack), ntohs(*pTotalTracks));
2108
2109                mFileMetaData->setCString(kKeyCDTrackNumber, tmp);
2110            }
2111            break;
2112        }
2113        case FOURCC('d', 'i', 's', 'k'):
2114        {
2115            if ((size == 14 || size == 16) && flags == 0) {
2116                char tmp[16];
2117                uint16_t* pDisc = (uint16_t*)&buffer[10];
2118                uint16_t* pTotalDiscs = (uint16_t*)&buffer[12];
2119                sprintf(tmp, "%d/%d", ntohs(*pDisc), ntohs(*pTotalDiscs));
2120
2121                mFileMetaData->setCString(kKeyDiscNumber, tmp);
2122            }
2123            break;
2124        }
2125        case FOURCC('-', '-', '-', '-'):
2126        {
2127            buffer[size] = '\0';
2128            switch (mPath[5]) {
2129                case FOURCC('m', 'e', 'a', 'n'):
2130                    mLastCommentMean.setTo((const char *)buffer + 4);
2131                    break;
2132                case FOURCC('n', 'a', 'm', 'e'):
2133                    mLastCommentName.setTo((const char *)buffer + 4);
2134                    break;
2135                case FOURCC('d', 'a', 't', 'a'):
2136                    mLastCommentData.setTo((const char *)buffer + 8);
2137                    break;
2138            }
2139
2140            // Once we have a set of mean/name/data info, go ahead and process
2141            // it to see if its something we are interested in.  Whether or not
2142            // were are interested in the specific tag, make sure to clear out
2143            // the set so we can be ready to process another tuple should one
2144            // show up later in the file.
2145            if ((mLastCommentMean.length() != 0) &&
2146                (mLastCommentName.length() != 0) &&
2147                (mLastCommentData.length() != 0)) {
2148
2149                if (mLastCommentMean == "com.apple.iTunes"
2150                        && mLastCommentName == "iTunSMPB") {
2151                    int32_t delay, padding;
2152                    if (sscanf(mLastCommentData,
2153                               " %*x %x %x %*x", &delay, &padding) == 2) {
2154                        mLastTrack->meta->setInt32(kKeyEncoderDelay, delay);
2155                        mLastTrack->meta->setInt32(kKeyEncoderPadding, padding);
2156                    }
2157                }
2158
2159                mLastCommentMean.clear();
2160                mLastCommentName.clear();
2161                mLastCommentData.clear();
2162            }
2163            break;
2164        }
2165
2166        default:
2167            break;
2168    }
2169
2170    if (size >= 8 && metadataKey) {
2171        if (metadataKey == kKeyAlbumArt) {
2172            mFileMetaData->setData(
2173                    kKeyAlbumArt, MetaData::TYPE_NONE,
2174                    buffer + 8, size - 8);
2175        } else if (metadataKey == kKeyGenre) {
2176            if (flags == 0) {
2177                // uint8_t genre code, iTunes genre codes are
2178                // the standard id3 codes, except they start
2179                // at 1 instead of 0 (e.g. Pop is 14, not 13)
2180                // We use standard id3 numbering, so subtract 1.
2181                int genrecode = (int)buffer[size - 1];
2182                genrecode--;
2183                if (genrecode < 0) {
2184                    genrecode = 255; // reserved for 'unknown genre'
2185                }
2186                char genre[10];
2187                sprintf(genre, "%d", genrecode);
2188
2189                mFileMetaData->setCString(metadataKey, genre);
2190            } else if (flags == 1) {
2191                // custom genre string
2192                buffer[size] = '\0';
2193
2194                mFileMetaData->setCString(
2195                        metadataKey, (const char *)buffer + 8);
2196            }
2197        } else {
2198            buffer[size] = '\0';
2199
2200            mFileMetaData->setCString(
2201                    metadataKey, (const char *)buffer + 8);
2202        }
2203    }
2204
2205    delete[] buffer;
2206    buffer = NULL;
2207
2208    return OK;
2209}
2210
2211status_t MPEG4Extractor::parse3GPPMetaData(off64_t offset, size_t size, int depth) {
2212    if (size < 4) {
2213        return ERROR_MALFORMED;
2214    }
2215
2216    uint8_t *buffer = new uint8_t[size];
2217    if (mDataSource->readAt(
2218                offset, buffer, size) != (ssize_t)size) {
2219        delete[] buffer;
2220        buffer = NULL;
2221
2222        return ERROR_IO;
2223    }
2224
2225    uint32_t metadataKey = 0;
2226    switch (mPath[depth]) {
2227        case FOURCC('t', 'i', 't', 'l'):
2228        {
2229            metadataKey = kKeyTitle;
2230            break;
2231        }
2232        case FOURCC('p', 'e', 'r', 'f'):
2233        {
2234            metadataKey = kKeyArtist;
2235            break;
2236        }
2237        case FOURCC('a', 'u', 't', 'h'):
2238        {
2239            metadataKey = kKeyWriter;
2240            break;
2241        }
2242        case FOURCC('g', 'n', 'r', 'e'):
2243        {
2244            metadataKey = kKeyGenre;
2245            break;
2246        }
2247        case FOURCC('a', 'l', 'b', 'm'):
2248        {
2249            if (buffer[size - 1] != '\0') {
2250              char tmp[4];
2251              sprintf(tmp, "%u", buffer[size - 1]);
2252
2253              mFileMetaData->setCString(kKeyCDTrackNumber, tmp);
2254            }
2255
2256            metadataKey = kKeyAlbum;
2257            break;
2258        }
2259        case FOURCC('y', 'r', 'r', 'c'):
2260        {
2261            char tmp[5];
2262            uint16_t year = U16_AT(&buffer[4]);
2263
2264            if (year < 10000) {
2265                sprintf(tmp, "%u", year);
2266
2267                mFileMetaData->setCString(kKeyYear, tmp);
2268            }
2269            break;
2270        }
2271
2272        default:
2273            break;
2274    }
2275
2276    if (metadataKey > 0) {
2277        bool isUTF8 = true; // Common case
2278        char16_t *framedata = NULL;
2279        int len16 = 0; // Number of UTF-16 characters
2280
2281        // smallest possible valid UTF-16 string w BOM: 0xfe 0xff 0x00 0x00
2282        if (size - 6 >= 4) {
2283            len16 = ((size - 6) / 2) - 1; // don't include 0x0000 terminator
2284            framedata = (char16_t *)(buffer + 6);
2285            if (0xfffe == *framedata) {
2286                // endianness marker (BOM) doesn't match host endianness
2287                for (int i = 0; i < len16; i++) {
2288                    framedata[i] = bswap_16(framedata[i]);
2289                }
2290                // BOM is now swapped to 0xfeff, we will execute next block too
2291            }
2292
2293            if (0xfeff == *framedata) {
2294                // Remove the BOM
2295                framedata++;
2296                len16--;
2297                isUTF8 = false;
2298            }
2299            // else normal non-zero-length UTF-8 string
2300            // we can't handle UTF-16 without BOM as there is no other
2301            // indication of encoding.
2302        }
2303
2304        if (isUTF8) {
2305            mFileMetaData->setCString(metadataKey, (const char *)buffer + 6);
2306        } else {
2307            // Convert from UTF-16 string to UTF-8 string.
2308            String8 tmpUTF8str(framedata, len16);
2309            mFileMetaData->setCString(metadataKey, tmpUTF8str.string());
2310        }
2311    }
2312
2313    delete[] buffer;
2314    buffer = NULL;
2315
2316    return OK;
2317}
2318
2319sp<MediaSource> MPEG4Extractor::getTrack(size_t index) {
2320    status_t err;
2321    if ((err = readMetaData()) != OK) {
2322        return NULL;
2323    }
2324
2325    Track *track = mFirstTrack;
2326    while (index > 0) {
2327        if (track == NULL) {
2328            return NULL;
2329        }
2330
2331        track = track->next;
2332        --index;
2333    }
2334
2335    if (track == NULL) {
2336        return NULL;
2337    }
2338
2339    ALOGV("getTrack called, pssh: %d", mPssh.size());
2340
2341    return new MPEG4Source(
2342            track->meta, mDataSource, track->timescale, track->sampleTable,
2343            mSidxEntries, mMoofOffset);
2344}
2345
2346// static
2347status_t MPEG4Extractor::verifyTrack(Track *track) {
2348    const char *mime;
2349    CHECK(track->meta->findCString(kKeyMIMEType, &mime));
2350
2351    uint32_t type;
2352    const void *data;
2353    size_t size;
2354    if (!strcasecmp(mime, MEDIA_MIMETYPE_VIDEO_AVC)) {
2355        if (!track->meta->findData(kKeyAVCC, &type, &data, &size)
2356                || type != kTypeAVCC) {
2357            return ERROR_MALFORMED;
2358        }
2359    } else if (!strcasecmp(mime, MEDIA_MIMETYPE_VIDEO_MPEG4)
2360            || !strcasecmp(mime, MEDIA_MIMETYPE_AUDIO_AAC)) {
2361        if (!track->meta->findData(kKeyESDS, &type, &data, &size)
2362                || type != kTypeESDS) {
2363            return ERROR_MALFORMED;
2364        }
2365    }
2366
2367    if (!track->sampleTable->isValid()) {
2368        // Make sure we have all the metadata we need.
2369        return ERROR_MALFORMED;
2370    }
2371
2372    return OK;
2373}
2374
2375status_t MPEG4Extractor::updateAudioTrackInfoFromESDS_MPEG4Audio(
2376        const void *esds_data, size_t esds_size) {
2377    ESDS esds(esds_data, esds_size);
2378
2379    uint8_t objectTypeIndication;
2380    if (esds.getObjectTypeIndication(&objectTypeIndication) != OK) {
2381        return ERROR_MALFORMED;
2382    }
2383
2384    if (objectTypeIndication == 0xe1) {
2385        // This isn't MPEG4 audio at all, it's QCELP 14k...
2386        mLastTrack->meta->setCString(kKeyMIMEType, MEDIA_MIMETYPE_AUDIO_QCELP);
2387        return OK;
2388    }
2389
2390    if (objectTypeIndication  == 0x6b) {
2391        // The media subtype is MP3 audio
2392        // Our software MP3 audio decoder may not be able to handle
2393        // packetized MP3 audio; for now, lets just return ERROR_UNSUPPORTED
2394        ALOGE("MP3 track in MP4/3GPP file is not supported");
2395        return ERROR_UNSUPPORTED;
2396    }
2397
2398    const uint8_t *csd;
2399    size_t csd_size;
2400    if (esds.getCodecSpecificInfo(
2401                (const void **)&csd, &csd_size) != OK) {
2402        return ERROR_MALFORMED;
2403    }
2404
2405#if 0
2406    printf("ESD of size %d\n", csd_size);
2407    hexdump(csd, csd_size);
2408#endif
2409
2410    if (csd_size == 0) {
2411        // There's no further information, i.e. no codec specific data
2412        // Let's assume that the information provided in the mpeg4 headers
2413        // is accurate and hope for the best.
2414
2415        return OK;
2416    }
2417
2418    if (csd_size < 2) {
2419        return ERROR_MALFORMED;
2420    }
2421
2422    static uint32_t kSamplingRate[] = {
2423        96000, 88200, 64000, 48000, 44100, 32000, 24000, 22050,
2424        16000, 12000, 11025, 8000, 7350
2425    };
2426
2427    ABitReader br(csd, csd_size);
2428    uint32_t objectType = br.getBits(5);
2429
2430    if (objectType == 31) {  // AAC-ELD => additional 6 bits
2431        objectType = 32 + br.getBits(6);
2432    }
2433
2434    //keep AOT type
2435    mLastTrack->meta->setInt32(kKeyAACAOT, objectType);
2436
2437    uint32_t freqIndex = br.getBits(4);
2438
2439    int32_t sampleRate = 0;
2440    int32_t numChannels = 0;
2441    if (freqIndex == 15) {
2442        if (csd_size < 5) {
2443            return ERROR_MALFORMED;
2444        }
2445        sampleRate = br.getBits(24);
2446        numChannels = br.getBits(4);
2447    } else {
2448        numChannels = br.getBits(4);
2449
2450        if (freqIndex == 13 || freqIndex == 14) {
2451            return ERROR_MALFORMED;
2452        }
2453
2454        sampleRate = kSamplingRate[freqIndex];
2455    }
2456
2457    if (objectType == 5 || objectType == 29) { // SBR specific config per 14496-3 table 1.13
2458        uint32_t extFreqIndex = br.getBits(4);
2459        int32_t extSampleRate;
2460        if (extFreqIndex == 15) {
2461            if (csd_size < 8) {
2462                return ERROR_MALFORMED;
2463            }
2464            extSampleRate = br.getBits(24);
2465        } else {
2466            if (extFreqIndex == 13 || extFreqIndex == 14) {
2467                return ERROR_MALFORMED;
2468            }
2469            extSampleRate = kSamplingRate[extFreqIndex];
2470        }
2471        //TODO: save the extension sampling rate value in meta data =>
2472        //      mLastTrack->meta->setInt32(kKeyExtSampleRate, extSampleRate);
2473    }
2474
2475    if (numChannels == 0) {
2476        return ERROR_UNSUPPORTED;
2477    }
2478
2479    int32_t prevSampleRate;
2480    CHECK(mLastTrack->meta->findInt32(kKeySampleRate, &prevSampleRate));
2481
2482    if (prevSampleRate != sampleRate) {
2483        ALOGV("mpeg4 audio sample rate different from previous setting. "
2484             "was: %d, now: %d", prevSampleRate, sampleRate);
2485    }
2486
2487    mLastTrack->meta->setInt32(kKeySampleRate, sampleRate);
2488
2489    int32_t prevChannelCount;
2490    CHECK(mLastTrack->meta->findInt32(kKeyChannelCount, &prevChannelCount));
2491
2492    if (prevChannelCount != numChannels) {
2493        ALOGV("mpeg4 audio channel count different from previous setting. "
2494             "was: %d, now: %d", prevChannelCount, numChannels);
2495    }
2496
2497    mLastTrack->meta->setInt32(kKeyChannelCount, numChannels);
2498
2499    return OK;
2500}
2501
2502////////////////////////////////////////////////////////////////////////////////
2503
2504MPEG4Source::MPEG4Source(
2505        const sp<MetaData> &format,
2506        const sp<DataSource> &dataSource,
2507        int32_t timeScale,
2508        const sp<SampleTable> &sampleTable,
2509        Vector<SidxEntry> &sidx,
2510        off64_t firstMoofOffset)
2511    : mFormat(format),
2512      mDataSource(dataSource),
2513      mTimescale(timeScale),
2514      mSampleTable(sampleTable),
2515      mCurrentSampleIndex(0),
2516      mCurrentFragmentIndex(0),
2517      mSegments(sidx),
2518      mFirstMoofOffset(firstMoofOffset),
2519      mCurrentMoofOffset(firstMoofOffset),
2520      mCurrentTime(0),
2521      mCurrentSampleInfoAllocSize(0),
2522      mCurrentSampleInfoSizes(NULL),
2523      mCurrentSampleInfoOffsetsAllocSize(0),
2524      mCurrentSampleInfoOffsets(NULL),
2525      mIsAVC(false),
2526      mNALLengthSize(0),
2527      mStarted(false),
2528      mGroup(NULL),
2529      mBuffer(NULL),
2530      mWantsNALFragments(false),
2531      mSrcBuffer(NULL) {
2532
2533    mFormat->findInt32(kKeyCryptoMode, &mCryptoMode);
2534    mDefaultIVSize = 0;
2535    mFormat->findInt32(kKeyCryptoDefaultIVSize, &mDefaultIVSize);
2536    uint32_t keytype;
2537    const void *key;
2538    size_t keysize;
2539    if (mFormat->findData(kKeyCryptoKey, &keytype, &key, &keysize)) {
2540        CHECK(keysize <= 16);
2541        memset(mCryptoKey, 0, 16);
2542        memcpy(mCryptoKey, key, keysize);
2543    }
2544
2545    const char *mime;
2546    bool success = mFormat->findCString(kKeyMIMEType, &mime);
2547    CHECK(success);
2548
2549    mIsAVC = !strcasecmp(mime, MEDIA_MIMETYPE_VIDEO_AVC);
2550
2551    if (mIsAVC) {
2552        uint32_t type;
2553        const void *data;
2554        size_t size;
2555        CHECK(format->findData(kKeyAVCC, &type, &data, &size));
2556
2557        const uint8_t *ptr = (const uint8_t *)data;
2558
2559        CHECK(size >= 7);
2560        CHECK_EQ((unsigned)ptr[0], 1u);  // configurationVersion == 1
2561
2562        // The number of bytes used to encode the length of a NAL unit.
2563        mNALLengthSize = 1 + (ptr[4] & 3);
2564    }
2565
2566    CHECK(format->findInt32(kKeyTrackID, &mTrackId));
2567
2568    if (mFirstMoofOffset != 0) {
2569        off64_t offset = mFirstMoofOffset;
2570        parseChunk(&offset);
2571    }
2572}
2573
2574MPEG4Source::~MPEG4Source() {
2575    if (mStarted) {
2576        stop();
2577    }
2578    free(mCurrentSampleInfoSizes);
2579    free(mCurrentSampleInfoOffsets);
2580}
2581
2582status_t MPEG4Source::start(MetaData *params) {
2583    Mutex::Autolock autoLock(mLock);
2584
2585    CHECK(!mStarted);
2586
2587    int32_t val;
2588    if (params && params->findInt32(kKeyWantsNALFragments, &val)
2589        && val != 0) {
2590        mWantsNALFragments = true;
2591    } else {
2592        mWantsNALFragments = false;
2593    }
2594
2595    mGroup = new MediaBufferGroup;
2596
2597    int32_t max_size;
2598    CHECK(mFormat->findInt32(kKeyMaxInputSize, &max_size));
2599
2600    mGroup->add_buffer(new MediaBuffer(max_size));
2601
2602    mSrcBuffer = new uint8_t[max_size];
2603
2604    mStarted = true;
2605
2606    return OK;
2607}
2608
2609status_t MPEG4Source::stop() {
2610    Mutex::Autolock autoLock(mLock);
2611
2612    CHECK(mStarted);
2613
2614    if (mBuffer != NULL) {
2615        mBuffer->release();
2616        mBuffer = NULL;
2617    }
2618
2619    delete[] mSrcBuffer;
2620    mSrcBuffer = NULL;
2621
2622    delete mGroup;
2623    mGroup = NULL;
2624
2625    mStarted = false;
2626    mCurrentSampleIndex = 0;
2627
2628    return OK;
2629}
2630
2631status_t MPEG4Source::parseChunk(off64_t *offset) {
2632    uint32_t hdr[2];
2633    if (mDataSource->readAt(*offset, hdr, 8) < 8) {
2634        return ERROR_IO;
2635    }
2636    uint64_t chunk_size = ntohl(hdr[0]);
2637    uint32_t chunk_type = ntohl(hdr[1]);
2638    off64_t data_offset = *offset + 8;
2639
2640    if (chunk_size == 1) {
2641        if (mDataSource->readAt(*offset + 8, &chunk_size, 8) < 8) {
2642            return ERROR_IO;
2643        }
2644        chunk_size = ntoh64(chunk_size);
2645        data_offset += 8;
2646
2647        if (chunk_size < 16) {
2648            // The smallest valid chunk is 16 bytes long in this case.
2649            return ERROR_MALFORMED;
2650        }
2651    } else if (chunk_size < 8) {
2652        // The smallest valid chunk is 8 bytes long.
2653        return ERROR_MALFORMED;
2654    }
2655
2656    char chunk[5];
2657    MakeFourCCString(chunk_type, chunk);
2658    ALOGV("MPEG4Source chunk %s @ %llx", chunk, *offset);
2659
2660    off64_t chunk_data_size = *offset + chunk_size - data_offset;
2661
2662    switch(chunk_type) {
2663
2664        case FOURCC('t', 'r', 'a', 'f'):
2665        case FOURCC('m', 'o', 'o', 'f'): {
2666            off64_t stop_offset = *offset + chunk_size;
2667            *offset = data_offset;
2668            while (*offset < stop_offset) {
2669                status_t err = parseChunk(offset);
2670                if (err != OK) {
2671                    return err;
2672                }
2673            }
2674            if (chunk_type == FOURCC('m', 'o', 'o', 'f')) {
2675                // *offset points to the mdat box following this moof
2676                parseChunk(offset); // doesn't actually parse it, just updates offset
2677                mNextMoofOffset = *offset;
2678            }
2679            break;
2680        }
2681
2682        case FOURCC('t', 'f', 'h', 'd'): {
2683                status_t err;
2684                if ((err = parseTrackFragmentHeader(data_offset, chunk_data_size)) != OK) {
2685                    return err;
2686                }
2687                *offset += chunk_size;
2688                break;
2689        }
2690
2691        case FOURCC('t', 'r', 'u', 'n'): {
2692                status_t err;
2693                if (mLastParsedTrackId == mTrackId) {
2694                    if ((err = parseTrackFragmentRun(data_offset, chunk_data_size)) != OK) {
2695                        return err;
2696                    }
2697                }
2698
2699                *offset += chunk_size;
2700                break;
2701        }
2702
2703        case FOURCC('s', 'a', 'i', 'z'): {
2704            status_t err;
2705            if ((err = parseSampleAuxiliaryInformationSizes(data_offset, chunk_data_size)) != OK) {
2706                return err;
2707            }
2708            *offset += chunk_size;
2709            break;
2710        }
2711        case FOURCC('s', 'a', 'i', 'o'): {
2712            status_t err;
2713            if ((err = parseSampleAuxiliaryInformationOffsets(data_offset, chunk_data_size)) != OK) {
2714                return err;
2715            }
2716            *offset += chunk_size;
2717            break;
2718        }
2719
2720        case FOURCC('m', 'd', 'a', 't'): {
2721            // parse DRM info if present
2722            ALOGV("MPEG4Source::parseChunk mdat");
2723            // if saiz/saoi was previously observed, do something with the sampleinfos
2724            *offset += chunk_size;
2725            break;
2726        }
2727
2728        default: {
2729            *offset += chunk_size;
2730            break;
2731        }
2732    }
2733    return OK;
2734}
2735
2736status_t MPEG4Source::parseSampleAuxiliaryInformationSizes(off64_t offset, off64_t size) {
2737    ALOGV("parseSampleAuxiliaryInformationSizes");
2738    // 14496-12 8.7.12
2739    uint8_t version;
2740    if (mDataSource->readAt(
2741            offset, &version, sizeof(version))
2742            < (ssize_t)sizeof(version)) {
2743        return ERROR_IO;
2744    }
2745
2746    if (version != 0) {
2747        return ERROR_UNSUPPORTED;
2748    }
2749    offset++;
2750
2751    uint32_t flags;
2752    if (!mDataSource->getUInt24(offset, &flags)) {
2753        return ERROR_IO;
2754    }
2755    offset += 3;
2756
2757    if (flags & 1) {
2758        uint32_t tmp;
2759        if (!mDataSource->getUInt32(offset, &tmp)) {
2760            return ERROR_MALFORMED;
2761        }
2762        mCurrentAuxInfoType = tmp;
2763        offset += 4;
2764        if (!mDataSource->getUInt32(offset, &tmp)) {
2765            return ERROR_MALFORMED;
2766        }
2767        mCurrentAuxInfoTypeParameter = tmp;
2768        offset += 4;
2769    }
2770
2771    uint8_t defsize;
2772    if (mDataSource->readAt(offset, &defsize, 1) != 1) {
2773        return ERROR_MALFORMED;
2774    }
2775    mCurrentDefaultSampleInfoSize = defsize;
2776    offset++;
2777
2778    uint32_t smplcnt;
2779    if (!mDataSource->getUInt32(offset, &smplcnt)) {
2780        return ERROR_MALFORMED;
2781    }
2782    mCurrentSampleInfoCount = smplcnt;
2783    offset += 4;
2784
2785    if (mCurrentDefaultSampleInfoSize != 0) {
2786        ALOGV("@@@@ using default sample info size of %d", mCurrentDefaultSampleInfoSize);
2787        return OK;
2788    }
2789    if (smplcnt > mCurrentSampleInfoAllocSize) {
2790        mCurrentSampleInfoSizes = (uint8_t*) realloc(mCurrentSampleInfoSizes, smplcnt);
2791        mCurrentSampleInfoAllocSize = smplcnt;
2792    }
2793
2794    mDataSource->readAt(offset, mCurrentSampleInfoSizes, smplcnt);
2795    return OK;
2796}
2797
2798status_t MPEG4Source::parseSampleAuxiliaryInformationOffsets(off64_t offset, off64_t size) {
2799    ALOGV("parseSampleAuxiliaryInformationOffsets");
2800    // 14496-12 8.7.13
2801    uint8_t version;
2802    if (mDataSource->readAt(offset, &version, sizeof(version)) != 1) {
2803        return ERROR_IO;
2804    }
2805    offset++;
2806
2807    uint32_t flags;
2808    if (!mDataSource->getUInt24(offset, &flags)) {
2809        return ERROR_IO;
2810    }
2811    offset += 3;
2812
2813    uint32_t entrycount;
2814    if (!mDataSource->getUInt32(offset, &entrycount)) {
2815        return ERROR_IO;
2816    }
2817    offset += 4;
2818
2819    if (entrycount > mCurrentSampleInfoOffsetsAllocSize) {
2820        mCurrentSampleInfoOffsets = (uint64_t*) realloc(mCurrentSampleInfoOffsets, entrycount * 8);
2821        mCurrentSampleInfoOffsetsAllocSize = entrycount;
2822    }
2823    mCurrentSampleInfoOffsetCount = entrycount;
2824
2825    for (size_t i = 0; i < entrycount; i++) {
2826        if (version == 0) {
2827            uint32_t tmp;
2828            if (!mDataSource->getUInt32(offset, &tmp)) {
2829                return ERROR_IO;
2830            }
2831            mCurrentSampleInfoOffsets[i] = tmp;
2832            offset += 4;
2833        } else {
2834            uint64_t tmp;
2835            if (!mDataSource->getUInt64(offset, &tmp)) {
2836                return ERROR_IO;
2837            }
2838            mCurrentSampleInfoOffsets[i] = tmp;
2839            offset += 8;
2840        }
2841    }
2842
2843    // parse clear/encrypted data
2844
2845    off64_t drmoffset = mCurrentSampleInfoOffsets[0]; // from moof
2846
2847    drmoffset += mCurrentMoofOffset;
2848    int ivlength;
2849    CHECK(mFormat->findInt32(kKeyCryptoDefaultIVSize, &ivlength));
2850
2851    // read CencSampleAuxiliaryDataFormats
2852    for (size_t i = 0; i < mCurrentSampleInfoCount; i++) {
2853        Sample *smpl = &mCurrentSamples.editItemAt(i);
2854
2855        memset(smpl->iv, 0, 16);
2856        if (mDataSource->readAt(drmoffset, smpl->iv, ivlength) != ivlength) {
2857            return ERROR_IO;
2858        }
2859
2860        drmoffset += ivlength;
2861
2862        int32_t smplinfosize = mCurrentDefaultSampleInfoSize;
2863        if (smplinfosize == 0) {
2864            smplinfosize = mCurrentSampleInfoSizes[i];
2865        }
2866        if (smplinfosize > ivlength) {
2867            uint16_t numsubsamples;
2868            if (!mDataSource->getUInt16(drmoffset, &numsubsamples)) {
2869                return ERROR_IO;
2870            }
2871            drmoffset += 2;
2872            for (size_t j = 0; j < numsubsamples; j++) {
2873                uint16_t numclear;
2874                uint32_t numencrypted;
2875                if (!mDataSource->getUInt16(drmoffset, &numclear)) {
2876                    return ERROR_IO;
2877                }
2878                drmoffset += 2;
2879                if (!mDataSource->getUInt32(drmoffset, &numencrypted)) {
2880                    return ERROR_IO;
2881                }
2882                drmoffset += 4;
2883                smpl->clearsizes.add(numclear);
2884                smpl->encryptedsizes.add(numencrypted);
2885            }
2886        } else {
2887            smpl->clearsizes.add(0);
2888            smpl->encryptedsizes.add(smpl->size);
2889        }
2890    }
2891
2892
2893    return OK;
2894}
2895
2896status_t MPEG4Source::parseTrackFragmentHeader(off64_t offset, off64_t size) {
2897
2898    if (size < 8) {
2899        return -EINVAL;
2900    }
2901
2902    uint32_t flags;
2903    if (!mDataSource->getUInt32(offset, &flags)) { // actually version + flags
2904        return ERROR_MALFORMED;
2905    }
2906
2907    if (flags & 0xff000000) {
2908        return -EINVAL;
2909    }
2910
2911    if (!mDataSource->getUInt32(offset + 4, (uint32_t*)&mLastParsedTrackId)) {
2912        return ERROR_MALFORMED;
2913    }
2914
2915    if (mLastParsedTrackId != mTrackId) {
2916        // this is not the right track, skip it
2917        return OK;
2918    }
2919
2920    mTrackFragmentHeaderInfo.mFlags = flags;
2921    mTrackFragmentHeaderInfo.mTrackID = mLastParsedTrackId;
2922    offset += 8;
2923    size -= 8;
2924
2925    ALOGV("fragment header: %08x %08x", flags, mTrackFragmentHeaderInfo.mTrackID);
2926
2927    if (flags & TrackFragmentHeaderInfo::kBaseDataOffsetPresent) {
2928        if (size < 8) {
2929            return -EINVAL;
2930        }
2931
2932        if (!mDataSource->getUInt64(offset, &mTrackFragmentHeaderInfo.mBaseDataOffset)) {
2933            return ERROR_MALFORMED;
2934        }
2935        offset += 8;
2936        size -= 8;
2937    }
2938
2939    if (flags & TrackFragmentHeaderInfo::kSampleDescriptionIndexPresent) {
2940        if (size < 4) {
2941            return -EINVAL;
2942        }
2943
2944        if (!mDataSource->getUInt32(offset, &mTrackFragmentHeaderInfo.mSampleDescriptionIndex)) {
2945            return ERROR_MALFORMED;
2946        }
2947        offset += 4;
2948        size -= 4;
2949    }
2950
2951    if (flags & TrackFragmentHeaderInfo::kDefaultSampleDurationPresent) {
2952        if (size < 4) {
2953            return -EINVAL;
2954        }
2955
2956        if (!mDataSource->getUInt32(offset, &mTrackFragmentHeaderInfo.mDefaultSampleDuration)) {
2957            return ERROR_MALFORMED;
2958        }
2959        offset += 4;
2960        size -= 4;
2961    }
2962
2963    if (flags & TrackFragmentHeaderInfo::kDefaultSampleSizePresent) {
2964        if (size < 4) {
2965            return -EINVAL;
2966        }
2967
2968        if (!mDataSource->getUInt32(offset, &mTrackFragmentHeaderInfo.mDefaultSampleSize)) {
2969            return ERROR_MALFORMED;
2970        }
2971        offset += 4;
2972        size -= 4;
2973    }
2974
2975    if (flags & TrackFragmentHeaderInfo::kDefaultSampleFlagsPresent) {
2976        if (size < 4) {
2977            return -EINVAL;
2978        }
2979
2980        if (!mDataSource->getUInt32(offset, &mTrackFragmentHeaderInfo.mDefaultSampleFlags)) {
2981            return ERROR_MALFORMED;
2982        }
2983        offset += 4;
2984        size -= 4;
2985    }
2986
2987    if (!(flags & TrackFragmentHeaderInfo::kBaseDataOffsetPresent)) {
2988        mTrackFragmentHeaderInfo.mBaseDataOffset = mCurrentMoofOffset;
2989    }
2990
2991    mTrackFragmentHeaderInfo.mDataOffset = 0;
2992    return OK;
2993}
2994
2995status_t MPEG4Source::parseTrackFragmentRun(off64_t offset, off64_t size) {
2996
2997    ALOGV("MPEG4Extractor::parseTrackFragmentRun");
2998    if (size < 8) {
2999        return -EINVAL;
3000    }
3001
3002    enum {
3003        kDataOffsetPresent                  = 0x01,
3004        kFirstSampleFlagsPresent            = 0x04,
3005        kSampleDurationPresent              = 0x100,
3006        kSampleSizePresent                  = 0x200,
3007        kSampleFlagsPresent                 = 0x400,
3008        kSampleCompositionTimeOffsetPresent = 0x800,
3009    };
3010
3011    uint32_t flags;
3012    if (!mDataSource->getUInt32(offset, &flags)) {
3013        return ERROR_MALFORMED;
3014    }
3015    ALOGV("fragment run flags: %08x", flags);
3016
3017    if (flags & 0xff000000) {
3018        return -EINVAL;
3019    }
3020
3021    if ((flags & kFirstSampleFlagsPresent) && (flags & kSampleFlagsPresent)) {
3022        // These two shall not be used together.
3023        return -EINVAL;
3024    }
3025
3026    uint32_t sampleCount;
3027    if (!mDataSource->getUInt32(offset + 4, &sampleCount)) {
3028        return ERROR_MALFORMED;
3029    }
3030    offset += 8;
3031    size -= 8;
3032
3033    uint64_t dataOffset = mTrackFragmentHeaderInfo.mDataOffset;
3034
3035    uint32_t firstSampleFlags = 0;
3036
3037    if (flags & kDataOffsetPresent) {
3038        if (size < 4) {
3039            return -EINVAL;
3040        }
3041
3042        int32_t dataOffsetDelta;
3043        if (!mDataSource->getUInt32(offset, (uint32_t*)&dataOffsetDelta)) {
3044            return ERROR_MALFORMED;
3045        }
3046
3047        dataOffset = mTrackFragmentHeaderInfo.mBaseDataOffset + dataOffsetDelta;
3048
3049        offset += 4;
3050        size -= 4;
3051    }
3052
3053    if (flags & kFirstSampleFlagsPresent) {
3054        if (size < 4) {
3055            return -EINVAL;
3056        }
3057
3058        if (!mDataSource->getUInt32(offset, &firstSampleFlags)) {
3059            return ERROR_MALFORMED;
3060        }
3061        offset += 4;
3062        size -= 4;
3063    }
3064
3065    uint32_t sampleDuration = 0, sampleSize = 0, sampleFlags = 0,
3066             sampleCtsOffset = 0;
3067
3068    size_t bytesPerSample = 0;
3069    if (flags & kSampleDurationPresent) {
3070        bytesPerSample += 4;
3071    } else if (mTrackFragmentHeaderInfo.mFlags
3072            & TrackFragmentHeaderInfo::kDefaultSampleDurationPresent) {
3073        sampleDuration = mTrackFragmentHeaderInfo.mDefaultSampleDuration;
3074    } else {
3075        sampleDuration = mTrackFragmentHeaderInfo.mDefaultSampleDuration;
3076    }
3077
3078    if (flags & kSampleSizePresent) {
3079        bytesPerSample += 4;
3080    } else if (mTrackFragmentHeaderInfo.mFlags
3081            & TrackFragmentHeaderInfo::kDefaultSampleSizePresent) {
3082        sampleSize = mTrackFragmentHeaderInfo.mDefaultSampleSize;
3083    } else {
3084        sampleSize = mTrackFragmentHeaderInfo.mDefaultSampleSize;
3085    }
3086
3087    if (flags & kSampleFlagsPresent) {
3088        bytesPerSample += 4;
3089    } else if (mTrackFragmentHeaderInfo.mFlags
3090            & TrackFragmentHeaderInfo::kDefaultSampleFlagsPresent) {
3091        sampleFlags = mTrackFragmentHeaderInfo.mDefaultSampleFlags;
3092    } else {
3093        sampleFlags = mTrackFragmentHeaderInfo.mDefaultSampleFlags;
3094    }
3095
3096    if (flags & kSampleCompositionTimeOffsetPresent) {
3097        bytesPerSample += 4;
3098    } else {
3099        sampleCtsOffset = 0;
3100    }
3101
3102    if (size < sampleCount * bytesPerSample) {
3103        return -EINVAL;
3104    }
3105
3106    Sample tmp;
3107    for (uint32_t i = 0; i < sampleCount; ++i) {
3108        if (flags & kSampleDurationPresent) {
3109            if (!mDataSource->getUInt32(offset, &sampleDuration)) {
3110                return ERROR_MALFORMED;
3111            }
3112            offset += 4;
3113        }
3114
3115        if (flags & kSampleSizePresent) {
3116            if (!mDataSource->getUInt32(offset, &sampleSize)) {
3117                return ERROR_MALFORMED;
3118            }
3119            offset += 4;
3120        }
3121
3122        if (flags & kSampleFlagsPresent) {
3123            if (!mDataSource->getUInt32(offset, &sampleFlags)) {
3124                return ERROR_MALFORMED;
3125            }
3126            offset += 4;
3127        }
3128
3129        if (flags & kSampleCompositionTimeOffsetPresent) {
3130            if (!mDataSource->getUInt32(offset, &sampleCtsOffset)) {
3131                return ERROR_MALFORMED;
3132            }
3133            offset += 4;
3134        }
3135
3136        ALOGV("adding sample %d at offset 0x%08llx, size %u, duration %u, "
3137              " flags 0x%08x", i + 1,
3138                dataOffset, sampleSize, sampleDuration,
3139                (flags & kFirstSampleFlagsPresent) && i == 0
3140                    ? firstSampleFlags : sampleFlags);
3141        tmp.offset = dataOffset;
3142        tmp.size = sampleSize;
3143        tmp.duration = sampleDuration;
3144        mCurrentSamples.add(tmp);
3145
3146        dataOffset += sampleSize;
3147    }
3148
3149    mTrackFragmentHeaderInfo.mDataOffset = dataOffset;
3150
3151    return OK;
3152}
3153
3154sp<MetaData> MPEG4Source::getFormat() {
3155    Mutex::Autolock autoLock(mLock);
3156
3157    return mFormat;
3158}
3159
3160size_t MPEG4Source::parseNALSize(const uint8_t *data) const {
3161    switch (mNALLengthSize) {
3162        case 1:
3163            return *data;
3164        case 2:
3165            return U16_AT(data);
3166        case 3:
3167            return ((size_t)data[0] << 16) | U16_AT(&data[1]);
3168        case 4:
3169            return U32_AT(data);
3170    }
3171
3172    // This cannot happen, mNALLengthSize springs to life by adding 1 to
3173    // a 2-bit integer.
3174    CHECK(!"Should not be here.");
3175
3176    return 0;
3177}
3178
3179status_t MPEG4Source::read(
3180        MediaBuffer **out, const ReadOptions *options) {
3181    Mutex::Autolock autoLock(mLock);
3182
3183    CHECK(mStarted);
3184
3185    if (mFirstMoofOffset > 0) {
3186        return fragmentedRead(out, options);
3187    }
3188
3189    *out = NULL;
3190
3191    int64_t targetSampleTimeUs = -1;
3192
3193    int64_t seekTimeUs;
3194    ReadOptions::SeekMode mode;
3195    if (options && options->getSeekTo(&seekTimeUs, &mode)) {
3196        uint32_t findFlags = 0;
3197        switch (mode) {
3198            case ReadOptions::SEEK_PREVIOUS_SYNC:
3199                findFlags = SampleTable::kFlagBefore;
3200                break;
3201            case ReadOptions::SEEK_NEXT_SYNC:
3202                findFlags = SampleTable::kFlagAfter;
3203                break;
3204            case ReadOptions::SEEK_CLOSEST_SYNC:
3205            case ReadOptions::SEEK_CLOSEST:
3206                findFlags = SampleTable::kFlagClosest;
3207                break;
3208            default:
3209                CHECK(!"Should not be here.");
3210                break;
3211        }
3212
3213        uint32_t sampleIndex;
3214        status_t err = mSampleTable->findSampleAtTime(
3215                seekTimeUs * mTimescale / 1000000,
3216                &sampleIndex, findFlags);
3217
3218        if (mode == ReadOptions::SEEK_CLOSEST) {
3219            // We found the closest sample already, now we want the sync
3220            // sample preceding it (or the sample itself of course), even
3221            // if the subsequent sync sample is closer.
3222            findFlags = SampleTable::kFlagBefore;
3223        }
3224
3225        uint32_t syncSampleIndex;
3226        if (err == OK) {
3227            err = mSampleTable->findSyncSampleNear(
3228                    sampleIndex, &syncSampleIndex, findFlags);
3229        }
3230
3231        uint32_t sampleTime;
3232        if (err == OK) {
3233            err = mSampleTable->getMetaDataForSample(
3234                    sampleIndex, NULL, NULL, &sampleTime);
3235        }
3236
3237        if (err != OK) {
3238            if (err == ERROR_OUT_OF_RANGE) {
3239                // An attempt to seek past the end of the stream would
3240                // normally cause this ERROR_OUT_OF_RANGE error. Propagating
3241                // this all the way to the MediaPlayer would cause abnormal
3242                // termination. Legacy behaviour appears to be to behave as if
3243                // we had seeked to the end of stream, ending normally.
3244                err = ERROR_END_OF_STREAM;
3245            }
3246            ALOGV("end of stream");
3247            return err;
3248        }
3249
3250        if (mode == ReadOptions::SEEK_CLOSEST) {
3251            targetSampleTimeUs = (sampleTime * 1000000ll) / mTimescale;
3252        }
3253
3254#if 0
3255        uint32_t syncSampleTime;
3256        CHECK_EQ(OK, mSampleTable->getMetaDataForSample(
3257                    syncSampleIndex, NULL, NULL, &syncSampleTime));
3258
3259        ALOGI("seek to time %lld us => sample at time %lld us, "
3260             "sync sample at time %lld us",
3261             seekTimeUs,
3262             sampleTime * 1000000ll / mTimescale,
3263             syncSampleTime * 1000000ll / mTimescale);
3264#endif
3265
3266        mCurrentSampleIndex = syncSampleIndex;
3267        if (mBuffer != NULL) {
3268            mBuffer->release();
3269            mBuffer = NULL;
3270        }
3271
3272        // fall through
3273    }
3274
3275    off64_t offset;
3276    size_t size;
3277    uint32_t cts;
3278    bool isSyncSample;
3279    bool newBuffer = false;
3280    if (mBuffer == NULL) {
3281        newBuffer = true;
3282
3283        status_t err =
3284            mSampleTable->getMetaDataForSample(
3285                    mCurrentSampleIndex, &offset, &size, &cts, &isSyncSample);
3286
3287        if (err != OK) {
3288            return err;
3289        }
3290
3291        err = mGroup->acquire_buffer(&mBuffer);
3292
3293        if (err != OK) {
3294            CHECK(mBuffer == NULL);
3295            return err;
3296        }
3297    }
3298
3299    if (!mIsAVC || mWantsNALFragments) {
3300        if (newBuffer) {
3301            ssize_t num_bytes_read =
3302                mDataSource->readAt(offset, (uint8_t *)mBuffer->data(), size);
3303
3304            if (num_bytes_read < (ssize_t)size) {
3305                mBuffer->release();
3306                mBuffer = NULL;
3307
3308                return ERROR_IO;
3309            }
3310
3311            CHECK(mBuffer != NULL);
3312            mBuffer->set_range(0, size);
3313            mBuffer->meta_data()->clear();
3314            mBuffer->meta_data()->setInt64(
3315                    kKeyTime, ((int64_t)cts * 1000000) / mTimescale);
3316
3317            if (targetSampleTimeUs >= 0) {
3318                mBuffer->meta_data()->setInt64(
3319                        kKeyTargetTime, targetSampleTimeUs);
3320            }
3321
3322            if (isSyncSample) {
3323                mBuffer->meta_data()->setInt32(kKeyIsSyncFrame, 1);
3324            }
3325
3326            ++mCurrentSampleIndex;
3327        }
3328
3329        if (!mIsAVC) {
3330            *out = mBuffer;
3331            mBuffer = NULL;
3332
3333            return OK;
3334        }
3335
3336        // Each NAL unit is split up into its constituent fragments and
3337        // each one of them returned in its own buffer.
3338
3339        CHECK(mBuffer->range_length() >= mNALLengthSize);
3340
3341        const uint8_t *src =
3342            (const uint8_t *)mBuffer->data() + mBuffer->range_offset();
3343
3344        size_t nal_size = parseNALSize(src);
3345        if (mBuffer->range_length() < mNALLengthSize + nal_size) {
3346            ALOGE("incomplete NAL unit.");
3347
3348            mBuffer->release();
3349            mBuffer = NULL;
3350
3351            return ERROR_MALFORMED;
3352        }
3353
3354        MediaBuffer *clone = mBuffer->clone();
3355        CHECK(clone != NULL);
3356        clone->set_range(mBuffer->range_offset() + mNALLengthSize, nal_size);
3357
3358        CHECK(mBuffer != NULL);
3359        mBuffer->set_range(
3360                mBuffer->range_offset() + mNALLengthSize + nal_size,
3361                mBuffer->range_length() - mNALLengthSize - nal_size);
3362
3363        if (mBuffer->range_length() == 0) {
3364            mBuffer->release();
3365            mBuffer = NULL;
3366        }
3367
3368        *out = clone;
3369
3370        return OK;
3371    } else {
3372        // Whole NAL units are returned but each fragment is prefixed by
3373        // the start code (0x00 00 00 01).
3374        ssize_t num_bytes_read = 0;
3375        int32_t drm = 0;
3376        bool usesDRM = (mFormat->findInt32(kKeyIsDRM, &drm) && drm != 0);
3377        if (usesDRM) {
3378            num_bytes_read =
3379                mDataSource->readAt(offset, (uint8_t*)mBuffer->data(), size);
3380        } else {
3381            num_bytes_read = mDataSource->readAt(offset, mSrcBuffer, size);
3382        }
3383
3384        if (num_bytes_read < (ssize_t)size) {
3385            mBuffer->release();
3386            mBuffer = NULL;
3387
3388            return ERROR_IO;
3389        }
3390
3391        if (usesDRM) {
3392            CHECK(mBuffer != NULL);
3393            mBuffer->set_range(0, size);
3394
3395        } else {
3396            uint8_t *dstData = (uint8_t *)mBuffer->data();
3397            size_t srcOffset = 0;
3398            size_t dstOffset = 0;
3399
3400            while (srcOffset < size) {
3401                bool isMalFormed = (srcOffset + mNALLengthSize > size);
3402                size_t nalLength = 0;
3403                if (!isMalFormed) {
3404                    nalLength = parseNALSize(&mSrcBuffer[srcOffset]);
3405                    srcOffset += mNALLengthSize;
3406                    isMalFormed = srcOffset + nalLength > size;
3407                }
3408
3409                if (isMalFormed) {
3410                    ALOGE("Video is malformed");
3411                    mBuffer->release();
3412                    mBuffer = NULL;
3413                    return ERROR_MALFORMED;
3414                }
3415
3416                if (nalLength == 0) {
3417                    continue;
3418                }
3419
3420                CHECK(dstOffset + 4 <= mBuffer->size());
3421
3422                dstData[dstOffset++] = 0;
3423                dstData[dstOffset++] = 0;
3424                dstData[dstOffset++] = 0;
3425                dstData[dstOffset++] = 1;
3426                memcpy(&dstData[dstOffset], &mSrcBuffer[srcOffset], nalLength);
3427                srcOffset += nalLength;
3428                dstOffset += nalLength;
3429            }
3430            CHECK_EQ(srcOffset, size);
3431            CHECK(mBuffer != NULL);
3432            mBuffer->set_range(0, dstOffset);
3433        }
3434
3435        mBuffer->meta_data()->clear();
3436        mBuffer->meta_data()->setInt64(
3437                kKeyTime, ((int64_t)cts * 1000000) / mTimescale);
3438
3439        if (targetSampleTimeUs >= 0) {
3440            mBuffer->meta_data()->setInt64(
3441                    kKeyTargetTime, targetSampleTimeUs);
3442        }
3443
3444        if (isSyncSample) {
3445            mBuffer->meta_data()->setInt32(kKeyIsSyncFrame, 1);
3446        }
3447
3448        ++mCurrentSampleIndex;
3449
3450        *out = mBuffer;
3451        mBuffer = NULL;
3452
3453        return OK;
3454    }
3455}
3456
3457status_t MPEG4Source::fragmentedRead(
3458        MediaBuffer **out, const ReadOptions *options) {
3459
3460    ALOGV("MPEG4Source::fragmentedRead");
3461
3462    CHECK(mStarted);
3463
3464    *out = NULL;
3465
3466    int64_t targetSampleTimeUs = -1;
3467
3468    int64_t seekTimeUs;
3469    ReadOptions::SeekMode mode;
3470    if (options && options->getSeekTo(&seekTimeUs, &mode)) {
3471
3472        int numSidxEntries = mSegments.size();
3473        if (numSidxEntries != 0) {
3474            int64_t totalTime = 0;
3475            off64_t totalOffset = mFirstMoofOffset;
3476            for (int i = 0; i < numSidxEntries; i++) {
3477                const SidxEntry *se = &mSegments[i];
3478                if (totalTime + se->mDurationUs > seekTimeUs) {
3479                    // The requested time is somewhere in this segment
3480                    if ((mode == ReadOptions::SEEK_NEXT_SYNC) ||
3481                        (mode == ReadOptions::SEEK_CLOSEST_SYNC &&
3482                        (seekTimeUs - totalTime) > (totalTime + se->mDurationUs - seekTimeUs))) {
3483                        // requested next sync, or closest sync and it was closer to the end of
3484                        // this segment
3485                        totalTime += se->mDurationUs;
3486                        totalOffset += se->mSize;
3487                    }
3488                    break;
3489                }
3490                totalTime += se->mDurationUs;
3491                totalOffset += se->mSize;
3492            }
3493        mCurrentMoofOffset = totalOffset;
3494        mCurrentSamples.clear();
3495        mCurrentSampleIndex = 0;
3496        parseChunk(&totalOffset);
3497        mCurrentTime = totalTime * mTimescale / 1000000ll;
3498        }
3499
3500        if (mBuffer != NULL) {
3501            mBuffer->release();
3502            mBuffer = NULL;
3503        }
3504
3505        // fall through
3506    }
3507
3508    off64_t offset = 0;
3509    size_t size;
3510    uint32_t cts = 0;
3511    bool isSyncSample = false;
3512    bool newBuffer = false;
3513    if (mBuffer == NULL) {
3514        newBuffer = true;
3515
3516        if (mCurrentSampleIndex >= mCurrentSamples.size()) {
3517            // move to next fragment
3518            Sample lastSample = mCurrentSamples[mCurrentSamples.size() - 1];
3519            off64_t nextMoof = mNextMoofOffset; // lastSample.offset + lastSample.size;
3520            mCurrentMoofOffset = nextMoof;
3521            mCurrentSamples.clear();
3522            mCurrentSampleIndex = 0;
3523            parseChunk(&nextMoof);
3524                if (mCurrentSampleIndex >= mCurrentSamples.size()) {
3525                    return ERROR_END_OF_STREAM;
3526                }
3527        }
3528
3529        const Sample *smpl = &mCurrentSamples[mCurrentSampleIndex];
3530        offset = smpl->offset;
3531        size = smpl->size;
3532        cts = mCurrentTime;
3533        mCurrentTime += smpl->duration;
3534        isSyncSample = (mCurrentSampleIndex == 0); // XXX
3535
3536        status_t err = mGroup->acquire_buffer(&mBuffer);
3537
3538        if (err != OK) {
3539            CHECK(mBuffer == NULL);
3540            ALOGV("acquire_buffer returned %d", err);
3541            return err;
3542        }
3543    }
3544
3545    const Sample *smpl = &mCurrentSamples[mCurrentSampleIndex];
3546    const sp<MetaData> bufmeta = mBuffer->meta_data();
3547    bufmeta->clear();
3548    if (smpl->encryptedsizes.size()) {
3549        // store clear/encrypted lengths in metadata
3550        bufmeta->setData(kKeyPlainSizes, 0,
3551                smpl->clearsizes.array(), smpl->clearsizes.size() * 4);
3552        bufmeta->setData(kKeyEncryptedSizes, 0,
3553                smpl->encryptedsizes.array(), smpl->encryptedsizes.size() * 4);
3554        bufmeta->setData(kKeyCryptoIV, 0, smpl->iv, 16); // use 16 or the actual size?
3555        bufmeta->setInt32(kKeyCryptoDefaultIVSize, mDefaultIVSize);
3556        bufmeta->setInt32(kKeyCryptoMode, mCryptoMode);
3557        bufmeta->setData(kKeyCryptoKey, 0, mCryptoKey, 16);
3558    }
3559
3560    if (!mIsAVC || mWantsNALFragments) {
3561        if (newBuffer) {
3562            ssize_t num_bytes_read =
3563                mDataSource->readAt(offset, (uint8_t *)mBuffer->data(), size);
3564
3565            if (num_bytes_read < (ssize_t)size) {
3566                mBuffer->release();
3567                mBuffer = NULL;
3568
3569                ALOGV("i/o error");
3570                return ERROR_IO;
3571            }
3572
3573            CHECK(mBuffer != NULL);
3574            mBuffer->set_range(0, size);
3575            mBuffer->meta_data()->setInt64(
3576                    kKeyTime, ((int64_t)cts * 1000000) / mTimescale);
3577
3578            if (targetSampleTimeUs >= 0) {
3579                mBuffer->meta_data()->setInt64(
3580                        kKeyTargetTime, targetSampleTimeUs);
3581            }
3582
3583            if (isSyncSample) {
3584                mBuffer->meta_data()->setInt32(kKeyIsSyncFrame, 1);
3585            }
3586
3587            ++mCurrentSampleIndex;
3588        }
3589
3590        if (!mIsAVC) {
3591            *out = mBuffer;
3592            mBuffer = NULL;
3593
3594            return OK;
3595        }
3596
3597        // Each NAL unit is split up into its constituent fragments and
3598        // each one of them returned in its own buffer.
3599
3600        CHECK(mBuffer->range_length() >= mNALLengthSize);
3601
3602        const uint8_t *src =
3603            (const uint8_t *)mBuffer->data() + mBuffer->range_offset();
3604
3605        size_t nal_size = parseNALSize(src);
3606        if (mBuffer->range_length() < mNALLengthSize + nal_size) {
3607            ALOGE("incomplete NAL unit.");
3608
3609            mBuffer->release();
3610            mBuffer = NULL;
3611
3612            return ERROR_MALFORMED;
3613        }
3614
3615        MediaBuffer *clone = mBuffer->clone();
3616        CHECK(clone != NULL);
3617        clone->set_range(mBuffer->range_offset() + mNALLengthSize, nal_size);
3618
3619        CHECK(mBuffer != NULL);
3620        mBuffer->set_range(
3621                mBuffer->range_offset() + mNALLengthSize + nal_size,
3622                mBuffer->range_length() - mNALLengthSize - nal_size);
3623
3624        if (mBuffer->range_length() == 0) {
3625            mBuffer->release();
3626            mBuffer = NULL;
3627        }
3628
3629        *out = clone;
3630
3631        return OK;
3632    } else {
3633        ALOGV("whole NAL");
3634        // Whole NAL units are returned but each fragment is prefixed by
3635        // the start code (0x00 00 00 01).
3636        ssize_t num_bytes_read = 0;
3637        int32_t drm = 0;
3638        bool usesDRM = (mFormat->findInt32(kKeyIsDRM, &drm) && drm != 0);
3639        if (usesDRM) {
3640            num_bytes_read =
3641                mDataSource->readAt(offset, (uint8_t*)mBuffer->data(), size);
3642        } else {
3643            num_bytes_read = mDataSource->readAt(offset, mSrcBuffer, size);
3644        }
3645
3646        if (num_bytes_read < (ssize_t)size) {
3647            mBuffer->release();
3648            mBuffer = NULL;
3649
3650            ALOGV("i/o error");
3651            return ERROR_IO;
3652        }
3653
3654        if (usesDRM) {
3655            CHECK(mBuffer != NULL);
3656            mBuffer->set_range(0, size);
3657
3658        } else {
3659            uint8_t *dstData = (uint8_t *)mBuffer->data();
3660            size_t srcOffset = 0;
3661            size_t dstOffset = 0;
3662
3663            while (srcOffset < size) {
3664                bool isMalFormed = (srcOffset + mNALLengthSize > size);
3665                size_t nalLength = 0;
3666                if (!isMalFormed) {
3667                    nalLength = parseNALSize(&mSrcBuffer[srcOffset]);
3668                    srcOffset += mNALLengthSize;
3669                    isMalFormed = srcOffset + nalLength > size;
3670                }
3671
3672                if (isMalFormed) {
3673                    ALOGE("Video is malformed");
3674                    mBuffer->release();
3675                    mBuffer = NULL;
3676                    return ERROR_MALFORMED;
3677                }
3678
3679                if (nalLength == 0) {
3680                    continue;
3681                }
3682
3683                CHECK(dstOffset + 4 <= mBuffer->size());
3684
3685                dstData[dstOffset++] = 0;
3686                dstData[dstOffset++] = 0;
3687                dstData[dstOffset++] = 0;
3688                dstData[dstOffset++] = 1;
3689                memcpy(&dstData[dstOffset], &mSrcBuffer[srcOffset], nalLength);
3690                srcOffset += nalLength;
3691                dstOffset += nalLength;
3692            }
3693            CHECK_EQ(srcOffset, size);
3694            CHECK(mBuffer != NULL);
3695            mBuffer->set_range(0, dstOffset);
3696        }
3697
3698        mBuffer->meta_data()->setInt64(
3699                kKeyTime, ((int64_t)cts * 1000000) / mTimescale);
3700
3701        if (targetSampleTimeUs >= 0) {
3702            mBuffer->meta_data()->setInt64(
3703                    kKeyTargetTime, targetSampleTimeUs);
3704        }
3705
3706        if (isSyncSample) {
3707            mBuffer->meta_data()->setInt32(kKeyIsSyncFrame, 1);
3708        }
3709
3710        ++mCurrentSampleIndex;
3711
3712        *out = mBuffer;
3713        mBuffer = NULL;
3714
3715        return OK;
3716    }
3717}
3718
3719MPEG4Extractor::Track *MPEG4Extractor::findTrackByMimePrefix(
3720        const char *mimePrefix) {
3721    for (Track *track = mFirstTrack; track != NULL; track = track->next) {
3722        const char *mime;
3723        if (track->meta != NULL
3724                && track->meta->findCString(kKeyMIMEType, &mime)
3725                && !strncasecmp(mime, mimePrefix, strlen(mimePrefix))) {
3726            return track;
3727        }
3728    }
3729
3730    return NULL;
3731}
3732
3733static bool LegacySniffMPEG4(
3734        const sp<DataSource> &source, String8 *mimeType, float *confidence) {
3735    uint8_t header[8];
3736
3737    ssize_t n = source->readAt(4, header, sizeof(header));
3738    if (n < (ssize_t)sizeof(header)) {
3739        return false;
3740    }
3741
3742    if (!memcmp(header, "ftyp3gp", 7) || !memcmp(header, "ftypmp42", 8)
3743        || !memcmp(header, "ftyp3gr6", 8) || !memcmp(header, "ftyp3gs6", 8)
3744        || !memcmp(header, "ftyp3ge6", 8) || !memcmp(header, "ftyp3gg6", 8)
3745        || !memcmp(header, "ftypisom", 8) || !memcmp(header, "ftypM4V ", 8)
3746        || !memcmp(header, "ftypM4A ", 8) || !memcmp(header, "ftypf4v ", 8)
3747        || !memcmp(header, "ftypkddi", 8) || !memcmp(header, "ftypM4VP", 8)) {
3748        *mimeType = MEDIA_MIMETYPE_CONTAINER_MPEG4;
3749        *confidence = 0.4;
3750
3751        return true;
3752    }
3753
3754    return false;
3755}
3756
3757static bool isCompatibleBrand(uint32_t fourcc) {
3758    static const uint32_t kCompatibleBrands[] = {
3759        FOURCC('i', 's', 'o', 'm'),
3760        FOURCC('i', 's', 'o', '2'),
3761        FOURCC('a', 'v', 'c', '1'),
3762        FOURCC('3', 'g', 'p', '4'),
3763        FOURCC('m', 'p', '4', '1'),
3764        FOURCC('m', 'p', '4', '2'),
3765
3766        // Won't promise that the following file types can be played.
3767        // Just give these file types a chance.
3768        FOURCC('q', 't', ' ', ' '),  // Apple's QuickTime
3769        FOURCC('M', 'S', 'N', 'V'),  // Sony's PSP
3770
3771        FOURCC('3', 'g', '2', 'a'),  // 3GPP2
3772        FOURCC('3', 'g', '2', 'b'),
3773    };
3774
3775    for (size_t i = 0;
3776         i < sizeof(kCompatibleBrands) / sizeof(kCompatibleBrands[0]);
3777         ++i) {
3778        if (kCompatibleBrands[i] == fourcc) {
3779            return true;
3780        }
3781    }
3782
3783    return false;
3784}
3785
3786// Attempt to actually parse the 'ftyp' atom and determine if a suitable
3787// compatible brand is present.
3788// Also try to identify where this file's metadata ends
3789// (end of the 'moov' atom) and report it to the caller as part of
3790// the metadata.
3791static bool BetterSniffMPEG4(
3792        const sp<DataSource> &source, String8 *mimeType, float *confidence,
3793        sp<AMessage> *meta) {
3794    // We scan up to 128 bytes to identify this file as an MP4.
3795    static const off64_t kMaxScanOffset = 128ll;
3796
3797    off64_t offset = 0ll;
3798    bool foundGoodFileType = false;
3799    off64_t moovAtomEndOffset = -1ll;
3800    bool done = false;
3801
3802    while (!done && offset < kMaxScanOffset) {
3803        uint32_t hdr[2];
3804        if (source->readAt(offset, hdr, 8) < 8) {
3805            return false;
3806        }
3807
3808        uint64_t chunkSize = ntohl(hdr[0]);
3809        uint32_t chunkType = ntohl(hdr[1]);
3810        off64_t chunkDataOffset = offset + 8;
3811
3812        if (chunkSize == 1) {
3813            if (source->readAt(offset + 8, &chunkSize, 8) < 8) {
3814                return false;
3815            }
3816
3817            chunkSize = ntoh64(chunkSize);
3818            chunkDataOffset += 8;
3819
3820            if (chunkSize < 16) {
3821                // The smallest valid chunk is 16 bytes long in this case.
3822                return false;
3823            }
3824        } else if (chunkSize < 8) {
3825            // The smallest valid chunk is 8 bytes long.
3826            return false;
3827        }
3828
3829        off64_t chunkDataSize = offset + chunkSize - chunkDataOffset;
3830
3831        char chunkstring[5];
3832        MakeFourCCString(chunkType, chunkstring);
3833        ALOGV("saw chunk type %s, size %lld @ %lld", chunkstring, chunkSize, offset);
3834        switch (chunkType) {
3835            case FOURCC('f', 't', 'y', 'p'):
3836            {
3837                if (chunkDataSize < 8) {
3838                    return false;
3839                }
3840
3841                uint32_t numCompatibleBrands = (chunkDataSize - 8) / 4;
3842                for (size_t i = 0; i < numCompatibleBrands + 2; ++i) {
3843                    if (i == 1) {
3844                        // Skip this index, it refers to the minorVersion,
3845                        // not a brand.
3846                        continue;
3847                    }
3848
3849                    uint32_t brand;
3850                    if (source->readAt(
3851                                chunkDataOffset + 4 * i, &brand, 4) < 4) {
3852                        return false;
3853                    }
3854
3855                    brand = ntohl(brand);
3856
3857                    if (isCompatibleBrand(brand)) {
3858                        foundGoodFileType = true;
3859                        break;
3860                    }
3861                }
3862
3863                if (!foundGoodFileType) {
3864                    return false;
3865                }
3866
3867                break;
3868            }
3869
3870            case FOURCC('m', 'o', 'o', 'v'):
3871            {
3872                moovAtomEndOffset = offset + chunkSize;
3873
3874                done = true;
3875                break;
3876            }
3877
3878            default:
3879                break;
3880        }
3881
3882        offset += chunkSize;
3883    }
3884
3885    if (!foundGoodFileType) {
3886        return false;
3887    }
3888
3889    *mimeType = MEDIA_MIMETYPE_CONTAINER_MPEG4;
3890    *confidence = 0.4f;
3891
3892    if (moovAtomEndOffset >= 0) {
3893        *meta = new AMessage;
3894        (*meta)->setInt64("meta-data-size", moovAtomEndOffset);
3895
3896        ALOGV("found metadata size: %lld", moovAtomEndOffset);
3897    }
3898
3899    return true;
3900}
3901
3902bool SniffMPEG4(
3903        const sp<DataSource> &source, String8 *mimeType, float *confidence,
3904        sp<AMessage> *meta) {
3905    if (BetterSniffMPEG4(source, mimeType, confidence, meta)) {
3906        return true;
3907    }
3908
3909    if (LegacySniffMPEG4(source, mimeType, confidence)) {
3910        ALOGW("Identified supported mpeg4 through LegacySniffMPEG4.");
3911        return true;
3912    }
3913
3914    return false;
3915}
3916
3917}  // namespace android
3918