MPEG4Extractor.cpp revision ce171998009e1abcb0c718c0aee495fcd33645e2
1a5a72e004bb7123445c2c3a94352d358fc80d904Behdad Esfahbod/*
26bd9b479b8b2befbb0847282e93beade197c8038Behdad Esfahbod * Copyright (C) 2009 The Android Open Source Project
3a5a72e004bb7123445c2c3a94352d358fc80d904Behdad Esfahbod *
4a5a72e004bb7123445c2c3a94352d358fc80d904Behdad Esfahbod * Licensed under the Apache License, Version 2.0 (the "License");
5a5a72e004bb7123445c2c3a94352d358fc80d904Behdad Esfahbod * you may not use this file except in compliance with the License.
6a5a72e004bb7123445c2c3a94352d358fc80d904Behdad Esfahbod * You may obtain a copy of the License at
7a5a72e004bb7123445c2c3a94352d358fc80d904Behdad Esfahbod *
8a5a72e004bb7123445c2c3a94352d358fc80d904Behdad Esfahbod *      http://www.apache.org/licenses/LICENSE-2.0
9a5a72e004bb7123445c2c3a94352d358fc80d904Behdad Esfahbod *
10a5a72e004bb7123445c2c3a94352d358fc80d904Behdad Esfahbod * Unless required by applicable law or agreed to in writing, software
11a5a72e004bb7123445c2c3a94352d358fc80d904Behdad Esfahbod * distributed under the License is distributed on an "AS IS" BASIS,
12a5a72e004bb7123445c2c3a94352d358fc80d904Behdad Esfahbod * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13a5a72e004bb7123445c2c3a94352d358fc80d904Behdad Esfahbod * See the License for the specific language governing permissions and
14a5a72e004bb7123445c2c3a94352d358fc80d904Behdad Esfahbod * limitations under the License.
15a5a72e004bb7123445c2c3a94352d358fc80d904Behdad Esfahbod */
16a5a72e004bb7123445c2c3a94352d358fc80d904Behdad Esfahbod
17a5a72e004bb7123445c2c3a94352d358fc80d904Behdad Esfahbod//#define LOG_NDEBUG 0
18a5a72e004bb7123445c2c3a94352d358fc80d904Behdad Esfahbod#define LOG_TAG "MPEG4Extractor"
19a5a72e004bb7123445c2c3a94352d358fc80d904Behdad Esfahbod#include <utils/Log.h>
20a5a72e004bb7123445c2c3a94352d358fc80d904Behdad Esfahbod
21a5a72e004bb7123445c2c3a94352d358fc80d904Behdad Esfahbod#include "include/MPEG4Extractor.h"
22a5a72e004bb7123445c2c3a94352d358fc80d904Behdad Esfahbod#include "include/SampleTable.h"
23a5a72e004bb7123445c2c3a94352d358fc80d904Behdad Esfahbod#include "include/ESDS.h"
246bd9b479b8b2befbb0847282e93beade197c8038Behdad Esfahbod
25a5a72e004bb7123445c2c3a94352d358fc80d904Behdad Esfahbod#include <ctype.h>
26a5a72e004bb7123445c2c3a94352d358fc80d904Behdad Esfahbod#include <stdint.h>
27027857d0412477fb4427dcb8a8c45287c272e143Behdad Esfahbod#include <stdlib.h>
28027857d0412477fb4427dcb8a8c45287c272e143Behdad Esfahbod#include <string.h>
29d1c9eb458c843215da8df84b596bfae51fee135bBehdad Esfahbod
306bd9b479b8b2befbb0847282e93beade197c8038Behdad Esfahbod#include <media/stagefright/foundation/ABitReader.h>
31a5a72e004bb7123445c2c3a94352d358fc80d904Behdad Esfahbod#include <media/stagefright/foundation/ABuffer.h>
32027857d0412477fb4427dcb8a8c45287c272e143Behdad Esfahbod#include <media/stagefright/foundation/ADebug.h>
33027857d0412477fb4427dcb8a8c45287c272e143Behdad Esfahbod#include <media/stagefright/foundation/AMessage.h>
34027857d0412477fb4427dcb8a8c45287c272e143Behdad Esfahbod#include <media/stagefright/MediaBuffer.h>
35027857d0412477fb4427dcb8a8c45287c272e143Behdad Esfahbod#include <media/stagefright/MediaBufferGroup.h>
36a5a72e004bb7123445c2c3a94352d358fc80d904Behdad Esfahbod#include <media/stagefright/MediaDefs.h>
37acdba3f90b232fc12fcb200dca2584481b339118Behdad Esfahbod#include <media/stagefright/MediaSource.h>
38027857d0412477fb4427dcb8a8c45287c272e143Behdad Esfahbod#include <media/stagefright/MetaData.h>
39b6b7ba1313bf686e6ed567183466104c90504a67Behdad Esfahbod#include <utils/String8.h>
40027857d0412477fb4427dcb8a8c45287c272e143Behdad Esfahbod
41a5a72e004bb7123445c2c3a94352d358fc80d904Behdad Esfahbod#include <byteswap.h>
42acdba3f90b232fc12fcb200dca2584481b339118Behdad Esfahbod#include "include/ID3.h"
43027857d0412477fb4427dcb8a8c45287c272e143Behdad Esfahbod
44namespace android {
45
46class MPEG4Source : public MediaSource {
47public:
48    // Caller retains ownership of both "dataSource" and "sampleTable".
49    MPEG4Source(const sp<MetaData> &format,
50                const sp<DataSource> &dataSource,
51                int32_t timeScale,
52                const sp<SampleTable> &sampleTable,
53                Vector<SidxEntry> &sidx,
54                off64_t firstMoofOffset);
55
56    virtual status_t start(MetaData *params = NULL);
57    virtual status_t stop();
58
59    virtual sp<MetaData> getFormat();
60
61    virtual status_t read(MediaBuffer **buffer, const ReadOptions *options = NULL);
62    virtual status_t fragmentedRead(MediaBuffer **buffer, const ReadOptions *options = NULL);
63
64protected:
65    virtual ~MPEG4Source();
66
67private:
68    Mutex mLock;
69
70    sp<MetaData> mFormat;
71    sp<DataSource> mDataSource;
72    int32_t mTimescale;
73    sp<SampleTable> mSampleTable;
74    uint32_t mCurrentSampleIndex;
75    uint32_t mCurrentFragmentIndex;
76    Vector<SidxEntry> &mSegments;
77    off64_t mFirstMoofOffset;
78    off64_t mCurrentMoofOffset;
79    off64_t mNextMoofOffset;
80    uint32_t mCurrentTime;
81    int32_t mLastParsedTrackId;
82    int32_t mTrackId;
83
84    int32_t mCryptoMode;    // passed in from extractor
85    int32_t mDefaultIVSize; // passed in from extractor
86    uint8_t mCryptoKey[16]; // passed in from extractor
87    uint32_t mCurrentAuxInfoType;
88    uint32_t mCurrentAuxInfoTypeParameter;
89    int32_t mCurrentDefaultSampleInfoSize;
90    uint32_t mCurrentSampleInfoCount;
91    uint32_t mCurrentSampleInfoAllocSize;
92    uint8_t* mCurrentSampleInfoSizes;
93    uint32_t mCurrentSampleInfoOffsetCount;
94    uint32_t mCurrentSampleInfoOffsetsAllocSize;
95    uint64_t* mCurrentSampleInfoOffsets;
96
97    bool mIsAVC;
98    size_t mNALLengthSize;
99
100    bool mStarted;
101
102    MediaBufferGroup *mGroup;
103
104    MediaBuffer *mBuffer;
105
106    bool mWantsNALFragments;
107
108    uint8_t *mSrcBuffer;
109
110    size_t parseNALSize(const uint8_t *data) const;
111    status_t parseChunk(off64_t *offset);
112    status_t parseTrackFragmentHeader(off64_t offset, off64_t size);
113    status_t parseTrackFragmentRun(off64_t offset, off64_t size);
114    status_t parseSampleAuxiliaryInformationSizes(off64_t offset, off64_t size);
115    status_t parseSampleAuxiliaryInformationOffsets(off64_t offset, off64_t size);
116
117    struct TrackFragmentHeaderInfo {
118        enum Flags {
119            kBaseDataOffsetPresent         = 0x01,
120            kSampleDescriptionIndexPresent = 0x02,
121            kDefaultSampleDurationPresent  = 0x08,
122            kDefaultSampleSizePresent      = 0x10,
123            kDefaultSampleFlagsPresent     = 0x20,
124            kDurationIsEmpty               = 0x10000,
125        };
126
127        uint32_t mTrackID;
128        uint32_t mFlags;
129        uint64_t mBaseDataOffset;
130        uint32_t mSampleDescriptionIndex;
131        uint32_t mDefaultSampleDuration;
132        uint32_t mDefaultSampleSize;
133        uint32_t mDefaultSampleFlags;
134
135        uint64_t mDataOffset;
136    };
137    TrackFragmentHeaderInfo mTrackFragmentHeaderInfo;
138
139    struct Sample {
140        off64_t offset;
141        size_t size;
142        uint32_t duration;
143        uint8_t iv[16];
144        Vector<size_t> clearsizes;
145        Vector<size_t> encryptedsizes;
146    };
147    Vector<Sample> mCurrentSamples;
148
149    MPEG4Source(const MPEG4Source &);
150    MPEG4Source &operator=(const MPEG4Source &);
151};
152
153// This custom data source wraps an existing one and satisfies requests
154// falling entirely within a cached range from the cache while forwarding
155// all remaining requests to the wrapped datasource.
156// This is used to cache the full sampletable metadata for a single track,
157// possibly wrapping multiple times to cover all tracks, i.e.
158// Each MPEG4DataSource caches the sampletable metadata for a single track.
159
160struct MPEG4DataSource : public DataSource {
161    MPEG4DataSource(const sp<DataSource> &source);
162
163    virtual status_t initCheck() const;
164    virtual ssize_t readAt(off64_t offset, void *data, size_t size);
165    virtual status_t getSize(off64_t *size);
166    virtual uint32_t flags();
167
168    status_t setCachedRange(off64_t offset, size_t size);
169
170protected:
171    virtual ~MPEG4DataSource();
172
173private:
174    Mutex mLock;
175
176    sp<DataSource> mSource;
177    off64_t mCachedOffset;
178    size_t mCachedSize;
179    uint8_t *mCache;
180
181    void clearCache();
182
183    MPEG4DataSource(const MPEG4DataSource &);
184    MPEG4DataSource &operator=(const MPEG4DataSource &);
185};
186
187MPEG4DataSource::MPEG4DataSource(const sp<DataSource> &source)
188    : mSource(source),
189      mCachedOffset(0),
190      mCachedSize(0),
191      mCache(NULL) {
192}
193
194MPEG4DataSource::~MPEG4DataSource() {
195    clearCache();
196}
197
198void MPEG4DataSource::clearCache() {
199    if (mCache) {
200        free(mCache);
201        mCache = NULL;
202    }
203
204    mCachedOffset = 0;
205    mCachedSize = 0;
206}
207
208status_t MPEG4DataSource::initCheck() const {
209    return mSource->initCheck();
210}
211
212ssize_t MPEG4DataSource::readAt(off64_t offset, void *data, size_t size) {
213    Mutex::Autolock autoLock(mLock);
214
215    if (offset >= mCachedOffset
216            && offset + size <= mCachedOffset + mCachedSize) {
217        memcpy(data, &mCache[offset - mCachedOffset], size);
218        return size;
219    }
220
221    return mSource->readAt(offset, data, size);
222}
223
224status_t MPEG4DataSource::getSize(off64_t *size) {
225    return mSource->getSize(size);
226}
227
228uint32_t MPEG4DataSource::flags() {
229    return mSource->flags();
230}
231
232status_t MPEG4DataSource::setCachedRange(off64_t offset, size_t size) {
233    Mutex::Autolock autoLock(mLock);
234
235    clearCache();
236
237    mCache = (uint8_t *)malloc(size);
238
239    if (mCache == NULL) {
240        return -ENOMEM;
241    }
242
243    mCachedOffset = offset;
244    mCachedSize = size;
245
246    ssize_t err = mSource->readAt(mCachedOffset, mCache, mCachedSize);
247
248    if (err < (ssize_t)size) {
249        clearCache();
250
251        return ERROR_IO;
252    }
253
254    return OK;
255}
256
257////////////////////////////////////////////////////////////////////////////////
258
259static void hexdump(const void *_data, size_t size) {
260    const uint8_t *data = (const uint8_t *)_data;
261    size_t offset = 0;
262    while (offset < size) {
263        printf("0x%04zx  ", offset);
264
265        size_t n = size - offset;
266        if (n > 16) {
267            n = 16;
268        }
269
270        for (size_t i = 0; i < 16; ++i) {
271            if (i == 8) {
272                printf(" ");
273            }
274
275            if (offset + i < size) {
276                printf("%02x ", data[offset + i]);
277            } else {
278                printf("   ");
279            }
280        }
281
282        printf(" ");
283
284        for (size_t i = 0; i < n; ++i) {
285            if (isprint(data[offset + i])) {
286                printf("%c", data[offset + i]);
287            } else {
288                printf(".");
289            }
290        }
291
292        printf("\n");
293
294        offset += 16;
295    }
296}
297
298static const char *FourCC2MIME(uint32_t fourcc) {
299    switch (fourcc) {
300        case FOURCC('m', 'p', '4', 'a'):
301            return MEDIA_MIMETYPE_AUDIO_AAC;
302
303        case FOURCC('s', 'a', 'm', 'r'):
304            return MEDIA_MIMETYPE_AUDIO_AMR_NB;
305
306        case FOURCC('s', 'a', 'w', 'b'):
307            return MEDIA_MIMETYPE_AUDIO_AMR_WB;
308
309        case FOURCC('m', 'p', '4', 'v'):
310            return MEDIA_MIMETYPE_VIDEO_MPEG4;
311
312        case FOURCC('s', '2', '6', '3'):
313        case FOURCC('h', '2', '6', '3'):
314        case FOURCC('H', '2', '6', '3'):
315            return MEDIA_MIMETYPE_VIDEO_H263;
316
317        case FOURCC('a', 'v', 'c', '1'):
318            return MEDIA_MIMETYPE_VIDEO_AVC;
319
320        default:
321            CHECK(!"should not be here.");
322            return NULL;
323    }
324}
325
326static bool AdjustChannelsAndRate(uint32_t fourcc, uint32_t *channels, uint32_t *rate) {
327    if (!strcasecmp(MEDIA_MIMETYPE_AUDIO_AMR_NB, FourCC2MIME(fourcc))) {
328        // AMR NB audio is always mono, 8kHz
329        *channels = 1;
330        *rate = 8000;
331        return true;
332    } else if (!strcasecmp(MEDIA_MIMETYPE_AUDIO_AMR_WB, FourCC2MIME(fourcc))) {
333        // AMR WB audio is always mono, 16kHz
334        *channels = 1;
335        *rate = 16000;
336        return true;
337    }
338    return false;
339}
340
341MPEG4Extractor::MPEG4Extractor(const sp<DataSource> &source)
342    : mSidxDuration(0),
343      mMoofOffset(0),
344      mDataSource(source),
345      mInitCheck(NO_INIT),
346      mHasVideo(false),
347      mHeaderTimescale(0),
348      mFirstTrack(NULL),
349      mLastTrack(NULL),
350      mFileMetaData(new MetaData),
351      mFirstSINF(NULL),
352      mIsDrm(false) {
353}
354
355MPEG4Extractor::~MPEG4Extractor() {
356    Track *track = mFirstTrack;
357    while (track) {
358        Track *next = track->next;
359
360        delete track;
361        track = next;
362    }
363    mFirstTrack = mLastTrack = NULL;
364
365    SINF *sinf = mFirstSINF;
366    while (sinf) {
367        SINF *next = sinf->next;
368        delete sinf->IPMPData;
369        delete sinf;
370        sinf = next;
371    }
372    mFirstSINF = NULL;
373
374    for (size_t i = 0; i < mPssh.size(); i++) {
375        delete [] mPssh[i].data;
376    }
377}
378
379uint32_t MPEG4Extractor::flags() const {
380    return CAN_PAUSE |
381            ((mMoofOffset == 0 || mSidxEntries.size() != 0) ?
382                    (CAN_SEEK_BACKWARD | CAN_SEEK_FORWARD | CAN_SEEK) : 0);
383}
384
385sp<MetaData> MPEG4Extractor::getMetaData() {
386    status_t err;
387    if ((err = readMetaData()) != OK) {
388        return new MetaData;
389    }
390
391    return mFileMetaData;
392}
393
394size_t MPEG4Extractor::countTracks() {
395    status_t err;
396    if ((err = readMetaData()) != OK) {
397        ALOGV("MPEG4Extractor::countTracks: no tracks");
398        return 0;
399    }
400
401    size_t n = 0;
402    Track *track = mFirstTrack;
403    while (track) {
404        ++n;
405        track = track->next;
406    }
407
408    ALOGV("MPEG4Extractor::countTracks: %d tracks", n);
409    return n;
410}
411
412sp<MetaData> MPEG4Extractor::getTrackMetaData(
413        size_t index, uint32_t flags) {
414    status_t err;
415    if ((err = readMetaData()) != OK) {
416        return NULL;
417    }
418
419    Track *track = mFirstTrack;
420    while (index > 0) {
421        if (track == NULL) {
422            return NULL;
423        }
424
425        track = track->next;
426        --index;
427    }
428
429    if (track == NULL) {
430        return NULL;
431    }
432
433    if ((flags & kIncludeExtensiveMetaData)
434            && !track->includes_expensive_metadata) {
435        track->includes_expensive_metadata = true;
436
437        const char *mime;
438        CHECK(track->meta->findCString(kKeyMIMEType, &mime));
439        if (!strncasecmp("video/", mime, 6)) {
440            if (mMoofOffset > 0) {
441                int64_t duration;
442                if (track->meta->findInt64(kKeyDuration, &duration)) {
443                    // nothing fancy, just pick a frame near 1/4th of the duration
444                    track->meta->setInt64(
445                            kKeyThumbnailTime, duration / 4);
446                }
447            } else {
448                uint32_t sampleIndex;
449                uint32_t sampleTime;
450                if (track->sampleTable->findThumbnailSample(&sampleIndex) == OK
451                        && track->sampleTable->getMetaDataForSample(
452                            sampleIndex, NULL /* offset */, NULL /* size */,
453                            &sampleTime) == OK) {
454                    track->meta->setInt64(
455                            kKeyThumbnailTime,
456                            ((int64_t)sampleTime * 1000000) / track->timescale);
457                }
458            }
459        }
460    }
461
462    return track->meta;
463}
464
465static void MakeFourCCString(uint32_t x, char *s) {
466    s[0] = x >> 24;
467    s[1] = (x >> 16) & 0xff;
468    s[2] = (x >> 8) & 0xff;
469    s[3] = x & 0xff;
470    s[4] = '\0';
471}
472
473status_t MPEG4Extractor::readMetaData() {
474    if (mInitCheck != NO_INIT) {
475        return mInitCheck;
476    }
477
478    off64_t offset = 0;
479    status_t err;
480    while (true) {
481        err = parseChunk(&offset, 0);
482        if (err == OK) {
483            continue;
484        }
485
486        uint32_t hdr[2];
487        if (mDataSource->readAt(offset, hdr, 8) < 8) {
488            break;
489        }
490        uint32_t chunk_type = ntohl(hdr[1]);
491        if (chunk_type == FOURCC('m', 'o', 'o', 'f')) {
492            // store the offset of the first segment
493            mMoofOffset = offset;
494        } else if (chunk_type != FOURCC('m', 'd', 'a', 't')) {
495            // keep parsing until we get to the data
496            continue;
497        }
498        break;
499    }
500
501    if (mInitCheck == OK) {
502        if (mHasVideo) {
503            mFileMetaData->setCString(
504                    kKeyMIMEType, MEDIA_MIMETYPE_CONTAINER_MPEG4);
505        } else {
506            mFileMetaData->setCString(kKeyMIMEType, "audio/mp4");
507        }
508
509        mInitCheck = OK;
510    } else {
511        mInitCheck = err;
512    }
513
514    CHECK_NE(err, (status_t)NO_INIT);
515
516    // copy pssh data into file metadata
517    int psshsize = 0;
518    for (size_t i = 0; i < mPssh.size(); i++) {
519        psshsize += 20 + mPssh[i].datalen;
520    }
521    if (psshsize) {
522        char *buf = (char*)malloc(psshsize);
523        char *ptr = buf;
524        for (size_t i = 0; i < mPssh.size(); i++) {
525            memcpy(ptr, mPssh[i].uuid, 20); // uuid + length
526            memcpy(ptr + 20, mPssh[i].data, mPssh[i].datalen);
527            ptr += (20 + mPssh[i].datalen);
528        }
529        mFileMetaData->setData(kKeyPssh, 'pssh', buf, psshsize);
530        free(buf);
531    }
532    return mInitCheck;
533}
534
535char* MPEG4Extractor::getDrmTrackInfo(size_t trackID, int *len) {
536    if (mFirstSINF == NULL) {
537        return NULL;
538    }
539
540    SINF *sinf = mFirstSINF;
541    while (sinf && (trackID != sinf->trackID)) {
542        sinf = sinf->next;
543    }
544
545    if (sinf == NULL) {
546        return NULL;
547    }
548
549    *len = sinf->len;
550    return sinf->IPMPData;
551}
552
553// Reads an encoded integer 7 bits at a time until it encounters the high bit clear.
554static int32_t readSize(off64_t offset,
555        const sp<DataSource> DataSource, uint8_t *numOfBytes) {
556    uint32_t size = 0;
557    uint8_t data;
558    bool moreData = true;
559    *numOfBytes = 0;
560
561    while (moreData) {
562        if (DataSource->readAt(offset, &data, 1) < 1) {
563            return -1;
564        }
565        offset ++;
566        moreData = (data >= 128) ? true : false;
567        size = (size << 7) | (data & 0x7f); // Take last 7 bits
568        (*numOfBytes) ++;
569    }
570
571    return size;
572}
573
574status_t MPEG4Extractor::parseDrmSINF(
575        off64_t * /* offset */, off64_t data_offset) {
576    uint8_t updateIdTag;
577    if (mDataSource->readAt(data_offset, &updateIdTag, 1) < 1) {
578        return ERROR_IO;
579    }
580    data_offset ++;
581
582    if (0x01/*OBJECT_DESCRIPTOR_UPDATE_ID_TAG*/ != updateIdTag) {
583        return ERROR_MALFORMED;
584    }
585
586    uint8_t numOfBytes;
587    int32_t size = readSize(data_offset, mDataSource, &numOfBytes);
588    if (size < 0) {
589        return ERROR_IO;
590    }
591    int32_t classSize = size;
592    data_offset += numOfBytes;
593
594    while(size >= 11 ) {
595        uint8_t descriptorTag;
596        if (mDataSource->readAt(data_offset, &descriptorTag, 1) < 1) {
597            return ERROR_IO;
598        }
599        data_offset ++;
600
601        if (0x11/*OBJECT_DESCRIPTOR_ID_TAG*/ != descriptorTag) {
602            return ERROR_MALFORMED;
603        }
604
605        uint8_t buffer[8];
606        //ObjectDescriptorID and ObjectDescriptor url flag
607        if (mDataSource->readAt(data_offset, buffer, 2) < 2) {
608            return ERROR_IO;
609        }
610        data_offset += 2;
611
612        if ((buffer[1] >> 5) & 0x0001) { //url flag is set
613            return ERROR_MALFORMED;
614        }
615
616        if (mDataSource->readAt(data_offset, buffer, 8) < 8) {
617            return ERROR_IO;
618        }
619        data_offset += 8;
620
621        if ((0x0F/*ES_ID_REF_TAG*/ != buffer[1])
622                || ( 0x0A/*IPMP_DESCRIPTOR_POINTER_ID_TAG*/ != buffer[5])) {
623            return ERROR_MALFORMED;
624        }
625
626        SINF *sinf = new SINF;
627        sinf->trackID = U16_AT(&buffer[3]);
628        sinf->IPMPDescriptorID = buffer[7];
629        sinf->next = mFirstSINF;
630        mFirstSINF = sinf;
631
632        size -= (8 + 2 + 1);
633    }
634
635    if (size != 0) {
636        return ERROR_MALFORMED;
637    }
638
639    if (mDataSource->readAt(data_offset, &updateIdTag, 1) < 1) {
640        return ERROR_IO;
641    }
642    data_offset ++;
643
644    if(0x05/*IPMP_DESCRIPTOR_UPDATE_ID_TAG*/ != updateIdTag) {
645        return ERROR_MALFORMED;
646    }
647
648    size = readSize(data_offset, mDataSource, &numOfBytes);
649    if (size < 0) {
650        return ERROR_IO;
651    }
652    classSize = size;
653    data_offset += numOfBytes;
654
655    while (size > 0) {
656        uint8_t tag;
657        int32_t dataLen;
658        if (mDataSource->readAt(data_offset, &tag, 1) < 1) {
659            return ERROR_IO;
660        }
661        data_offset ++;
662
663        if (0x0B/*IPMP_DESCRIPTOR_ID_TAG*/ == tag) {
664            uint8_t id;
665            dataLen = readSize(data_offset, mDataSource, &numOfBytes);
666            if (dataLen < 0) {
667                return ERROR_IO;
668            } else if (dataLen < 4) {
669                return ERROR_MALFORMED;
670            }
671            data_offset += numOfBytes;
672
673            if (mDataSource->readAt(data_offset, &id, 1) < 1) {
674                return ERROR_IO;
675            }
676            data_offset ++;
677
678            SINF *sinf = mFirstSINF;
679            while (sinf && (sinf->IPMPDescriptorID != id)) {
680                sinf = sinf->next;
681            }
682            if (sinf == NULL) {
683                return ERROR_MALFORMED;
684            }
685            sinf->len = dataLen - 3;
686            sinf->IPMPData = new char[sinf->len];
687            data_offset += 2;
688
689            if (mDataSource->readAt(data_offset, sinf->IPMPData, sinf->len) < sinf->len) {
690                return ERROR_IO;
691            }
692            data_offset += sinf->len;
693
694            size -= (dataLen + numOfBytes + 1);
695        }
696    }
697
698    if (size != 0) {
699        return ERROR_MALFORMED;
700    }
701
702    return UNKNOWN_ERROR;  // Return a dummy error.
703}
704
705struct PathAdder {
706    PathAdder(Vector<uint32_t> *path, uint32_t chunkType)
707        : mPath(path) {
708        mPath->push(chunkType);
709    }
710
711    ~PathAdder() {
712        mPath->pop();
713    }
714
715private:
716    Vector<uint32_t> *mPath;
717
718    PathAdder(const PathAdder &);
719    PathAdder &operator=(const PathAdder &);
720};
721
722static bool underMetaDataPath(const Vector<uint32_t> &path) {
723    return path.size() >= 5
724        && path[0] == FOURCC('m', 'o', 'o', 'v')
725        && path[1] == FOURCC('u', 'd', 't', 'a')
726        && path[2] == FOURCC('m', 'e', 't', 'a')
727        && path[3] == FOURCC('i', 'l', 's', 't');
728}
729
730// Given a time in seconds since Jan 1 1904, produce a human-readable string.
731static void convertTimeToDate(int64_t time_1904, String8 *s) {
732    time_t time_1970 = time_1904 - (((66 * 365 + 17) * 24) * 3600);
733
734    char tmp[32];
735    strftime(tmp, sizeof(tmp), "%Y%m%dT%H%M%S.000Z", gmtime(&time_1970));
736
737    s->setTo(tmp);
738}
739
740status_t MPEG4Extractor::parseChunk(off64_t *offset, int depth) {
741    ALOGV("entering parseChunk %lld/%d", *offset, depth);
742    uint32_t hdr[2];
743    if (mDataSource->readAt(*offset, hdr, 8) < 8) {
744        return ERROR_IO;
745    }
746    uint64_t chunk_size = ntohl(hdr[0]);
747    uint32_t chunk_type = ntohl(hdr[1]);
748    off64_t data_offset = *offset + 8;
749
750    if (chunk_size == 1) {
751        if (mDataSource->readAt(*offset + 8, &chunk_size, 8) < 8) {
752            return ERROR_IO;
753        }
754        chunk_size = ntoh64(chunk_size);
755        data_offset += 8;
756
757        if (chunk_size < 16) {
758            // The smallest valid chunk is 16 bytes long in this case.
759            return ERROR_MALFORMED;
760        }
761    } else if (chunk_size < 8) {
762        // The smallest valid chunk is 8 bytes long.
763        return ERROR_MALFORMED;
764    }
765
766    char chunk[5];
767    MakeFourCCString(chunk_type, chunk);
768    ALOGV("chunk: %s @ %lld, %d", chunk, *offset, depth);
769
770#if 0
771    static const char kWhitespace[] = "                                        ";
772    const char *indent = &kWhitespace[sizeof(kWhitespace) - 1 - 2 * depth];
773    printf("%sfound chunk '%s' of size %lld\n", indent, chunk, chunk_size);
774
775    char buffer[256];
776    size_t n = chunk_size;
777    if (n > sizeof(buffer)) {
778        n = sizeof(buffer);
779    }
780    if (mDataSource->readAt(*offset, buffer, n)
781            < (ssize_t)n) {
782        return ERROR_IO;
783    }
784
785    hexdump(buffer, n);
786#endif
787
788    PathAdder autoAdder(&mPath, chunk_type);
789
790    off64_t chunk_data_size = *offset + chunk_size - data_offset;
791
792    if (chunk_type != FOURCC('c', 'p', 'r', 't')
793            && chunk_type != FOURCC('c', 'o', 'v', 'r')
794            && mPath.size() == 5 && underMetaDataPath(mPath)) {
795        off64_t stop_offset = *offset + chunk_size;
796        *offset = data_offset;
797        while (*offset < stop_offset) {
798            status_t err = parseChunk(offset, depth + 1);
799            if (err != OK) {
800                return err;
801            }
802        }
803
804        if (*offset != stop_offset) {
805            return ERROR_MALFORMED;
806        }
807
808        return OK;
809    }
810
811    switch(chunk_type) {
812        case FOURCC('m', 'o', 'o', 'v'):
813        case FOURCC('t', 'r', 'a', 'k'):
814        case FOURCC('m', 'd', 'i', 'a'):
815        case FOURCC('m', 'i', 'n', 'f'):
816        case FOURCC('d', 'i', 'n', 'f'):
817        case FOURCC('s', 't', 'b', 'l'):
818        case FOURCC('m', 'v', 'e', 'x'):
819        case FOURCC('m', 'o', 'o', 'f'):
820        case FOURCC('t', 'r', 'a', 'f'):
821        case FOURCC('m', 'f', 'r', 'a'):
822        case FOURCC('u', 'd', 't', 'a'):
823        case FOURCC('i', 'l', 's', 't'):
824        case FOURCC('s', 'i', 'n', 'f'):
825        case FOURCC('s', 'c', 'h', 'i'):
826        case FOURCC('e', 'd', 't', 's'):
827        {
828            if (chunk_type == FOURCC('s', 't', 'b', 'l')) {
829                ALOGV("sampleTable chunk is %d bytes long.", (size_t)chunk_size);
830
831                if (mDataSource->flags()
832                        & (DataSource::kWantsPrefetching
833                            | DataSource::kIsCachingDataSource)) {
834                    sp<MPEG4DataSource> cachedSource =
835                        new MPEG4DataSource(mDataSource);
836
837                    if (cachedSource->setCachedRange(*offset, chunk_size) == OK) {
838                        mDataSource = cachedSource;
839                    }
840                }
841
842                mLastTrack->sampleTable = new SampleTable(mDataSource);
843            }
844
845            bool isTrack = false;
846            if (chunk_type == FOURCC('t', 'r', 'a', 'k')) {
847                isTrack = true;
848
849                Track *track = new Track;
850                track->next = NULL;
851                if (mLastTrack) {
852                    mLastTrack->next = track;
853                } else {
854                    mFirstTrack = track;
855                }
856                mLastTrack = track;
857
858                track->meta = new MetaData;
859                track->includes_expensive_metadata = false;
860                track->skipTrack = false;
861                track->timescale = 0;
862                track->meta->setCString(kKeyMIMEType, "application/octet-stream");
863            }
864
865            off64_t stop_offset = *offset + chunk_size;
866            *offset = data_offset;
867            while (*offset < stop_offset) {
868                status_t err = parseChunk(offset, depth + 1);
869                if (err != OK) {
870                    return err;
871                }
872            }
873
874            if (*offset != stop_offset) {
875                return ERROR_MALFORMED;
876            }
877
878            if (isTrack) {
879                if (mLastTrack->skipTrack) {
880                    Track *cur = mFirstTrack;
881
882                    if (cur == mLastTrack) {
883                        delete cur;
884                        mFirstTrack = mLastTrack = NULL;
885                    } else {
886                        while (cur && cur->next != mLastTrack) {
887                            cur = cur->next;
888                        }
889                        cur->next = NULL;
890                        delete mLastTrack;
891                        mLastTrack = cur;
892                    }
893
894                    return OK;
895                }
896
897                status_t err = verifyTrack(mLastTrack);
898
899                if (err != OK) {
900                    return err;
901                }
902            } else if (chunk_type == FOURCC('m', 'o', 'o', 'v')) {
903                mInitCheck = OK;
904
905                if (!mIsDrm) {
906                    return UNKNOWN_ERROR;  // Return a dummy error.
907                } else {
908                    return OK;
909                }
910            }
911            break;
912        }
913
914        case FOURCC('e', 'l', 's', 't'):
915        {
916            *offset += chunk_size;
917
918            // See 14496-12 8.6.6
919            uint8_t version;
920            if (mDataSource->readAt(data_offset, &version, 1) < 1) {
921                return ERROR_IO;
922            }
923
924            uint32_t entry_count;
925            if (!mDataSource->getUInt32(data_offset + 4, &entry_count)) {
926                return ERROR_IO;
927            }
928
929            if (entry_count != 1) {
930                // we only support a single entry at the moment, for gapless playback
931                ALOGW("ignoring edit list with %d entries", entry_count);
932            } else if (mHeaderTimescale == 0) {
933                ALOGW("ignoring edit list because timescale is 0");
934            } else {
935                off64_t entriesoffset = data_offset + 8;
936                uint64_t segment_duration;
937                int64_t media_time;
938
939                if (version == 1) {
940                    if (!mDataSource->getUInt64(entriesoffset, &segment_duration) ||
941                            !mDataSource->getUInt64(entriesoffset + 8, (uint64_t*)&media_time)) {
942                        return ERROR_IO;
943                    }
944                } else if (version == 0) {
945                    uint32_t sd;
946                    int32_t mt;
947                    if (!mDataSource->getUInt32(entriesoffset, &sd) ||
948                            !mDataSource->getUInt32(entriesoffset + 4, (uint32_t*)&mt)) {
949                        return ERROR_IO;
950                    }
951                    segment_duration = sd;
952                    media_time = mt;
953                } else {
954                    return ERROR_IO;
955                }
956
957                uint64_t halfscale = mHeaderTimescale / 2;
958                segment_duration = (segment_duration * 1000000 + halfscale)/ mHeaderTimescale;
959                media_time = (media_time * 1000000 + halfscale) / mHeaderTimescale;
960
961                int64_t duration;
962                int32_t samplerate;
963                if (mLastTrack->meta->findInt64(kKeyDuration, &duration) &&
964                        mLastTrack->meta->findInt32(kKeySampleRate, &samplerate)) {
965
966                    int64_t delay = (media_time  * samplerate + 500000) / 1000000;
967                    mLastTrack->meta->setInt32(kKeyEncoderDelay, delay);
968
969                    int64_t paddingus = duration - (segment_duration + media_time);
970                    if (paddingus < 0) {
971                        // track duration from media header (which is what kKeyDuration is) might
972                        // be slightly shorter than the segment duration, which would make the
973                        // padding negative. Clamp to zero.
974                        paddingus = 0;
975                    }
976                    int64_t paddingsamples = (paddingus * samplerate + 500000) / 1000000;
977                    mLastTrack->meta->setInt32(kKeyEncoderPadding, paddingsamples);
978                }
979            }
980            break;
981        }
982
983        case FOURCC('f', 'r', 'm', 'a'):
984        {
985            *offset += chunk_size;
986
987            uint32_t original_fourcc;
988            if (mDataSource->readAt(data_offset, &original_fourcc, 4) < 4) {
989                return ERROR_IO;
990            }
991            original_fourcc = ntohl(original_fourcc);
992            ALOGV("read original format: %d", original_fourcc);
993            mLastTrack->meta->setCString(kKeyMIMEType, FourCC2MIME(original_fourcc));
994            uint32_t num_channels = 0;
995            uint32_t sample_rate = 0;
996            if (AdjustChannelsAndRate(original_fourcc, &num_channels, &sample_rate)) {
997                mLastTrack->meta->setInt32(kKeyChannelCount, num_channels);
998                mLastTrack->meta->setInt32(kKeySampleRate, sample_rate);
999            }
1000            break;
1001        }
1002
1003        case FOURCC('t', 'e', 'n', 'c'):
1004        {
1005            *offset += chunk_size;
1006
1007            if (chunk_size < 32) {
1008                return ERROR_MALFORMED;
1009            }
1010
1011            // tenc box contains 1 byte version, 3 byte flags, 3 byte default algorithm id, one byte
1012            // default IV size, 16 bytes default KeyID
1013            // (ISO 23001-7)
1014            char buf[4];
1015            memset(buf, 0, 4);
1016            if (mDataSource->readAt(data_offset + 4, buf + 1, 3) < 3) {
1017                return ERROR_IO;
1018            }
1019            uint32_t defaultAlgorithmId = ntohl(*((int32_t*)buf));
1020            if (defaultAlgorithmId > 1) {
1021                // only 0 (clear) and 1 (AES-128) are valid
1022                return ERROR_MALFORMED;
1023            }
1024
1025            memset(buf, 0, 4);
1026            if (mDataSource->readAt(data_offset + 7, buf + 3, 1) < 1) {
1027                return ERROR_IO;
1028            }
1029            uint32_t defaultIVSize = ntohl(*((int32_t*)buf));
1030
1031            if ((defaultAlgorithmId == 0 && defaultIVSize != 0) ||
1032                    (defaultAlgorithmId != 0 && defaultIVSize == 0)) {
1033                // only unencrypted data must have 0 IV size
1034                return ERROR_MALFORMED;
1035            } else if (defaultIVSize != 0 &&
1036                    defaultIVSize != 8 &&
1037                    defaultIVSize != 16) {
1038                // only supported sizes are 0, 8 and 16
1039                return ERROR_MALFORMED;
1040            }
1041
1042            uint8_t defaultKeyId[16];
1043
1044            if (mDataSource->readAt(data_offset + 8, &defaultKeyId, 16) < 16) {
1045                return ERROR_IO;
1046            }
1047
1048            mLastTrack->meta->setInt32(kKeyCryptoMode, defaultAlgorithmId);
1049            mLastTrack->meta->setInt32(kKeyCryptoDefaultIVSize, defaultIVSize);
1050            mLastTrack->meta->setData(kKeyCryptoKey, 'tenc', defaultKeyId, 16);
1051            break;
1052        }
1053
1054        case FOURCC('t', 'k', 'h', 'd'):
1055        {
1056            *offset += chunk_size;
1057
1058            status_t err;
1059            if ((err = parseTrackHeader(data_offset, chunk_data_size)) != OK) {
1060                return err;
1061            }
1062
1063            break;
1064        }
1065
1066        case FOURCC('p', 's', 's', 'h'):
1067        {
1068            *offset += chunk_size;
1069
1070            PsshInfo pssh;
1071
1072            if (mDataSource->readAt(data_offset + 4, &pssh.uuid, 16) < 16) {
1073                return ERROR_IO;
1074            }
1075
1076            uint32_t psshdatalen = 0;
1077            if (mDataSource->readAt(data_offset + 20, &psshdatalen, 4) < 4) {
1078                return ERROR_IO;
1079            }
1080            pssh.datalen = ntohl(psshdatalen);
1081            ALOGV("pssh data size: %d", pssh.datalen);
1082            if (pssh.datalen + 20 > chunk_size) {
1083                // pssh data length exceeds size of containing box
1084                return ERROR_MALFORMED;
1085            }
1086
1087            pssh.data = new uint8_t[pssh.datalen];
1088            ALOGV("allocated pssh @ %p", pssh.data);
1089            ssize_t requested = (ssize_t) pssh.datalen;
1090            if (mDataSource->readAt(data_offset + 24, pssh.data, requested) < requested) {
1091                return ERROR_IO;
1092            }
1093            mPssh.push_back(pssh);
1094
1095            break;
1096        }
1097
1098        case FOURCC('m', 'd', 'h', 'd'):
1099        {
1100            *offset += chunk_size;
1101
1102            if (chunk_data_size < 4) {
1103                return ERROR_MALFORMED;
1104            }
1105
1106            uint8_t version;
1107            if (mDataSource->readAt(
1108                        data_offset, &version, sizeof(version))
1109                    < (ssize_t)sizeof(version)) {
1110                return ERROR_IO;
1111            }
1112
1113            off64_t timescale_offset;
1114
1115            if (version == 1) {
1116                timescale_offset = data_offset + 4 + 16;
1117            } else if (version == 0) {
1118                timescale_offset = data_offset + 4 + 8;
1119            } else {
1120                return ERROR_IO;
1121            }
1122
1123            uint32_t timescale;
1124            if (mDataSource->readAt(
1125                        timescale_offset, &timescale, sizeof(timescale))
1126                    < (ssize_t)sizeof(timescale)) {
1127                return ERROR_IO;
1128            }
1129
1130            mLastTrack->timescale = ntohl(timescale);
1131
1132            int64_t duration = 0;
1133            if (version == 1) {
1134                if (mDataSource->readAt(
1135                            timescale_offset + 4, &duration, sizeof(duration))
1136                        < (ssize_t)sizeof(duration)) {
1137                    return ERROR_IO;
1138                }
1139                duration = ntoh64(duration);
1140            } else {
1141                uint32_t duration32;
1142                if (mDataSource->readAt(
1143                            timescale_offset + 4, &duration32, sizeof(duration32))
1144                        < (ssize_t)sizeof(duration32)) {
1145                    return ERROR_IO;
1146                }
1147                // ffmpeg sets duration to -1, which is incorrect.
1148                if (duration32 != 0xffffffff) {
1149                    duration = ntohl(duration32);
1150                }
1151            }
1152            mLastTrack->meta->setInt64(
1153                    kKeyDuration, (duration * 1000000) / mLastTrack->timescale);
1154
1155            uint8_t lang[2];
1156            off64_t lang_offset;
1157            if (version == 1) {
1158                lang_offset = timescale_offset + 4 + 8;
1159            } else if (version == 0) {
1160                lang_offset = timescale_offset + 4 + 4;
1161            } else {
1162                return ERROR_IO;
1163            }
1164
1165            if (mDataSource->readAt(lang_offset, &lang, sizeof(lang))
1166                    < (ssize_t)sizeof(lang)) {
1167                return ERROR_IO;
1168            }
1169
1170            // To get the ISO-639-2/T three character language code
1171            // 1 bit pad followed by 3 5-bits characters. Each character
1172            // is packed as the difference between its ASCII value and 0x60.
1173            char lang_code[4];
1174            lang_code[0] = ((lang[0] >> 2) & 0x1f) + 0x60;
1175            lang_code[1] = ((lang[0] & 0x3) << 3 | (lang[1] >> 5)) + 0x60;
1176            lang_code[2] = (lang[1] & 0x1f) + 0x60;
1177            lang_code[3] = '\0';
1178
1179            mLastTrack->meta->setCString(
1180                    kKeyMediaLanguage, lang_code);
1181
1182            break;
1183        }
1184
1185        case FOURCC('s', 't', 's', 'd'):
1186        {
1187            if (chunk_data_size < 8) {
1188                return ERROR_MALFORMED;
1189            }
1190
1191            uint8_t buffer[8];
1192            if (chunk_data_size < (off64_t)sizeof(buffer)) {
1193                return ERROR_MALFORMED;
1194            }
1195
1196            if (mDataSource->readAt(
1197                        data_offset, buffer, 8) < 8) {
1198                return ERROR_IO;
1199            }
1200
1201            if (U32_AT(buffer) != 0) {
1202                // Should be version 0, flags 0.
1203                return ERROR_MALFORMED;
1204            }
1205
1206            uint32_t entry_count = U32_AT(&buffer[4]);
1207
1208            if (entry_count > 1) {
1209                // For 3GPP timed text, there could be multiple tx3g boxes contain
1210                // multiple text display formats. These formats will be used to
1211                // display the timed text.
1212                // For encrypted files, there may also be more than one entry.
1213                const char *mime;
1214                CHECK(mLastTrack->meta->findCString(kKeyMIMEType, &mime));
1215                if (strcasecmp(mime, MEDIA_MIMETYPE_TEXT_3GPP) &&
1216                        strcasecmp(mime, "application/octet-stream")) {
1217                    // For now we only support a single type of media per track.
1218                    mLastTrack->skipTrack = true;
1219                    *offset += chunk_size;
1220                    break;
1221                }
1222            }
1223            off64_t stop_offset = *offset + chunk_size;
1224            *offset = data_offset + 8;
1225            for (uint32_t i = 0; i < entry_count; ++i) {
1226                status_t err = parseChunk(offset, depth + 1);
1227                if (err != OK) {
1228                    return err;
1229                }
1230            }
1231
1232            if (*offset != stop_offset) {
1233                return ERROR_MALFORMED;
1234            }
1235            break;
1236        }
1237
1238        case FOURCC('m', 'p', '4', 'a'):
1239        case FOURCC('e', 'n', 'c', 'a'):
1240        case FOURCC('s', 'a', 'm', 'r'):
1241        case FOURCC('s', 'a', 'w', 'b'):
1242        {
1243            uint8_t buffer[8 + 20];
1244            if (chunk_data_size < (ssize_t)sizeof(buffer)) {
1245                // Basic AudioSampleEntry size.
1246                return ERROR_MALFORMED;
1247            }
1248
1249            if (mDataSource->readAt(
1250                        data_offset, buffer, sizeof(buffer)) < (ssize_t)sizeof(buffer)) {
1251                return ERROR_IO;
1252            }
1253
1254            uint16_t data_ref_index = U16_AT(&buffer[6]);
1255            uint32_t num_channels = U16_AT(&buffer[16]);
1256
1257            uint16_t sample_size = U16_AT(&buffer[18]);
1258            uint32_t sample_rate = U32_AT(&buffer[24]) >> 16;
1259
1260            if (chunk_type != FOURCC('e', 'n', 'c', 'a')) {
1261                // if the chunk type is enca, we'll get the type from the sinf/frma box later
1262                mLastTrack->meta->setCString(kKeyMIMEType, FourCC2MIME(chunk_type));
1263                AdjustChannelsAndRate(chunk_type, &num_channels, &sample_rate);
1264            }
1265            ALOGV("*** coding='%s' %d channels, size %d, rate %d\n",
1266                   chunk, num_channels, sample_size, sample_rate);
1267            mLastTrack->meta->setInt32(kKeyChannelCount, num_channels);
1268            mLastTrack->meta->setInt32(kKeySampleRate, sample_rate);
1269
1270            off64_t stop_offset = *offset + chunk_size;
1271            *offset = data_offset + sizeof(buffer);
1272            while (*offset < stop_offset) {
1273                status_t err = parseChunk(offset, depth + 1);
1274                if (err != OK) {
1275                    return err;
1276                }
1277            }
1278
1279            if (*offset != stop_offset) {
1280                return ERROR_MALFORMED;
1281            }
1282            break;
1283        }
1284
1285        case FOURCC('m', 'p', '4', 'v'):
1286        case FOURCC('e', 'n', 'c', 'v'):
1287        case FOURCC('s', '2', '6', '3'):
1288        case FOURCC('H', '2', '6', '3'):
1289        case FOURCC('h', '2', '6', '3'):
1290        case FOURCC('a', 'v', 'c', '1'):
1291        {
1292            mHasVideo = true;
1293
1294            uint8_t buffer[78];
1295            if (chunk_data_size < (ssize_t)sizeof(buffer)) {
1296                // Basic VideoSampleEntry size.
1297                return ERROR_MALFORMED;
1298            }
1299
1300            if (mDataSource->readAt(
1301                        data_offset, buffer, sizeof(buffer)) < (ssize_t)sizeof(buffer)) {
1302                return ERROR_IO;
1303            }
1304
1305            uint16_t data_ref_index = U16_AT(&buffer[6]);
1306            uint16_t width = U16_AT(&buffer[6 + 18]);
1307            uint16_t height = U16_AT(&buffer[6 + 20]);
1308
1309            // The video sample is not standard-compliant if it has invalid dimension.
1310            // Use some default width and height value, and
1311            // let the decoder figure out the actual width and height (and thus
1312            // be prepared for INFO_FOMRAT_CHANGED event).
1313            if (width == 0)  width  = 352;
1314            if (height == 0) height = 288;
1315
1316            // printf("*** coding='%s' width=%d height=%d\n",
1317            //        chunk, width, height);
1318
1319            if (chunk_type != FOURCC('e', 'n', 'c', 'v')) {
1320                // if the chunk type is encv, we'll get the type from the sinf/frma box later
1321                mLastTrack->meta->setCString(kKeyMIMEType, FourCC2MIME(chunk_type));
1322            }
1323            mLastTrack->meta->setInt32(kKeyWidth, width);
1324            mLastTrack->meta->setInt32(kKeyHeight, height);
1325
1326            off64_t stop_offset = *offset + chunk_size;
1327            *offset = data_offset + sizeof(buffer);
1328            while (*offset < stop_offset) {
1329                status_t err = parseChunk(offset, depth + 1);
1330                if (err != OK) {
1331                    return err;
1332                }
1333            }
1334
1335            if (*offset != stop_offset) {
1336                return ERROR_MALFORMED;
1337            }
1338            break;
1339        }
1340
1341        case FOURCC('s', 't', 'c', 'o'):
1342        case FOURCC('c', 'o', '6', '4'):
1343        {
1344            status_t err =
1345                mLastTrack->sampleTable->setChunkOffsetParams(
1346                        chunk_type, data_offset, chunk_data_size);
1347
1348            *offset += chunk_size;
1349
1350            if (err != OK) {
1351                return err;
1352            }
1353
1354            break;
1355        }
1356
1357        case FOURCC('s', 't', 's', 'c'):
1358        {
1359            status_t err =
1360                mLastTrack->sampleTable->setSampleToChunkParams(
1361                        data_offset, chunk_data_size);
1362
1363            *offset += chunk_size;
1364
1365            if (err != OK) {
1366                return err;
1367            }
1368
1369            break;
1370        }
1371
1372        case FOURCC('s', 't', 's', 'z'):
1373        case FOURCC('s', 't', 'z', '2'):
1374        {
1375            status_t err =
1376                mLastTrack->sampleTable->setSampleSizeParams(
1377                        chunk_type, data_offset, chunk_data_size);
1378
1379            *offset += chunk_size;
1380
1381            if (err != OK) {
1382                return err;
1383            }
1384
1385            size_t max_size;
1386            err = mLastTrack->sampleTable->getMaxSampleSize(&max_size);
1387
1388            if (err != OK) {
1389                return err;
1390            }
1391
1392            if (max_size != 0) {
1393                // Assume that a given buffer only contains at most 10 chunks,
1394                // each chunk originally prefixed with a 2 byte length will
1395                // have a 4 byte header (0x00 0x00 0x00 0x01) after conversion,
1396                // and thus will grow by 2 bytes per chunk.
1397                mLastTrack->meta->setInt32(kKeyMaxInputSize, max_size + 10 * 2);
1398            } else {
1399                // No size was specified. Pick a conservatively large size.
1400                int32_t width, height;
1401                if (!mLastTrack->meta->findInt32(kKeyWidth, &width) ||
1402                    !mLastTrack->meta->findInt32(kKeyHeight, &height)) {
1403                    ALOGE("No width or height, assuming worst case 1080p");
1404                    width = 1920;
1405                    height = 1080;
1406                }
1407
1408                const char *mime;
1409                CHECK(mLastTrack->meta->findCString(kKeyMIMEType, &mime));
1410                if (!strcmp(mime, MEDIA_MIMETYPE_VIDEO_AVC)) {
1411                    // AVC requires compression ratio of at least 2, and uses
1412                    // macroblocks
1413                    max_size = ((width + 15) / 16) * ((height + 15) / 16) * 192;
1414                } else {
1415                    // For all other formats there is no minimum compression
1416                    // ratio. Use compression ratio of 1.
1417                    max_size = width * height * 3 / 2;
1418                }
1419                mLastTrack->meta->setInt32(kKeyMaxInputSize, max_size);
1420            }
1421
1422            // NOTE: setting another piece of metadata invalidates any pointers (such as the
1423            // mimetype) previously obtained, so don't cache them.
1424            const char *mime;
1425            CHECK(mLastTrack->meta->findCString(kKeyMIMEType, &mime));
1426            // Calculate average frame rate.
1427            if (!strncasecmp("video/", mime, 6)) {
1428                size_t nSamples = mLastTrack->sampleTable->countSamples();
1429                int64_t durationUs;
1430                if (mLastTrack->meta->findInt64(kKeyDuration, &durationUs)) {
1431                    if (durationUs > 0) {
1432                        int32_t frameRate = (nSamples * 1000000LL +
1433                                    (durationUs >> 1)) / durationUs;
1434                        mLastTrack->meta->setInt32(kKeyFrameRate, frameRate);
1435                    }
1436                }
1437            }
1438
1439            break;
1440        }
1441
1442        case FOURCC('s', 't', 't', 's'):
1443        {
1444            *offset += chunk_size;
1445
1446            status_t err =
1447                mLastTrack->sampleTable->setTimeToSampleParams(
1448                        data_offset, chunk_data_size);
1449
1450            if (err != OK) {
1451                return err;
1452            }
1453
1454            break;
1455        }
1456
1457        case FOURCC('c', 't', 't', 's'):
1458        {
1459            *offset += chunk_size;
1460
1461            status_t err =
1462                mLastTrack->sampleTable->setCompositionTimeToSampleParams(
1463                        data_offset, chunk_data_size);
1464
1465            if (err != OK) {
1466                return err;
1467            }
1468
1469            break;
1470        }
1471
1472        case FOURCC('s', 't', 's', 's'):
1473        {
1474            *offset += chunk_size;
1475
1476            status_t err =
1477                mLastTrack->sampleTable->setSyncSampleParams(
1478                        data_offset, chunk_data_size);
1479
1480            if (err != OK) {
1481                return err;
1482            }
1483
1484            break;
1485        }
1486
1487        // @xyz
1488        case FOURCC('\xA9', 'x', 'y', 'z'):
1489        {
1490            *offset += chunk_size;
1491
1492            // Best case the total data length inside "@xyz" box
1493            // would be 8, for instance "@xyz" + "\x00\x04\x15\xc7" + "0+0/",
1494            // where "\x00\x04" is the text string length with value = 4,
1495            // "\0x15\xc7" is the language code = en, and "0+0" is a
1496            // location (string) value with longitude = 0 and latitude = 0.
1497            if (chunk_data_size < 8) {
1498                return ERROR_MALFORMED;
1499            }
1500
1501            // Worst case the location string length would be 18,
1502            // for instance +90.0000-180.0000, without the trailing "/" and
1503            // the string length + language code.
1504            char buffer[18];
1505
1506            // Substracting 5 from the data size is because the text string length +
1507            // language code takes 4 bytes, and the trailing slash "/" takes 1 byte.
1508            off64_t location_length = chunk_data_size - 5;
1509            if (location_length >= (off64_t) sizeof(buffer)) {
1510                return ERROR_MALFORMED;
1511            }
1512
1513            if (mDataSource->readAt(
1514                        data_offset + 4, buffer, location_length) < location_length) {
1515                return ERROR_IO;
1516            }
1517
1518            buffer[location_length] = '\0';
1519            mFileMetaData->setCString(kKeyLocation, buffer);
1520            break;
1521        }
1522
1523        case FOURCC('e', 's', 'd', 's'):
1524        {
1525            *offset += chunk_size;
1526
1527            if (chunk_data_size < 4) {
1528                return ERROR_MALFORMED;
1529            }
1530
1531            uint8_t buffer[256];
1532            if (chunk_data_size > (off64_t)sizeof(buffer)) {
1533                return ERROR_BUFFER_TOO_SMALL;
1534            }
1535
1536            if (mDataSource->readAt(
1537                        data_offset, buffer, chunk_data_size) < chunk_data_size) {
1538                return ERROR_IO;
1539            }
1540
1541            if (U32_AT(buffer) != 0) {
1542                // Should be version 0, flags 0.
1543                return ERROR_MALFORMED;
1544            }
1545
1546            mLastTrack->meta->setData(
1547                    kKeyESDS, kTypeESDS, &buffer[4], chunk_data_size - 4);
1548
1549            if (mPath.size() >= 2
1550                    && mPath[mPath.size() - 2] == FOURCC('m', 'p', '4', 'a')) {
1551                // Information from the ESDS must be relied on for proper
1552                // setup of sample rate and channel count for MPEG4 Audio.
1553                // The generic header appears to only contain generic
1554                // information...
1555
1556                status_t err = updateAudioTrackInfoFromESDS_MPEG4Audio(
1557                        &buffer[4], chunk_data_size - 4);
1558
1559                if (err != OK) {
1560                    return err;
1561                }
1562            }
1563
1564            break;
1565        }
1566
1567        case FOURCC('a', 'v', 'c', 'C'):
1568        {
1569            *offset += chunk_size;
1570
1571            sp<ABuffer> buffer = new ABuffer(chunk_data_size);
1572
1573            if (mDataSource->readAt(
1574                        data_offset, buffer->data(), chunk_data_size) < chunk_data_size) {
1575                return ERROR_IO;
1576            }
1577
1578            mLastTrack->meta->setData(
1579                    kKeyAVCC, kTypeAVCC, buffer->data(), chunk_data_size);
1580
1581            break;
1582        }
1583
1584        case FOURCC('d', '2', '6', '3'):
1585        {
1586            *offset += chunk_size;
1587            /*
1588             * d263 contains a fixed 7 bytes part:
1589             *   vendor - 4 bytes
1590             *   version - 1 byte
1591             *   level - 1 byte
1592             *   profile - 1 byte
1593             * optionally, "d263" box itself may contain a 16-byte
1594             * bit rate box (bitr)
1595             *   average bit rate - 4 bytes
1596             *   max bit rate - 4 bytes
1597             */
1598            char buffer[23];
1599            if (chunk_data_size != 7 &&
1600                chunk_data_size != 23) {
1601                ALOGE("Incorrect D263 box size %lld", chunk_data_size);
1602                return ERROR_MALFORMED;
1603            }
1604
1605            if (mDataSource->readAt(
1606                    data_offset, buffer, chunk_data_size) < chunk_data_size) {
1607                return ERROR_IO;
1608            }
1609
1610            mLastTrack->meta->setData(kKeyD263, kTypeD263, buffer, chunk_data_size);
1611
1612            break;
1613        }
1614
1615        case FOURCC('m', 'e', 't', 'a'):
1616        {
1617            uint8_t buffer[4];
1618            if (chunk_data_size < (off64_t)sizeof(buffer)) {
1619                *offset += chunk_size;
1620                return ERROR_MALFORMED;
1621            }
1622
1623            if (mDataSource->readAt(
1624                        data_offset, buffer, 4) < 4) {
1625                *offset += chunk_size;
1626                return ERROR_IO;
1627            }
1628
1629            if (U32_AT(buffer) != 0) {
1630                // Should be version 0, flags 0.
1631
1632                // If it's not, let's assume this is one of those
1633                // apparently malformed chunks that don't have flags
1634                // and completely different semantics than what's
1635                // in the MPEG4 specs and skip it.
1636                *offset += chunk_size;
1637                return OK;
1638            }
1639
1640            off64_t stop_offset = *offset + chunk_size;
1641            *offset = data_offset + sizeof(buffer);
1642            while (*offset < stop_offset) {
1643                status_t err = parseChunk(offset, depth + 1);
1644                if (err != OK) {
1645                    return err;
1646                }
1647            }
1648
1649            if (*offset != stop_offset) {
1650                return ERROR_MALFORMED;
1651            }
1652            break;
1653        }
1654
1655        case FOURCC('m', 'e', 'a', 'n'):
1656        case FOURCC('n', 'a', 'm', 'e'):
1657        case FOURCC('d', 'a', 't', 'a'):
1658        {
1659            *offset += chunk_size;
1660
1661            if (mPath.size() == 6 && underMetaDataPath(mPath)) {
1662                status_t err = parseITunesMetaData(data_offset, chunk_data_size);
1663
1664                if (err != OK) {
1665                    return err;
1666                }
1667            }
1668
1669            break;
1670        }
1671
1672        case FOURCC('m', 'v', 'h', 'd'):
1673        {
1674            *offset += chunk_size;
1675
1676            if (chunk_data_size < 24) {
1677                return ERROR_MALFORMED;
1678            }
1679
1680            uint8_t header[24];
1681            if (mDataSource->readAt(
1682                        data_offset, header, sizeof(header))
1683                    < (ssize_t)sizeof(header)) {
1684                return ERROR_IO;
1685            }
1686
1687            uint64_t creationTime;
1688            if (header[0] == 1) {
1689                creationTime = U64_AT(&header[4]);
1690                mHeaderTimescale = U32_AT(&header[20]);
1691            } else if (header[0] != 0) {
1692                return ERROR_MALFORMED;
1693            } else {
1694                creationTime = U32_AT(&header[4]);
1695                mHeaderTimescale = U32_AT(&header[12]);
1696            }
1697
1698            String8 s;
1699            convertTimeToDate(creationTime, &s);
1700
1701            mFileMetaData->setCString(kKeyDate, s.string());
1702
1703            break;
1704        }
1705
1706        case FOURCC('m', 'd', 'a', 't'):
1707        {
1708            ALOGV("mdat chunk, drm: %d", mIsDrm);
1709            if (!mIsDrm) {
1710                *offset += chunk_size;
1711                break;
1712            }
1713
1714            if (chunk_size < 8) {
1715                return ERROR_MALFORMED;
1716            }
1717
1718            return parseDrmSINF(offset, data_offset);
1719        }
1720
1721        case FOURCC('h', 'd', 'l', 'r'):
1722        {
1723            *offset += chunk_size;
1724
1725            uint32_t buffer;
1726            if (mDataSource->readAt(
1727                        data_offset + 8, &buffer, 4) < 4) {
1728                return ERROR_IO;
1729            }
1730
1731            uint32_t type = ntohl(buffer);
1732            // For the 3GPP file format, the handler-type within the 'hdlr' box
1733            // shall be 'text'. We also want to support 'sbtl' handler type
1734            // for a practical reason as various MPEG4 containers use it.
1735            if (type == FOURCC('t', 'e', 'x', 't') || type == FOURCC('s', 'b', 't', 'l')) {
1736                mLastTrack->meta->setCString(kKeyMIMEType, MEDIA_MIMETYPE_TEXT_3GPP);
1737            }
1738
1739            break;
1740        }
1741
1742        case FOURCC('t', 'x', '3', 'g'):
1743        {
1744            uint32_t type;
1745            const void *data;
1746            size_t size = 0;
1747            if (!mLastTrack->meta->findData(
1748                    kKeyTextFormatData, &type, &data, &size)) {
1749                size = 0;
1750            }
1751
1752            uint8_t *buffer = new uint8_t[size + chunk_size];
1753
1754            if (size > 0) {
1755                memcpy(buffer, data, size);
1756            }
1757
1758            if ((size_t)(mDataSource->readAt(*offset, buffer + size, chunk_size))
1759                    < chunk_size) {
1760                delete[] buffer;
1761                buffer = NULL;
1762
1763                // advance read pointer so we don't end up reading this again
1764                *offset += chunk_size;
1765                return ERROR_IO;
1766            }
1767
1768            mLastTrack->meta->setData(
1769                    kKeyTextFormatData, 0, buffer, size + chunk_size);
1770
1771            delete[] buffer;
1772
1773            *offset += chunk_size;
1774            break;
1775        }
1776
1777        case FOURCC('c', 'o', 'v', 'r'):
1778        {
1779            *offset += chunk_size;
1780
1781            if (mFileMetaData != NULL) {
1782                ALOGV("chunk_data_size = %lld and data_offset = %lld",
1783                        chunk_data_size, data_offset);
1784                sp<ABuffer> buffer = new ABuffer(chunk_data_size + 1);
1785                if (mDataSource->readAt(
1786                    data_offset, buffer->data(), chunk_data_size) != (ssize_t)chunk_data_size) {
1787                    return ERROR_IO;
1788                }
1789                const int kSkipBytesOfDataBox = 16;
1790                mFileMetaData->setData(
1791                    kKeyAlbumArt, MetaData::TYPE_NONE,
1792                    buffer->data() + kSkipBytesOfDataBox, chunk_data_size - kSkipBytesOfDataBox);
1793            }
1794
1795            break;
1796        }
1797
1798        case FOURCC('t', 'i', 't', 'l'):
1799        case FOURCC('p', 'e', 'r', 'f'):
1800        case FOURCC('a', 'u', 't', 'h'):
1801        case FOURCC('g', 'n', 'r', 'e'):
1802        case FOURCC('a', 'l', 'b', 'm'):
1803        case FOURCC('y', 'r', 'r', 'c'):
1804        {
1805            *offset += chunk_size;
1806
1807            status_t err = parse3GPPMetaData(data_offset, chunk_data_size, depth);
1808
1809            if (err != OK) {
1810                return err;
1811            }
1812
1813            break;
1814        }
1815
1816        case FOURCC('I', 'D', '3', '2'):
1817        {
1818            *offset += chunk_size;
1819
1820            if (chunk_data_size < 6) {
1821                return ERROR_MALFORMED;
1822            }
1823
1824            parseID3v2MetaData(data_offset + 6);
1825
1826            break;
1827        }
1828
1829        case FOURCC('-', '-', '-', '-'):
1830        {
1831            mLastCommentMean.clear();
1832            mLastCommentName.clear();
1833            mLastCommentData.clear();
1834            *offset += chunk_size;
1835            break;
1836        }
1837
1838        case FOURCC('s', 'i', 'd', 'x'):
1839        {
1840            parseSegmentIndex(data_offset, chunk_data_size);
1841            *offset += chunk_size;
1842            return UNKNOWN_ERROR; // stop parsing after sidx
1843        }
1844
1845        default:
1846        {
1847            *offset += chunk_size;
1848            break;
1849        }
1850    }
1851
1852    return OK;
1853}
1854
1855status_t MPEG4Extractor::parseSegmentIndex(off64_t offset, size_t size) {
1856  ALOGV("MPEG4Extractor::parseSegmentIndex");
1857
1858    if (size < 12) {
1859      return -EINVAL;
1860    }
1861
1862    uint32_t flags;
1863    if (!mDataSource->getUInt32(offset, &flags)) {
1864        return ERROR_MALFORMED;
1865    }
1866
1867    uint32_t version = flags >> 24;
1868    flags &= 0xffffff;
1869
1870    ALOGV("sidx version %d", version);
1871
1872    uint32_t referenceId;
1873    if (!mDataSource->getUInt32(offset + 4, &referenceId)) {
1874        return ERROR_MALFORMED;
1875    }
1876
1877    uint32_t timeScale;
1878    if (!mDataSource->getUInt32(offset + 8, &timeScale)) {
1879        return ERROR_MALFORMED;
1880    }
1881    ALOGV("sidx refid/timescale: %d/%d", referenceId, timeScale);
1882
1883    uint64_t earliestPresentationTime;
1884    uint64_t firstOffset;
1885
1886    offset += 12;
1887    size -= 12;
1888
1889    if (version == 0) {
1890        if (size < 8) {
1891            return -EINVAL;
1892        }
1893        uint32_t tmp;
1894        if (!mDataSource->getUInt32(offset, &tmp)) {
1895            return ERROR_MALFORMED;
1896        }
1897        earliestPresentationTime = tmp;
1898        if (!mDataSource->getUInt32(offset + 4, &tmp)) {
1899            return ERROR_MALFORMED;
1900        }
1901        firstOffset = tmp;
1902        offset += 8;
1903        size -= 8;
1904    } else {
1905        if (size < 16) {
1906            return -EINVAL;
1907        }
1908        if (!mDataSource->getUInt64(offset, &earliestPresentationTime)) {
1909            return ERROR_MALFORMED;
1910        }
1911        if (!mDataSource->getUInt64(offset + 8, &firstOffset)) {
1912            return ERROR_MALFORMED;
1913        }
1914        offset += 16;
1915        size -= 16;
1916    }
1917    ALOGV("sidx pres/off: %Ld/%Ld", earliestPresentationTime, firstOffset);
1918
1919    if (size < 4) {
1920        return -EINVAL;
1921    }
1922
1923    uint16_t referenceCount;
1924    if (!mDataSource->getUInt16(offset + 2, &referenceCount)) {
1925        return ERROR_MALFORMED;
1926    }
1927    offset += 4;
1928    size -= 4;
1929    ALOGV("refcount: %d", referenceCount);
1930
1931    if (size < referenceCount * 12) {
1932        return -EINVAL;
1933    }
1934
1935    uint64_t total_duration = 0;
1936    for (unsigned int i = 0; i < referenceCount; i++) {
1937        uint32_t d1, d2, d3;
1938
1939        if (!mDataSource->getUInt32(offset, &d1) ||     // size
1940            !mDataSource->getUInt32(offset + 4, &d2) || // duration
1941            !mDataSource->getUInt32(offset + 8, &d3)) { // flags
1942            return ERROR_MALFORMED;
1943        }
1944
1945        if (d1 & 0x80000000) {
1946            ALOGW("sub-sidx boxes not supported yet");
1947        }
1948        bool sap = d3 & 0x80000000;
1949        uint32_t saptype = (d3 >> 28) & 7;
1950        if (!sap || (saptype != 1 && saptype != 2)) {
1951            // type 1 and 2 are sync samples
1952            ALOGW("not a stream access point, or unsupported type: %08x", d3);
1953        }
1954        total_duration += d2;
1955        offset += 12;
1956        ALOGV(" item %d, %08x %08x %08x", i, d1, d2, d3);
1957        SidxEntry se;
1958        se.mSize = d1 & 0x7fffffff;
1959        se.mDurationUs = 1000000LL * d2 / timeScale;
1960        mSidxEntries.add(se);
1961    }
1962
1963    mSidxDuration = total_duration * 1000000 / timeScale;
1964    ALOGV("duration: %lld", mSidxDuration);
1965
1966    int64_t metaDuration;
1967    if (!mLastTrack->meta->findInt64(kKeyDuration, &metaDuration) || metaDuration == 0) {
1968        mLastTrack->meta->setInt64(kKeyDuration, mSidxDuration);
1969    }
1970    return OK;
1971}
1972
1973
1974
1975status_t MPEG4Extractor::parseTrackHeader(
1976        off64_t data_offset, off64_t data_size) {
1977    if (data_size < 4) {
1978        return ERROR_MALFORMED;
1979    }
1980
1981    uint8_t version;
1982    if (mDataSource->readAt(data_offset, &version, 1) < 1) {
1983        return ERROR_IO;
1984    }
1985
1986    size_t dynSize = (version == 1) ? 36 : 24;
1987
1988    uint8_t buffer[36 + 60];
1989
1990    if (data_size != (off64_t)dynSize + 60) {
1991        return ERROR_MALFORMED;
1992    }
1993
1994    if (mDataSource->readAt(
1995                data_offset, buffer, data_size) < (ssize_t)data_size) {
1996        return ERROR_IO;
1997    }
1998
1999    uint64_t ctime, mtime, duration;
2000    int32_t id;
2001
2002    if (version == 1) {
2003        ctime = U64_AT(&buffer[4]);
2004        mtime = U64_AT(&buffer[12]);
2005        id = U32_AT(&buffer[20]);
2006        duration = U64_AT(&buffer[28]);
2007    } else if (version == 0) {
2008        ctime = U32_AT(&buffer[4]);
2009        mtime = U32_AT(&buffer[8]);
2010        id = U32_AT(&buffer[12]);
2011        duration = U32_AT(&buffer[20]);
2012    } else {
2013        return ERROR_UNSUPPORTED;
2014    }
2015
2016    mLastTrack->meta->setInt32(kKeyTrackID, id);
2017
2018    size_t matrixOffset = dynSize + 16;
2019    int32_t a00 = U32_AT(&buffer[matrixOffset]);
2020    int32_t a01 = U32_AT(&buffer[matrixOffset + 4]);
2021    int32_t dx = U32_AT(&buffer[matrixOffset + 8]);
2022    int32_t a10 = U32_AT(&buffer[matrixOffset + 12]);
2023    int32_t a11 = U32_AT(&buffer[matrixOffset + 16]);
2024    int32_t dy = U32_AT(&buffer[matrixOffset + 20]);
2025
2026#if 0
2027    ALOGI("x' = %.2f * x + %.2f * y + %.2f",
2028         a00 / 65536.0f, a01 / 65536.0f, dx / 65536.0f);
2029    ALOGI("y' = %.2f * x + %.2f * y + %.2f",
2030         a10 / 65536.0f, a11 / 65536.0f, dy / 65536.0f);
2031#endif
2032
2033    uint32_t rotationDegrees;
2034
2035    static const int32_t kFixedOne = 0x10000;
2036    if (a00 == kFixedOne && a01 == 0 && a10 == 0 && a11 == kFixedOne) {
2037        // Identity, no rotation
2038        rotationDegrees = 0;
2039    } else if (a00 == 0 && a01 == kFixedOne && a10 == -kFixedOne && a11 == 0) {
2040        rotationDegrees = 90;
2041    } else if (a00 == 0 && a01 == -kFixedOne && a10 == kFixedOne && a11 == 0) {
2042        rotationDegrees = 270;
2043    } else if (a00 == -kFixedOne && a01 == 0 && a10 == 0 && a11 == -kFixedOne) {
2044        rotationDegrees = 180;
2045    } else {
2046        ALOGW("We only support 0,90,180,270 degree rotation matrices");
2047        rotationDegrees = 0;
2048    }
2049
2050    if (rotationDegrees != 0) {
2051        mLastTrack->meta->setInt32(kKeyRotation, rotationDegrees);
2052    }
2053
2054    // Handle presentation display size, which could be different
2055    // from the image size indicated by kKeyWidth and kKeyHeight.
2056    uint32_t width = U32_AT(&buffer[dynSize + 52]);
2057    uint32_t height = U32_AT(&buffer[dynSize + 56]);
2058    mLastTrack->meta->setInt32(kKeyDisplayWidth, width >> 16);
2059    mLastTrack->meta->setInt32(kKeyDisplayHeight, height >> 16);
2060
2061    return OK;
2062}
2063
2064status_t MPEG4Extractor::parseITunesMetaData(off64_t offset, size_t size) {
2065    if (size < 4) {
2066        return ERROR_MALFORMED;
2067    }
2068
2069    uint8_t *buffer = new uint8_t[size + 1];
2070    if (mDataSource->readAt(
2071                offset, buffer, size) != (ssize_t)size) {
2072        delete[] buffer;
2073        buffer = NULL;
2074
2075        return ERROR_IO;
2076    }
2077
2078    uint32_t flags = U32_AT(buffer);
2079
2080    uint32_t metadataKey = 0;
2081    char chunk[5];
2082    MakeFourCCString(mPath[4], chunk);
2083    ALOGV("meta: %s @ %lld", chunk, offset);
2084    switch (mPath[4]) {
2085        case FOURCC(0xa9, 'a', 'l', 'b'):
2086        {
2087            metadataKey = kKeyAlbum;
2088            break;
2089        }
2090        case FOURCC(0xa9, 'A', 'R', 'T'):
2091        {
2092            metadataKey = kKeyArtist;
2093            break;
2094        }
2095        case FOURCC('a', 'A', 'R', 'T'):
2096        {
2097            metadataKey = kKeyAlbumArtist;
2098            break;
2099        }
2100        case FOURCC(0xa9, 'd', 'a', 'y'):
2101        {
2102            metadataKey = kKeyYear;
2103            break;
2104        }
2105        case FOURCC(0xa9, 'n', 'a', 'm'):
2106        {
2107            metadataKey = kKeyTitle;
2108            break;
2109        }
2110        case FOURCC(0xa9, 'w', 'r', 't'):
2111        {
2112            metadataKey = kKeyWriter;
2113            break;
2114        }
2115        case FOURCC('c', 'o', 'v', 'r'):
2116        {
2117            metadataKey = kKeyAlbumArt;
2118            break;
2119        }
2120        case FOURCC('g', 'n', 'r', 'e'):
2121        {
2122            metadataKey = kKeyGenre;
2123            break;
2124        }
2125        case FOURCC(0xa9, 'g', 'e', 'n'):
2126        {
2127            metadataKey = kKeyGenre;
2128            break;
2129        }
2130        case FOURCC('c', 'p', 'i', 'l'):
2131        {
2132            if (size == 9 && flags == 21) {
2133                char tmp[16];
2134                sprintf(tmp, "%d",
2135                        (int)buffer[size - 1]);
2136
2137                mFileMetaData->setCString(kKeyCompilation, tmp);
2138            }
2139            break;
2140        }
2141        case FOURCC('t', 'r', 'k', 'n'):
2142        {
2143            if (size == 16 && flags == 0) {
2144                char tmp[16];
2145                uint16_t* pTrack = (uint16_t*)&buffer[10];
2146                uint16_t* pTotalTracks = (uint16_t*)&buffer[12];
2147                sprintf(tmp, "%d/%d", ntohs(*pTrack), ntohs(*pTotalTracks));
2148
2149                mFileMetaData->setCString(kKeyCDTrackNumber, tmp);
2150            }
2151            break;
2152        }
2153        case FOURCC('d', 'i', 's', 'k'):
2154        {
2155            if ((size == 14 || size == 16) && flags == 0) {
2156                char tmp[16];
2157                uint16_t* pDisc = (uint16_t*)&buffer[10];
2158                uint16_t* pTotalDiscs = (uint16_t*)&buffer[12];
2159                sprintf(tmp, "%d/%d", ntohs(*pDisc), ntohs(*pTotalDiscs));
2160
2161                mFileMetaData->setCString(kKeyDiscNumber, tmp);
2162            }
2163            break;
2164        }
2165        case FOURCC('-', '-', '-', '-'):
2166        {
2167            buffer[size] = '\0';
2168            switch (mPath[5]) {
2169                case FOURCC('m', 'e', 'a', 'n'):
2170                    mLastCommentMean.setTo((const char *)buffer + 4);
2171                    break;
2172                case FOURCC('n', 'a', 'm', 'e'):
2173                    mLastCommentName.setTo((const char *)buffer + 4);
2174                    break;
2175                case FOURCC('d', 'a', 't', 'a'):
2176                    mLastCommentData.setTo((const char *)buffer + 8);
2177                    break;
2178            }
2179
2180            // Once we have a set of mean/name/data info, go ahead and process
2181            // it to see if its something we are interested in.  Whether or not
2182            // were are interested in the specific tag, make sure to clear out
2183            // the set so we can be ready to process another tuple should one
2184            // show up later in the file.
2185            if ((mLastCommentMean.length() != 0) &&
2186                (mLastCommentName.length() != 0) &&
2187                (mLastCommentData.length() != 0)) {
2188
2189                if (mLastCommentMean == "com.apple.iTunes"
2190                        && mLastCommentName == "iTunSMPB") {
2191                    int32_t delay, padding;
2192                    if (sscanf(mLastCommentData,
2193                               " %*x %x %x %*x", &delay, &padding) == 2) {
2194                        mLastTrack->meta->setInt32(kKeyEncoderDelay, delay);
2195                        mLastTrack->meta->setInt32(kKeyEncoderPadding, padding);
2196                    }
2197                }
2198
2199                mLastCommentMean.clear();
2200                mLastCommentName.clear();
2201                mLastCommentData.clear();
2202            }
2203            break;
2204        }
2205
2206        default:
2207            break;
2208    }
2209
2210    if (size >= 8 && metadataKey && !mFileMetaData->hasData(metadataKey)) {
2211        if (metadataKey == kKeyAlbumArt) {
2212            mFileMetaData->setData(
2213                    kKeyAlbumArt, MetaData::TYPE_NONE,
2214                    buffer + 8, size - 8);
2215        } else if (metadataKey == kKeyGenre) {
2216            if (flags == 0) {
2217                // uint8_t genre code, iTunes genre codes are
2218                // the standard id3 codes, except they start
2219                // at 1 instead of 0 (e.g. Pop is 14, not 13)
2220                // We use standard id3 numbering, so subtract 1.
2221                int genrecode = (int)buffer[size - 1];
2222                genrecode--;
2223                if (genrecode < 0) {
2224                    genrecode = 255; // reserved for 'unknown genre'
2225                }
2226                char genre[10];
2227                sprintf(genre, "%d", genrecode);
2228
2229                mFileMetaData->setCString(metadataKey, genre);
2230            } else if (flags == 1) {
2231                // custom genre string
2232                buffer[size] = '\0';
2233
2234                mFileMetaData->setCString(
2235                        metadataKey, (const char *)buffer + 8);
2236            }
2237        } else {
2238            buffer[size] = '\0';
2239
2240            mFileMetaData->setCString(
2241                    metadataKey, (const char *)buffer + 8);
2242        }
2243    }
2244
2245    delete[] buffer;
2246    buffer = NULL;
2247
2248    return OK;
2249}
2250
2251status_t MPEG4Extractor::parse3GPPMetaData(off64_t offset, size_t size, int depth) {
2252    if (size < 4) {
2253        return ERROR_MALFORMED;
2254    }
2255
2256    uint8_t *buffer = new uint8_t[size];
2257    if (mDataSource->readAt(
2258                offset, buffer, size) != (ssize_t)size) {
2259        delete[] buffer;
2260        buffer = NULL;
2261
2262        return ERROR_IO;
2263    }
2264
2265    uint32_t metadataKey = 0;
2266    switch (mPath[depth]) {
2267        case FOURCC('t', 'i', 't', 'l'):
2268        {
2269            metadataKey = kKeyTitle;
2270            break;
2271        }
2272        case FOURCC('p', 'e', 'r', 'f'):
2273        {
2274            metadataKey = kKeyArtist;
2275            break;
2276        }
2277        case FOURCC('a', 'u', 't', 'h'):
2278        {
2279            metadataKey = kKeyWriter;
2280            break;
2281        }
2282        case FOURCC('g', 'n', 'r', 'e'):
2283        {
2284            metadataKey = kKeyGenre;
2285            break;
2286        }
2287        case FOURCC('a', 'l', 'b', 'm'):
2288        {
2289            if (buffer[size - 1] != '\0') {
2290              char tmp[4];
2291              sprintf(tmp, "%u", buffer[size - 1]);
2292
2293              mFileMetaData->setCString(kKeyCDTrackNumber, tmp);
2294            }
2295
2296            metadataKey = kKeyAlbum;
2297            break;
2298        }
2299        case FOURCC('y', 'r', 'r', 'c'):
2300        {
2301            char tmp[5];
2302            uint16_t year = U16_AT(&buffer[4]);
2303
2304            if (year < 10000) {
2305                sprintf(tmp, "%u", year);
2306
2307                mFileMetaData->setCString(kKeyYear, tmp);
2308            }
2309            break;
2310        }
2311
2312        default:
2313            break;
2314    }
2315
2316    if (metadataKey > 0) {
2317        bool isUTF8 = true; // Common case
2318        char16_t *framedata = NULL;
2319        int len16 = 0; // Number of UTF-16 characters
2320
2321        // smallest possible valid UTF-16 string w BOM: 0xfe 0xff 0x00 0x00
2322        if (size - 6 >= 4) {
2323            len16 = ((size - 6) / 2) - 1; // don't include 0x0000 terminator
2324            framedata = (char16_t *)(buffer + 6);
2325            if (0xfffe == *framedata) {
2326                // endianness marker (BOM) doesn't match host endianness
2327                for (int i = 0; i < len16; i++) {
2328                    framedata[i] = bswap_16(framedata[i]);
2329                }
2330                // BOM is now swapped to 0xfeff, we will execute next block too
2331            }
2332
2333            if (0xfeff == *framedata) {
2334                // Remove the BOM
2335                framedata++;
2336                len16--;
2337                isUTF8 = false;
2338            }
2339            // else normal non-zero-length UTF-8 string
2340            // we can't handle UTF-16 without BOM as there is no other
2341            // indication of encoding.
2342        }
2343
2344        if (isUTF8) {
2345            mFileMetaData->setCString(metadataKey, (const char *)buffer + 6);
2346        } else {
2347            // Convert from UTF-16 string to UTF-8 string.
2348            String8 tmpUTF8str(framedata, len16);
2349            mFileMetaData->setCString(metadataKey, tmpUTF8str.string());
2350        }
2351    }
2352
2353    delete[] buffer;
2354    buffer = NULL;
2355
2356    return OK;
2357}
2358
2359void MPEG4Extractor::parseID3v2MetaData(off64_t offset) {
2360    ID3 id3(mDataSource, true /* ignorev1 */, offset);
2361
2362    if (id3.isValid()) {
2363        struct Map {
2364            int key;
2365            const char *tag1;
2366            const char *tag2;
2367        };
2368        static const Map kMap[] = {
2369            { kKeyAlbum, "TALB", "TAL" },
2370            { kKeyArtist, "TPE1", "TP1" },
2371            { kKeyAlbumArtist, "TPE2", "TP2" },
2372            { kKeyComposer, "TCOM", "TCM" },
2373            { kKeyGenre, "TCON", "TCO" },
2374            { kKeyTitle, "TIT2", "TT2" },
2375            { kKeyYear, "TYE", "TYER" },
2376            { kKeyAuthor, "TXT", "TEXT" },
2377            { kKeyCDTrackNumber, "TRK", "TRCK" },
2378            { kKeyDiscNumber, "TPA", "TPOS" },
2379            { kKeyCompilation, "TCP", "TCMP" },
2380        };
2381        static const size_t kNumMapEntries = sizeof(kMap) / sizeof(kMap[0]);
2382
2383        for (size_t i = 0; i < kNumMapEntries; ++i) {
2384            if (!mFileMetaData->hasData(kMap[i].key)) {
2385                ID3::Iterator *it = new ID3::Iterator(id3, kMap[i].tag1);
2386                if (it->done()) {
2387                    delete it;
2388                    it = new ID3::Iterator(id3, kMap[i].tag2);
2389                }
2390
2391                if (it->done()) {
2392                    delete it;
2393                    continue;
2394                }
2395
2396                String8 s;
2397                it->getString(&s);
2398                delete it;
2399
2400                mFileMetaData->setCString(kMap[i].key, s);
2401            }
2402        }
2403
2404        size_t dataSize;
2405        String8 mime;
2406        const void *data = id3.getAlbumArt(&dataSize, &mime);
2407
2408        if (data) {
2409            mFileMetaData->setData(kKeyAlbumArt, MetaData::TYPE_NONE, data, dataSize);
2410            mFileMetaData->setCString(kKeyAlbumArtMIME, mime.string());
2411        }
2412    }
2413}
2414
2415sp<MediaSource> MPEG4Extractor::getTrack(size_t index) {
2416    status_t err;
2417    if ((err = readMetaData()) != OK) {
2418        return NULL;
2419    }
2420
2421    Track *track = mFirstTrack;
2422    while (index > 0) {
2423        if (track == NULL) {
2424            return NULL;
2425        }
2426
2427        track = track->next;
2428        --index;
2429    }
2430
2431    if (track == NULL) {
2432        return NULL;
2433    }
2434
2435    ALOGV("getTrack called, pssh: %d", mPssh.size());
2436
2437    return new MPEG4Source(
2438            track->meta, mDataSource, track->timescale, track->sampleTable,
2439            mSidxEntries, mMoofOffset);
2440}
2441
2442// static
2443status_t MPEG4Extractor::verifyTrack(Track *track) {
2444    const char *mime;
2445    CHECK(track->meta->findCString(kKeyMIMEType, &mime));
2446
2447    uint32_t type;
2448    const void *data;
2449    size_t size;
2450    if (!strcasecmp(mime, MEDIA_MIMETYPE_VIDEO_AVC)) {
2451        if (!track->meta->findData(kKeyAVCC, &type, &data, &size)
2452                || type != kTypeAVCC) {
2453            return ERROR_MALFORMED;
2454        }
2455    } else if (!strcasecmp(mime, MEDIA_MIMETYPE_VIDEO_MPEG4)
2456            || !strcasecmp(mime, MEDIA_MIMETYPE_AUDIO_AAC)) {
2457        if (!track->meta->findData(kKeyESDS, &type, &data, &size)
2458                || type != kTypeESDS) {
2459            return ERROR_MALFORMED;
2460        }
2461    }
2462
2463    if (track->sampleTable == NULL || !track->sampleTable->isValid()) {
2464        // Make sure we have all the metadata we need.
2465        ALOGE("stbl atom missing/invalid.");
2466        return ERROR_MALFORMED;
2467    }
2468
2469    return OK;
2470}
2471
2472typedef enum {
2473    //AOT_NONE             = -1,
2474    //AOT_NULL_OBJECT      = 0,
2475    //AOT_AAC_MAIN         = 1, /**< Main profile                              */
2476    AOT_AAC_LC           = 2,   /**< Low Complexity object                     */
2477    //AOT_AAC_SSR          = 3,
2478    //AOT_AAC_LTP          = 4,
2479    AOT_SBR              = 5,
2480    //AOT_AAC_SCAL         = 6,
2481    //AOT_TWIN_VQ          = 7,
2482    //AOT_CELP             = 8,
2483    //AOT_HVXC             = 9,
2484    //AOT_RSVD_10          = 10, /**< (reserved)                                */
2485    //AOT_RSVD_11          = 11, /**< (reserved)                                */
2486    //AOT_TTSI             = 12, /**< TTSI Object                               */
2487    //AOT_MAIN_SYNTH       = 13, /**< Main Synthetic object                     */
2488    //AOT_WAV_TAB_SYNTH    = 14, /**< Wavetable Synthesis object                */
2489    //AOT_GEN_MIDI         = 15, /**< General MIDI object                       */
2490    //AOT_ALG_SYNTH_AUD_FX = 16, /**< Algorithmic Synthesis and Audio FX object */
2491    AOT_ER_AAC_LC        = 17,   /**< Error Resilient(ER) AAC Low Complexity    */
2492    //AOT_RSVD_18          = 18, /**< (reserved)                                */
2493    //AOT_ER_AAC_LTP       = 19, /**< Error Resilient(ER) AAC LTP object        */
2494    AOT_ER_AAC_SCAL      = 20,   /**< Error Resilient(ER) AAC Scalable object   */
2495    //AOT_ER_TWIN_VQ       = 21, /**< Error Resilient(ER) TwinVQ object         */
2496    AOT_ER_BSAC          = 22,   /**< Error Resilient(ER) BSAC object           */
2497    AOT_ER_AAC_LD        = 23,   /**< Error Resilient(ER) AAC LowDelay object   */
2498    //AOT_ER_CELP          = 24, /**< Error Resilient(ER) CELP object           */
2499    //AOT_ER_HVXC          = 25, /**< Error Resilient(ER) HVXC object           */
2500    //AOT_ER_HILN          = 26, /**< Error Resilient(ER) HILN object           */
2501    //AOT_ER_PARA          = 27, /**< Error Resilient(ER) Parametric object     */
2502    //AOT_RSVD_28          = 28, /**< might become SSC                          */
2503    AOT_PS               = 29,   /**< PS, Parametric Stereo (includes SBR)      */
2504    //AOT_MPEGS            = 30, /**< MPEG Surround                             */
2505
2506    AOT_ESCAPE           = 31,   /**< Signal AOT uses more than 5 bits          */
2507
2508    //AOT_MP3ONMP4_L1      = 32, /**< MPEG-Layer1 in mp4                        */
2509    //AOT_MP3ONMP4_L2      = 33, /**< MPEG-Layer2 in mp4                        */
2510    //AOT_MP3ONMP4_L3      = 34, /**< MPEG-Layer3 in mp4                        */
2511    //AOT_RSVD_35          = 35, /**< might become DST                          */
2512    //AOT_RSVD_36          = 36, /**< might become ALS                          */
2513    //AOT_AAC_SLS          = 37, /**< AAC + SLS                                 */
2514    //AOT_SLS              = 38, /**< SLS                                       */
2515    //AOT_ER_AAC_ELD       = 39, /**< AAC Enhanced Low Delay                    */
2516
2517    //AOT_USAC             = 42, /**< USAC                                      */
2518    //AOT_SAOC             = 43, /**< SAOC                                      */
2519    //AOT_LD_MPEGS         = 44, /**< Low Delay MPEG Surround                   */
2520
2521    //AOT_RSVD50           = 50,  /**< Interim AOT for Rsvd50                   */
2522} AUDIO_OBJECT_TYPE;
2523
2524status_t MPEG4Extractor::updateAudioTrackInfoFromESDS_MPEG4Audio(
2525        const void *esds_data, size_t esds_size) {
2526    ESDS esds(esds_data, esds_size);
2527
2528    uint8_t objectTypeIndication;
2529    if (esds.getObjectTypeIndication(&objectTypeIndication) != OK) {
2530        return ERROR_MALFORMED;
2531    }
2532
2533    if (objectTypeIndication == 0xe1) {
2534        // This isn't MPEG4 audio at all, it's QCELP 14k...
2535        mLastTrack->meta->setCString(kKeyMIMEType, MEDIA_MIMETYPE_AUDIO_QCELP);
2536        return OK;
2537    }
2538
2539    if (objectTypeIndication  == 0x6b) {
2540        // The media subtype is MP3 audio
2541        // Our software MP3 audio decoder may not be able to handle
2542        // packetized MP3 audio; for now, lets just return ERROR_UNSUPPORTED
2543        ALOGE("MP3 track in MP4/3GPP file is not supported");
2544        return ERROR_UNSUPPORTED;
2545    }
2546
2547    const uint8_t *csd;
2548    size_t csd_size;
2549    if (esds.getCodecSpecificInfo(
2550                (const void **)&csd, &csd_size) != OK) {
2551        return ERROR_MALFORMED;
2552    }
2553
2554#if 0
2555    printf("ESD of size %d\n", csd_size);
2556    hexdump(csd, csd_size);
2557#endif
2558
2559    if (csd_size == 0) {
2560        // There's no further information, i.e. no codec specific data
2561        // Let's assume that the information provided in the mpeg4 headers
2562        // is accurate and hope for the best.
2563
2564        return OK;
2565    }
2566
2567    if (csd_size < 2) {
2568        return ERROR_MALFORMED;
2569    }
2570
2571    static uint32_t kSamplingRate[] = {
2572        96000, 88200, 64000, 48000, 44100, 32000, 24000, 22050,
2573        16000, 12000, 11025, 8000, 7350
2574    };
2575
2576    ABitReader br(csd, csd_size);
2577    uint32_t objectType = br.getBits(5);
2578
2579    if (objectType == 31) {  // AAC-ELD => additional 6 bits
2580        objectType = 32 + br.getBits(6);
2581    }
2582
2583    //keep AOT type
2584    mLastTrack->meta->setInt32(kKeyAACAOT, objectType);
2585
2586    uint32_t freqIndex = br.getBits(4);
2587
2588    int32_t sampleRate = 0;
2589    int32_t numChannels = 0;
2590    if (freqIndex == 15) {
2591        if (csd_size < 5) {
2592            return ERROR_MALFORMED;
2593        }
2594        sampleRate = br.getBits(24);
2595        numChannels = br.getBits(4);
2596    } else {
2597        numChannels = br.getBits(4);
2598
2599        if (freqIndex == 13 || freqIndex == 14) {
2600            return ERROR_MALFORMED;
2601        }
2602
2603        sampleRate = kSamplingRate[freqIndex];
2604    }
2605
2606    if (objectType == AOT_SBR || objectType == AOT_PS) {//SBR specific config per 14496-3 table 1.13
2607        uint32_t extFreqIndex = br.getBits(4);
2608        int32_t extSampleRate;
2609        if (extFreqIndex == 15) {
2610            if (csd_size < 8) {
2611                return ERROR_MALFORMED;
2612            }
2613            extSampleRate = br.getBits(24);
2614        } else {
2615            if (extFreqIndex == 13 || extFreqIndex == 14) {
2616                return ERROR_MALFORMED;
2617            }
2618            extSampleRate = kSamplingRate[extFreqIndex];
2619        }
2620        //TODO: save the extension sampling rate value in meta data =>
2621        //      mLastTrack->meta->setInt32(kKeyExtSampleRate, extSampleRate);
2622    }
2623
2624    switch (numChannels) {
2625        // values defined in 14496-3_2009 amendment-4 Table 1.19 - Channel Configuration
2626        case 0:
2627        case 1:// FC
2628        case 2:// FL FR
2629        case 3:// FC, FL FR
2630        case 4:// FC, FL FR, RC
2631        case 5:// FC, FL FR, SL SR
2632        case 6:// FC, FL FR, SL SR, LFE
2633            //numChannels already contains the right value
2634            break;
2635        case 11:// FC, FL FR, SL SR, RC, LFE
2636            numChannels = 7;
2637            break;
2638        case 7: // FC, FCL FCR, FL FR, SL SR, LFE
2639        case 12:// FC, FL  FR,  SL SR, RL RR, LFE
2640        case 14:// FC, FL  FR,  SL SR, LFE, FHL FHR
2641            numChannels = 8;
2642            break;
2643        default:
2644            return ERROR_UNSUPPORTED;
2645    }
2646
2647    {
2648        if (objectType == AOT_SBR || objectType == AOT_PS) {
2649            const int32_t extensionSamplingFrequency = br.getBits(4);
2650            objectType = br.getBits(5);
2651
2652            if (objectType == AOT_ESCAPE) {
2653                objectType = 32 + br.getBits(6);
2654            }
2655        }
2656        if (objectType == AOT_AAC_LC || objectType == AOT_ER_AAC_LC ||
2657                objectType == AOT_ER_AAC_LD || objectType == AOT_ER_AAC_SCAL ||
2658                objectType == AOT_ER_BSAC) {
2659            const int32_t frameLengthFlag = br.getBits(1);
2660
2661            const int32_t dependsOnCoreCoder = br.getBits(1);
2662
2663            if (dependsOnCoreCoder ) {
2664                const int32_t coreCoderDelay = br.getBits(14);
2665            }
2666
2667            const int32_t extensionFlag = br.getBits(1);
2668
2669            if (numChannels == 0 ) {
2670                int32_t channelsEffectiveNum = 0;
2671                int32_t channelsNum = 0;
2672                const int32_t ElementInstanceTag = br.getBits(4);
2673                const int32_t Profile = br.getBits(2);
2674                const int32_t SamplingFrequencyIndex = br.getBits(4);
2675                const int32_t NumFrontChannelElements = br.getBits(4);
2676                const int32_t NumSideChannelElements = br.getBits(4);
2677                const int32_t NumBackChannelElements = br.getBits(4);
2678                const int32_t NumLfeChannelElements = br.getBits(2);
2679                const int32_t NumAssocDataElements = br.getBits(3);
2680                const int32_t NumValidCcElements = br.getBits(4);
2681
2682                const int32_t MonoMixdownPresent = br.getBits(1);
2683                if (MonoMixdownPresent != 0) {
2684                    const int32_t MonoMixdownElementNumber = br.getBits(4);
2685                }
2686
2687                const int32_t StereoMixdownPresent = br.getBits(1);
2688                if (StereoMixdownPresent != 0) {
2689                    const int32_t StereoMixdownElementNumber = br.getBits(4);
2690                }
2691
2692                const int32_t MatrixMixdownIndexPresent = br.getBits(1);
2693                if (MatrixMixdownIndexPresent != 0) {
2694                    const int32_t MatrixMixdownIndex = br.getBits(2);
2695                    const int32_t PseudoSurroundEnable = br.getBits(1);
2696                }
2697
2698                int i;
2699                for (i=0; i < NumFrontChannelElements; i++) {
2700                    const int32_t FrontElementIsCpe = br.getBits(1);
2701                    const int32_t FrontElementTagSelect = br.getBits(4);
2702                    channelsNum += FrontElementIsCpe ? 2 : 1;
2703                }
2704
2705                for (i=0; i < NumSideChannelElements; i++) {
2706                    const int32_t SideElementIsCpe = br.getBits(1);
2707                    const int32_t SideElementTagSelect = br.getBits(4);
2708                    channelsNum += SideElementIsCpe ? 2 : 1;
2709                }
2710
2711                for (i=0; i < NumBackChannelElements; i++) {
2712                    const int32_t BackElementIsCpe = br.getBits(1);
2713                    const int32_t BackElementTagSelect = br.getBits(4);
2714                    channelsNum += BackElementIsCpe ? 2 : 1;
2715                }
2716                channelsEffectiveNum = channelsNum;
2717
2718                for (i=0; i < NumLfeChannelElements; i++) {
2719                    const int32_t LfeElementTagSelect = br.getBits(4);
2720                    channelsNum += 1;
2721                }
2722                ALOGV("mpeg4 audio channelsNum = %d", channelsNum);
2723                ALOGV("mpeg4 audio channelsEffectiveNum = %d", channelsEffectiveNum);
2724                numChannels = channelsNum;
2725            }
2726        }
2727    }
2728
2729    if (numChannels == 0) {
2730        return ERROR_UNSUPPORTED;
2731    }
2732
2733    int32_t prevSampleRate;
2734    CHECK(mLastTrack->meta->findInt32(kKeySampleRate, &prevSampleRate));
2735
2736    if (prevSampleRate != sampleRate) {
2737        ALOGV("mpeg4 audio sample rate different from previous setting. "
2738             "was: %d, now: %d", prevSampleRate, sampleRate);
2739    }
2740
2741    mLastTrack->meta->setInt32(kKeySampleRate, sampleRate);
2742
2743    int32_t prevChannelCount;
2744    CHECK(mLastTrack->meta->findInt32(kKeyChannelCount, &prevChannelCount));
2745
2746    if (prevChannelCount != numChannels) {
2747        ALOGV("mpeg4 audio channel count different from previous setting. "
2748             "was: %d, now: %d", prevChannelCount, numChannels);
2749    }
2750
2751    mLastTrack->meta->setInt32(kKeyChannelCount, numChannels);
2752
2753    return OK;
2754}
2755
2756////////////////////////////////////////////////////////////////////////////////
2757
2758MPEG4Source::MPEG4Source(
2759        const sp<MetaData> &format,
2760        const sp<DataSource> &dataSource,
2761        int32_t timeScale,
2762        const sp<SampleTable> &sampleTable,
2763        Vector<SidxEntry> &sidx,
2764        off64_t firstMoofOffset)
2765    : mFormat(format),
2766      mDataSource(dataSource),
2767      mTimescale(timeScale),
2768      mSampleTable(sampleTable),
2769      mCurrentSampleIndex(0),
2770      mCurrentFragmentIndex(0),
2771      mSegments(sidx),
2772      mFirstMoofOffset(firstMoofOffset),
2773      mCurrentMoofOffset(firstMoofOffset),
2774      mCurrentTime(0),
2775      mCurrentSampleInfoAllocSize(0),
2776      mCurrentSampleInfoSizes(NULL),
2777      mCurrentSampleInfoOffsetsAllocSize(0),
2778      mCurrentSampleInfoOffsets(NULL),
2779      mIsAVC(false),
2780      mNALLengthSize(0),
2781      mStarted(false),
2782      mGroup(NULL),
2783      mBuffer(NULL),
2784      mWantsNALFragments(false),
2785      mSrcBuffer(NULL) {
2786
2787    mFormat->findInt32(kKeyCryptoMode, &mCryptoMode);
2788    mDefaultIVSize = 0;
2789    mFormat->findInt32(kKeyCryptoDefaultIVSize, &mDefaultIVSize);
2790    uint32_t keytype;
2791    const void *key;
2792    size_t keysize;
2793    if (mFormat->findData(kKeyCryptoKey, &keytype, &key, &keysize)) {
2794        CHECK(keysize <= 16);
2795        memset(mCryptoKey, 0, 16);
2796        memcpy(mCryptoKey, key, keysize);
2797    }
2798
2799    const char *mime;
2800    bool success = mFormat->findCString(kKeyMIMEType, &mime);
2801    CHECK(success);
2802
2803    mIsAVC = !strcasecmp(mime, MEDIA_MIMETYPE_VIDEO_AVC);
2804
2805    if (mIsAVC) {
2806        uint32_t type;
2807        const void *data;
2808        size_t size;
2809        CHECK(format->findData(kKeyAVCC, &type, &data, &size));
2810
2811        const uint8_t *ptr = (const uint8_t *)data;
2812
2813        CHECK(size >= 7);
2814        CHECK_EQ((unsigned)ptr[0], 1u);  // configurationVersion == 1
2815
2816        // The number of bytes used to encode the length of a NAL unit.
2817        mNALLengthSize = 1 + (ptr[4] & 3);
2818    }
2819
2820    CHECK(format->findInt32(kKeyTrackID, &mTrackId));
2821
2822    if (mFirstMoofOffset != 0) {
2823        off64_t offset = mFirstMoofOffset;
2824        parseChunk(&offset);
2825    }
2826}
2827
2828MPEG4Source::~MPEG4Source() {
2829    if (mStarted) {
2830        stop();
2831    }
2832    free(mCurrentSampleInfoSizes);
2833    free(mCurrentSampleInfoOffsets);
2834}
2835
2836status_t MPEG4Source::start(MetaData *params) {
2837    Mutex::Autolock autoLock(mLock);
2838
2839    CHECK(!mStarted);
2840
2841    int32_t val;
2842    if (params && params->findInt32(kKeyWantsNALFragments, &val)
2843        && val != 0) {
2844        mWantsNALFragments = true;
2845    } else {
2846        mWantsNALFragments = false;
2847    }
2848
2849    mGroup = new MediaBufferGroup;
2850
2851    int32_t max_size;
2852    CHECK(mFormat->findInt32(kKeyMaxInputSize, &max_size));
2853
2854    mGroup->add_buffer(new MediaBuffer(max_size));
2855
2856    mSrcBuffer = new uint8_t[max_size];
2857
2858    mStarted = true;
2859
2860    return OK;
2861}
2862
2863status_t MPEG4Source::stop() {
2864    Mutex::Autolock autoLock(mLock);
2865
2866    CHECK(mStarted);
2867
2868    if (mBuffer != NULL) {
2869        mBuffer->release();
2870        mBuffer = NULL;
2871    }
2872
2873    delete[] mSrcBuffer;
2874    mSrcBuffer = NULL;
2875
2876    delete mGroup;
2877    mGroup = NULL;
2878
2879    mStarted = false;
2880    mCurrentSampleIndex = 0;
2881
2882    return OK;
2883}
2884
2885status_t MPEG4Source::parseChunk(off64_t *offset) {
2886    uint32_t hdr[2];
2887    if (mDataSource->readAt(*offset, hdr, 8) < 8) {
2888        return ERROR_IO;
2889    }
2890    uint64_t chunk_size = ntohl(hdr[0]);
2891    uint32_t chunk_type = ntohl(hdr[1]);
2892    off64_t data_offset = *offset + 8;
2893
2894    if (chunk_size == 1) {
2895        if (mDataSource->readAt(*offset + 8, &chunk_size, 8) < 8) {
2896            return ERROR_IO;
2897        }
2898        chunk_size = ntoh64(chunk_size);
2899        data_offset += 8;
2900
2901        if (chunk_size < 16) {
2902            // The smallest valid chunk is 16 bytes long in this case.
2903            return ERROR_MALFORMED;
2904        }
2905    } else if (chunk_size < 8) {
2906        // The smallest valid chunk is 8 bytes long.
2907        return ERROR_MALFORMED;
2908    }
2909
2910    char chunk[5];
2911    MakeFourCCString(chunk_type, chunk);
2912    ALOGV("MPEG4Source chunk %s @ %llx", chunk, *offset);
2913
2914    off64_t chunk_data_size = *offset + chunk_size - data_offset;
2915
2916    switch(chunk_type) {
2917
2918        case FOURCC('t', 'r', 'a', 'f'):
2919        case FOURCC('m', 'o', 'o', 'f'): {
2920            off64_t stop_offset = *offset + chunk_size;
2921            *offset = data_offset;
2922            while (*offset < stop_offset) {
2923                status_t err = parseChunk(offset);
2924                if (err != OK) {
2925                    return err;
2926                }
2927            }
2928            if (chunk_type == FOURCC('m', 'o', 'o', 'f')) {
2929                // *offset points to the box following this moof. Find the next moof from there.
2930
2931                while (true) {
2932                    if (mDataSource->readAt(*offset, hdr, 8) < 8) {
2933                        return ERROR_END_OF_STREAM;
2934                    }
2935                    chunk_size = ntohl(hdr[0]);
2936                    chunk_type = ntohl(hdr[1]);
2937                    if (chunk_type == FOURCC('m', 'o', 'o', 'f')) {
2938                        mNextMoofOffset = *offset;
2939                        break;
2940                    }
2941                    *offset += chunk_size;
2942                }
2943            }
2944            break;
2945        }
2946
2947        case FOURCC('t', 'f', 'h', 'd'): {
2948                status_t err;
2949                if ((err = parseTrackFragmentHeader(data_offset, chunk_data_size)) != OK) {
2950                    return err;
2951                }
2952                *offset += chunk_size;
2953                break;
2954        }
2955
2956        case FOURCC('t', 'r', 'u', 'n'): {
2957                status_t err;
2958                if (mLastParsedTrackId == mTrackId) {
2959                    if ((err = parseTrackFragmentRun(data_offset, chunk_data_size)) != OK) {
2960                        return err;
2961                    }
2962                }
2963
2964                *offset += chunk_size;
2965                break;
2966        }
2967
2968        case FOURCC('s', 'a', 'i', 'z'): {
2969            status_t err;
2970            if ((err = parseSampleAuxiliaryInformationSizes(data_offset, chunk_data_size)) != OK) {
2971                return err;
2972            }
2973            *offset += chunk_size;
2974            break;
2975        }
2976        case FOURCC('s', 'a', 'i', 'o'): {
2977            status_t err;
2978            if ((err = parseSampleAuxiliaryInformationOffsets(data_offset, chunk_data_size)) != OK) {
2979                return err;
2980            }
2981            *offset += chunk_size;
2982            break;
2983        }
2984
2985        case FOURCC('m', 'd', 'a', 't'): {
2986            // parse DRM info if present
2987            ALOGV("MPEG4Source::parseChunk mdat");
2988            // if saiz/saoi was previously observed, do something with the sampleinfos
2989            *offset += chunk_size;
2990            break;
2991        }
2992
2993        default: {
2994            *offset += chunk_size;
2995            break;
2996        }
2997    }
2998    return OK;
2999}
3000
3001status_t MPEG4Source::parseSampleAuxiliaryInformationSizes(
3002        off64_t offset, off64_t /* size */) {
3003    ALOGV("parseSampleAuxiliaryInformationSizes");
3004    // 14496-12 8.7.12
3005    uint8_t version;
3006    if (mDataSource->readAt(
3007            offset, &version, sizeof(version))
3008            < (ssize_t)sizeof(version)) {
3009        return ERROR_IO;
3010    }
3011
3012    if (version != 0) {
3013        return ERROR_UNSUPPORTED;
3014    }
3015    offset++;
3016
3017    uint32_t flags;
3018    if (!mDataSource->getUInt24(offset, &flags)) {
3019        return ERROR_IO;
3020    }
3021    offset += 3;
3022
3023    if (flags & 1) {
3024        uint32_t tmp;
3025        if (!mDataSource->getUInt32(offset, &tmp)) {
3026            return ERROR_MALFORMED;
3027        }
3028        mCurrentAuxInfoType = tmp;
3029        offset += 4;
3030        if (!mDataSource->getUInt32(offset, &tmp)) {
3031            return ERROR_MALFORMED;
3032        }
3033        mCurrentAuxInfoTypeParameter = tmp;
3034        offset += 4;
3035    }
3036
3037    uint8_t defsize;
3038    if (mDataSource->readAt(offset, &defsize, 1) != 1) {
3039        return ERROR_MALFORMED;
3040    }
3041    mCurrentDefaultSampleInfoSize = defsize;
3042    offset++;
3043
3044    uint32_t smplcnt;
3045    if (!mDataSource->getUInt32(offset, &smplcnt)) {
3046        return ERROR_MALFORMED;
3047    }
3048    mCurrentSampleInfoCount = smplcnt;
3049    offset += 4;
3050
3051    if (mCurrentDefaultSampleInfoSize != 0) {
3052        ALOGV("@@@@ using default sample info size of %d", mCurrentDefaultSampleInfoSize);
3053        return OK;
3054    }
3055    if (smplcnt > mCurrentSampleInfoAllocSize) {
3056        mCurrentSampleInfoSizes = (uint8_t*) realloc(mCurrentSampleInfoSizes, smplcnt);
3057        mCurrentSampleInfoAllocSize = smplcnt;
3058    }
3059
3060    mDataSource->readAt(offset, mCurrentSampleInfoSizes, smplcnt);
3061    return OK;
3062}
3063
3064status_t MPEG4Source::parseSampleAuxiliaryInformationOffsets(
3065        off64_t offset, off64_t /* size */) {
3066    ALOGV("parseSampleAuxiliaryInformationOffsets");
3067    // 14496-12 8.7.13
3068    uint8_t version;
3069    if (mDataSource->readAt(offset, &version, sizeof(version)) != 1) {
3070        return ERROR_IO;
3071    }
3072    offset++;
3073
3074    uint32_t flags;
3075    if (!mDataSource->getUInt24(offset, &flags)) {
3076        return ERROR_IO;
3077    }
3078    offset += 3;
3079
3080    uint32_t entrycount;
3081    if (!mDataSource->getUInt32(offset, &entrycount)) {
3082        return ERROR_IO;
3083    }
3084    offset += 4;
3085
3086    if (entrycount > mCurrentSampleInfoOffsetsAllocSize) {
3087        mCurrentSampleInfoOffsets = (uint64_t*) realloc(mCurrentSampleInfoOffsets, entrycount * 8);
3088        mCurrentSampleInfoOffsetsAllocSize = entrycount;
3089    }
3090    mCurrentSampleInfoOffsetCount = entrycount;
3091
3092    for (size_t i = 0; i < entrycount; i++) {
3093        if (version == 0) {
3094            uint32_t tmp;
3095            if (!mDataSource->getUInt32(offset, &tmp)) {
3096                return ERROR_IO;
3097            }
3098            mCurrentSampleInfoOffsets[i] = tmp;
3099            offset += 4;
3100        } else {
3101            uint64_t tmp;
3102            if (!mDataSource->getUInt64(offset, &tmp)) {
3103                return ERROR_IO;
3104            }
3105            mCurrentSampleInfoOffsets[i] = tmp;
3106            offset += 8;
3107        }
3108    }
3109
3110    // parse clear/encrypted data
3111
3112    off64_t drmoffset = mCurrentSampleInfoOffsets[0]; // from moof
3113
3114    drmoffset += mCurrentMoofOffset;
3115    int ivlength;
3116    CHECK(mFormat->findInt32(kKeyCryptoDefaultIVSize, &ivlength));
3117
3118    // read CencSampleAuxiliaryDataFormats
3119    for (size_t i = 0; i < mCurrentSampleInfoCount; i++) {
3120        Sample *smpl = &mCurrentSamples.editItemAt(i);
3121
3122        memset(smpl->iv, 0, 16);
3123        if (mDataSource->readAt(drmoffset, smpl->iv, ivlength) != ivlength) {
3124            return ERROR_IO;
3125        }
3126
3127        drmoffset += ivlength;
3128
3129        int32_t smplinfosize = mCurrentDefaultSampleInfoSize;
3130        if (smplinfosize == 0) {
3131            smplinfosize = mCurrentSampleInfoSizes[i];
3132        }
3133        if (smplinfosize > ivlength) {
3134            uint16_t numsubsamples;
3135            if (!mDataSource->getUInt16(drmoffset, &numsubsamples)) {
3136                return ERROR_IO;
3137            }
3138            drmoffset += 2;
3139            for (size_t j = 0; j < numsubsamples; j++) {
3140                uint16_t numclear;
3141                uint32_t numencrypted;
3142                if (!mDataSource->getUInt16(drmoffset, &numclear)) {
3143                    return ERROR_IO;
3144                }
3145                drmoffset += 2;
3146                if (!mDataSource->getUInt32(drmoffset, &numencrypted)) {
3147                    return ERROR_IO;
3148                }
3149                drmoffset += 4;
3150                smpl->clearsizes.add(numclear);
3151                smpl->encryptedsizes.add(numencrypted);
3152            }
3153        } else {
3154            smpl->clearsizes.add(0);
3155            smpl->encryptedsizes.add(smpl->size);
3156        }
3157    }
3158
3159
3160    return OK;
3161}
3162
3163status_t MPEG4Source::parseTrackFragmentHeader(off64_t offset, off64_t size) {
3164
3165    if (size < 8) {
3166        return -EINVAL;
3167    }
3168
3169    uint32_t flags;
3170    if (!mDataSource->getUInt32(offset, &flags)) { // actually version + flags
3171        return ERROR_MALFORMED;
3172    }
3173
3174    if (flags & 0xff000000) {
3175        return -EINVAL;
3176    }
3177
3178    if (!mDataSource->getUInt32(offset + 4, (uint32_t*)&mLastParsedTrackId)) {
3179        return ERROR_MALFORMED;
3180    }
3181
3182    if (mLastParsedTrackId != mTrackId) {
3183        // this is not the right track, skip it
3184        return OK;
3185    }
3186
3187    mTrackFragmentHeaderInfo.mFlags = flags;
3188    mTrackFragmentHeaderInfo.mTrackID = mLastParsedTrackId;
3189    offset += 8;
3190    size -= 8;
3191
3192    ALOGV("fragment header: %08x %08x", flags, mTrackFragmentHeaderInfo.mTrackID);
3193
3194    if (flags & TrackFragmentHeaderInfo::kBaseDataOffsetPresent) {
3195        if (size < 8) {
3196            return -EINVAL;
3197        }
3198
3199        if (!mDataSource->getUInt64(offset, &mTrackFragmentHeaderInfo.mBaseDataOffset)) {
3200            return ERROR_MALFORMED;
3201        }
3202        offset += 8;
3203        size -= 8;
3204    }
3205
3206    if (flags & TrackFragmentHeaderInfo::kSampleDescriptionIndexPresent) {
3207        if (size < 4) {
3208            return -EINVAL;
3209        }
3210
3211        if (!mDataSource->getUInt32(offset, &mTrackFragmentHeaderInfo.mSampleDescriptionIndex)) {
3212            return ERROR_MALFORMED;
3213        }
3214        offset += 4;
3215        size -= 4;
3216    }
3217
3218    if (flags & TrackFragmentHeaderInfo::kDefaultSampleDurationPresent) {
3219        if (size < 4) {
3220            return -EINVAL;
3221        }
3222
3223        if (!mDataSource->getUInt32(offset, &mTrackFragmentHeaderInfo.mDefaultSampleDuration)) {
3224            return ERROR_MALFORMED;
3225        }
3226        offset += 4;
3227        size -= 4;
3228    }
3229
3230    if (flags & TrackFragmentHeaderInfo::kDefaultSampleSizePresent) {
3231        if (size < 4) {
3232            return -EINVAL;
3233        }
3234
3235        if (!mDataSource->getUInt32(offset, &mTrackFragmentHeaderInfo.mDefaultSampleSize)) {
3236            return ERROR_MALFORMED;
3237        }
3238        offset += 4;
3239        size -= 4;
3240    }
3241
3242    if (flags & TrackFragmentHeaderInfo::kDefaultSampleFlagsPresent) {
3243        if (size < 4) {
3244            return -EINVAL;
3245        }
3246
3247        if (!mDataSource->getUInt32(offset, &mTrackFragmentHeaderInfo.mDefaultSampleFlags)) {
3248            return ERROR_MALFORMED;
3249        }
3250        offset += 4;
3251        size -= 4;
3252    }
3253
3254    if (!(flags & TrackFragmentHeaderInfo::kBaseDataOffsetPresent)) {
3255        mTrackFragmentHeaderInfo.mBaseDataOffset = mCurrentMoofOffset;
3256    }
3257
3258    mTrackFragmentHeaderInfo.mDataOffset = 0;
3259    return OK;
3260}
3261
3262status_t MPEG4Source::parseTrackFragmentRun(off64_t offset, off64_t size) {
3263
3264    ALOGV("MPEG4Extractor::parseTrackFragmentRun");
3265    if (size < 8) {
3266        return -EINVAL;
3267    }
3268
3269    enum {
3270        kDataOffsetPresent                  = 0x01,
3271        kFirstSampleFlagsPresent            = 0x04,
3272        kSampleDurationPresent              = 0x100,
3273        kSampleSizePresent                  = 0x200,
3274        kSampleFlagsPresent                 = 0x400,
3275        kSampleCompositionTimeOffsetPresent = 0x800,
3276    };
3277
3278    uint32_t flags;
3279    if (!mDataSource->getUInt32(offset, &flags)) {
3280        return ERROR_MALFORMED;
3281    }
3282    ALOGV("fragment run flags: %08x", flags);
3283
3284    if (flags & 0xff000000) {
3285        return -EINVAL;
3286    }
3287
3288    if ((flags & kFirstSampleFlagsPresent) && (flags & kSampleFlagsPresent)) {
3289        // These two shall not be used together.
3290        return -EINVAL;
3291    }
3292
3293    uint32_t sampleCount;
3294    if (!mDataSource->getUInt32(offset + 4, &sampleCount)) {
3295        return ERROR_MALFORMED;
3296    }
3297    offset += 8;
3298    size -= 8;
3299
3300    uint64_t dataOffset = mTrackFragmentHeaderInfo.mDataOffset;
3301
3302    uint32_t firstSampleFlags = 0;
3303
3304    if (flags & kDataOffsetPresent) {
3305        if (size < 4) {
3306            return -EINVAL;
3307        }
3308
3309        int32_t dataOffsetDelta;
3310        if (!mDataSource->getUInt32(offset, (uint32_t*)&dataOffsetDelta)) {
3311            return ERROR_MALFORMED;
3312        }
3313
3314        dataOffset = mTrackFragmentHeaderInfo.mBaseDataOffset + dataOffsetDelta;
3315
3316        offset += 4;
3317        size -= 4;
3318    }
3319
3320    if (flags & kFirstSampleFlagsPresent) {
3321        if (size < 4) {
3322            return -EINVAL;
3323        }
3324
3325        if (!mDataSource->getUInt32(offset, &firstSampleFlags)) {
3326            return ERROR_MALFORMED;
3327        }
3328        offset += 4;
3329        size -= 4;
3330    }
3331
3332    uint32_t sampleDuration = 0, sampleSize = 0, sampleFlags = 0,
3333             sampleCtsOffset = 0;
3334
3335    size_t bytesPerSample = 0;
3336    if (flags & kSampleDurationPresent) {
3337        bytesPerSample += 4;
3338    } else if (mTrackFragmentHeaderInfo.mFlags
3339            & TrackFragmentHeaderInfo::kDefaultSampleDurationPresent) {
3340        sampleDuration = mTrackFragmentHeaderInfo.mDefaultSampleDuration;
3341    } else {
3342        sampleDuration = mTrackFragmentHeaderInfo.mDefaultSampleDuration;
3343    }
3344
3345    if (flags & kSampleSizePresent) {
3346        bytesPerSample += 4;
3347    } else if (mTrackFragmentHeaderInfo.mFlags
3348            & TrackFragmentHeaderInfo::kDefaultSampleSizePresent) {
3349        sampleSize = mTrackFragmentHeaderInfo.mDefaultSampleSize;
3350    } else {
3351        sampleSize = mTrackFragmentHeaderInfo.mDefaultSampleSize;
3352    }
3353
3354    if (flags & kSampleFlagsPresent) {
3355        bytesPerSample += 4;
3356    } else if (mTrackFragmentHeaderInfo.mFlags
3357            & TrackFragmentHeaderInfo::kDefaultSampleFlagsPresent) {
3358        sampleFlags = mTrackFragmentHeaderInfo.mDefaultSampleFlags;
3359    } else {
3360        sampleFlags = mTrackFragmentHeaderInfo.mDefaultSampleFlags;
3361    }
3362
3363    if (flags & kSampleCompositionTimeOffsetPresent) {
3364        bytesPerSample += 4;
3365    } else {
3366        sampleCtsOffset = 0;
3367    }
3368
3369    if (size < sampleCount * bytesPerSample) {
3370        return -EINVAL;
3371    }
3372
3373    Sample tmp;
3374    for (uint32_t i = 0; i < sampleCount; ++i) {
3375        if (flags & kSampleDurationPresent) {
3376            if (!mDataSource->getUInt32(offset, &sampleDuration)) {
3377                return ERROR_MALFORMED;
3378            }
3379            offset += 4;
3380        }
3381
3382        if (flags & kSampleSizePresent) {
3383            if (!mDataSource->getUInt32(offset, &sampleSize)) {
3384                return ERROR_MALFORMED;
3385            }
3386            offset += 4;
3387        }
3388
3389        if (flags & kSampleFlagsPresent) {
3390            if (!mDataSource->getUInt32(offset, &sampleFlags)) {
3391                return ERROR_MALFORMED;
3392            }
3393            offset += 4;
3394        }
3395
3396        if (flags & kSampleCompositionTimeOffsetPresent) {
3397            if (!mDataSource->getUInt32(offset, &sampleCtsOffset)) {
3398                return ERROR_MALFORMED;
3399            }
3400            offset += 4;
3401        }
3402
3403        ALOGV("adding sample %d at offset 0x%08llx, size %u, duration %u, "
3404              " flags 0x%08x", i + 1,
3405                dataOffset, sampleSize, sampleDuration,
3406                (flags & kFirstSampleFlagsPresent) && i == 0
3407                    ? firstSampleFlags : sampleFlags);
3408        tmp.offset = dataOffset;
3409        tmp.size = sampleSize;
3410        tmp.duration = sampleDuration;
3411        mCurrentSamples.add(tmp);
3412
3413        dataOffset += sampleSize;
3414    }
3415
3416    mTrackFragmentHeaderInfo.mDataOffset = dataOffset;
3417
3418    return OK;
3419}
3420
3421sp<MetaData> MPEG4Source::getFormat() {
3422    Mutex::Autolock autoLock(mLock);
3423
3424    return mFormat;
3425}
3426
3427size_t MPEG4Source::parseNALSize(const uint8_t *data) const {
3428    switch (mNALLengthSize) {
3429        case 1:
3430            return *data;
3431        case 2:
3432            return U16_AT(data);
3433        case 3:
3434            return ((size_t)data[0] << 16) | U16_AT(&data[1]);
3435        case 4:
3436            return U32_AT(data);
3437    }
3438
3439    // This cannot happen, mNALLengthSize springs to life by adding 1 to
3440    // a 2-bit integer.
3441    CHECK(!"Should not be here.");
3442
3443    return 0;
3444}
3445
3446status_t MPEG4Source::read(
3447        MediaBuffer **out, const ReadOptions *options) {
3448    Mutex::Autolock autoLock(mLock);
3449
3450    CHECK(mStarted);
3451
3452    if (mFirstMoofOffset > 0) {
3453        return fragmentedRead(out, options);
3454    }
3455
3456    *out = NULL;
3457
3458    int64_t targetSampleTimeUs = -1;
3459
3460    int64_t seekTimeUs;
3461    ReadOptions::SeekMode mode;
3462    if (options && options->getSeekTo(&seekTimeUs, &mode)) {
3463        uint32_t findFlags = 0;
3464        switch (mode) {
3465            case ReadOptions::SEEK_PREVIOUS_SYNC:
3466                findFlags = SampleTable::kFlagBefore;
3467                break;
3468            case ReadOptions::SEEK_NEXT_SYNC:
3469                findFlags = SampleTable::kFlagAfter;
3470                break;
3471            case ReadOptions::SEEK_CLOSEST_SYNC:
3472            case ReadOptions::SEEK_CLOSEST:
3473                findFlags = SampleTable::kFlagClosest;
3474                break;
3475            default:
3476                CHECK(!"Should not be here.");
3477                break;
3478        }
3479
3480        uint32_t sampleIndex;
3481        status_t err = mSampleTable->findSampleAtTime(
3482                seekTimeUs * mTimescale / 1000000,
3483                &sampleIndex, findFlags);
3484
3485        if (mode == ReadOptions::SEEK_CLOSEST) {
3486            // We found the closest sample already, now we want the sync
3487            // sample preceding it (or the sample itself of course), even
3488            // if the subsequent sync sample is closer.
3489            findFlags = SampleTable::kFlagBefore;
3490        }
3491
3492        uint32_t syncSampleIndex;
3493        if (err == OK) {
3494            err = mSampleTable->findSyncSampleNear(
3495                    sampleIndex, &syncSampleIndex, findFlags);
3496        }
3497
3498        uint32_t sampleTime;
3499        if (err == OK) {
3500            err = mSampleTable->getMetaDataForSample(
3501                    sampleIndex, NULL, NULL, &sampleTime);
3502        }
3503
3504        if (err != OK) {
3505            if (err == ERROR_OUT_OF_RANGE) {
3506                // An attempt to seek past the end of the stream would
3507                // normally cause this ERROR_OUT_OF_RANGE error. Propagating
3508                // this all the way to the MediaPlayer would cause abnormal
3509                // termination. Legacy behaviour appears to be to behave as if
3510                // we had seeked to the end of stream, ending normally.
3511                err = ERROR_END_OF_STREAM;
3512            }
3513            ALOGV("end of stream");
3514            return err;
3515        }
3516
3517        if (mode == ReadOptions::SEEK_CLOSEST) {
3518            targetSampleTimeUs = (sampleTime * 1000000ll) / mTimescale;
3519        }
3520
3521#if 0
3522        uint32_t syncSampleTime;
3523        CHECK_EQ(OK, mSampleTable->getMetaDataForSample(
3524                    syncSampleIndex, NULL, NULL, &syncSampleTime));
3525
3526        ALOGI("seek to time %lld us => sample at time %lld us, "
3527             "sync sample at time %lld us",
3528             seekTimeUs,
3529             sampleTime * 1000000ll / mTimescale,
3530             syncSampleTime * 1000000ll / mTimescale);
3531#endif
3532
3533        mCurrentSampleIndex = syncSampleIndex;
3534        if (mBuffer != NULL) {
3535            mBuffer->release();
3536            mBuffer = NULL;
3537        }
3538
3539        // fall through
3540    }
3541
3542    off64_t offset;
3543    size_t size;
3544    uint32_t cts, stts;
3545    bool isSyncSample;
3546    bool newBuffer = false;
3547    if (mBuffer == NULL) {
3548        newBuffer = true;
3549
3550        status_t err =
3551            mSampleTable->getMetaDataForSample(
3552                    mCurrentSampleIndex, &offset, &size, &cts, &isSyncSample, &stts);
3553
3554        if (err != OK) {
3555            return err;
3556        }
3557
3558        err = mGroup->acquire_buffer(&mBuffer);
3559
3560        if (err != OK) {
3561            CHECK(mBuffer == NULL);
3562            return err;
3563        }
3564    }
3565
3566    if (!mIsAVC || mWantsNALFragments) {
3567        if (newBuffer) {
3568            ssize_t num_bytes_read =
3569                mDataSource->readAt(offset, (uint8_t *)mBuffer->data(), size);
3570
3571            if (num_bytes_read < (ssize_t)size) {
3572                mBuffer->release();
3573                mBuffer = NULL;
3574
3575                return ERROR_IO;
3576            }
3577
3578            CHECK(mBuffer != NULL);
3579            mBuffer->set_range(0, size);
3580            mBuffer->meta_data()->clear();
3581            mBuffer->meta_data()->setInt64(
3582                    kKeyTime, ((int64_t)cts * 1000000) / mTimescale);
3583            mBuffer->meta_data()->setInt64(
3584                    kKeyDuration, ((int64_t)stts * 1000000) / mTimescale);
3585
3586            if (targetSampleTimeUs >= 0) {
3587                mBuffer->meta_data()->setInt64(
3588                        kKeyTargetTime, targetSampleTimeUs);
3589            }
3590
3591            if (isSyncSample) {
3592                mBuffer->meta_data()->setInt32(kKeyIsSyncFrame, 1);
3593            }
3594
3595            ++mCurrentSampleIndex;
3596        }
3597
3598        if (!mIsAVC) {
3599            *out = mBuffer;
3600            mBuffer = NULL;
3601
3602            return OK;
3603        }
3604
3605        // Each NAL unit is split up into its constituent fragments and
3606        // each one of them returned in its own buffer.
3607
3608        CHECK(mBuffer->range_length() >= mNALLengthSize);
3609
3610        const uint8_t *src =
3611            (const uint8_t *)mBuffer->data() + mBuffer->range_offset();
3612
3613        size_t nal_size = parseNALSize(src);
3614        if (mBuffer->range_length() < mNALLengthSize + nal_size) {
3615            ALOGE("incomplete NAL unit.");
3616
3617            mBuffer->release();
3618            mBuffer = NULL;
3619
3620            return ERROR_MALFORMED;
3621        }
3622
3623        MediaBuffer *clone = mBuffer->clone();
3624        CHECK(clone != NULL);
3625        clone->set_range(mBuffer->range_offset() + mNALLengthSize, nal_size);
3626
3627        CHECK(mBuffer != NULL);
3628        mBuffer->set_range(
3629                mBuffer->range_offset() + mNALLengthSize + nal_size,
3630                mBuffer->range_length() - mNALLengthSize - nal_size);
3631
3632        if (mBuffer->range_length() == 0) {
3633            mBuffer->release();
3634            mBuffer = NULL;
3635        }
3636
3637        *out = clone;
3638
3639        return OK;
3640    } else {
3641        // Whole NAL units are returned but each fragment is prefixed by
3642        // the start code (0x00 00 00 01).
3643        ssize_t num_bytes_read = 0;
3644        int32_t drm = 0;
3645        bool usesDRM = (mFormat->findInt32(kKeyIsDRM, &drm) && drm != 0);
3646        if (usesDRM) {
3647            num_bytes_read =
3648                mDataSource->readAt(offset, (uint8_t*)mBuffer->data(), size);
3649        } else {
3650            num_bytes_read = mDataSource->readAt(offset, mSrcBuffer, size);
3651        }
3652
3653        if (num_bytes_read < (ssize_t)size) {
3654            mBuffer->release();
3655            mBuffer = NULL;
3656
3657            return ERROR_IO;
3658        }
3659
3660        if (usesDRM) {
3661            CHECK(mBuffer != NULL);
3662            mBuffer->set_range(0, size);
3663
3664        } else {
3665            uint8_t *dstData = (uint8_t *)mBuffer->data();
3666            size_t srcOffset = 0;
3667            size_t dstOffset = 0;
3668
3669            while (srcOffset < size) {
3670                bool isMalFormed = (srcOffset + mNALLengthSize > size);
3671                size_t nalLength = 0;
3672                if (!isMalFormed) {
3673                    nalLength = parseNALSize(&mSrcBuffer[srcOffset]);
3674                    srcOffset += mNALLengthSize;
3675                    isMalFormed = srcOffset + nalLength > size;
3676                }
3677
3678                if (isMalFormed) {
3679                    ALOGE("Video is malformed");
3680                    mBuffer->release();
3681                    mBuffer = NULL;
3682                    return ERROR_MALFORMED;
3683                }
3684
3685                if (nalLength == 0) {
3686                    continue;
3687                }
3688
3689                CHECK(dstOffset + 4 <= mBuffer->size());
3690
3691                dstData[dstOffset++] = 0;
3692                dstData[dstOffset++] = 0;
3693                dstData[dstOffset++] = 0;
3694                dstData[dstOffset++] = 1;
3695                memcpy(&dstData[dstOffset], &mSrcBuffer[srcOffset], nalLength);
3696                srcOffset += nalLength;
3697                dstOffset += nalLength;
3698            }
3699            CHECK_EQ(srcOffset, size);
3700            CHECK(mBuffer != NULL);
3701            mBuffer->set_range(0, dstOffset);
3702        }
3703
3704        mBuffer->meta_data()->clear();
3705        mBuffer->meta_data()->setInt64(
3706                kKeyTime, ((int64_t)cts * 1000000) / mTimescale);
3707        mBuffer->meta_data()->setInt64(
3708                kKeyDuration, ((int64_t)stts * 1000000) / mTimescale);
3709
3710        if (targetSampleTimeUs >= 0) {
3711            mBuffer->meta_data()->setInt64(
3712                    kKeyTargetTime, targetSampleTimeUs);
3713        }
3714
3715        if (isSyncSample) {
3716            mBuffer->meta_data()->setInt32(kKeyIsSyncFrame, 1);
3717        }
3718
3719        ++mCurrentSampleIndex;
3720
3721        *out = mBuffer;
3722        mBuffer = NULL;
3723
3724        return OK;
3725    }
3726}
3727
3728status_t MPEG4Source::fragmentedRead(
3729        MediaBuffer **out, const ReadOptions *options) {
3730
3731    ALOGV("MPEG4Source::fragmentedRead");
3732
3733    CHECK(mStarted);
3734
3735    *out = NULL;
3736
3737    int64_t targetSampleTimeUs = -1;
3738
3739    int64_t seekTimeUs;
3740    ReadOptions::SeekMode mode;
3741    if (options && options->getSeekTo(&seekTimeUs, &mode)) {
3742
3743        int numSidxEntries = mSegments.size();
3744        if (numSidxEntries != 0) {
3745            int64_t totalTime = 0;
3746            off64_t totalOffset = mFirstMoofOffset;
3747            for (int i = 0; i < numSidxEntries; i++) {
3748                const SidxEntry *se = &mSegments[i];
3749                if (totalTime + se->mDurationUs > seekTimeUs) {
3750                    // The requested time is somewhere in this segment
3751                    if ((mode == ReadOptions::SEEK_NEXT_SYNC && seekTimeUs > totalTime) ||
3752                        (mode == ReadOptions::SEEK_CLOSEST_SYNC &&
3753                        (seekTimeUs - totalTime) > (totalTime + se->mDurationUs - seekTimeUs))) {
3754                        // requested next sync, or closest sync and it was closer to the end of
3755                        // this segment
3756                        totalTime += se->mDurationUs;
3757                        totalOffset += se->mSize;
3758                    }
3759                    break;
3760                }
3761                totalTime += se->mDurationUs;
3762                totalOffset += se->mSize;
3763            }
3764            mCurrentMoofOffset = totalOffset;
3765            mCurrentSamples.clear();
3766            mCurrentSampleIndex = 0;
3767            parseChunk(&totalOffset);
3768            mCurrentTime = totalTime * mTimescale / 1000000ll;
3769        } else {
3770            // without sidx boxes, we can only seek to 0
3771            mCurrentMoofOffset = mFirstMoofOffset;
3772            mCurrentSamples.clear();
3773            mCurrentSampleIndex = 0;
3774            off64_t tmp = mCurrentMoofOffset;
3775            parseChunk(&tmp);
3776            mCurrentTime = 0;
3777        }
3778
3779        if (mBuffer != NULL) {
3780            mBuffer->release();
3781            mBuffer = NULL;
3782        }
3783
3784        // fall through
3785    }
3786
3787    off64_t offset = 0;
3788    size_t size = 0;
3789    uint32_t cts = 0;
3790    bool isSyncSample = false;
3791    bool newBuffer = false;
3792    if (mBuffer == NULL) {
3793        newBuffer = true;
3794
3795        if (mCurrentSampleIndex >= mCurrentSamples.size()) {
3796            // move to next fragment if there is one
3797            if (mNextMoofOffset <= mCurrentMoofOffset) {
3798                return ERROR_END_OF_STREAM;
3799            }
3800            off64_t nextMoof = mNextMoofOffset;
3801            mCurrentMoofOffset = nextMoof;
3802            mCurrentSamples.clear();
3803            mCurrentSampleIndex = 0;
3804            parseChunk(&nextMoof);
3805            if (mCurrentSampleIndex >= mCurrentSamples.size()) {
3806                return ERROR_END_OF_STREAM;
3807            }
3808        }
3809
3810        const Sample *smpl = &mCurrentSamples[mCurrentSampleIndex];
3811        offset = smpl->offset;
3812        size = smpl->size;
3813        cts = mCurrentTime;
3814        mCurrentTime += smpl->duration;
3815        isSyncSample = (mCurrentSampleIndex == 0); // XXX
3816
3817        status_t err = mGroup->acquire_buffer(&mBuffer);
3818
3819        if (err != OK) {
3820            CHECK(mBuffer == NULL);
3821            ALOGV("acquire_buffer returned %d", err);
3822            return err;
3823        }
3824    }
3825
3826    const Sample *smpl = &mCurrentSamples[mCurrentSampleIndex];
3827    const sp<MetaData> bufmeta = mBuffer->meta_data();
3828    bufmeta->clear();
3829    if (smpl->encryptedsizes.size()) {
3830        // store clear/encrypted lengths in metadata
3831        bufmeta->setData(kKeyPlainSizes, 0,
3832                smpl->clearsizes.array(), smpl->clearsizes.size() * 4);
3833        bufmeta->setData(kKeyEncryptedSizes, 0,
3834                smpl->encryptedsizes.array(), smpl->encryptedsizes.size() * 4);
3835        bufmeta->setData(kKeyCryptoIV, 0, smpl->iv, 16); // use 16 or the actual size?
3836        bufmeta->setInt32(kKeyCryptoDefaultIVSize, mDefaultIVSize);
3837        bufmeta->setInt32(kKeyCryptoMode, mCryptoMode);
3838        bufmeta->setData(kKeyCryptoKey, 0, mCryptoKey, 16);
3839    }
3840
3841    if (!mIsAVC || mWantsNALFragments) {
3842        if (newBuffer) {
3843            ssize_t num_bytes_read =
3844                mDataSource->readAt(offset, (uint8_t *)mBuffer->data(), size);
3845
3846            if (num_bytes_read < (ssize_t)size) {
3847                mBuffer->release();
3848                mBuffer = NULL;
3849
3850                ALOGV("i/o error");
3851                return ERROR_IO;
3852            }
3853
3854            CHECK(mBuffer != NULL);
3855            mBuffer->set_range(0, size);
3856            mBuffer->meta_data()->setInt64(
3857                    kKeyTime, ((int64_t)cts * 1000000) / mTimescale);
3858            mBuffer->meta_data()->setInt64(
3859                    kKeyDuration, ((int64_t)smpl->duration * 1000000) / mTimescale);
3860
3861            if (targetSampleTimeUs >= 0) {
3862                mBuffer->meta_data()->setInt64(
3863                        kKeyTargetTime, targetSampleTimeUs);
3864            }
3865
3866            if (isSyncSample) {
3867                mBuffer->meta_data()->setInt32(kKeyIsSyncFrame, 1);
3868            }
3869
3870            ++mCurrentSampleIndex;
3871        }
3872
3873        if (!mIsAVC) {
3874            *out = mBuffer;
3875            mBuffer = NULL;
3876
3877            return OK;
3878        }
3879
3880        // Each NAL unit is split up into its constituent fragments and
3881        // each one of them returned in its own buffer.
3882
3883        CHECK(mBuffer->range_length() >= mNALLengthSize);
3884
3885        const uint8_t *src =
3886            (const uint8_t *)mBuffer->data() + mBuffer->range_offset();
3887
3888        size_t nal_size = parseNALSize(src);
3889        if (mBuffer->range_length() < mNALLengthSize + nal_size) {
3890            ALOGE("incomplete NAL unit.");
3891
3892            mBuffer->release();
3893            mBuffer = NULL;
3894
3895            return ERROR_MALFORMED;
3896        }
3897
3898        MediaBuffer *clone = mBuffer->clone();
3899        CHECK(clone != NULL);
3900        clone->set_range(mBuffer->range_offset() + mNALLengthSize, nal_size);
3901
3902        CHECK(mBuffer != NULL);
3903        mBuffer->set_range(
3904                mBuffer->range_offset() + mNALLengthSize + nal_size,
3905                mBuffer->range_length() - mNALLengthSize - nal_size);
3906
3907        if (mBuffer->range_length() == 0) {
3908            mBuffer->release();
3909            mBuffer = NULL;
3910        }
3911
3912        *out = clone;
3913
3914        return OK;
3915    } else {
3916        ALOGV("whole NAL");
3917        // Whole NAL units are returned but each fragment is prefixed by
3918        // the start code (0x00 00 00 01).
3919        ssize_t num_bytes_read = 0;
3920        int32_t drm = 0;
3921        bool usesDRM = (mFormat->findInt32(kKeyIsDRM, &drm) && drm != 0);
3922        if (usesDRM) {
3923            num_bytes_read =
3924                mDataSource->readAt(offset, (uint8_t*)mBuffer->data(), size);
3925        } else {
3926            num_bytes_read = mDataSource->readAt(offset, mSrcBuffer, size);
3927        }
3928
3929        if (num_bytes_read < (ssize_t)size) {
3930            mBuffer->release();
3931            mBuffer = NULL;
3932
3933            ALOGV("i/o error");
3934            return ERROR_IO;
3935        }
3936
3937        if (usesDRM) {
3938            CHECK(mBuffer != NULL);
3939            mBuffer->set_range(0, size);
3940
3941        } else {
3942            uint8_t *dstData = (uint8_t *)mBuffer->data();
3943            size_t srcOffset = 0;
3944            size_t dstOffset = 0;
3945
3946            while (srcOffset < size) {
3947                bool isMalFormed = (srcOffset + mNALLengthSize > size);
3948                size_t nalLength = 0;
3949                if (!isMalFormed) {
3950                    nalLength = parseNALSize(&mSrcBuffer[srcOffset]);
3951                    srcOffset += mNALLengthSize;
3952                    isMalFormed = srcOffset + nalLength > size;
3953                }
3954
3955                if (isMalFormed) {
3956                    ALOGE("Video is malformed");
3957                    mBuffer->release();
3958                    mBuffer = NULL;
3959                    return ERROR_MALFORMED;
3960                }
3961
3962                if (nalLength == 0) {
3963                    continue;
3964                }
3965
3966                CHECK(dstOffset + 4 <= mBuffer->size());
3967
3968                dstData[dstOffset++] = 0;
3969                dstData[dstOffset++] = 0;
3970                dstData[dstOffset++] = 0;
3971                dstData[dstOffset++] = 1;
3972                memcpy(&dstData[dstOffset], &mSrcBuffer[srcOffset], nalLength);
3973                srcOffset += nalLength;
3974                dstOffset += nalLength;
3975            }
3976            CHECK_EQ(srcOffset, size);
3977            CHECK(mBuffer != NULL);
3978            mBuffer->set_range(0, dstOffset);
3979        }
3980
3981        mBuffer->meta_data()->setInt64(
3982                kKeyTime, ((int64_t)cts * 1000000) / mTimescale);
3983        mBuffer->meta_data()->setInt64(
3984                kKeyDuration, ((int64_t)smpl->duration * 1000000) / mTimescale);
3985
3986        if (targetSampleTimeUs >= 0) {
3987            mBuffer->meta_data()->setInt64(
3988                    kKeyTargetTime, targetSampleTimeUs);
3989        }
3990
3991        if (isSyncSample) {
3992            mBuffer->meta_data()->setInt32(kKeyIsSyncFrame, 1);
3993        }
3994
3995        ++mCurrentSampleIndex;
3996
3997        *out = mBuffer;
3998        mBuffer = NULL;
3999
4000        return OK;
4001    }
4002}
4003
4004MPEG4Extractor::Track *MPEG4Extractor::findTrackByMimePrefix(
4005        const char *mimePrefix) {
4006    for (Track *track = mFirstTrack; track != NULL; track = track->next) {
4007        const char *mime;
4008        if (track->meta != NULL
4009                && track->meta->findCString(kKeyMIMEType, &mime)
4010                && !strncasecmp(mime, mimePrefix, strlen(mimePrefix))) {
4011            return track;
4012        }
4013    }
4014
4015    return NULL;
4016}
4017
4018static bool LegacySniffMPEG4(
4019        const sp<DataSource> &source, String8 *mimeType, float *confidence) {
4020    uint8_t header[8];
4021
4022    ssize_t n = source->readAt(4, header, sizeof(header));
4023    if (n < (ssize_t)sizeof(header)) {
4024        return false;
4025    }
4026
4027    if (!memcmp(header, "ftyp3gp", 7) || !memcmp(header, "ftypmp42", 8)
4028        || !memcmp(header, "ftyp3gr6", 8) || !memcmp(header, "ftyp3gs6", 8)
4029        || !memcmp(header, "ftyp3ge6", 8) || !memcmp(header, "ftyp3gg6", 8)
4030        || !memcmp(header, "ftypisom", 8) || !memcmp(header, "ftypM4V ", 8)
4031        || !memcmp(header, "ftypM4A ", 8) || !memcmp(header, "ftypf4v ", 8)
4032        || !memcmp(header, "ftypkddi", 8) || !memcmp(header, "ftypM4VP", 8)) {
4033        *mimeType = MEDIA_MIMETYPE_CONTAINER_MPEG4;
4034        *confidence = 0.4;
4035
4036        return true;
4037    }
4038
4039    return false;
4040}
4041
4042static bool isCompatibleBrand(uint32_t fourcc) {
4043    static const uint32_t kCompatibleBrands[] = {
4044        FOURCC('i', 's', 'o', 'm'),
4045        FOURCC('i', 's', 'o', '2'),
4046        FOURCC('a', 'v', 'c', '1'),
4047        FOURCC('3', 'g', 'p', '4'),
4048        FOURCC('m', 'p', '4', '1'),
4049        FOURCC('m', 'p', '4', '2'),
4050
4051        // Won't promise that the following file types can be played.
4052        // Just give these file types a chance.
4053        FOURCC('q', 't', ' ', ' '),  // Apple's QuickTime
4054        FOURCC('M', 'S', 'N', 'V'),  // Sony's PSP
4055
4056        FOURCC('3', 'g', '2', 'a'),  // 3GPP2
4057        FOURCC('3', 'g', '2', 'b'),
4058    };
4059
4060    for (size_t i = 0;
4061         i < sizeof(kCompatibleBrands) / sizeof(kCompatibleBrands[0]);
4062         ++i) {
4063        if (kCompatibleBrands[i] == fourcc) {
4064            return true;
4065        }
4066    }
4067
4068    return false;
4069}
4070
4071// Attempt to actually parse the 'ftyp' atom and determine if a suitable
4072// compatible brand is present.
4073// Also try to identify where this file's metadata ends
4074// (end of the 'moov' atom) and report it to the caller as part of
4075// the metadata.
4076static bool BetterSniffMPEG4(
4077        const sp<DataSource> &source, String8 *mimeType, float *confidence,
4078        sp<AMessage> *meta) {
4079    // We scan up to 128 bytes to identify this file as an MP4.
4080    static const off64_t kMaxScanOffset = 128ll;
4081
4082    off64_t offset = 0ll;
4083    bool foundGoodFileType = false;
4084    off64_t moovAtomEndOffset = -1ll;
4085    bool done = false;
4086
4087    while (!done && offset < kMaxScanOffset) {
4088        uint32_t hdr[2];
4089        if (source->readAt(offset, hdr, 8) < 8) {
4090            return false;
4091        }
4092
4093        uint64_t chunkSize = ntohl(hdr[0]);
4094        uint32_t chunkType = ntohl(hdr[1]);
4095        off64_t chunkDataOffset = offset + 8;
4096
4097        if (chunkSize == 1) {
4098            if (source->readAt(offset + 8, &chunkSize, 8) < 8) {
4099                return false;
4100            }
4101
4102            chunkSize = ntoh64(chunkSize);
4103            chunkDataOffset += 8;
4104
4105            if (chunkSize < 16) {
4106                // The smallest valid chunk is 16 bytes long in this case.
4107                return false;
4108            }
4109        } else if (chunkSize < 8) {
4110            // The smallest valid chunk is 8 bytes long.
4111            return false;
4112        }
4113
4114        off64_t chunkDataSize = offset + chunkSize - chunkDataOffset;
4115
4116        char chunkstring[5];
4117        MakeFourCCString(chunkType, chunkstring);
4118        ALOGV("saw chunk type %s, size %lld @ %lld", chunkstring, chunkSize, offset);
4119        switch (chunkType) {
4120            case FOURCC('f', 't', 'y', 'p'):
4121            {
4122                if (chunkDataSize < 8) {
4123                    return false;
4124                }
4125
4126                uint32_t numCompatibleBrands = (chunkDataSize - 8) / 4;
4127                for (size_t i = 0; i < numCompatibleBrands + 2; ++i) {
4128                    if (i == 1) {
4129                        // Skip this index, it refers to the minorVersion,
4130                        // not a brand.
4131                        continue;
4132                    }
4133
4134                    uint32_t brand;
4135                    if (source->readAt(
4136                                chunkDataOffset + 4 * i, &brand, 4) < 4) {
4137                        return false;
4138                    }
4139
4140                    brand = ntohl(brand);
4141
4142                    if (isCompatibleBrand(brand)) {
4143                        foundGoodFileType = true;
4144                        break;
4145                    }
4146                }
4147
4148                if (!foundGoodFileType) {
4149                    return false;
4150                }
4151
4152                break;
4153            }
4154
4155            case FOURCC('m', 'o', 'o', 'v'):
4156            {
4157                moovAtomEndOffset = offset + chunkSize;
4158
4159                done = true;
4160                break;
4161            }
4162
4163            default:
4164                break;
4165        }
4166
4167        offset += chunkSize;
4168    }
4169
4170    if (!foundGoodFileType) {
4171        return false;
4172    }
4173
4174    *mimeType = MEDIA_MIMETYPE_CONTAINER_MPEG4;
4175    *confidence = 0.4f;
4176
4177    if (moovAtomEndOffset >= 0) {
4178        *meta = new AMessage;
4179        (*meta)->setInt64("meta-data-size", moovAtomEndOffset);
4180
4181        ALOGV("found metadata size: %lld", moovAtomEndOffset);
4182    }
4183
4184    return true;
4185}
4186
4187bool SniffMPEG4(
4188        const sp<DataSource> &source, String8 *mimeType, float *confidence,
4189        sp<AMessage> *meta) {
4190    if (BetterSniffMPEG4(source, mimeType, confidence, meta)) {
4191        return true;
4192    }
4193
4194    if (LegacySniffMPEG4(source, mimeType, confidence)) {
4195        ALOGW("Identified supported mpeg4 through LegacySniffMPEG4.");
4196        return true;
4197    }
4198
4199    return false;
4200}
4201
4202}  // namespace android
4203