MPEG4Extractor.cpp revision 7a1e3e81264189e23a1db2b174e1b5a5d4c7d1c3
1/*
2 * Copyright (C) 2009 The Android Open Source Project
3 *
4 * Licensed under the Apache License, Version 2.0 (the "License");
5 * you may not use this file except in compliance with the License.
6 * You may obtain a copy of the License at
7 *
8 *      http://www.apache.org/licenses/LICENSE-2.0
9 *
10 * Unless required by applicable law or agreed to in writing, software
11 * distributed under the License is distributed on an "AS IS" BASIS,
12 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13 * See the License for the specific language governing permissions and
14 * limitations under the License.
15 */
16
17#define LOG_TAG "MPEG4Extractor"
18#include <utils/Log.h>
19
20#include "include/MPEG4Extractor.h"
21#include "include/SampleTable.h"
22#include "include/ESDS.h"
23#include "include/TimedTextPlayer.h"
24
25#include <arpa/inet.h>
26
27#include <ctype.h>
28#include <stdint.h>
29#include <stdlib.h>
30#include <string.h>
31
32#include <media/stagefright/foundation/ADebug.h>
33#include <media/stagefright/DataSource.h>
34#include <media/stagefright/MediaBuffer.h>
35#include <media/stagefright/MediaBufferGroup.h>
36#include <media/stagefright/MediaDefs.h>
37#include <media/stagefright/MediaSource.h>
38#include <media/stagefright/MetaData.h>
39#include <media/stagefright/Utils.h>
40#include <utils/String8.h>
41
42namespace android {
43
44class MPEG4Source : public MediaSource {
45public:
46    // Caller retains ownership of both "dataSource" and "sampleTable".
47    MPEG4Source(const sp<MetaData> &format,
48                const sp<DataSource> &dataSource,
49                int32_t timeScale,
50                const sp<SampleTable> &sampleTable);
51
52    virtual status_t start(MetaData *params = NULL);
53    virtual status_t stop();
54
55    virtual sp<MetaData> getFormat();
56
57    virtual status_t read(
58            MediaBuffer **buffer, const ReadOptions *options = NULL);
59
60protected:
61    virtual ~MPEG4Source();
62
63private:
64    Mutex mLock;
65
66    sp<MetaData> mFormat;
67    sp<DataSource> mDataSource;
68    int32_t mTimescale;
69    sp<SampleTable> mSampleTable;
70    uint32_t mCurrentSampleIndex;
71
72    bool mIsAVC;
73    size_t mNALLengthSize;
74
75    bool mStarted;
76
77    MediaBufferGroup *mGroup;
78
79    MediaBuffer *mBuffer;
80
81    bool mWantsNALFragments;
82
83    uint8_t *mSrcBuffer;
84
85    size_t parseNALSize(const uint8_t *data) const;
86
87    MPEG4Source(const MPEG4Source &);
88    MPEG4Source &operator=(const MPEG4Source &);
89};
90
91// This custom data source wraps an existing one and satisfies requests
92// falling entirely within a cached range from the cache while forwarding
93// all remaining requests to the wrapped datasource.
94// This is used to cache the full sampletable metadata for a single track,
95// possibly wrapping multiple times to cover all tracks, i.e.
96// Each MPEG4DataSource caches the sampletable metadata for a single track.
97
98struct MPEG4DataSource : public DataSource {
99    MPEG4DataSource(const sp<DataSource> &source);
100
101    virtual status_t initCheck() const;
102    virtual ssize_t readAt(off64_t offset, void *data, size_t size);
103    virtual status_t getSize(off64_t *size);
104    virtual uint32_t flags();
105
106    status_t setCachedRange(off64_t offset, size_t size);
107
108protected:
109    virtual ~MPEG4DataSource();
110
111private:
112    Mutex mLock;
113
114    sp<DataSource> mSource;
115    off64_t mCachedOffset;
116    size_t mCachedSize;
117    uint8_t *mCache;
118
119    void clearCache();
120
121    MPEG4DataSource(const MPEG4DataSource &);
122    MPEG4DataSource &operator=(const MPEG4DataSource &);
123};
124
125MPEG4DataSource::MPEG4DataSource(const sp<DataSource> &source)
126    : mSource(source),
127      mCachedOffset(0),
128      mCachedSize(0),
129      mCache(NULL) {
130}
131
132MPEG4DataSource::~MPEG4DataSource() {
133    clearCache();
134}
135
136void MPEG4DataSource::clearCache() {
137    if (mCache) {
138        free(mCache);
139        mCache = NULL;
140    }
141
142    mCachedOffset = 0;
143    mCachedSize = 0;
144}
145
146status_t MPEG4DataSource::initCheck() const {
147    return mSource->initCheck();
148}
149
150ssize_t MPEG4DataSource::readAt(off64_t offset, void *data, size_t size) {
151    Mutex::Autolock autoLock(mLock);
152
153    if (offset >= mCachedOffset
154            && offset + size <= mCachedOffset + mCachedSize) {
155        memcpy(data, &mCache[offset - mCachedOffset], size);
156        return size;
157    }
158
159    return mSource->readAt(offset, data, size);
160}
161
162status_t MPEG4DataSource::getSize(off64_t *size) {
163    return mSource->getSize(size);
164}
165
166uint32_t MPEG4DataSource::flags() {
167    return mSource->flags();
168}
169
170status_t MPEG4DataSource::setCachedRange(off64_t offset, size_t size) {
171    Mutex::Autolock autoLock(mLock);
172
173    clearCache();
174
175    mCache = (uint8_t *)malloc(size);
176
177    if (mCache == NULL) {
178        return -ENOMEM;
179    }
180
181    mCachedOffset = offset;
182    mCachedSize = size;
183
184    ssize_t err = mSource->readAt(mCachedOffset, mCache, mCachedSize);
185
186    if (err < (ssize_t)size) {
187        clearCache();
188
189        return ERROR_IO;
190    }
191
192    return OK;
193}
194
195////////////////////////////////////////////////////////////////////////////////
196
197static void hexdump(const void *_data, size_t size) {
198    const uint8_t *data = (const uint8_t *)_data;
199    size_t offset = 0;
200    while (offset < size) {
201        printf("0x%04x  ", offset);
202
203        size_t n = size - offset;
204        if (n > 16) {
205            n = 16;
206        }
207
208        for (size_t i = 0; i < 16; ++i) {
209            if (i == 8) {
210                printf(" ");
211            }
212
213            if (offset + i < size) {
214                printf("%02x ", data[offset + i]);
215            } else {
216                printf("   ");
217            }
218        }
219
220        printf(" ");
221
222        for (size_t i = 0; i < n; ++i) {
223            if (isprint(data[offset + i])) {
224                printf("%c", data[offset + i]);
225            } else {
226                printf(".");
227            }
228        }
229
230        printf("\n");
231
232        offset += 16;
233    }
234}
235
236static const char *FourCC2MIME(uint32_t fourcc) {
237    switch (fourcc) {
238        case FOURCC('m', 'p', '4', 'a'):
239            return MEDIA_MIMETYPE_AUDIO_AAC;
240
241        case FOURCC('s', 'a', 'm', 'r'):
242            return MEDIA_MIMETYPE_AUDIO_AMR_NB;
243
244        case FOURCC('s', 'a', 'w', 'b'):
245            return MEDIA_MIMETYPE_AUDIO_AMR_WB;
246
247        case FOURCC('m', 'p', '4', 'v'):
248            return MEDIA_MIMETYPE_VIDEO_MPEG4;
249
250        case FOURCC('s', '2', '6', '3'):
251        case FOURCC('h', '2', '6', '3'):
252        case FOURCC('H', '2', '6', '3'):
253            return MEDIA_MIMETYPE_VIDEO_H263;
254
255        case FOURCC('a', 'v', 'c', '1'):
256            return MEDIA_MIMETYPE_VIDEO_AVC;
257
258        default:
259            CHECK(!"should not be here.");
260            return NULL;
261    }
262}
263
264MPEG4Extractor::MPEG4Extractor(const sp<DataSource> &source)
265    : mDataSource(source),
266      mInitCheck(NO_INIT),
267      mHasVideo(false),
268      mFirstTrack(NULL),
269      mLastTrack(NULL),
270      mFileMetaData(new MetaData),
271      mFirstSINF(NULL),
272      mIsDrm(false) {
273}
274
275MPEG4Extractor::~MPEG4Extractor() {
276    Track *track = mFirstTrack;
277    while (track) {
278        Track *next = track->next;
279
280        delete track;
281        track = next;
282    }
283    mFirstTrack = mLastTrack = NULL;
284
285    SINF *sinf = mFirstSINF;
286    while (sinf) {
287        SINF *next = sinf->next;
288        delete sinf->IPMPData;
289        delete sinf;
290        sinf = next;
291    }
292    mFirstSINF = NULL;
293}
294
295sp<MetaData> MPEG4Extractor::getMetaData() {
296    status_t err;
297    if ((err = readMetaData()) != OK) {
298        return new MetaData;
299    }
300
301    return mFileMetaData;
302}
303
304size_t MPEG4Extractor::countTracks() {
305    status_t err;
306    if ((err = readMetaData()) != OK) {
307        return 0;
308    }
309
310    size_t n = 0;
311    Track *track = mFirstTrack;
312    while (track) {
313        ++n;
314        track = track->next;
315    }
316
317    return n;
318}
319
320sp<MetaData> MPEG4Extractor::getTrackMetaData(
321        size_t index, uint32_t flags) {
322    status_t err;
323    if ((err = readMetaData()) != OK) {
324        return NULL;
325    }
326
327    Track *track = mFirstTrack;
328    while (index > 0) {
329        if (track == NULL) {
330            return NULL;
331        }
332
333        track = track->next;
334        --index;
335    }
336
337    if (track == NULL) {
338        return NULL;
339    }
340
341    if ((flags & kIncludeExtensiveMetaData)
342            && !track->includes_expensive_metadata) {
343        track->includes_expensive_metadata = true;
344
345        const char *mime;
346        CHECK(track->meta->findCString(kKeyMIMEType, &mime));
347        if (!strncasecmp("video/", mime, 6)) {
348            uint32_t sampleIndex;
349            uint32_t sampleTime;
350            if (track->sampleTable->findThumbnailSample(&sampleIndex) == OK
351                    && track->sampleTable->getMetaDataForSample(
352                        sampleIndex, NULL /* offset */, NULL /* size */,
353                        &sampleTime) == OK) {
354                track->meta->setInt64(
355                        kKeyThumbnailTime,
356                        ((int64_t)sampleTime * 1000000) / track->timescale);
357            }
358        }
359    }
360
361    return track->meta;
362}
363
364status_t MPEG4Extractor::readMetaData() {
365    if (mInitCheck != NO_INIT) {
366        return mInitCheck;
367    }
368
369    off64_t offset = 0;
370    status_t err;
371    while ((err = parseChunk(&offset, 0)) == OK) {
372    }
373
374    if (mInitCheck == OK) {
375        if (mHasVideo) {
376            mFileMetaData->setCString(kKeyMIMEType, "video/mp4");
377        } else {
378            mFileMetaData->setCString(kKeyMIMEType, "audio/mp4");
379        }
380
381        mInitCheck = OK;
382    } else {
383        mInitCheck = err;
384    }
385
386    CHECK_NE(err, (status_t)NO_INIT);
387    return mInitCheck;
388}
389
390void MPEG4Extractor::setDrmFlag(bool flag) {
391    mIsDrm = flag;
392}
393
394char* MPEG4Extractor::getDrmTrackInfo(size_t trackID, int *len) {
395    if (mFirstSINF == NULL) {
396        return NULL;
397    }
398
399    SINF *sinf = mFirstSINF;
400    while (sinf && (trackID != sinf->trackID)) {
401        sinf = sinf->next;
402    }
403
404    if (sinf == NULL) {
405        return NULL;
406    }
407
408    *len = sinf->len;
409    return sinf->IPMPData;
410}
411
412// Reads an encoded integer 7 bits at a time until it encounters the high bit clear.
413int32_t readSize(off64_t offset,
414        const sp<DataSource> DataSource, uint8_t *numOfBytes) {
415    uint32_t size = 0;
416    uint8_t data;
417    bool moreData = true;
418    *numOfBytes = 0;
419
420    while (moreData) {
421        if (DataSource->readAt(offset, &data, 1) < 1) {
422            return -1;
423        }
424        offset ++;
425        moreData = (data >= 128) ? true : false;
426        size = (size << 7) | (data & 0x7f); // Take last 7 bits
427        (*numOfBytes) ++;
428    }
429
430    return size;
431}
432
433status_t MPEG4Extractor::parseDrmSINF(off64_t *offset, off64_t data_offset) {
434    uint8_t updateIdTag;
435    if (mDataSource->readAt(data_offset, &updateIdTag, 1) < 1) {
436        return ERROR_IO;
437    }
438    data_offset ++;
439
440    if (0x01/*OBJECT_DESCRIPTOR_UPDATE_ID_TAG*/ != updateIdTag) {
441        return ERROR_MALFORMED;
442    }
443
444    uint8_t numOfBytes;
445    int32_t size = readSize(data_offset, mDataSource, &numOfBytes);
446    if (size < 0) {
447        return ERROR_IO;
448    }
449    int32_t classSize = size;
450    data_offset += numOfBytes;
451
452    while(size >= 11 ) {
453        uint8_t descriptorTag;
454        if (mDataSource->readAt(data_offset, &descriptorTag, 1) < 1) {
455            return ERROR_IO;
456        }
457        data_offset ++;
458
459        if (0x11/*OBJECT_DESCRIPTOR_ID_TAG*/ != descriptorTag) {
460            return ERROR_MALFORMED;
461        }
462
463        uint8_t buffer[8];
464        //ObjectDescriptorID and ObjectDescriptor url flag
465        if (mDataSource->readAt(data_offset, buffer, 2) < 2) {
466            return ERROR_IO;
467        }
468        data_offset += 2;
469
470        if ((buffer[1] >> 5) & 0x0001) { //url flag is set
471            return ERROR_MALFORMED;
472        }
473
474        if (mDataSource->readAt(data_offset, buffer, 8) < 8) {
475            return ERROR_IO;
476        }
477        data_offset += 8;
478
479        if ((0x0F/*ES_ID_REF_TAG*/ != buffer[1])
480                || ( 0x0A/*IPMP_DESCRIPTOR_POINTER_ID_TAG*/ != buffer[5])) {
481            return ERROR_MALFORMED;
482        }
483
484        SINF *sinf = new SINF;
485        sinf->trackID = U16_AT(&buffer[3]);
486        sinf->IPMPDescriptorID = buffer[7];
487        sinf->next = mFirstSINF;
488        mFirstSINF = sinf;
489
490        size -= (8 + 2 + 1);
491    }
492
493    if (size != 0) {
494        return ERROR_MALFORMED;
495    }
496
497    if (mDataSource->readAt(data_offset, &updateIdTag, 1) < 1) {
498        return ERROR_IO;
499    }
500    data_offset ++;
501
502    if(0x05/*IPMP_DESCRIPTOR_UPDATE_ID_TAG*/ != updateIdTag) {
503        return ERROR_MALFORMED;
504    }
505
506    size = readSize(data_offset, mDataSource, &numOfBytes);
507    if (size < 0) {
508        return ERROR_IO;
509    }
510    classSize = size;
511    data_offset += numOfBytes;
512
513    while (size > 0) {
514        uint8_t tag;
515        int32_t dataLen;
516        if (mDataSource->readAt(data_offset, &tag, 1) < 1) {
517            return ERROR_IO;
518        }
519        data_offset ++;
520
521        if (0x0B/*IPMP_DESCRIPTOR_ID_TAG*/ == tag) {
522            uint8_t id;
523            dataLen = readSize(data_offset, mDataSource, &numOfBytes);
524            if (dataLen < 0) {
525                return ERROR_IO;
526            } else if (dataLen < 4) {
527                return ERROR_MALFORMED;
528            }
529            data_offset += numOfBytes;
530
531            if (mDataSource->readAt(data_offset, &id, 1) < 1) {
532                return ERROR_IO;
533            }
534            data_offset ++;
535
536            SINF *sinf = mFirstSINF;
537            while (sinf && (sinf->IPMPDescriptorID != id)) {
538                sinf = sinf->next;
539            }
540            if (sinf == NULL) {
541                return ERROR_MALFORMED;
542            }
543            sinf->len = dataLen - 3;
544            sinf->IPMPData = new char[sinf->len];
545
546            if (mDataSource->readAt(data_offset + 2, sinf->IPMPData, sinf->len) < sinf->len) {
547                return ERROR_IO;
548            }
549            data_offset += sinf->len;
550
551            size -= (dataLen + numOfBytes + 1);
552        }
553    }
554
555    if (size != 0) {
556        return ERROR_MALFORMED;
557    }
558
559    return UNKNOWN_ERROR;  // Return a dummy error.
560}
561
562static void MakeFourCCString(uint32_t x, char *s) {
563    s[0] = x >> 24;
564    s[1] = (x >> 16) & 0xff;
565    s[2] = (x >> 8) & 0xff;
566    s[3] = x & 0xff;
567    s[4] = '\0';
568}
569
570struct PathAdder {
571    PathAdder(Vector<uint32_t> *path, uint32_t chunkType)
572        : mPath(path) {
573        mPath->push(chunkType);
574    }
575
576    ~PathAdder() {
577        mPath->pop();
578    }
579
580private:
581    Vector<uint32_t> *mPath;
582
583    PathAdder(const PathAdder &);
584    PathAdder &operator=(const PathAdder &);
585};
586
587static bool underMetaDataPath(const Vector<uint32_t> &path) {
588    return path.size() >= 5
589        && path[0] == FOURCC('m', 'o', 'o', 'v')
590        && path[1] == FOURCC('u', 'd', 't', 'a')
591        && path[2] == FOURCC('m', 'e', 't', 'a')
592        && path[3] == FOURCC('i', 'l', 's', 't');
593}
594
595// Given a time in seconds since Jan 1 1904, produce a human-readable string.
596static void convertTimeToDate(int64_t time_1904, String8 *s) {
597    time_t time_1970 = time_1904 - (((66 * 365 + 17) * 24) * 3600);
598
599    char tmp[32];
600    strftime(tmp, sizeof(tmp), "%Y%m%dT%H%M%S.000Z", gmtime(&time_1970));
601
602    s->setTo(tmp);
603}
604
605status_t MPEG4Extractor::parseChunk(off64_t *offset, int depth) {
606    uint32_t hdr[2];
607    if (mDataSource->readAt(*offset, hdr, 8) < 8) {
608        return ERROR_IO;
609    }
610    uint64_t chunk_size = ntohl(hdr[0]);
611    uint32_t chunk_type = ntohl(hdr[1]);
612    off64_t data_offset = *offset + 8;
613
614    if (chunk_size == 1) {
615        if (mDataSource->readAt(*offset + 8, &chunk_size, 8) < 8) {
616            return ERROR_IO;
617        }
618        chunk_size = ntoh64(chunk_size);
619        data_offset += 8;
620
621        if (chunk_size < 16) {
622            // The smallest valid chunk is 16 bytes long in this case.
623            return ERROR_MALFORMED;
624        }
625    } else if (chunk_size < 8) {
626        // The smallest valid chunk is 8 bytes long.
627        return ERROR_MALFORMED;
628    }
629
630    char chunk[5];
631    MakeFourCCString(chunk_type, chunk);
632
633#if 0
634    static const char kWhitespace[] = "                                        ";
635    const char *indent = &kWhitespace[sizeof(kWhitespace) - 1 - 2 * depth];
636    printf("%sfound chunk '%s' of size %lld\n", indent, chunk, chunk_size);
637
638    char buffer[256];
639    size_t n = chunk_size;
640    if (n > sizeof(buffer)) {
641        n = sizeof(buffer);
642    }
643    if (mDataSource->readAt(*offset, buffer, n)
644            < (ssize_t)n) {
645        return ERROR_IO;
646    }
647
648    hexdump(buffer, n);
649#endif
650
651    PathAdder autoAdder(&mPath, chunk_type);
652
653    off64_t chunk_data_size = *offset + chunk_size - data_offset;
654
655    if (chunk_type != FOURCC('c', 'p', 'r', 't')
656            && mPath.size() == 5 && underMetaDataPath(mPath)) {
657        off64_t stop_offset = *offset + chunk_size;
658        *offset = data_offset;
659        while (*offset < stop_offset) {
660            status_t err = parseChunk(offset, depth + 1);
661            if (err != OK) {
662                return err;
663            }
664        }
665
666        if (*offset != stop_offset) {
667            return ERROR_MALFORMED;
668        }
669
670        return OK;
671    }
672
673    switch(chunk_type) {
674        case FOURCC('m', 'o', 'o', 'v'):
675        case FOURCC('t', 'r', 'a', 'k'):
676        case FOURCC('m', 'd', 'i', 'a'):
677        case FOURCC('m', 'i', 'n', 'f'):
678        case FOURCC('d', 'i', 'n', 'f'):
679        case FOURCC('s', 't', 'b', 'l'):
680        case FOURCC('m', 'v', 'e', 'x'):
681        case FOURCC('m', 'o', 'o', 'f'):
682        case FOURCC('t', 'r', 'a', 'f'):
683        case FOURCC('m', 'f', 'r', 'a'):
684        case FOURCC('u', 'd', 't', 'a'):
685        case FOURCC('i', 'l', 's', 't'):
686        {
687            if (chunk_type == FOURCC('s', 't', 'b', 'l')) {
688                LOGV("sampleTable chunk is %d bytes long.", (size_t)chunk_size);
689
690                if (mDataSource->flags()
691                        & (DataSource::kWantsPrefetching
692                            | DataSource::kIsCachingDataSource)) {
693                    sp<MPEG4DataSource> cachedSource =
694                        new MPEG4DataSource(mDataSource);
695
696                    if (cachedSource->setCachedRange(*offset, chunk_size) == OK) {
697                        mDataSource = cachedSource;
698                    }
699                }
700
701                mLastTrack->sampleTable = new SampleTable(mDataSource);
702            }
703
704            bool isTrack = false;
705            if (chunk_type == FOURCC('t', 'r', 'a', 'k')) {
706                isTrack = true;
707
708                Track *track = new Track;
709                track->next = NULL;
710                if (mLastTrack) {
711                    mLastTrack->next = track;
712                } else {
713                    mFirstTrack = track;
714                }
715                mLastTrack = track;
716
717                track->meta = new MetaData;
718                track->includes_expensive_metadata = false;
719                track->skipTrack = false;
720                track->timescale = 0;
721                track->meta->setCString(kKeyMIMEType, "application/octet-stream");
722            }
723
724            off64_t stop_offset = *offset + chunk_size;
725            *offset = data_offset;
726            while (*offset < stop_offset) {
727                status_t err = parseChunk(offset, depth + 1);
728                if (err != OK) {
729                    return err;
730                }
731            }
732
733            if (*offset != stop_offset) {
734                return ERROR_MALFORMED;
735            }
736
737            if (isTrack) {
738                if (mLastTrack->skipTrack) {
739                    Track *cur = mFirstTrack;
740
741                    if (cur == mLastTrack) {
742                        delete cur;
743                        mFirstTrack = mLastTrack = NULL;
744                    } else {
745                        while (cur && cur->next != mLastTrack) {
746                            cur = cur->next;
747                        }
748                        cur->next = NULL;
749                        delete mLastTrack;
750                        mLastTrack = cur;
751                    }
752
753                    return OK;
754                }
755
756                status_t err = verifyTrack(mLastTrack);
757
758                if (err != OK) {
759                    return err;
760                }
761            } else if (chunk_type == FOURCC('m', 'o', 'o', 'v')) {
762                mInitCheck = OK;
763
764                if (!mIsDrm) {
765                    return UNKNOWN_ERROR;  // Return a dummy error.
766                } else {
767                    return OK;
768                }
769            }
770            break;
771        }
772
773        case FOURCC('t', 'k', 'h', 'd'):
774        {
775            status_t err;
776            if ((err = parseTrackHeader(data_offset, chunk_data_size)) != OK) {
777                return err;
778            }
779
780            *offset += chunk_size;
781            break;
782        }
783
784        case FOURCC('m', 'd', 'h', 'd'):
785        {
786            if (chunk_data_size < 4) {
787                return ERROR_MALFORMED;
788            }
789
790            uint8_t version;
791            if (mDataSource->readAt(
792                        data_offset, &version, sizeof(version))
793                    < (ssize_t)sizeof(version)) {
794                return ERROR_IO;
795            }
796
797            off64_t timescale_offset;
798
799            if (version == 1) {
800                timescale_offset = data_offset + 4 + 16;
801            } else if (version == 0) {
802                timescale_offset = data_offset + 4 + 8;
803            } else {
804                return ERROR_IO;
805            }
806
807            uint32_t timescale;
808            if (mDataSource->readAt(
809                        timescale_offset, &timescale, sizeof(timescale))
810                    < (ssize_t)sizeof(timescale)) {
811                return ERROR_IO;
812            }
813
814            mLastTrack->timescale = ntohl(timescale);
815
816            int64_t duration;
817            if (version == 1) {
818                if (mDataSource->readAt(
819                            timescale_offset + 4, &duration, sizeof(duration))
820                        < (ssize_t)sizeof(duration)) {
821                    return ERROR_IO;
822                }
823                duration = ntoh64(duration);
824            } else {
825                int32_t duration32;
826                if (mDataSource->readAt(
827                            timescale_offset + 4, &duration32, sizeof(duration32))
828                        < (ssize_t)sizeof(duration32)) {
829                    return ERROR_IO;
830                }
831                duration = ntohl(duration32);
832            }
833            mLastTrack->meta->setInt64(
834                    kKeyDuration, (duration * 1000000) / mLastTrack->timescale);
835
836            uint8_t lang[2];
837            off64_t lang_offset;
838            if (version == 1) {
839                lang_offset = timescale_offset + 4 + 8;
840            } else if (version == 0) {
841                lang_offset = timescale_offset + 4 + 4;
842            } else {
843                return ERROR_IO;
844            }
845
846            if (mDataSource->readAt(lang_offset, &lang, sizeof(lang))
847                    < (ssize_t)sizeof(lang)) {
848                return ERROR_IO;
849            }
850
851            // To get the ISO-639-2/T three character language code
852            // 1 bit pad followed by 3 5-bits characters. Each character
853            // is packed as the difference between its ASCII value and 0x60.
854            char lang_code[4];
855            lang_code[0] = ((lang[0] >> 2) & 0x1f) + 0x60;
856            lang_code[1] = ((lang[0] & 0x3) << 3 | (lang[1] >> 5)) + 0x60;
857            lang_code[2] = (lang[1] & 0x1f) + 0x60;
858            lang_code[3] = '\0';
859
860            mLastTrack->meta->setCString(
861                    kKeyMediaLanguage, lang_code);
862
863            *offset += chunk_size;
864            break;
865        }
866
867        case FOURCC('s', 't', 's', 'd'):
868        {
869            if (chunk_data_size < 8) {
870                return ERROR_MALFORMED;
871            }
872
873            uint8_t buffer[8];
874            if (chunk_data_size < (off64_t)sizeof(buffer)) {
875                return ERROR_MALFORMED;
876            }
877
878            if (mDataSource->readAt(
879                        data_offset, buffer, 8) < 8) {
880                return ERROR_IO;
881            }
882
883            if (U32_AT(buffer) != 0) {
884                // Should be version 0, flags 0.
885                return ERROR_MALFORMED;
886            }
887
888            uint32_t entry_count = U32_AT(&buffer[4]);
889
890            if (entry_count > 1) {
891                // For now we only support a single type of media per track.
892
893                mLastTrack->skipTrack = true;
894                *offset += chunk_size;
895                break;
896            }
897
898            off64_t stop_offset = *offset + chunk_size;
899            *offset = data_offset + 8;
900            for (uint32_t i = 0; i < entry_count; ++i) {
901                status_t err = parseChunk(offset, depth + 1);
902                if (err != OK) {
903                    return err;
904                }
905            }
906
907            if (*offset != stop_offset) {
908                return ERROR_MALFORMED;
909            }
910            break;
911        }
912
913        case FOURCC('m', 'p', '4', 'a'):
914        case FOURCC('s', 'a', 'm', 'r'):
915        case FOURCC('s', 'a', 'w', 'b'):
916        {
917            uint8_t buffer[8 + 20];
918            if (chunk_data_size < (ssize_t)sizeof(buffer)) {
919                // Basic AudioSampleEntry size.
920                return ERROR_MALFORMED;
921            }
922
923            if (mDataSource->readAt(
924                        data_offset, buffer, sizeof(buffer)) < (ssize_t)sizeof(buffer)) {
925                return ERROR_IO;
926            }
927
928            uint16_t data_ref_index = U16_AT(&buffer[6]);
929            uint16_t num_channels = U16_AT(&buffer[16]);
930
931            uint16_t sample_size = U16_AT(&buffer[18]);
932            uint32_t sample_rate = U32_AT(&buffer[24]) >> 16;
933
934            if (!strcasecmp(MEDIA_MIMETYPE_AUDIO_AMR_NB,
935                            FourCC2MIME(chunk_type))) {
936                // AMR NB audio is always mono, 8kHz
937                num_channels = 1;
938                sample_rate = 8000;
939            } else if (!strcasecmp(MEDIA_MIMETYPE_AUDIO_AMR_WB,
940                               FourCC2MIME(chunk_type))) {
941                // AMR WB audio is always mono, 16kHz
942                num_channels = 1;
943                sample_rate = 16000;
944            }
945
946#if 0
947            printf("*** coding='%s' %d channels, size %d, rate %d\n",
948                   chunk, num_channels, sample_size, sample_rate);
949#endif
950
951            mLastTrack->meta->setCString(kKeyMIMEType, FourCC2MIME(chunk_type));
952            mLastTrack->meta->setInt32(kKeyChannelCount, num_channels);
953            mLastTrack->meta->setInt32(kKeySampleRate, sample_rate);
954
955            off64_t stop_offset = *offset + chunk_size;
956            *offset = data_offset + sizeof(buffer);
957            while (*offset < stop_offset) {
958                status_t err = parseChunk(offset, depth + 1);
959                if (err != OK) {
960                    return err;
961                }
962            }
963
964            if (*offset != stop_offset) {
965                return ERROR_MALFORMED;
966            }
967            break;
968        }
969
970        case FOURCC('m', 'p', '4', 'v'):
971        case FOURCC('s', '2', '6', '3'):
972        case FOURCC('H', '2', '6', '3'):
973        case FOURCC('h', '2', '6', '3'):
974        case FOURCC('a', 'v', 'c', '1'):
975        {
976            mHasVideo = true;
977
978            uint8_t buffer[78];
979            if (chunk_data_size < (ssize_t)sizeof(buffer)) {
980                // Basic VideoSampleEntry size.
981                return ERROR_MALFORMED;
982            }
983
984            if (mDataSource->readAt(
985                        data_offset, buffer, sizeof(buffer)) < (ssize_t)sizeof(buffer)) {
986                return ERROR_IO;
987            }
988
989            uint16_t data_ref_index = U16_AT(&buffer[6]);
990            uint16_t width = U16_AT(&buffer[6 + 18]);
991            uint16_t height = U16_AT(&buffer[6 + 20]);
992
993            // The video sample is not stand-compliant if it has invalid dimension.
994            // Use some default width and height value, and
995            // let the decoder figure out the actual width and height (and thus
996            // be prepared for INFO_FOMRAT_CHANGED event).
997            if (width == 0)  width  = 352;
998            if (height == 0) height = 288;
999
1000            // printf("*** coding='%s' width=%d height=%d\n",
1001            //        chunk, width, height);
1002
1003            mLastTrack->meta->setCString(kKeyMIMEType, FourCC2MIME(chunk_type));
1004            mLastTrack->meta->setInt32(kKeyWidth, width);
1005            mLastTrack->meta->setInt32(kKeyHeight, height);
1006
1007            off64_t stop_offset = *offset + chunk_size;
1008            *offset = data_offset + sizeof(buffer);
1009            while (*offset < stop_offset) {
1010                status_t err = parseChunk(offset, depth + 1);
1011                if (err != OK) {
1012                    return err;
1013                }
1014            }
1015
1016            if (*offset != stop_offset) {
1017                return ERROR_MALFORMED;
1018            }
1019            break;
1020        }
1021
1022        case FOURCC('s', 't', 'c', 'o'):
1023        case FOURCC('c', 'o', '6', '4'):
1024        {
1025            status_t err =
1026                mLastTrack->sampleTable->setChunkOffsetParams(
1027                        chunk_type, data_offset, chunk_data_size);
1028
1029            if (err != OK) {
1030                return err;
1031            }
1032
1033            *offset += chunk_size;
1034            break;
1035        }
1036
1037        case FOURCC('s', 't', 's', 'c'):
1038        {
1039            status_t err =
1040                mLastTrack->sampleTable->setSampleToChunkParams(
1041                        data_offset, chunk_data_size);
1042
1043            if (err != OK) {
1044                return err;
1045            }
1046
1047            *offset += chunk_size;
1048            break;
1049        }
1050
1051        case FOURCC('s', 't', 's', 'z'):
1052        case FOURCC('s', 't', 'z', '2'):
1053        {
1054            status_t err =
1055                mLastTrack->sampleTable->setSampleSizeParams(
1056                        chunk_type, data_offset, chunk_data_size);
1057
1058            if (err != OK) {
1059                return err;
1060            }
1061
1062            size_t max_size;
1063            err = mLastTrack->sampleTable->getMaxSampleSize(&max_size);
1064
1065            if (err != OK) {
1066                return err;
1067            }
1068
1069            // Assume that a given buffer only contains at most 10 fragments,
1070            // each fragment originally prefixed with a 2 byte length will
1071            // have a 4 byte header (0x00 0x00 0x00 0x01) after conversion,
1072            // and thus will grow by 2 bytes per fragment.
1073            mLastTrack->meta->setInt32(kKeyMaxInputSize, max_size + 10 * 2);
1074            *offset += chunk_size;
1075
1076            // Calculate average frame rate.
1077            const char *mime;
1078            CHECK(mLastTrack->meta->findCString(kKeyMIMEType, &mime));
1079            if (!strncasecmp("video/", mime, 6)) {
1080                size_t nSamples = mLastTrack->sampleTable->countSamples();
1081                int64_t durationUs;
1082                if (mLastTrack->meta->findInt64(kKeyDuration, &durationUs)) {
1083                    if (durationUs > 0) {
1084                        int32_t frameRate = (nSamples * 1000000LL +
1085                                    (durationUs >> 1)) / durationUs;
1086                        mLastTrack->meta->setInt32(kKeyFrameRate, frameRate);
1087                    }
1088                }
1089            }
1090
1091            break;
1092        }
1093
1094        case FOURCC('s', 't', 't', 's'):
1095        {
1096            status_t err =
1097                mLastTrack->sampleTable->setTimeToSampleParams(
1098                        data_offset, chunk_data_size);
1099
1100            if (err != OK) {
1101                return err;
1102            }
1103
1104            *offset += chunk_size;
1105            break;
1106        }
1107
1108        case FOURCC('c', 't', 't', 's'):
1109        {
1110            status_t err =
1111                mLastTrack->sampleTable->setCompositionTimeToSampleParams(
1112                        data_offset, chunk_data_size);
1113
1114            if (err != OK) {
1115                return err;
1116            }
1117
1118            *offset += chunk_size;
1119            break;
1120        }
1121
1122        case FOURCC('s', 't', 's', 's'):
1123        {
1124            status_t err =
1125                mLastTrack->sampleTable->setSyncSampleParams(
1126                        data_offset, chunk_data_size);
1127
1128            if (err != OK) {
1129                return err;
1130            }
1131
1132            *offset += chunk_size;
1133            break;
1134        }
1135
1136        case FOURCC('e', 's', 'd', 's'):
1137        {
1138            if (chunk_data_size < 4) {
1139                return ERROR_MALFORMED;
1140            }
1141
1142            uint8_t buffer[256];
1143            if (chunk_data_size > (off64_t)sizeof(buffer)) {
1144                return ERROR_BUFFER_TOO_SMALL;
1145            }
1146
1147            if (mDataSource->readAt(
1148                        data_offset, buffer, chunk_data_size) < chunk_data_size) {
1149                return ERROR_IO;
1150            }
1151
1152            if (U32_AT(buffer) != 0) {
1153                // Should be version 0, flags 0.
1154                return ERROR_MALFORMED;
1155            }
1156
1157            mLastTrack->meta->setData(
1158                    kKeyESDS, kTypeESDS, &buffer[4], chunk_data_size - 4);
1159
1160            if (mPath.size() >= 2
1161                    && mPath[mPath.size() - 2] == FOURCC('m', 'p', '4', 'a')) {
1162                // Information from the ESDS must be relied on for proper
1163                // setup of sample rate and channel count for MPEG4 Audio.
1164                // The generic header appears to only contain generic
1165                // information...
1166
1167                status_t err = updateAudioTrackInfoFromESDS_MPEG4Audio(
1168                        &buffer[4], chunk_data_size - 4);
1169
1170                if (err != OK) {
1171                    return err;
1172                }
1173            }
1174
1175            *offset += chunk_size;
1176            break;
1177        }
1178
1179        case FOURCC('a', 'v', 'c', 'C'):
1180        {
1181            char buffer[256];
1182            if (chunk_data_size > (off64_t)sizeof(buffer)) {
1183                return ERROR_BUFFER_TOO_SMALL;
1184            }
1185
1186            if (mDataSource->readAt(
1187                        data_offset, buffer, chunk_data_size) < chunk_data_size) {
1188                return ERROR_IO;
1189            }
1190
1191            mLastTrack->meta->setData(
1192                    kKeyAVCC, kTypeAVCC, buffer, chunk_data_size);
1193
1194            *offset += chunk_size;
1195            break;
1196        }
1197
1198        case FOURCC('d', '2', '6', '3'):
1199        {
1200            /*
1201             * d263 contains a fixed 7 bytes part:
1202             *   vendor - 4 bytes
1203             *   version - 1 byte
1204             *   level - 1 byte
1205             *   profile - 1 byte
1206             * optionally, "d263" box itself may contain a 16-byte
1207             * bit rate box (bitr)
1208             *   average bit rate - 4 bytes
1209             *   max bit rate - 4 bytes
1210             */
1211            char buffer[23];
1212            if (chunk_data_size != 7 &&
1213                chunk_data_size != 23) {
1214                LOGE("Incorrect D263 box size %lld", chunk_data_size);
1215                return ERROR_MALFORMED;
1216            }
1217
1218            if (mDataSource->readAt(
1219                    data_offset, buffer, chunk_data_size) < chunk_data_size) {
1220                return ERROR_IO;
1221            }
1222
1223            mLastTrack->meta->setData(kKeyD263, kTypeD263, buffer, chunk_data_size);
1224
1225            *offset += chunk_size;
1226            break;
1227        }
1228
1229        case FOURCC('m', 'e', 't', 'a'):
1230        {
1231            uint8_t buffer[4];
1232            if (chunk_data_size < (off64_t)sizeof(buffer)) {
1233                return ERROR_MALFORMED;
1234            }
1235
1236            if (mDataSource->readAt(
1237                        data_offset, buffer, 4) < 4) {
1238                return ERROR_IO;
1239            }
1240
1241            if (U32_AT(buffer) != 0) {
1242                // Should be version 0, flags 0.
1243
1244                // If it's not, let's assume this is one of those
1245                // apparently malformed chunks that don't have flags
1246                // and completely different semantics than what's
1247                // in the MPEG4 specs and skip it.
1248                *offset += chunk_size;
1249                return OK;
1250            }
1251
1252            off64_t stop_offset = *offset + chunk_size;
1253            *offset = data_offset + sizeof(buffer);
1254            while (*offset < stop_offset) {
1255                status_t err = parseChunk(offset, depth + 1);
1256                if (err != OK) {
1257                    return err;
1258                }
1259            }
1260
1261            if (*offset != stop_offset) {
1262                return ERROR_MALFORMED;
1263            }
1264            break;
1265        }
1266
1267        case FOURCC('d', 'a', 't', 'a'):
1268        {
1269            if (mPath.size() == 6 && underMetaDataPath(mPath)) {
1270                status_t err = parseMetaData(data_offset, chunk_data_size);
1271
1272                if (err != OK) {
1273                    return err;
1274                }
1275            }
1276
1277            *offset += chunk_size;
1278            break;
1279        }
1280
1281        case FOURCC('m', 'v', 'h', 'd'):
1282        {
1283            if (chunk_data_size < 12) {
1284                return ERROR_MALFORMED;
1285            }
1286
1287            uint8_t header[12];
1288            if (mDataSource->readAt(
1289                        data_offset, header, sizeof(header))
1290                    < (ssize_t)sizeof(header)) {
1291                return ERROR_IO;
1292            }
1293
1294            int64_t creationTime;
1295            if (header[0] == 1) {
1296                creationTime = U64_AT(&header[4]);
1297            } else if (header[0] != 0) {
1298                return ERROR_MALFORMED;
1299            } else {
1300                creationTime = U32_AT(&header[4]);
1301            }
1302
1303            String8 s;
1304            convertTimeToDate(creationTime, &s);
1305
1306            mFileMetaData->setCString(kKeyDate, s.string());
1307
1308            *offset += chunk_size;
1309            break;
1310        }
1311
1312        case FOURCC('m', 'd', 'a', 't'):
1313        {
1314            if (!mIsDrm) {
1315                *offset += chunk_size;
1316                break;
1317            }
1318
1319            if (chunk_size < 8) {
1320                return ERROR_MALFORMED;
1321            }
1322
1323            return parseDrmSINF(offset, data_offset);
1324        }
1325
1326        case FOURCC('t', 'x', '3', 'g'):
1327        {
1328            mLastTrack->meta->setCString(kKeyMIMEType, MEDIA_MIMETYPE_TEXT_3GPP);
1329
1330            *offset += chunk_size;
1331            break;
1332        }
1333
1334        default:
1335        {
1336            *offset += chunk_size;
1337            break;
1338        }
1339    }
1340
1341    return OK;
1342}
1343
1344status_t MPEG4Extractor::parseTrackHeader(
1345        off64_t data_offset, off64_t data_size) {
1346    if (data_size < 4) {
1347        return ERROR_MALFORMED;
1348    }
1349
1350    uint8_t version;
1351    if (mDataSource->readAt(data_offset, &version, 1) < 1) {
1352        return ERROR_IO;
1353    }
1354
1355    size_t dynSize = (version == 1) ? 36 : 24;
1356
1357    uint8_t buffer[36 + 60];
1358
1359    if (data_size != (off64_t)dynSize + 60) {
1360        return ERROR_MALFORMED;
1361    }
1362
1363    if (mDataSource->readAt(
1364                data_offset, buffer, data_size) < (ssize_t)data_size) {
1365        return ERROR_IO;
1366    }
1367
1368    uint64_t ctime, mtime, duration;
1369    int32_t id;
1370
1371    if (version == 1) {
1372        ctime = U64_AT(&buffer[4]);
1373        mtime = U64_AT(&buffer[12]);
1374        id = U32_AT(&buffer[20]);
1375        duration = U64_AT(&buffer[28]);
1376    } else {
1377        CHECK_EQ((unsigned)version, 0u);
1378
1379        ctime = U32_AT(&buffer[4]);
1380        mtime = U32_AT(&buffer[8]);
1381        id = U32_AT(&buffer[12]);
1382        duration = U32_AT(&buffer[20]);
1383    }
1384
1385    mLastTrack->meta->setInt32(kKeyTrackID, id);
1386
1387    size_t matrixOffset = dynSize + 16;
1388    int32_t a00 = U32_AT(&buffer[matrixOffset]);
1389    int32_t a01 = U32_AT(&buffer[matrixOffset + 4]);
1390    int32_t dx = U32_AT(&buffer[matrixOffset + 8]);
1391    int32_t a10 = U32_AT(&buffer[matrixOffset + 12]);
1392    int32_t a11 = U32_AT(&buffer[matrixOffset + 16]);
1393    int32_t dy = U32_AT(&buffer[matrixOffset + 20]);
1394
1395#if 0
1396    LOGI("x' = %.2f * x + %.2f * y + %.2f",
1397         a00 / 65536.0f, a01 / 65536.0f, dx / 65536.0f);
1398    LOGI("y' = %.2f * x + %.2f * y + %.2f",
1399         a10 / 65536.0f, a11 / 65536.0f, dy / 65536.0f);
1400#endif
1401
1402    uint32_t rotationDegrees;
1403
1404    static const int32_t kFixedOne = 0x10000;
1405    if (a00 == kFixedOne && a01 == 0 && a10 == 0 && a11 == kFixedOne) {
1406        // Identity, no rotation
1407        rotationDegrees = 0;
1408    } else if (a00 == 0 && a01 == kFixedOne && a10 == -kFixedOne && a11 == 0) {
1409        rotationDegrees = 90;
1410    } else if (a00 == 0 && a01 == -kFixedOne && a10 == kFixedOne && a11 == 0) {
1411        rotationDegrees = 270;
1412    } else if (a00 == -kFixedOne && a01 == 0 && a10 == 0 && a11 == -kFixedOne) {
1413        rotationDegrees = 180;
1414    } else {
1415        LOGW("We only support 0,90,180,270 degree rotation matrices");
1416        rotationDegrees = 0;
1417    }
1418
1419    if (rotationDegrees != 0) {
1420        mLastTrack->meta->setInt32(kKeyRotation, rotationDegrees);
1421    }
1422
1423    // Handle presentation display size, which could be different
1424    // from the image size indicated by kKeyWidth and kKeyHeight.
1425    uint32_t width = U32_AT(&buffer[dynSize + 52]);
1426    uint32_t height = U32_AT(&buffer[dynSize + 56]);
1427    mLastTrack->meta->setInt32(kKeyDisplayWidth, width >> 16);
1428    mLastTrack->meta->setInt32(kKeyDisplayHeight, height >> 16);
1429
1430    return OK;
1431}
1432
1433status_t MPEG4Extractor::parseMetaData(off64_t offset, size_t size) {
1434    if (size < 4) {
1435        return ERROR_MALFORMED;
1436    }
1437
1438    uint8_t *buffer = new uint8_t[size + 1];
1439    if (mDataSource->readAt(
1440                offset, buffer, size) != (ssize_t)size) {
1441        delete[] buffer;
1442        buffer = NULL;
1443
1444        return ERROR_IO;
1445    }
1446
1447    uint32_t flags = U32_AT(buffer);
1448
1449    uint32_t metadataKey = 0;
1450    switch (mPath[4]) {
1451        case FOURCC(0xa9, 'a', 'l', 'b'):
1452        {
1453            metadataKey = kKeyAlbum;
1454            break;
1455        }
1456        case FOURCC(0xa9, 'A', 'R', 'T'):
1457        {
1458            metadataKey = kKeyArtist;
1459            break;
1460        }
1461        case FOURCC('a', 'A', 'R', 'T'):
1462        {
1463            metadataKey = kKeyAlbumArtist;
1464            break;
1465        }
1466        case FOURCC(0xa9, 'd', 'a', 'y'):
1467        {
1468            metadataKey = kKeyYear;
1469            break;
1470        }
1471        case FOURCC(0xa9, 'n', 'a', 'm'):
1472        {
1473            metadataKey = kKeyTitle;
1474            break;
1475        }
1476        case FOURCC(0xa9, 'w', 'r', 't'):
1477        {
1478            metadataKey = kKeyWriter;
1479            break;
1480        }
1481        case FOURCC('c', 'o', 'v', 'r'):
1482        {
1483            metadataKey = kKeyAlbumArt;
1484            break;
1485        }
1486        case FOURCC('g', 'n', 'r', 'e'):
1487        {
1488            metadataKey = kKeyGenre;
1489            break;
1490        }
1491        case FOURCC(0xa9, 'g', 'e', 'n'):
1492        {
1493            metadataKey = kKeyGenre;
1494            break;
1495        }
1496        case FOURCC('c', 'p', 'i', 'l'):
1497        {
1498            if (size == 9 && flags == 21) {
1499                char tmp[16];
1500                sprintf(tmp, "%d",
1501                        (int)buffer[size - 1]);
1502
1503                mFileMetaData->setCString(kKeyCompilation, tmp);
1504            }
1505            break;
1506        }
1507        case FOURCC('t', 'r', 'k', 'n'):
1508        {
1509            if (size == 16 && flags == 0) {
1510                char tmp[16];
1511                sprintf(tmp, "%d/%d",
1512                        (int)buffer[size - 5], (int)buffer[size - 3]);
1513
1514                mFileMetaData->setCString(kKeyCDTrackNumber, tmp);
1515            }
1516            break;
1517        }
1518        case FOURCC('d', 'i', 's', 'k'):
1519        {
1520            if (size == 14 && flags == 0) {
1521                char tmp[16];
1522                sprintf(tmp, "%d/%d",
1523                        (int)buffer[size - 3], (int)buffer[size - 1]);
1524
1525                mFileMetaData->setCString(kKeyDiscNumber, tmp);
1526            }
1527            break;
1528        }
1529
1530        default:
1531            break;
1532    }
1533
1534    if (size >= 8 && metadataKey) {
1535        if (metadataKey == kKeyAlbumArt) {
1536            mFileMetaData->setData(
1537                    kKeyAlbumArt, MetaData::TYPE_NONE,
1538                    buffer + 8, size - 8);
1539        } else if (metadataKey == kKeyGenre) {
1540            if (flags == 0) {
1541                // uint8_t genre code, iTunes genre codes are
1542                // the standard id3 codes, except they start
1543                // at 1 instead of 0 (e.g. Pop is 14, not 13)
1544                // We use standard id3 numbering, so subtract 1.
1545                int genrecode = (int)buffer[size - 1];
1546                genrecode--;
1547                if (genrecode < 0) {
1548                    genrecode = 255; // reserved for 'unknown genre'
1549                }
1550                char genre[10];
1551                sprintf(genre, "%d", genrecode);
1552
1553                mFileMetaData->setCString(metadataKey, genre);
1554            } else if (flags == 1) {
1555                // custom genre string
1556                buffer[size] = '\0';
1557
1558                mFileMetaData->setCString(
1559                        metadataKey, (const char *)buffer + 8);
1560            }
1561        } else {
1562            buffer[size] = '\0';
1563
1564            mFileMetaData->setCString(
1565                    metadataKey, (const char *)buffer + 8);
1566        }
1567    }
1568
1569    delete[] buffer;
1570    buffer = NULL;
1571
1572    return OK;
1573}
1574
1575sp<MediaSource> MPEG4Extractor::getTrack(size_t index) {
1576    status_t err;
1577    if ((err = readMetaData()) != OK) {
1578        return NULL;
1579    }
1580
1581    Track *track = mFirstTrack;
1582    while (index > 0) {
1583        if (track == NULL) {
1584            return NULL;
1585        }
1586
1587        track = track->next;
1588        --index;
1589    }
1590
1591    if (track == NULL) {
1592        return NULL;
1593    }
1594
1595    return new MPEG4Source(
1596            track->meta, mDataSource, track->timescale, track->sampleTable);
1597}
1598
1599// static
1600status_t MPEG4Extractor::verifyTrack(Track *track) {
1601    const char *mime;
1602    CHECK(track->meta->findCString(kKeyMIMEType, &mime));
1603
1604    uint32_t type;
1605    const void *data;
1606    size_t size;
1607    if (!strcasecmp(mime, MEDIA_MIMETYPE_VIDEO_AVC)) {
1608        if (!track->meta->findData(kKeyAVCC, &type, &data, &size)
1609                || type != kTypeAVCC) {
1610            return ERROR_MALFORMED;
1611        }
1612    } else if (!strcasecmp(mime, MEDIA_MIMETYPE_VIDEO_MPEG4)
1613            || !strcasecmp(mime, MEDIA_MIMETYPE_AUDIO_AAC)) {
1614        if (!track->meta->findData(kKeyESDS, &type, &data, &size)
1615                || type != kTypeESDS) {
1616            return ERROR_MALFORMED;
1617        }
1618    }
1619
1620    return OK;
1621}
1622
1623status_t MPEG4Extractor::updateAudioTrackInfoFromESDS_MPEG4Audio(
1624        const void *esds_data, size_t esds_size) {
1625    ESDS esds(esds_data, esds_size);
1626
1627    uint8_t objectTypeIndication;
1628    if (esds.getObjectTypeIndication(&objectTypeIndication) != OK) {
1629        return ERROR_MALFORMED;
1630    }
1631
1632    if (objectTypeIndication == 0xe1) {
1633        // This isn't MPEG4 audio at all, it's QCELP 14k...
1634        mLastTrack->meta->setCString(kKeyMIMEType, MEDIA_MIMETYPE_AUDIO_QCELP);
1635        return OK;
1636    }
1637
1638    if (objectTypeIndication  == 0x6b) {
1639        // The media subtype is MP3 audio
1640        // Our software MP3 audio decoder may not be able to handle
1641        // packetized MP3 audio; for now, lets just return ERROR_UNSUPPORTED
1642        LOGE("MP3 track in MP4/3GPP file is not supported");
1643        return ERROR_UNSUPPORTED;
1644    }
1645
1646    const uint8_t *csd;
1647    size_t csd_size;
1648    if (esds.getCodecSpecificInfo(
1649                (const void **)&csd, &csd_size) != OK) {
1650        return ERROR_MALFORMED;
1651    }
1652
1653#if 0
1654    printf("ESD of size %d\n", csd_size);
1655    hexdump(csd, csd_size);
1656#endif
1657
1658    if (csd_size == 0) {
1659        // There's no further information, i.e. no codec specific data
1660        // Let's assume that the information provided in the mpeg4 headers
1661        // is accurate and hope for the best.
1662
1663        return OK;
1664    }
1665
1666    if (csd_size < 2) {
1667        return ERROR_MALFORMED;
1668    }
1669
1670    uint32_t objectType = csd[0] >> 3;
1671
1672    if (objectType == 31) {
1673        return ERROR_UNSUPPORTED;
1674    }
1675
1676    uint32_t freqIndex = (csd[0] & 7) << 1 | (csd[1] >> 7);
1677    int32_t sampleRate = 0;
1678    int32_t numChannels = 0;
1679    if (freqIndex == 15) {
1680        if (csd_size < 5) {
1681            return ERROR_MALFORMED;
1682        }
1683
1684        sampleRate = (csd[1] & 0x7f) << 17
1685                        | csd[2] << 9
1686                        | csd[3] << 1
1687                        | (csd[4] >> 7);
1688
1689        numChannels = (csd[4] >> 3) & 15;
1690    } else {
1691        static uint32_t kSamplingRate[] = {
1692            96000, 88200, 64000, 48000, 44100, 32000, 24000, 22050,
1693            16000, 12000, 11025, 8000, 7350
1694        };
1695
1696        if (freqIndex == 13 || freqIndex == 14) {
1697            return ERROR_MALFORMED;
1698        }
1699
1700        sampleRate = kSamplingRate[freqIndex];
1701        numChannels = (csd[1] >> 3) & 15;
1702    }
1703
1704    if (numChannels == 0) {
1705        return ERROR_UNSUPPORTED;
1706    }
1707
1708    int32_t prevSampleRate;
1709    CHECK(mLastTrack->meta->findInt32(kKeySampleRate, &prevSampleRate));
1710
1711    if (prevSampleRate != sampleRate) {
1712        LOGV("mpeg4 audio sample rate different from previous setting. "
1713             "was: %d, now: %d", prevSampleRate, sampleRate);
1714    }
1715
1716    mLastTrack->meta->setInt32(kKeySampleRate, sampleRate);
1717
1718    int32_t prevChannelCount;
1719    CHECK(mLastTrack->meta->findInt32(kKeyChannelCount, &prevChannelCount));
1720
1721    if (prevChannelCount != numChannels) {
1722        LOGV("mpeg4 audio channel count different from previous setting. "
1723             "was: %d, now: %d", prevChannelCount, numChannels);
1724    }
1725
1726    mLastTrack->meta->setInt32(kKeyChannelCount, numChannels);
1727
1728    return OK;
1729}
1730
1731////////////////////////////////////////////////////////////////////////////////
1732
1733MPEG4Source::MPEG4Source(
1734        const sp<MetaData> &format,
1735        const sp<DataSource> &dataSource,
1736        int32_t timeScale,
1737        const sp<SampleTable> &sampleTable)
1738    : mFormat(format),
1739      mDataSource(dataSource),
1740      mTimescale(timeScale),
1741      mSampleTable(sampleTable),
1742      mCurrentSampleIndex(0),
1743      mIsAVC(false),
1744      mNALLengthSize(0),
1745      mStarted(false),
1746      mGroup(NULL),
1747      mBuffer(NULL),
1748      mWantsNALFragments(false),
1749      mSrcBuffer(NULL) {
1750    const char *mime;
1751    bool success = mFormat->findCString(kKeyMIMEType, &mime);
1752    CHECK(success);
1753
1754    mIsAVC = !strcasecmp(mime, MEDIA_MIMETYPE_VIDEO_AVC);
1755
1756    if (mIsAVC) {
1757        uint32_t type;
1758        const void *data;
1759        size_t size;
1760        CHECK(format->findData(kKeyAVCC, &type, &data, &size));
1761
1762        const uint8_t *ptr = (const uint8_t *)data;
1763
1764        CHECK(size >= 7);
1765        CHECK_EQ((unsigned)ptr[0], 1u);  // configurationVersion == 1
1766
1767        // The number of bytes used to encode the length of a NAL unit.
1768        mNALLengthSize = 1 + (ptr[4] & 3);
1769    }
1770}
1771
1772MPEG4Source::~MPEG4Source() {
1773    if (mStarted) {
1774        stop();
1775    }
1776}
1777
1778status_t MPEG4Source::start(MetaData *params) {
1779    Mutex::Autolock autoLock(mLock);
1780
1781    CHECK(!mStarted);
1782
1783    int32_t val;
1784    if (params && params->findInt32(kKeyWantsNALFragments, &val)
1785        && val != 0) {
1786        mWantsNALFragments = true;
1787    } else {
1788        mWantsNALFragments = false;
1789    }
1790
1791    mGroup = new MediaBufferGroup;
1792
1793    int32_t max_size;
1794    CHECK(mFormat->findInt32(kKeyMaxInputSize, &max_size));
1795
1796    mGroup->add_buffer(new MediaBuffer(max_size));
1797
1798    mSrcBuffer = new uint8_t[max_size];
1799
1800    mStarted = true;
1801
1802    return OK;
1803}
1804
1805status_t MPEG4Source::stop() {
1806    Mutex::Autolock autoLock(mLock);
1807
1808    CHECK(mStarted);
1809
1810    if (mBuffer != NULL) {
1811        mBuffer->release();
1812        mBuffer = NULL;
1813    }
1814
1815    delete[] mSrcBuffer;
1816    mSrcBuffer = NULL;
1817
1818    delete mGroup;
1819    mGroup = NULL;
1820
1821    mStarted = false;
1822    mCurrentSampleIndex = 0;
1823
1824    return OK;
1825}
1826
1827sp<MetaData> MPEG4Source::getFormat() {
1828    Mutex::Autolock autoLock(mLock);
1829
1830    return mFormat;
1831}
1832
1833size_t MPEG4Source::parseNALSize(const uint8_t *data) const {
1834    switch (mNALLengthSize) {
1835        case 1:
1836            return *data;
1837        case 2:
1838            return U16_AT(data);
1839        case 3:
1840            return ((size_t)data[0] << 16) | U16_AT(&data[1]);
1841        case 4:
1842            return U32_AT(data);
1843    }
1844
1845    // This cannot happen, mNALLengthSize springs to life by adding 1 to
1846    // a 2-bit integer.
1847    CHECK(!"Should not be here.");
1848
1849    return 0;
1850}
1851
1852status_t MPEG4Source::read(
1853        MediaBuffer **out, const ReadOptions *options) {
1854    Mutex::Autolock autoLock(mLock);
1855
1856    CHECK(mStarted);
1857
1858    *out = NULL;
1859
1860    int64_t targetSampleTimeUs = -1;
1861
1862    int64_t seekTimeUs;
1863    ReadOptions::SeekMode mode;
1864    if (options && options->getSeekTo(&seekTimeUs, &mode)) {
1865        uint32_t findFlags = 0;
1866        switch (mode) {
1867            case ReadOptions::SEEK_PREVIOUS_SYNC:
1868                findFlags = SampleTable::kFlagBefore;
1869                break;
1870            case ReadOptions::SEEK_NEXT_SYNC:
1871                findFlags = SampleTable::kFlagAfter;
1872                break;
1873            case ReadOptions::SEEK_CLOSEST_SYNC:
1874            case ReadOptions::SEEK_CLOSEST:
1875                findFlags = SampleTable::kFlagClosest;
1876                break;
1877            default:
1878                CHECK(!"Should not be here.");
1879                break;
1880        }
1881
1882        uint32_t sampleIndex;
1883        status_t err = mSampleTable->findSampleAtTime(
1884                seekTimeUs * mTimescale / 1000000,
1885                &sampleIndex, findFlags);
1886
1887        if (mode == ReadOptions::SEEK_CLOSEST) {
1888            // We found the closest sample already, now we want the sync
1889            // sample preceding it (or the sample itself of course), even
1890            // if the subsequent sync sample is closer.
1891            findFlags = SampleTable::kFlagBefore;
1892        }
1893
1894        uint32_t syncSampleIndex;
1895        if (err == OK) {
1896            err = mSampleTable->findSyncSampleNear(
1897                    sampleIndex, &syncSampleIndex, findFlags);
1898        }
1899
1900        if (err != OK) {
1901            if (err == ERROR_OUT_OF_RANGE) {
1902                // An attempt to seek past the end of the stream would
1903                // normally cause this ERROR_OUT_OF_RANGE error. Propagating
1904                // this all the way to the MediaPlayer would cause abnormal
1905                // termination. Legacy behaviour appears to be to behave as if
1906                // we had seeked to the end of stream, ending normally.
1907                err = ERROR_END_OF_STREAM;
1908            }
1909            return err;
1910        }
1911
1912        uint32_t sampleTime;
1913        CHECK_EQ((status_t)OK, mSampleTable->getMetaDataForSample(
1914                    sampleIndex, NULL, NULL, &sampleTime));
1915
1916        if (mode == ReadOptions::SEEK_CLOSEST) {
1917            targetSampleTimeUs = (sampleTime * 1000000ll) / mTimescale;
1918        }
1919
1920#if 0
1921        uint32_t syncSampleTime;
1922        CHECK_EQ(OK, mSampleTable->getMetaDataForSample(
1923                    syncSampleIndex, NULL, NULL, &syncSampleTime));
1924
1925        LOGI("seek to time %lld us => sample at time %lld us, "
1926             "sync sample at time %lld us",
1927             seekTimeUs,
1928             sampleTime * 1000000ll / mTimescale,
1929             syncSampleTime * 1000000ll / mTimescale);
1930#endif
1931
1932        mCurrentSampleIndex = syncSampleIndex;
1933        if (mBuffer != NULL) {
1934            mBuffer->release();
1935            mBuffer = NULL;
1936        }
1937
1938        // fall through
1939    }
1940
1941    off64_t offset;
1942    size_t size;
1943    uint32_t cts;
1944    bool isSyncSample;
1945    bool newBuffer = false;
1946    if (mBuffer == NULL) {
1947        newBuffer = true;
1948
1949        status_t err =
1950            mSampleTable->getMetaDataForSample(
1951                    mCurrentSampleIndex, &offset, &size, &cts, &isSyncSample);
1952
1953        if (err != OK) {
1954            return err;
1955        }
1956
1957        err = mGroup->acquire_buffer(&mBuffer);
1958
1959        if (err != OK) {
1960            CHECK(mBuffer == NULL);
1961            return err;
1962        }
1963    }
1964
1965    if (!mIsAVC || mWantsNALFragments) {
1966        if (newBuffer) {
1967            ssize_t num_bytes_read =
1968                mDataSource->readAt(offset, (uint8_t *)mBuffer->data(), size);
1969
1970            if (num_bytes_read < (ssize_t)size) {
1971                mBuffer->release();
1972                mBuffer = NULL;
1973
1974                return ERROR_IO;
1975            }
1976
1977            CHECK(mBuffer != NULL);
1978            mBuffer->set_range(0, size);
1979            mBuffer->meta_data()->clear();
1980            mBuffer->meta_data()->setInt64(
1981                    kKeyTime, ((int64_t)cts * 1000000) / mTimescale);
1982
1983            if (targetSampleTimeUs >= 0) {
1984                mBuffer->meta_data()->setInt64(
1985                        kKeyTargetTime, targetSampleTimeUs);
1986            }
1987
1988            if (isSyncSample) {
1989                mBuffer->meta_data()->setInt32(kKeyIsSyncFrame, 1);
1990            }
1991
1992            ++mCurrentSampleIndex;
1993        }
1994
1995        if (!mIsAVC) {
1996            *out = mBuffer;
1997            mBuffer = NULL;
1998
1999            return OK;
2000        }
2001
2002        // Each NAL unit is split up into its constituent fragments and
2003        // each one of them returned in its own buffer.
2004
2005        CHECK(mBuffer->range_length() >= mNALLengthSize);
2006
2007        const uint8_t *src =
2008            (const uint8_t *)mBuffer->data() + mBuffer->range_offset();
2009
2010        size_t nal_size = parseNALSize(src);
2011        if (mBuffer->range_length() < mNALLengthSize + nal_size) {
2012            LOGE("incomplete NAL unit.");
2013
2014            mBuffer->release();
2015            mBuffer = NULL;
2016
2017            return ERROR_MALFORMED;
2018        }
2019
2020        MediaBuffer *clone = mBuffer->clone();
2021        CHECK(clone != NULL);
2022        clone->set_range(mBuffer->range_offset() + mNALLengthSize, nal_size);
2023
2024        CHECK(mBuffer != NULL);
2025        mBuffer->set_range(
2026                mBuffer->range_offset() + mNALLengthSize + nal_size,
2027                mBuffer->range_length() - mNALLengthSize - nal_size);
2028
2029        if (mBuffer->range_length() == 0) {
2030            mBuffer->release();
2031            mBuffer = NULL;
2032        }
2033
2034        *out = clone;
2035
2036        return OK;
2037    } else {
2038        // Whole NAL units are returned but each fragment is prefixed by
2039        // the start code (0x00 00 00 01).
2040        ssize_t num_bytes_read = 0;
2041        int32_t drm = 0;
2042        bool usesDRM = (mFormat->findInt32(kKeyIsDRM, &drm) && drm != 0);
2043        if (usesDRM) {
2044            num_bytes_read =
2045                mDataSource->readAt(offset, (uint8_t*)mBuffer->data(), size);
2046        } else {
2047            num_bytes_read = mDataSource->readAt(offset, mSrcBuffer, size);
2048        }
2049
2050        if (num_bytes_read < (ssize_t)size) {
2051            mBuffer->release();
2052            mBuffer = NULL;
2053
2054            return ERROR_IO;
2055        }
2056
2057        if (usesDRM) {
2058            CHECK(mBuffer != NULL);
2059            mBuffer->set_range(0, size);
2060
2061        } else {
2062            uint8_t *dstData = (uint8_t *)mBuffer->data();
2063            size_t srcOffset = 0;
2064            size_t dstOffset = 0;
2065
2066            while (srcOffset < size) {
2067                bool isMalFormed = (srcOffset + mNALLengthSize > size);
2068                size_t nalLength = 0;
2069                if (!isMalFormed) {
2070                    nalLength = parseNALSize(&mSrcBuffer[srcOffset]);
2071                    srcOffset += mNALLengthSize;
2072                    isMalFormed = srcOffset + nalLength > size;
2073                }
2074
2075                if (isMalFormed) {
2076                    LOGE("Video is malformed");
2077                    mBuffer->release();
2078                    mBuffer = NULL;
2079                    return ERROR_MALFORMED;
2080                }
2081
2082                if (nalLength == 0) {
2083                    continue;
2084                }
2085
2086                CHECK(dstOffset + 4 <= mBuffer->size());
2087
2088                dstData[dstOffset++] = 0;
2089                dstData[dstOffset++] = 0;
2090                dstData[dstOffset++] = 0;
2091                dstData[dstOffset++] = 1;
2092                memcpy(&dstData[dstOffset], &mSrcBuffer[srcOffset], nalLength);
2093                srcOffset += nalLength;
2094                dstOffset += nalLength;
2095            }
2096            CHECK_EQ(srcOffset, size);
2097            CHECK(mBuffer != NULL);
2098            mBuffer->set_range(0, dstOffset);
2099        }
2100
2101        mBuffer->meta_data()->clear();
2102        mBuffer->meta_data()->setInt64(
2103                kKeyTime, ((int64_t)cts * 1000000) / mTimescale);
2104
2105        if (targetSampleTimeUs >= 0) {
2106            mBuffer->meta_data()->setInt64(
2107                    kKeyTargetTime, targetSampleTimeUs);
2108        }
2109
2110        if (isSyncSample) {
2111            mBuffer->meta_data()->setInt32(kKeyIsSyncFrame, 1);
2112        }
2113
2114        ++mCurrentSampleIndex;
2115
2116        *out = mBuffer;
2117        mBuffer = NULL;
2118
2119        return OK;
2120    }
2121}
2122
2123MPEG4Extractor::Track *MPEG4Extractor::findTrackByMimePrefix(
2124        const char *mimePrefix) {
2125    for (Track *track = mFirstTrack; track != NULL; track = track->next) {
2126        const char *mime;
2127        if (track->meta != NULL
2128                && track->meta->findCString(kKeyMIMEType, &mime)
2129                && !strncasecmp(mime, mimePrefix, strlen(mimePrefix))) {
2130            return track;
2131        }
2132    }
2133
2134    return NULL;
2135}
2136
2137static bool LegacySniffMPEG4(
2138        const sp<DataSource> &source, String8 *mimeType, float *confidence) {
2139    uint8_t header[8];
2140
2141    ssize_t n = source->readAt(4, header, sizeof(header));
2142    if (n < (ssize_t)sizeof(header)) {
2143        return false;
2144    }
2145
2146    if (!memcmp(header, "ftyp3gp", 7) || !memcmp(header, "ftypmp42", 8)
2147        || !memcmp(header, "ftyp3gr6", 8) || !memcmp(header, "ftyp3gs6", 8)
2148        || !memcmp(header, "ftyp3ge6", 8) || !memcmp(header, "ftyp3gg6", 8)
2149        || !memcmp(header, "ftypisom", 8) || !memcmp(header, "ftypM4V ", 8)
2150        || !memcmp(header, "ftypM4A ", 8) || !memcmp(header, "ftypf4v ", 8)
2151        || !memcmp(header, "ftypkddi", 8) || !memcmp(header, "ftypM4VP", 8)) {
2152        *mimeType = MEDIA_MIMETYPE_CONTAINER_MPEG4;
2153        *confidence = 0.4;
2154
2155        return true;
2156    }
2157
2158    return false;
2159}
2160
2161static bool isCompatibleBrand(uint32_t fourcc) {
2162    static const uint32_t kCompatibleBrands[] = {
2163        FOURCC('i', 's', 'o', 'm'),
2164        FOURCC('i', 's', 'o', '2'),
2165        FOURCC('a', 'v', 'c', '1'),
2166        FOURCC('3', 'g', 'p', '4'),
2167        FOURCC('m', 'p', '4', '1'),
2168        FOURCC('m', 'p', '4', '2'),
2169
2170        // Won't promise that the following file types can be played.
2171        // Just give these file types a chance.
2172        FOURCC('q', 't', ' ', ' '),  // Apple's QuickTime
2173        FOURCC('M', 'S', 'N', 'V'),  // Sony's PSP
2174    };
2175
2176    for (size_t i = 0;
2177         i < sizeof(kCompatibleBrands) / sizeof(kCompatibleBrands[0]);
2178         ++i) {
2179        if (kCompatibleBrands[i] == fourcc) {
2180            return true;
2181        }
2182    }
2183
2184    return false;
2185}
2186
2187// Attempt to actually parse the 'ftyp' atom and determine if a suitable
2188// compatible brand is present.
2189static bool BetterSniffMPEG4(
2190        const sp<DataSource> &source, String8 *mimeType, float *confidence) {
2191    uint8_t header[12];
2192    if (source->readAt(0, header, 12) != 12
2193            || memcmp("ftyp", &header[4], 4)) {
2194        return false;
2195    }
2196
2197    size_t atomSize = U32_AT(&header[0]);
2198    if (atomSize < 16 || (atomSize % 4) != 0) {
2199        return false;
2200    }
2201
2202    bool success = false;
2203    if (isCompatibleBrand(U32_AT(&header[8]))) {
2204        success = true;
2205    } else {
2206        size_t numCompatibleBrands = (atomSize - 16) / 4;
2207        for (size_t i = 0; i < numCompatibleBrands; ++i) {
2208            uint8_t tmp[4];
2209            if (source->readAt(16 + i * 4, tmp, 4) != 4) {
2210                return false;
2211            }
2212
2213            if (isCompatibleBrand(U32_AT(&tmp[0]))) {
2214                success = true;
2215                break;
2216            }
2217        }
2218    }
2219
2220    if (!success) {
2221        return false;
2222    }
2223
2224    *mimeType = MEDIA_MIMETYPE_CONTAINER_MPEG4;
2225    *confidence = 0.4f;
2226
2227    return true;
2228}
2229
2230bool SniffMPEG4(
2231        const sp<DataSource> &source, String8 *mimeType, float *confidence,
2232        sp<AMessage> *) {
2233    if (BetterSniffMPEG4(source, mimeType, confidence)) {
2234        return true;
2235    }
2236
2237    if (LegacySniffMPEG4(source, mimeType, confidence)) {
2238        LOGW("Identified supported mpeg4 through LegacySniffMPEG4.");
2239        return true;
2240    }
2241
2242    return false;
2243}
2244
2245}  // namespace android
2246
2247