MPEG4Extractor.cpp revision 66ac4df65516ebfd0e500bfca75dc4b5ef8d674e
1/*
2 * Copyright (C) 2009 The Android Open Source Project
3 *
4 * Licensed under the Apache License, Version 2.0 (the "License");
5 * you may not use this file except in compliance with the License.
6 * You may obtain a copy of the License at
7 *
8 *      http://www.apache.org/licenses/LICENSE-2.0
9 *
10 * Unless required by applicable law or agreed to in writing, software
11 * distributed under the License is distributed on an "AS IS" BASIS,
12 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13 * See the License for the specific language governing permissions and
14 * limitations under the License.
15 */
16
17#define LOG_TAG "MPEG4Extractor"
18#include <utils/Log.h>
19
20#include "include/MPEG4Extractor.h"
21#include "include/SampleTable.h"
22
23#include <arpa/inet.h>
24
25#include <ctype.h>
26#include <stdint.h>
27#include <stdlib.h>
28#include <string.h>
29
30#include <media/stagefright/DataSource.h>
31#include "include/ESDS.h"
32#include <media/stagefright/MediaBuffer.h>
33#include <media/stagefright/MediaBufferGroup.h>
34#include <media/stagefright/MediaDebug.h>
35#include <media/stagefright/MediaDefs.h>
36#include <media/stagefright/MediaSource.h>
37#include <media/stagefright/MetaData.h>
38#include <media/stagefright/Utils.h>
39#include <utils/String8.h>
40
41namespace android {
42
43class MPEG4Source : public MediaSource {
44public:
45    // Caller retains ownership of both "dataSource" and "sampleTable".
46    MPEG4Source(const sp<MetaData> &format,
47                const sp<DataSource> &dataSource,
48                int32_t timeScale,
49                const sp<SampleTable> &sampleTable);
50
51    virtual status_t start(MetaData *params = NULL);
52    virtual status_t stop();
53
54    virtual sp<MetaData> getFormat();
55
56    virtual status_t read(
57            MediaBuffer **buffer, const ReadOptions *options = NULL);
58
59protected:
60    virtual ~MPEG4Source();
61
62private:
63    sp<MetaData> mFormat;
64    sp<DataSource> mDataSource;
65    int32_t mTimescale;
66    sp<SampleTable> mSampleTable;
67    uint32_t mCurrentSampleIndex;
68
69    bool mIsAVC;
70    size_t mNALLengthSize;
71
72    bool mStarted;
73
74    MediaBufferGroup *mGroup;
75
76    MediaBuffer *mBuffer;
77
78    bool mWantsNALFragments;
79
80    uint8_t *mSrcBuffer;
81
82    size_t parseNALSize(const uint8_t *data) const;
83
84    MPEG4Source(const MPEG4Source &);
85    MPEG4Source &operator=(const MPEG4Source &);
86};
87
88// This custom data source wraps an existing one and satisfies requests
89// falling entirely within a cached range from the cache while forwarding
90// all remaining requests to the wrapped datasource.
91// This is used to cache the full sampletable metadata for a single track,
92// possibly wrapping multiple times to cover all tracks, i.e.
93// Each MPEG4DataSource caches the sampletable metadata for a single track.
94
95struct MPEG4DataSource : public DataSource {
96    MPEG4DataSource(const sp<DataSource> &source);
97
98    virtual status_t initCheck() const;
99    virtual ssize_t readAt(off_t offset, void *data, size_t size);
100    virtual status_t getSize(off_t *size);
101    virtual uint32_t flags();
102
103    status_t setCachedRange(off_t offset, size_t size);
104
105protected:
106    virtual ~MPEG4DataSource();
107
108private:
109    Mutex mLock;
110
111    sp<DataSource> mSource;
112    off_t mCachedOffset;
113    size_t mCachedSize;
114    uint8_t *mCache;
115
116    void clearCache();
117
118    MPEG4DataSource(const MPEG4DataSource &);
119    MPEG4DataSource &operator=(const MPEG4DataSource &);
120};
121
122MPEG4DataSource::MPEG4DataSource(const sp<DataSource> &source)
123    : mSource(source),
124      mCachedOffset(0),
125      mCachedSize(0),
126      mCache(NULL) {
127}
128
129MPEG4DataSource::~MPEG4DataSource() {
130    clearCache();
131}
132
133void MPEG4DataSource::clearCache() {
134    if (mCache) {
135        free(mCache);
136        mCache = NULL;
137    }
138
139    mCachedOffset = 0;
140    mCachedSize = 0;
141}
142
143status_t MPEG4DataSource::initCheck() const {
144    return mSource->initCheck();
145}
146
147ssize_t MPEG4DataSource::readAt(off_t offset, void *data, size_t size) {
148    Mutex::Autolock autoLock(mLock);
149
150    if (offset >= mCachedOffset
151            && offset + size <= mCachedOffset + mCachedSize) {
152        memcpy(data, &mCache[offset - mCachedOffset], size);
153        return size;
154    }
155
156    return mSource->readAt(offset, data, size);
157}
158
159status_t MPEG4DataSource::getSize(off_t *size) {
160    return mSource->getSize(size);
161}
162
163uint32_t MPEG4DataSource::flags() {
164    return mSource->flags();
165}
166
167status_t MPEG4DataSource::setCachedRange(off_t offset, size_t size) {
168    Mutex::Autolock autoLock(mLock);
169
170    clearCache();
171
172    mCache = (uint8_t *)malloc(size);
173
174    if (mCache == NULL) {
175        return -ENOMEM;
176    }
177
178    mCachedOffset = offset;
179    mCachedSize = size;
180
181    ssize_t err = mSource->readAt(mCachedOffset, mCache, mCachedSize);
182
183    if (err < (ssize_t)size) {
184        clearCache();
185
186        return ERROR_IO;
187    }
188
189    return OK;
190}
191
192////////////////////////////////////////////////////////////////////////////////
193
194static void hexdump(const void *_data, size_t size) {
195    const uint8_t *data = (const uint8_t *)_data;
196    size_t offset = 0;
197    while (offset < size) {
198        printf("0x%04x  ", offset);
199
200        size_t n = size - offset;
201        if (n > 16) {
202            n = 16;
203        }
204
205        for (size_t i = 0; i < 16; ++i) {
206            if (i == 8) {
207                printf(" ");
208            }
209
210            if (offset + i < size) {
211                printf("%02x ", data[offset + i]);
212            } else {
213                printf("   ");
214            }
215        }
216
217        printf(" ");
218
219        for (size_t i = 0; i < n; ++i) {
220            if (isprint(data[offset + i])) {
221                printf("%c", data[offset + i]);
222            } else {
223                printf(".");
224            }
225        }
226
227        printf("\n");
228
229        offset += 16;
230    }
231}
232
233static const char *FourCC2MIME(uint32_t fourcc) {
234    switch (fourcc) {
235        case FOURCC('m', 'p', '4', 'a'):
236            return MEDIA_MIMETYPE_AUDIO_AAC;
237
238        case FOURCC('s', 'a', 'm', 'r'):
239            return MEDIA_MIMETYPE_AUDIO_AMR_NB;
240
241        case FOURCC('s', 'a', 'w', 'b'):
242            return MEDIA_MIMETYPE_AUDIO_AMR_WB;
243
244        case FOURCC('m', 'p', '4', 'v'):
245            return MEDIA_MIMETYPE_VIDEO_MPEG4;
246
247        case FOURCC('s', '2', '6', '3'):
248            return MEDIA_MIMETYPE_VIDEO_H263;
249
250        case FOURCC('a', 'v', 'c', '1'):
251            return MEDIA_MIMETYPE_VIDEO_AVC;
252
253        default:
254            CHECK(!"should not be here.");
255            return NULL;
256    }
257}
258
259MPEG4Extractor::MPEG4Extractor(const sp<DataSource> &source)
260    : mDataSource(source),
261      mHaveMetadata(false),
262      mHasVideo(false),
263      mFirstTrack(NULL),
264      mLastTrack(NULL),
265      mFileMetaData(new MetaData) {
266}
267
268MPEG4Extractor::~MPEG4Extractor() {
269    Track *track = mFirstTrack;
270    while (track) {
271        Track *next = track->next;
272
273        delete track;
274        track = next;
275    }
276    mFirstTrack = mLastTrack = NULL;
277}
278
279sp<MetaData> MPEG4Extractor::getMetaData() {
280    status_t err;
281    if ((err = readMetaData()) != OK) {
282        return new MetaData;
283    }
284
285    return mFileMetaData;
286}
287
288size_t MPEG4Extractor::countTracks() {
289    status_t err;
290    if ((err = readMetaData()) != OK) {
291        return 0;
292    }
293
294    size_t n = 0;
295    Track *track = mFirstTrack;
296    while (track) {
297        ++n;
298        track = track->next;
299    }
300
301    return n;
302}
303
304sp<MetaData> MPEG4Extractor::getTrackMetaData(
305        size_t index, uint32_t flags) {
306    status_t err;
307    if ((err = readMetaData()) != OK) {
308        return NULL;
309    }
310
311    Track *track = mFirstTrack;
312    while (index > 0) {
313        if (track == NULL) {
314            return NULL;
315        }
316
317        track = track->next;
318        --index;
319    }
320
321    if (track == NULL) {
322        return NULL;
323    }
324
325    if ((flags & kIncludeExtensiveMetaData)
326            && !track->includes_expensive_metadata) {
327        track->includes_expensive_metadata = true;
328
329        const char *mime;
330        CHECK(track->meta->findCString(kKeyMIMEType, &mime));
331        if (!strncasecmp("video/", mime, 6)) {
332            uint32_t sampleIndex;
333            uint32_t sampleTime;
334            if (track->sampleTable->findThumbnailSample(&sampleIndex) == OK
335                    && track->sampleTable->getMetaDataForSample(
336                        sampleIndex, NULL /* offset */, NULL /* size */,
337                        &sampleTime) == OK) {
338                track->meta->setInt64(
339                        kKeyThumbnailTime,
340                        ((int64_t)sampleTime * 1000000) / track->timescale);
341            }
342        }
343    }
344
345    return track->meta;
346}
347
348status_t MPEG4Extractor::readMetaData() {
349    if (mHaveMetadata) {
350        return OK;
351    }
352
353    off_t offset = 0;
354    status_t err;
355    while ((err = parseChunk(&offset, 0)) == OK) {
356    }
357
358    if (mHaveMetadata) {
359        if (mHasVideo) {
360            mFileMetaData->setCString(kKeyMIMEType, "video/mp4");
361        } else {
362            mFileMetaData->setCString(kKeyMIMEType, "audio/mp4");
363        }
364
365        return OK;
366    }
367
368    return err;
369}
370
371static void MakeFourCCString(uint32_t x, char *s) {
372    s[0] = x >> 24;
373    s[1] = (x >> 16) & 0xff;
374    s[2] = (x >> 8) & 0xff;
375    s[3] = x & 0xff;
376    s[4] = '\0';
377}
378
379struct PathAdder {
380    PathAdder(Vector<uint32_t> *path, uint32_t chunkType)
381        : mPath(path) {
382        mPath->push(chunkType);
383    }
384
385    ~PathAdder() {
386        mPath->pop();
387    }
388
389private:
390    Vector<uint32_t> *mPath;
391
392    PathAdder(const PathAdder &);
393    PathAdder &operator=(const PathAdder &);
394};
395
396static bool underMetaDataPath(const Vector<uint32_t> &path) {
397    return path.size() >= 5
398        && path[0] == FOURCC('m', 'o', 'o', 'v')
399        && path[1] == FOURCC('u', 'd', 't', 'a')
400        && path[2] == FOURCC('m', 'e', 't', 'a')
401        && path[3] == FOURCC('i', 'l', 's', 't');
402}
403
404// Given a time in seconds since Jan 1 1904, produce a human-readable string.
405static void convertTimeToDate(int64_t time_1904, String8 *s) {
406    time_t time_1970 = time_1904 - (((66 * 365 + 17) * 24) * 3600);
407
408    char tmp[32];
409    strftime(tmp, sizeof(tmp), "%Y%m%dT%H%M%S.000Z", gmtime(&time_1970));
410
411    s->setTo(tmp);
412}
413
414status_t MPEG4Extractor::parseChunk(off_t *offset, int depth) {
415    uint32_t hdr[2];
416    if (mDataSource->readAt(*offset, hdr, 8) < 8) {
417        return ERROR_IO;
418    }
419    uint64_t chunk_size = ntohl(hdr[0]);
420    uint32_t chunk_type = ntohl(hdr[1]);
421    off_t data_offset = *offset + 8;
422
423    if (chunk_size == 1) {
424        if (mDataSource->readAt(*offset + 8, &chunk_size, 8) < 8) {
425            return ERROR_IO;
426        }
427        chunk_size = ntoh64(chunk_size);
428        data_offset += 8;
429    }
430
431    char chunk[5];
432    MakeFourCCString(chunk_type, chunk);
433
434#if 0
435    static const char kWhitespace[] = "                                        ";
436    const char *indent = &kWhitespace[sizeof(kWhitespace) - 1 - 2 * depth];
437    printf("%sfound chunk '%s' of size %lld\n", indent, chunk, chunk_size);
438
439    char buffer[256];
440    if (chunk_size <= sizeof(buffer)) {
441        if (mDataSource->readAt(*offset, buffer, chunk_size)
442                < (ssize_t)chunk_size) {
443            return ERROR_IO;
444        }
445
446        hexdump(buffer, chunk_size);
447    }
448#endif
449
450    PathAdder autoAdder(&mPath, chunk_type);
451
452    off_t chunk_data_size = *offset + chunk_size - data_offset;
453
454    if (chunk_type != FOURCC('c', 'p', 'r', 't')
455            && mPath.size() == 5 && underMetaDataPath(mPath)) {
456        off_t stop_offset = *offset + chunk_size;
457        *offset = data_offset;
458        while (*offset < stop_offset) {
459            status_t err = parseChunk(offset, depth + 1);
460            if (err != OK) {
461                return err;
462            }
463        }
464        CHECK_EQ(*offset, stop_offset);
465
466        return OK;
467    }
468
469    switch(chunk_type) {
470        case FOURCC('m', 'o', 'o', 'v'):
471        case FOURCC('t', 'r', 'a', 'k'):
472        case FOURCC('m', 'd', 'i', 'a'):
473        case FOURCC('m', 'i', 'n', 'f'):
474        case FOURCC('d', 'i', 'n', 'f'):
475        case FOURCC('s', 't', 'b', 'l'):
476        case FOURCC('m', 'v', 'e', 'x'):
477        case FOURCC('m', 'o', 'o', 'f'):
478        case FOURCC('t', 'r', 'a', 'f'):
479        case FOURCC('m', 'f', 'r', 'a'):
480        case FOURCC('s', 'k', 'i' ,'p'):
481        case FOURCC('u', 'd', 't', 'a'):
482        case FOURCC('i', 'l', 's', 't'):
483        {
484            if (chunk_type == FOURCC('s', 't', 'b', 'l')) {
485                LOGV("sampleTable chunk is %d bytes long.", (size_t)chunk_size);
486
487                if (mDataSource->flags() & DataSource::kWantsPrefetching) {
488                    sp<MPEG4DataSource> cachedSource =
489                        new MPEG4DataSource(mDataSource);
490
491                    if (cachedSource->setCachedRange(*offset, chunk_size) == OK) {
492                        mDataSource = cachedSource;
493                    }
494                }
495            }
496
497            off_t stop_offset = *offset + chunk_size;
498            *offset = data_offset;
499            while (*offset < stop_offset) {
500                status_t err = parseChunk(offset, depth + 1);
501                if (err != OK) {
502                    return err;
503                }
504            }
505            CHECK_EQ(*offset, stop_offset);
506
507            if (chunk_type == FOURCC('m', 'o', 'o', 'v')) {
508                mHaveMetadata = true;
509
510                return UNKNOWN_ERROR;  // Return a dummy error.
511            }
512            break;
513        }
514
515        case FOURCC('t', 'k', 'h', 'd'):
516        {
517            CHECK(chunk_data_size >= 4);
518
519            uint8_t version;
520            if (mDataSource->readAt(data_offset, &version, 1) < 1) {
521                return ERROR_IO;
522            }
523
524            uint64_t ctime, mtime, duration;
525            int32_t id;
526            uint32_t width, height;
527
528            if (version == 1) {
529                if (chunk_data_size != 36 + 60) {
530                    return ERROR_MALFORMED;
531                }
532
533                uint8_t buffer[36 + 60];
534                if (mDataSource->readAt(
535                            data_offset, buffer, sizeof(buffer)) < (ssize_t)sizeof(buffer)) {
536                    return ERROR_IO;
537                }
538
539                ctime = U64_AT(&buffer[4]);
540                mtime = U64_AT(&buffer[12]);
541                id = U32_AT(&buffer[20]);
542                duration = U64_AT(&buffer[28]);
543                width = U32_AT(&buffer[88]);
544                height = U32_AT(&buffer[92]);
545            } else if (version == 0) {
546                if (chunk_data_size != 24 + 60) {
547                    return ERROR_MALFORMED;
548                }
549
550                uint8_t buffer[24 + 60];
551                if (mDataSource->readAt(
552                            data_offset, buffer, sizeof(buffer)) < (ssize_t)sizeof(buffer)) {
553                    return ERROR_IO;
554                }
555                ctime = U32_AT(&buffer[4]);
556                mtime = U32_AT(&buffer[8]);
557                id = U32_AT(&buffer[12]);
558                duration = U32_AT(&buffer[20]);
559                width = U32_AT(&buffer[76]);
560                height = U32_AT(&buffer[80]);
561            }
562
563            Track *track = new Track;
564            track->next = NULL;
565            if (mLastTrack) {
566                mLastTrack->next = track;
567            } else {
568                mFirstTrack = track;
569            }
570            mLastTrack = track;
571
572            track->meta = new MetaData;
573            track->includes_expensive_metadata = false;
574            track->timescale = 0;
575            track->sampleTable = new SampleTable(mDataSource);
576            track->meta->setCString(kKeyMIMEType, "application/octet-stream");
577
578            *offset += chunk_size;
579            break;
580        }
581
582        case FOURCC('m', 'd', 'h', 'd'):
583        {
584            if (chunk_data_size < 4) {
585                return ERROR_MALFORMED;
586            }
587
588            uint8_t version;
589            if (mDataSource->readAt(
590                        data_offset, &version, sizeof(version))
591                    < (ssize_t)sizeof(version)) {
592                return ERROR_IO;
593            }
594
595            off_t timescale_offset;
596
597            if (version == 1) {
598                timescale_offset = data_offset + 4 + 16;
599            } else if (version == 0) {
600                timescale_offset = data_offset + 4 + 8;
601            } else {
602                return ERROR_IO;
603            }
604
605            uint32_t timescale;
606            if (mDataSource->readAt(
607                        timescale_offset, &timescale, sizeof(timescale))
608                    < (ssize_t)sizeof(timescale)) {
609                return ERROR_IO;
610            }
611
612            mLastTrack->timescale = ntohl(timescale);
613
614            int64_t duration;
615            if (version == 1) {
616                if (mDataSource->readAt(
617                            timescale_offset + 4, &duration, sizeof(duration))
618                        < (ssize_t)sizeof(duration)) {
619                    return ERROR_IO;
620                }
621                duration = ntoh64(duration);
622            } else {
623                int32_t duration32;
624                if (mDataSource->readAt(
625                            timescale_offset + 4, &duration32, sizeof(duration32))
626                        < (ssize_t)sizeof(duration32)) {
627                    return ERROR_IO;
628                }
629                duration = ntohl(duration32);
630            }
631            mLastTrack->meta->setInt64(
632                    kKeyDuration, (duration * 1000000) / mLastTrack->timescale);
633
634            *offset += chunk_size;
635            break;
636        }
637
638        case FOURCC('h', 'd', 'l', 'r'):
639        {
640            if (chunk_data_size < 25) {
641                return ERROR_MALFORMED;
642            }
643
644            uint8_t buffer[24];
645            if (mDataSource->readAt(data_offset, buffer, 24) < 24) {
646                return ERROR_IO;
647            }
648
649            if (U32_AT(buffer) != 0) {
650                // Should be version 0, flags 0.
651                return ERROR_MALFORMED;
652            }
653
654            if (U32_AT(&buffer[4]) != 0) {
655                // pre_defined should be 0.
656                return ERROR_MALFORMED;
657            }
658
659            mHandlerType = U32_AT(&buffer[8]);
660            *offset += chunk_size;
661            break;
662        }
663
664        case FOURCC('s', 't', 's', 'd'):
665        {
666            if (chunk_data_size < 8) {
667                return ERROR_MALFORMED;
668            }
669
670            uint8_t buffer[8];
671            CHECK(chunk_data_size >= (off_t)sizeof(buffer));
672            if (mDataSource->readAt(
673                        data_offset, buffer, 8) < 8) {
674                return ERROR_IO;
675            }
676
677            if (U32_AT(buffer) != 0) {
678                // Should be version 0, flags 0.
679                return ERROR_MALFORMED;
680            }
681
682            uint32_t entry_count = U32_AT(&buffer[4]);
683
684            if (entry_count > 1) {
685                // For now we only support a single type of media per track.
686                return ERROR_UNSUPPORTED;
687            }
688
689            off_t stop_offset = *offset + chunk_size;
690            *offset = data_offset + 8;
691            for (uint32_t i = 0; i < entry_count; ++i) {
692                status_t err = parseChunk(offset, depth + 1);
693                if (err != OK) {
694                    return err;
695                }
696            }
697            CHECK_EQ(*offset, stop_offset);
698            break;
699        }
700
701        case FOURCC('m', 'p', '4', 'a'):
702        case FOURCC('s', 'a', 'm', 'r'):
703        case FOURCC('s', 'a', 'w', 'b'):
704        {
705            if (mHandlerType != FOURCC('s', 'o', 'u', 'n')) {
706                return ERROR_MALFORMED;
707            }
708
709            uint8_t buffer[8 + 20];
710            if (chunk_data_size < (ssize_t)sizeof(buffer)) {
711                // Basic AudioSampleEntry size.
712                return ERROR_MALFORMED;
713            }
714
715            if (mDataSource->readAt(
716                        data_offset, buffer, sizeof(buffer)) < (ssize_t)sizeof(buffer)) {
717                return ERROR_IO;
718            }
719
720            uint16_t data_ref_index = U16_AT(&buffer[6]);
721            uint16_t num_channels = U16_AT(&buffer[16]);
722
723            if (!strcasecmp(MEDIA_MIMETYPE_AUDIO_AMR_NB,
724                            FourCC2MIME(chunk_type))
725                || !strcasecmp(MEDIA_MIMETYPE_AUDIO_AMR_WB,
726                               FourCC2MIME(chunk_type))) {
727                // AMR audio is always mono.
728                num_channels = 1;
729            }
730
731            uint16_t sample_size = U16_AT(&buffer[18]);
732            uint32_t sample_rate = U32_AT(&buffer[24]) >> 16;
733
734            // printf("*** coding='%s' %d channels, size %d, rate %d\n",
735            //        chunk, num_channels, sample_size, sample_rate);
736
737            mLastTrack->meta->setCString(kKeyMIMEType, FourCC2MIME(chunk_type));
738            mLastTrack->meta->setInt32(kKeyChannelCount, num_channels);
739            mLastTrack->meta->setInt32(kKeySampleRate, sample_rate);
740
741            off_t stop_offset = *offset + chunk_size;
742            *offset = data_offset + sizeof(buffer);
743            while (*offset < stop_offset) {
744                status_t err = parseChunk(offset, depth + 1);
745                if (err != OK) {
746                    return err;
747                }
748            }
749            CHECK_EQ(*offset, stop_offset);
750            break;
751        }
752
753        case FOURCC('m', 'p', '4', 'v'):
754        case FOURCC('s', '2', '6', '3'):
755        case FOURCC('a', 'v', 'c', '1'):
756        {
757            mHasVideo = true;
758
759            if (mHandlerType != FOURCC('v', 'i', 'd', 'e')) {
760                return ERROR_MALFORMED;
761            }
762
763            uint8_t buffer[78];
764            if (chunk_data_size < (ssize_t)sizeof(buffer)) {
765                // Basic VideoSampleEntry size.
766                return ERROR_MALFORMED;
767            }
768
769            if (mDataSource->readAt(
770                        data_offset, buffer, sizeof(buffer)) < (ssize_t)sizeof(buffer)) {
771                return ERROR_IO;
772            }
773
774            uint16_t data_ref_index = U16_AT(&buffer[6]);
775            uint16_t width = U16_AT(&buffer[6 + 18]);
776            uint16_t height = U16_AT(&buffer[6 + 20]);
777
778            // printf("*** coding='%s' width=%d height=%d\n",
779            //        chunk, width, height);
780
781            mLastTrack->meta->setCString(kKeyMIMEType, FourCC2MIME(chunk_type));
782            mLastTrack->meta->setInt32(kKeyWidth, width);
783            mLastTrack->meta->setInt32(kKeyHeight, height);
784
785            off_t stop_offset = *offset + chunk_size;
786            *offset = data_offset + sizeof(buffer);
787            while (*offset < stop_offset) {
788                status_t err = parseChunk(offset, depth + 1);
789                if (err != OK) {
790                    return err;
791                }
792            }
793            CHECK_EQ(*offset, stop_offset);
794            break;
795        }
796
797        case FOURCC('s', 't', 'c', 'o'):
798        case FOURCC('c', 'o', '6', '4'):
799        {
800            status_t err =
801                mLastTrack->sampleTable->setChunkOffsetParams(
802                        chunk_type, data_offset, chunk_data_size);
803
804            if (err != OK) {
805                return err;
806            }
807
808            *offset += chunk_size;
809            break;
810        }
811
812        case FOURCC('s', 't', 's', 'c'):
813        {
814            status_t err =
815                mLastTrack->sampleTable->setSampleToChunkParams(
816                        data_offset, chunk_data_size);
817
818            if (err != OK) {
819                return err;
820            }
821
822            *offset += chunk_size;
823            break;
824        }
825
826        case FOURCC('s', 't', 's', 'z'):
827        case FOURCC('s', 't', 'z', '2'):
828        {
829            status_t err =
830                mLastTrack->sampleTable->setSampleSizeParams(
831                        chunk_type, data_offset, chunk_data_size);
832
833            if (err != OK) {
834                return err;
835            }
836
837            size_t max_size;
838            CHECK_EQ(mLastTrack->sampleTable->getMaxSampleSize(&max_size), OK);
839
840            // Assume that a given buffer only contains at most 10 fragments,
841            // each fragment originally prefixed with a 2 byte length will
842            // have a 4 byte header (0x00 0x00 0x00 0x01) after conversion,
843            // and thus will grow by 2 bytes per fragment.
844            mLastTrack->meta->setInt32(kKeyMaxInputSize, max_size + 10 * 2);
845
846            *offset += chunk_size;
847            break;
848        }
849
850        case FOURCC('s', 't', 't', 's'):
851        {
852            status_t err =
853                mLastTrack->sampleTable->setTimeToSampleParams(
854                        data_offset, chunk_data_size);
855
856            if (err != OK) {
857                return err;
858            }
859
860            *offset += chunk_size;
861            break;
862        }
863
864        case FOURCC('s', 't', 's', 's'):
865        {
866            status_t err =
867                mLastTrack->sampleTable->setSyncSampleParams(
868                        data_offset, chunk_data_size);
869
870            if (err != OK) {
871                return err;
872            }
873
874            *offset += chunk_size;
875            break;
876        }
877
878        case FOURCC('e', 's', 'd', 's'):
879        {
880            if (chunk_data_size < 4) {
881                return ERROR_MALFORMED;
882            }
883
884            uint8_t buffer[256];
885            if (chunk_data_size > (off_t)sizeof(buffer)) {
886                return ERROR_BUFFER_TOO_SMALL;
887            }
888
889            if (mDataSource->readAt(
890                        data_offset, buffer, chunk_data_size) < chunk_data_size) {
891                return ERROR_IO;
892            }
893
894            if (U32_AT(buffer) != 0) {
895                // Should be version 0, flags 0.
896                return ERROR_MALFORMED;
897            }
898
899            mLastTrack->meta->setData(
900                    kKeyESDS, kTypeESDS, &buffer[4], chunk_data_size - 4);
901
902            if (mPath.size() >= 2
903                    && mPath[mPath.size() - 2] == FOURCC('m', 'p', '4', 'a')) {
904                // Information from the ESDS must be relied on for proper
905                // setup of sample rate and channel count for MPEG4 Audio.
906                // The generic header appears to only contain generic
907                // information...
908
909                status_t err = updateAudioTrackInfoFromESDS_MPEG4Audio(
910                        &buffer[4], chunk_data_size - 4);
911
912                if (err != OK) {
913                    return err;
914                }
915            }
916
917            *offset += chunk_size;
918            break;
919        }
920
921        case FOURCC('a', 'v', 'c', 'C'):
922        {
923            char buffer[256];
924            if (chunk_data_size > (off_t)sizeof(buffer)) {
925                return ERROR_BUFFER_TOO_SMALL;
926            }
927
928            if (mDataSource->readAt(
929                        data_offset, buffer, chunk_data_size) < chunk_data_size) {
930                return ERROR_IO;
931            }
932
933            mLastTrack->meta->setData(
934                    kKeyAVCC, kTypeAVCC, buffer, chunk_data_size);
935
936            *offset += chunk_size;
937            break;
938        }
939
940        case FOURCC('m', 'e', 't', 'a'):
941        {
942            uint8_t buffer[4];
943            CHECK(chunk_data_size >= (off_t)sizeof(buffer));
944            if (mDataSource->readAt(
945                        data_offset, buffer, 4) < 4) {
946                return ERROR_IO;
947            }
948
949            if (U32_AT(buffer) != 0) {
950                // Should be version 0, flags 0.
951                return ERROR_MALFORMED;
952            }
953
954            off_t stop_offset = *offset + chunk_size;
955            *offset = data_offset + sizeof(buffer);
956            while (*offset < stop_offset) {
957                status_t err = parseChunk(offset, depth + 1);
958                if (err != OK) {
959                    return err;
960                }
961            }
962            CHECK_EQ(*offset, stop_offset);
963            break;
964        }
965
966        case FOURCC('d', 'a', 't', 'a'):
967        {
968            if (mPath.size() == 6 && underMetaDataPath(mPath)) {
969                status_t err = parseMetaData(data_offset, chunk_data_size);
970
971                if (err != OK) {
972                    return err;
973                }
974            }
975
976            *offset += chunk_size;
977            break;
978        }
979
980        case FOURCC('m', 'v', 'h', 'd'):
981        {
982            if (chunk_data_size < 12) {
983                return ERROR_MALFORMED;
984            }
985
986            uint8_t header[12];
987            if (mDataSource->readAt(
988                        data_offset, header, sizeof(header))
989                    < (ssize_t)sizeof(header)) {
990                return ERROR_IO;
991            }
992
993            int64_t creationTime;
994            if (header[0] == 1) {
995                creationTime = U64_AT(&header[4]);
996            } else {
997                CHECK_EQ(header[0], 0);
998                creationTime = U32_AT(&header[4]);
999            }
1000
1001            String8 s;
1002            convertTimeToDate(creationTime, &s);
1003
1004            mFileMetaData->setCString(kKeyDate, s.string());
1005
1006            *offset += chunk_size;
1007            break;
1008        }
1009
1010        default:
1011        {
1012            *offset += chunk_size;
1013            break;
1014        }
1015    }
1016
1017    return OK;
1018}
1019
1020status_t MPEG4Extractor::parseMetaData(off_t offset, size_t size) {
1021    if (size < 4) {
1022        return ERROR_MALFORMED;
1023    }
1024
1025    uint8_t *buffer = new uint8_t[size + 1];
1026    if (mDataSource->readAt(
1027                offset, buffer, size) != (ssize_t)size) {
1028        delete[] buffer;
1029        buffer = NULL;
1030
1031        return ERROR_IO;
1032    }
1033
1034    uint32_t flags = U32_AT(buffer);
1035
1036    uint32_t metadataKey = 0;
1037    switch (mPath[4]) {
1038        case FOURCC(0xa9, 'a', 'l', 'b'):
1039        {
1040            metadataKey = kKeyAlbum;
1041            break;
1042        }
1043        case FOURCC(0xa9, 'A', 'R', 'T'):
1044        {
1045            metadataKey = kKeyArtist;
1046            break;
1047        }
1048        case FOURCC('a', 'A', 'R', 'T'):
1049        {
1050            metadataKey = kKeyAlbumArtist;
1051            break;
1052        }
1053        case FOURCC(0xa9, 'd', 'a', 'y'):
1054        {
1055            metadataKey = kKeyYear;
1056            break;
1057        }
1058        case FOURCC(0xa9, 'n', 'a', 'm'):
1059        {
1060            metadataKey = kKeyTitle;
1061            break;
1062        }
1063        case FOURCC(0xa9, 'w', 'r', 't'):
1064        {
1065            metadataKey = kKeyWriter;
1066            break;
1067        }
1068        case FOURCC('c', 'o', 'v', 'r'):
1069        {
1070            metadataKey = kKeyAlbumArt;
1071            break;
1072        }
1073        case FOURCC('g', 'n', 'r', 'e'):
1074        {
1075            metadataKey = kKeyGenre;
1076            break;
1077        }
1078        case FOURCC(0xa9, 'g', 'e', 'n'):
1079        {
1080            metadataKey = kKeyGenre;
1081            break;
1082        }
1083        case FOURCC('t', 'r', 'k', 'n'):
1084        {
1085            if (size == 16 && flags == 0) {
1086                char tmp[16];
1087                sprintf(tmp, "%d/%d",
1088                        (int)buffer[size - 5], (int)buffer[size - 3]);
1089
1090                mFileMetaData->setCString(kKeyCDTrackNumber, tmp);
1091            }
1092            break;
1093        }
1094        case FOURCC('d', 'i', 's', 'k'):
1095        {
1096            if (size == 14 && flags == 0) {
1097                char tmp[16];
1098                sprintf(tmp, "%d/%d",
1099                        (int)buffer[size - 3], (int)buffer[size - 1]);
1100
1101                mFileMetaData->setCString(kKeyDiscNumber, tmp);
1102            }
1103            break;
1104        }
1105
1106        default:
1107            break;
1108    }
1109
1110    if (size >= 8 && metadataKey) {
1111        if (metadataKey == kKeyAlbumArt) {
1112            mFileMetaData->setData(
1113                    kKeyAlbumArt, MetaData::TYPE_NONE,
1114                    buffer + 8, size - 8);
1115        } else if (metadataKey == kKeyGenre) {
1116            if (flags == 0) {
1117                // uint8_t genre code, iTunes genre codes are
1118                // the standard id3 codes, except they start
1119                // at 1 instead of 0 (e.g. Pop is 14, not 13)
1120                // We use standard id3 numbering, so subtract 1.
1121                int genrecode = (int)buffer[size - 1];
1122                genrecode--;
1123                if (genrecode < 0) {
1124                    genrecode = 255; // reserved for 'unknown genre'
1125                }
1126                char genre[10];
1127                sprintf(genre, "%d", genrecode);
1128
1129                mFileMetaData->setCString(metadataKey, genre);
1130            } else if (flags == 1) {
1131                // custom genre string
1132                buffer[size] = '\0';
1133
1134                mFileMetaData->setCString(
1135                        metadataKey, (const char *)buffer + 8);
1136            }
1137        } else {
1138            buffer[size] = '\0';
1139
1140            mFileMetaData->setCString(
1141                    metadataKey, (const char *)buffer + 8);
1142        }
1143    }
1144
1145    delete[] buffer;
1146    buffer = NULL;
1147
1148    return OK;
1149}
1150
1151sp<MediaSource> MPEG4Extractor::getTrack(size_t index) {
1152    status_t err;
1153    if ((err = readMetaData()) != OK) {
1154        return NULL;
1155    }
1156
1157    Track *track = mFirstTrack;
1158    while (index > 0) {
1159        if (track == NULL) {
1160            return NULL;
1161        }
1162
1163        track = track->next;
1164        --index;
1165    }
1166
1167    if (track == NULL) {
1168        return NULL;
1169    }
1170
1171    return new MPEG4Source(
1172            track->meta, mDataSource, track->timescale, track->sampleTable);
1173}
1174
1175status_t MPEG4Extractor::updateAudioTrackInfoFromESDS_MPEG4Audio(
1176        const void *esds_data, size_t esds_size) {
1177    ESDS esds(esds_data, esds_size);
1178    const uint8_t *csd;
1179    size_t csd_size;
1180    if (esds.getCodecSpecificInfo(
1181                (const void **)&csd, &csd_size) != OK) {
1182        return ERROR_MALFORMED;
1183    }
1184
1185#if 0
1186    printf("ESD of size %d\n", csd_size);
1187    hexdump(csd, csd_size);
1188#endif
1189
1190    if (csd_size < 2) {
1191        return ERROR_MALFORMED;
1192    }
1193
1194    uint32_t objectType = csd[0] >> 3;
1195
1196    if (objectType == 31) {
1197        return ERROR_UNSUPPORTED;
1198    }
1199
1200    uint32_t freqIndex = (csd[0] & 7) << 1 | (csd[1] >> 7);
1201    int32_t sampleRate = 0;
1202    int32_t numChannels = 0;
1203    if (freqIndex == 15) {
1204        if (csd_size < 5) {
1205            return ERROR_MALFORMED;
1206        }
1207
1208        sampleRate = (csd[1] & 0x7f) << 17
1209                        | csd[2] << 9
1210                        | csd[3] << 1
1211                        | (csd[4] >> 7);
1212
1213        numChannels = (csd[4] >> 3) & 15;
1214    } else {
1215        static uint32_t kSamplingRate[] = {
1216            96000, 88200, 64000, 48000, 44100, 32000, 24000, 22050,
1217            16000, 12000, 11025, 8000, 7350
1218        };
1219
1220        if (freqIndex == 13 || freqIndex == 14) {
1221            return ERROR_MALFORMED;
1222        }
1223
1224        sampleRate = kSamplingRate[freqIndex];
1225        numChannels = (csd[1] >> 3) & 15;
1226    }
1227
1228    if (numChannels == 0) {
1229        return ERROR_UNSUPPORTED;
1230    }
1231
1232    int32_t prevSampleRate;
1233    CHECK(mLastTrack->meta->findInt32(kKeySampleRate, &prevSampleRate));
1234
1235    if (prevSampleRate != sampleRate) {
1236        LOGV("mpeg4 audio sample rate different from previous setting. "
1237             "was: %d, now: %d", prevSampleRate, sampleRate);
1238    }
1239
1240    mLastTrack->meta->setInt32(kKeySampleRate, sampleRate);
1241
1242    int32_t prevChannelCount;
1243    CHECK(mLastTrack->meta->findInt32(kKeyChannelCount, &prevChannelCount));
1244
1245    if (prevChannelCount != numChannels) {
1246        LOGV("mpeg4 audio channel count different from previous setting. "
1247             "was: %d, now: %d", prevChannelCount, numChannels);
1248    }
1249
1250    mLastTrack->meta->setInt32(kKeyChannelCount, numChannels);
1251
1252    return OK;
1253}
1254
1255////////////////////////////////////////////////////////////////////////////////
1256
1257MPEG4Source::MPEG4Source(
1258        const sp<MetaData> &format,
1259        const sp<DataSource> &dataSource,
1260        int32_t timeScale,
1261        const sp<SampleTable> &sampleTable)
1262    : mFormat(format),
1263      mDataSource(dataSource),
1264      mTimescale(timeScale),
1265      mSampleTable(sampleTable),
1266      mCurrentSampleIndex(0),
1267      mIsAVC(false),
1268      mNALLengthSize(0),
1269      mStarted(false),
1270      mGroup(NULL),
1271      mBuffer(NULL),
1272      mWantsNALFragments(false),
1273      mSrcBuffer(NULL) {
1274    const char *mime;
1275    bool success = mFormat->findCString(kKeyMIMEType, &mime);
1276    CHECK(success);
1277
1278    mIsAVC = !strcasecmp(mime, MEDIA_MIMETYPE_VIDEO_AVC);
1279
1280    if (mIsAVC) {
1281        uint32_t type;
1282        const void *data;
1283        size_t size;
1284        CHECK(format->findData(kKeyAVCC, &type, &data, &size));
1285
1286        const uint8_t *ptr = (const uint8_t *)data;
1287
1288        CHECK(size >= 7);
1289        CHECK_EQ(ptr[0], 1);  // configurationVersion == 1
1290
1291        // The number of bytes used to encode the length of a NAL unit.
1292        mNALLengthSize = 1 + (ptr[4] & 3);
1293    }
1294}
1295
1296MPEG4Source::~MPEG4Source() {
1297    if (mStarted) {
1298        stop();
1299    }
1300}
1301
1302status_t MPEG4Source::start(MetaData *params) {
1303    CHECK(!mStarted);
1304
1305    int32_t val;
1306    if (params && params->findInt32(kKeyWantsNALFragments, &val)
1307        && val != 0) {
1308        mWantsNALFragments = true;
1309    } else {
1310        mWantsNALFragments = false;
1311    }
1312
1313    mGroup = new MediaBufferGroup;
1314
1315    int32_t max_size;
1316    CHECK(mFormat->findInt32(kKeyMaxInputSize, &max_size));
1317
1318    mGroup->add_buffer(new MediaBuffer(max_size));
1319
1320    mSrcBuffer = new uint8_t[max_size];
1321
1322    mStarted = true;
1323
1324    return OK;
1325}
1326
1327status_t MPEG4Source::stop() {
1328    CHECK(mStarted);
1329
1330    if (mBuffer != NULL) {
1331        mBuffer->release();
1332        mBuffer = NULL;
1333    }
1334
1335    delete[] mSrcBuffer;
1336    mSrcBuffer = NULL;
1337
1338    delete mGroup;
1339    mGroup = NULL;
1340
1341    mStarted = false;
1342    mCurrentSampleIndex = 0;
1343
1344    return OK;
1345}
1346
1347sp<MetaData> MPEG4Source::getFormat() {
1348    return mFormat;
1349}
1350
1351size_t MPEG4Source::parseNALSize(const uint8_t *data) const {
1352    switch (mNALLengthSize) {
1353        case 1:
1354            return *data;
1355        case 2:
1356            return U16_AT(data);
1357        case 3:
1358            return ((size_t)data[0] << 16) | U16_AT(&data[1]);
1359        case 4:
1360            return U32_AT(data);
1361    }
1362
1363    // This cannot happen, mNALLengthSize springs to life by adding 1 to
1364    // a 2-bit integer.
1365    CHECK(!"Should not be here.");
1366
1367    return 0;
1368}
1369
1370status_t MPEG4Source::read(
1371        MediaBuffer **out, const ReadOptions *options) {
1372    CHECK(mStarted);
1373
1374    *out = NULL;
1375
1376    int64_t seekTimeUs;
1377    if (options && options->getSeekTo(&seekTimeUs)) {
1378        uint32_t sampleIndex;
1379        status_t err = mSampleTable->findClosestSample(
1380                seekTimeUs * mTimescale / 1000000,
1381                &sampleIndex, SampleTable::kSyncSample_Flag);
1382
1383        if (err != OK) {
1384            return err;
1385        }
1386
1387        mCurrentSampleIndex = sampleIndex;
1388        if (mBuffer != NULL) {
1389            mBuffer->release();
1390            mBuffer = NULL;
1391        }
1392
1393        // fall through
1394    }
1395
1396    off_t offset;
1397    size_t size;
1398    uint32_t dts;
1399    bool newBuffer = false;
1400    if (mBuffer == NULL) {
1401        newBuffer = true;
1402
1403        status_t err =
1404            mSampleTable->getMetaDataForSample(
1405                    mCurrentSampleIndex, &offset, &size, &dts);
1406
1407        if (err != OK) {
1408            return err;
1409        }
1410
1411        err = mGroup->acquire_buffer(&mBuffer);
1412
1413        if (err != OK) {
1414            CHECK_EQ(mBuffer, NULL);
1415            return err;
1416        }
1417    }
1418
1419    if (!mIsAVC || mWantsNALFragments) {
1420        if (newBuffer) {
1421            ssize_t num_bytes_read =
1422                mDataSource->readAt(offset, (uint8_t *)mBuffer->data(), size);
1423
1424            if (num_bytes_read < (ssize_t)size) {
1425                mBuffer->release();
1426                mBuffer = NULL;
1427
1428                return ERROR_IO;
1429            }
1430
1431            mBuffer->set_range(0, size);
1432            mBuffer->meta_data()->clear();
1433            mBuffer->meta_data()->setInt64(
1434                    kKeyTime, ((int64_t)dts * 1000000) / mTimescale);
1435            ++mCurrentSampleIndex;
1436        }
1437
1438        if (!mIsAVC) {
1439            *out = mBuffer;
1440            mBuffer = NULL;
1441
1442            return OK;
1443        }
1444
1445        // Each NAL unit is split up into its constituent fragments and
1446        // each one of them returned in its own buffer.
1447
1448        CHECK(mBuffer->range_length() >= mNALLengthSize);
1449
1450        const uint8_t *src =
1451            (const uint8_t *)mBuffer->data() + mBuffer->range_offset();
1452
1453        size_t nal_size = parseNALSize(src);
1454        if (mBuffer->range_length() < mNALLengthSize + nal_size) {
1455            LOGE("incomplete NAL unit.");
1456
1457            mBuffer->release();
1458            mBuffer = NULL;
1459
1460            return ERROR_MALFORMED;
1461        }
1462
1463        MediaBuffer *clone = mBuffer->clone();
1464        clone->set_range(mBuffer->range_offset() + mNALLengthSize, nal_size);
1465
1466        mBuffer->set_range(
1467                mBuffer->range_offset() + mNALLengthSize + nal_size,
1468                mBuffer->range_length() - mNALLengthSize - nal_size);
1469
1470        if (mBuffer->range_length() == 0) {
1471            mBuffer->release();
1472            mBuffer = NULL;
1473        }
1474
1475        *out = clone;
1476
1477        return OK;
1478    } else {
1479        // Whole NAL units are returned but each fragment is prefixed by
1480        // the start code (0x00 00 00 01).
1481
1482        ssize_t num_bytes_read =
1483            mDataSource->readAt(offset, mSrcBuffer, size);
1484
1485        if (num_bytes_read < (ssize_t)size) {
1486            mBuffer->release();
1487            mBuffer = NULL;
1488
1489            return ERROR_IO;
1490        }
1491
1492        uint8_t *dstData = (uint8_t *)mBuffer->data();
1493        size_t srcOffset = 0;
1494        size_t dstOffset = 0;
1495
1496        while (srcOffset < size) {
1497            CHECK(srcOffset + mNALLengthSize <= size);
1498            size_t nalLength = parseNALSize(&mSrcBuffer[srcOffset]);
1499            srcOffset += mNALLengthSize;
1500
1501            if (srcOffset + nalLength > size) {
1502                mBuffer->release();
1503                mBuffer = NULL;
1504
1505                return ERROR_MALFORMED;
1506            }
1507
1508            if (nalLength == 0) {
1509                continue;
1510            }
1511
1512            CHECK(dstOffset + 4 <= mBuffer->size());
1513
1514            dstData[dstOffset++] = 0;
1515            dstData[dstOffset++] = 0;
1516            dstData[dstOffset++] = 0;
1517            dstData[dstOffset++] = 1;
1518            memcpy(&dstData[dstOffset], &mSrcBuffer[srcOffset], nalLength);
1519            srcOffset += nalLength;
1520            dstOffset += nalLength;
1521        }
1522        CHECK_EQ(srcOffset, size);
1523
1524        mBuffer->set_range(0, dstOffset);
1525        mBuffer->meta_data()->clear();
1526        mBuffer->meta_data()->setInt64(
1527                kKeyTime, ((int64_t)dts * 1000000) / mTimescale);
1528        ++mCurrentSampleIndex;
1529
1530        *out = mBuffer;
1531        mBuffer = NULL;
1532
1533        return OK;
1534    }
1535}
1536
1537bool SniffMPEG4(
1538        const sp<DataSource> &source, String8 *mimeType, float *confidence) {
1539    uint8_t header[8];
1540
1541    ssize_t n = source->readAt(4, header, sizeof(header));
1542    if (n < (ssize_t)sizeof(header)) {
1543        return false;
1544    }
1545
1546    if (!memcmp(header, "ftyp3gp", 7) || !memcmp(header, "ftypmp42", 8)
1547        || !memcmp(header, "ftypisom", 8) || !memcmp(header, "ftypM4V ", 8)
1548        || !memcmp(header, "ftypM4A ", 8) || !memcmp(header, "ftypf4v ", 8)) {
1549        *mimeType = MEDIA_MIMETYPE_CONTAINER_MPEG4;
1550        *confidence = 0.1;
1551
1552        return true;
1553    }
1554
1555    return false;
1556}
1557
1558}  // namespace android
1559
1560