1/*
2 * Copyright (C) 2012 The Android Open Source Project
3 *
4 * Licensed under the Apache License, Version 2.0 (the "License");
5 * you may not use this file except in compliance with the License.
6 * You may obtain a copy of the License at
7 *
8 *      http://www.apache.org/licenses/LICENSE-2.0
9 *
10 * Unless required by applicable law or agreed to in writing, software
11 * distributed under the License is distributed on an "AS IS" BASIS,
12 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13 * See the License for the specific language governing permissions and
14 * limitations under the License.
15 */
16
17//#define LOG_NDEBUG 0
18#define LOG_TAG "FragmentedMP4Extractor"
19#include <utils/Log.h>
20
21#include "include/FragmentedMP4Extractor.h"
22#include "include/SampleTable.h"
23#include "include/ESDS.h"
24
25#include <arpa/inet.h>
26
27#include <ctype.h>
28#include <stdint.h>
29#include <stdlib.h>
30#include <string.h>
31
32#include <cutils/properties.h> // for property_get
33
34#include <media/stagefright/foundation/ABitReader.h>
35#include <media/stagefright/foundation/ABuffer.h>
36#include <media/stagefright/foundation/ADebug.h>
37#include <media/stagefright/foundation/AMessage.h>
38#include <media/stagefright/DataSource.h>
39#include <media/stagefright/MediaBuffer.h>
40#include <media/stagefright/MediaBufferGroup.h>
41#include <media/stagefright/MediaDefs.h>
42#include <media/stagefright/MediaSource.h>
43#include <media/stagefright/MetaData.h>
44#include <media/stagefright/Utils.h>
45#include <utils/String8.h>
46
47namespace android {
48
49class FragmentedMPEG4Source : public MediaSource {
50public:
51    // Caller retains ownership of the Parser
52    FragmentedMPEG4Source(bool audio,
53                const sp<MetaData> &format,
54                const sp<FragmentedMP4Parser> &parser,
55                const sp<FragmentedMP4Extractor> &extractor);
56
57    virtual status_t start(MetaData *params = NULL);
58    virtual status_t stop();
59
60    virtual sp<MetaData> getFormat();
61
62    virtual status_t read(
63            MediaBuffer **buffer, const ReadOptions *options = NULL);
64
65protected:
66    virtual ~FragmentedMPEG4Source();
67
68private:
69    Mutex mLock;
70
71    sp<MetaData> mFormat;
72    sp<FragmentedMP4Parser> mParser;
73    sp<FragmentedMP4Extractor> mExtractor;
74    bool mIsAudioTrack;
75    uint32_t mCurrentSampleIndex;
76
77    bool mIsAVC;
78    size_t mNALLengthSize;
79
80    bool mStarted;
81
82    MediaBufferGroup *mGroup;
83
84    bool mWantsNALFragments;
85
86    uint8_t *mSrcBuffer;
87
88    FragmentedMPEG4Source(const FragmentedMPEG4Source &);
89    FragmentedMPEG4Source &operator=(const FragmentedMPEG4Source &);
90};
91
92
93FragmentedMP4Extractor::FragmentedMP4Extractor(const sp<DataSource> &source)
94    : mLooper(new ALooper),
95      mParser(new FragmentedMP4Parser()),
96      mDataSource(source),
97      mInitCheck(NO_INIT),
98      mFileMetaData(new MetaData) {
99    ALOGV("FragmentedMP4Extractor");
100    mLooper->registerHandler(mParser);
101    mLooper->start(false /* runOnCallingThread */);
102    mParser->start(mDataSource);
103
104    bool hasVideo = mParser->getFormat(false /* audio */, true /* synchronous */) != NULL;
105    bool hasAudio = mParser->getFormat(true /* audio */, true /* synchronous */) != NULL;
106
107    ALOGV("number of tracks: %d", countTracks());
108
109    if (hasVideo) {
110        mFileMetaData->setCString(
111                kKeyMIMEType, MEDIA_MIMETYPE_CONTAINER_MPEG4);
112    } else if (hasAudio) {
113        mFileMetaData->setCString(kKeyMIMEType, "audio/mp4");
114    } else {
115        ALOGE("no audio and no video, no idea what file type this is");
116    }
117    // tracks are numbered such that video track is first, audio track is second
118    if (hasAudio && hasVideo) {
119        mTrackCount = 2;
120        mAudioTrackIndex = 1;
121    } else if (hasAudio) {
122        mTrackCount = 1;
123        mAudioTrackIndex = 0;
124    } else if (hasVideo) {
125        mTrackCount = 1;
126        mAudioTrackIndex = -1;
127    } else {
128        mTrackCount = 0;
129        mAudioTrackIndex = -1;
130    }
131}
132
133FragmentedMP4Extractor::~FragmentedMP4Extractor() {
134    ALOGV("~FragmentedMP4Extractor");
135    mLooper->stop();
136}
137
138uint32_t FragmentedMP4Extractor::flags() const {
139    return CAN_PAUSE |
140            (mParser->isSeekable() ? (CAN_SEEK_BACKWARD | CAN_SEEK_FORWARD | CAN_SEEK) : 0);
141}
142
143sp<MetaData> FragmentedMP4Extractor::getMetaData() {
144    return mFileMetaData;
145}
146
147size_t FragmentedMP4Extractor::countTracks() {
148    return mTrackCount;
149}
150
151
152sp<MetaData> FragmentedMP4Extractor::getTrackMetaData(
153        size_t index, uint32_t flags) {
154    if (index >= countTracks()) {
155        return NULL;
156    }
157
158    sp<AMessage> msg = mParser->getFormat(index == mAudioTrackIndex, true /* synchronous */);
159
160    if (msg == NULL) {
161        ALOGV("got null format for track %d", index);
162        return NULL;
163    }
164
165    sp<MetaData> meta = new MetaData();
166    convertMessageToMetaData(msg, meta);
167    return meta;
168}
169
170static void MakeFourCCString(uint32_t x, char *s) {
171    s[0] = x >> 24;
172    s[1] = (x >> 16) & 0xff;
173    s[2] = (x >> 8) & 0xff;
174    s[3] = x & 0xff;
175    s[4] = '\0';
176}
177
178sp<MediaSource> FragmentedMP4Extractor::getTrack(size_t index) {
179    if (index >= countTracks()) {
180        return NULL;
181    }
182    return new FragmentedMPEG4Source(index == mAudioTrackIndex, getTrackMetaData(index, 0), mParser, this);
183}
184
185
186////////////////////////////////////////////////////////////////////////////////
187
188FragmentedMPEG4Source::FragmentedMPEG4Source(
189        bool audio,
190        const sp<MetaData> &format,
191        const sp<FragmentedMP4Parser> &parser,
192        const sp<FragmentedMP4Extractor> &extractor)
193    : mFormat(format),
194      mParser(parser),
195      mExtractor(extractor),
196      mIsAudioTrack(audio),
197      mStarted(false),
198      mGroup(NULL),
199      mWantsNALFragments(false),
200      mSrcBuffer(NULL) {
201}
202
203FragmentedMPEG4Source::~FragmentedMPEG4Source() {
204    if (mStarted) {
205        stop();
206    }
207}
208
209status_t FragmentedMPEG4Source::start(MetaData *params) {
210    Mutex::Autolock autoLock(mLock);
211
212    CHECK(!mStarted);
213
214    int32_t val;
215    if (params && params->findInt32(kKeyWantsNALFragments, &val)
216        && val != 0) {
217        mWantsNALFragments = true;
218    } else {
219        mWantsNALFragments = false;
220    }
221    ALOGV("caller wants NAL fragments: %s", mWantsNALFragments ? "yes" : "no");
222
223    mGroup = new MediaBufferGroup;
224
225    int32_t max_size = 65536;
226    // XXX CHECK(mFormat->findInt32(kKeyMaxInputSize, &max_size));
227
228    mGroup->add_buffer(new MediaBuffer(max_size));
229
230    mSrcBuffer = new uint8_t[max_size];
231
232    mStarted = true;
233
234    return OK;
235}
236
237status_t FragmentedMPEG4Source::stop() {
238    Mutex::Autolock autoLock(mLock);
239
240    CHECK(mStarted);
241
242    delete[] mSrcBuffer;
243    mSrcBuffer = NULL;
244
245    delete mGroup;
246    mGroup = NULL;
247
248    mStarted = false;
249    mCurrentSampleIndex = 0;
250
251    return OK;
252}
253
254sp<MetaData> FragmentedMPEG4Source::getFormat() {
255    Mutex::Autolock autoLock(mLock);
256
257    return mFormat;
258}
259
260
261status_t FragmentedMPEG4Source::read(
262        MediaBuffer **out, const ReadOptions *options) {
263    int64_t seekTimeUs;
264    ReadOptions::SeekMode mode;
265    if (options && options->getSeekTo(&seekTimeUs, &mode)) {
266        mParser->seekTo(mIsAudioTrack, seekTimeUs);
267    }
268    MediaBuffer *buffer = NULL;
269    mGroup->acquire_buffer(&buffer);
270    sp<ABuffer> parseBuffer;
271
272    status_t ret = mParser->dequeueAccessUnit(mIsAudioTrack, &parseBuffer, true /* synchronous */);
273    if (ret != OK) {
274        buffer->release();
275        ALOGV("returning %d", ret);
276        return ret;
277    }
278    sp<AMessage> meta = parseBuffer->meta();
279    int64_t timeUs;
280    CHECK(meta->findInt64("timeUs", &timeUs));
281    buffer->meta_data()->setInt64(kKeyTime, timeUs);
282    buffer->set_range(0, parseBuffer->size());
283    memcpy(buffer->data(), parseBuffer->data(), parseBuffer->size());
284    *out = buffer;
285    return OK;
286}
287
288
289static bool isCompatibleBrand(uint32_t fourcc) {
290    static const uint32_t kCompatibleBrands[] = {
291        FOURCC('i', 's', 'o', 'm'),
292        FOURCC('i', 's', 'o', '2'),
293        FOURCC('a', 'v', 'c', '1'),
294        FOURCC('3', 'g', 'p', '4'),
295        FOURCC('m', 'p', '4', '1'),
296        FOURCC('m', 'p', '4', '2'),
297
298        // Won't promise that the following file types can be played.
299        // Just give these file types a chance.
300        FOURCC('q', 't', ' ', ' '),  // Apple's QuickTime
301        FOURCC('M', 'S', 'N', 'V'),  // Sony's PSP
302
303        FOURCC('3', 'g', '2', 'a'),  // 3GPP2
304        FOURCC('3', 'g', '2', 'b'),
305    };
306
307    for (size_t i = 0;
308         i < sizeof(kCompatibleBrands) / sizeof(kCompatibleBrands[0]);
309         ++i) {
310        if (kCompatibleBrands[i] == fourcc) {
311            return true;
312        }
313    }
314
315    return false;
316}
317
318// Attempt to actually parse the 'ftyp' atom and determine if a suitable
319// compatible brand is present.
320// Also try to identify where this file's metadata ends
321// (end of the 'moov' atom) and report it to the caller as part of
322// the metadata.
323static bool Sniff(
324        const sp<DataSource> &source, String8 *mimeType, float *confidence,
325        sp<AMessage> *meta) {
326    // We scan up to 128k bytes to identify this file as an MP4.
327    static const off64_t kMaxScanOffset = 128ll * 1024ll;
328
329    off64_t offset = 0ll;
330    bool foundGoodFileType = false;
331    bool isFragmented = false;
332    off64_t moovAtomEndOffset = -1ll;
333    bool done = false;
334
335    while (!done && offset < kMaxScanOffset) {
336        uint32_t hdr[2];
337        if (source->readAt(offset, hdr, 8) < 8) {
338            return false;
339        }
340
341        uint64_t chunkSize = ntohl(hdr[0]);
342        uint32_t chunkType = ntohl(hdr[1]);
343        off64_t chunkDataOffset = offset + 8;
344
345        if (chunkSize == 1) {
346            if (source->readAt(offset + 8, &chunkSize, 8) < 8) {
347                return false;
348            }
349
350            chunkSize = ntoh64(chunkSize);
351            chunkDataOffset += 8;
352
353            if (chunkSize < 16) {
354                // The smallest valid chunk is 16 bytes long in this case.
355                return false;
356            }
357        } else if (chunkSize < 8) {
358            // The smallest valid chunk is 8 bytes long.
359            return false;
360        }
361
362        off64_t chunkDataSize = offset + chunkSize - chunkDataOffset;
363
364        char chunkstring[5];
365        MakeFourCCString(chunkType, chunkstring);
366        ALOGV("saw chunk type %s, size %lld @ %lld", chunkstring, chunkSize, offset);
367        switch (chunkType) {
368            case FOURCC('f', 't', 'y', 'p'):
369            {
370                if (chunkDataSize < 8) {
371                    return false;
372                }
373
374                uint32_t numCompatibleBrands = (chunkDataSize - 8) / 4;
375                for (size_t i = 0; i < numCompatibleBrands + 2; ++i) {
376                    if (i == 1) {
377                        // Skip this index, it refers to the minorVersion,
378                        // not a brand.
379                        continue;
380                    }
381
382                    uint32_t brand;
383                    if (source->readAt(
384                                chunkDataOffset + 4 * i, &brand, 4) < 4) {
385                        return false;
386                    }
387
388                    brand = ntohl(brand);
389                    char brandstring[5];
390                    MakeFourCCString(brand, brandstring);
391                    ALOGV("Brand: %s", brandstring);
392
393                    if (isCompatibleBrand(brand)) {
394                        foundGoodFileType = true;
395                        break;
396                    }
397                }
398
399                if (!foundGoodFileType) {
400                    return false;
401                }
402
403                break;
404            }
405
406            case FOURCC('m', 'o', 'o', 'v'):
407            {
408                moovAtomEndOffset = offset + chunkSize;
409                break;
410            }
411
412            case FOURCC('m', 'o', 'o', 'f'):
413            {
414                // this is kind of broken, since we might not actually find a
415                // moof box in the first 128k.
416                isFragmented = true;
417                done = true;
418                break;
419            }
420
421            default:
422                break;
423        }
424
425        offset += chunkSize;
426    }
427
428    if (!foundGoodFileType || !isFragmented) {
429        return false;
430    }
431
432    *mimeType = MEDIA_MIMETYPE_CONTAINER_MPEG4;
433    *confidence = 0.5f; // slightly more than MPEG4Extractor
434
435    if (moovAtomEndOffset >= 0) {
436        *meta = new AMessage;
437        (*meta)->setInt64("meta-data-size", moovAtomEndOffset);
438        (*meta)->setInt32("fragmented", 1); // tell MediaExtractor what to instantiate
439
440        ALOGV("found metadata size: %lld", moovAtomEndOffset);
441    }
442
443    return true;
444}
445
446// used by DataSource::RegisterDefaultSniffers
447bool SniffFragmentedMP4(
448        const sp<DataSource> &source, String8 *mimeType, float *confidence,
449        sp<AMessage> *meta) {
450    ALOGV("SniffFragmentedMP4");
451    char prop[PROPERTY_VALUE_MAX];
452    if (property_get("media.stagefright.use-fragmp4", prop, NULL)
453            && (!strcmp(prop, "1") || !strcasecmp(prop, "true"))) {
454        return Sniff(source, mimeType, confidence, meta);
455    }
456
457    return false;
458}
459
460}  // namespace android
461