1/*
2 * Copyright (C) 2012 The Android Open Source Project
3 *
4 * Licensed under the Apache License, Version 2.0 (the "License");
5 * you may not use this file except in compliance with the License.
6 * You may obtain a copy of the License at
7 *
8 *      http://www.apache.org/licenses/LICENSE-2.0
9 *
10 * Unless required by applicable law or agreed to in writing, software
11 * distributed under the License is distributed on an "AS IS" BASIS,
12 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13 * See the License for the specific language governing permissions and
14 * limitations under the License.
15 */
16
17//#define LOG_NDEBUG 0
18#define LOG_TAG "FragmentedMP4Parser"
19#include <utils/Log.h>
20
21#include "include/avc_utils.h"
22#include "include/ESDS.h"
23#include "include/FragmentedMP4Parser.h"
24#include "TrackFragment.h"
25
26
27#include <media/stagefright/foundation/ABuffer.h>
28#include <media/stagefright/foundation/ADebug.h>
29#include <media/stagefright/foundation/AMessage.h>
30#include <media/stagefright/foundation/hexdump.h>
31#include <media/stagefright/MediaDefs.h>
32#include <media/stagefright/MediaErrors.h>
33#include <media/stagefright/Utils.h>
34
35
36namespace android {
37
38static const char *Fourcc2String(uint32_t fourcc) {
39    static char buffer[5];
40    buffer[4] = '\0';
41    buffer[0] = fourcc >> 24;
42    buffer[1] = (fourcc >> 16) & 0xff;
43    buffer[2] = (fourcc >> 8) & 0xff;
44    buffer[3] = fourcc & 0xff;
45
46    return buffer;
47}
48
49static const char *IndentString(size_t n) {
50    static const char kSpace[] = "                              ";
51    return kSpace + sizeof(kSpace) - 2 * n - 1;
52}
53
54// static
55const FragmentedMP4Parser::DispatchEntry FragmentedMP4Parser::kDispatchTable[] = {
56    { FOURCC('m', 'o', 'o', 'v'), 0, NULL },
57    { FOURCC('t', 'r', 'a', 'k'), FOURCC('m', 'o', 'o', 'v'), NULL },
58    { FOURCC('u', 'd', 't', 'a'), FOURCC('t', 'r', 'a', 'k'), NULL },
59    { FOURCC('u', 'd', 't', 'a'), FOURCC('m', 'o', 'o', 'v'), NULL },
60    { FOURCC('m', 'e', 't', 'a'), FOURCC('u', 'd', 't', 'a'), NULL },
61    { FOURCC('i', 'l', 's', 't'), FOURCC('m', 'e', 't', 'a'), NULL },
62
63    { FOURCC('t', 'k', 'h', 'd'), FOURCC('t', 'r', 'a', 'k'),
64        &FragmentedMP4Parser::parseTrackHeader
65    },
66
67    { FOURCC('m', 'v', 'e', 'x'), FOURCC('m', 'o', 'o', 'v'), NULL },
68
69    { FOURCC('t', 'r', 'e', 'x'), FOURCC('m', 'v', 'e', 'x'),
70        &FragmentedMP4Parser::parseTrackExtends
71    },
72
73    { FOURCC('e', 'd', 't', 's'), FOURCC('t', 'r', 'a', 'k'), NULL },
74    { FOURCC('m', 'd', 'i', 'a'), FOURCC('t', 'r', 'a', 'k'), NULL },
75
76    { FOURCC('m', 'd', 'h', 'd'), FOURCC('m', 'd', 'i', 'a'),
77        &FragmentedMP4Parser::parseMediaHeader
78    },
79
80    { FOURCC('h', 'd', 'l', 'r'), FOURCC('m', 'd', 'i', 'a'),
81        &FragmentedMP4Parser::parseMediaHandler
82    },
83
84    { FOURCC('m', 'i', 'n', 'f'), FOURCC('m', 'd', 'i', 'a'), NULL },
85    { FOURCC('d', 'i', 'n', 'f'), FOURCC('m', 'i', 'n', 'f'), NULL },
86    { FOURCC('s', 't', 'b', 'l'), FOURCC('m', 'i', 'n', 'f'), NULL },
87    { FOURCC('s', 't', 's', 'd'), FOURCC('s', 't', 'b', 'l'), NULL },
88
89    { FOURCC('s', 't', 's', 'z'), FOURCC('s', 't', 'b', 'l'),
90        &FragmentedMP4Parser::parseSampleSizes },
91
92    { FOURCC('s', 't', 'z', '2'), FOURCC('s', 't', 'b', 'l'),
93        &FragmentedMP4Parser::parseCompactSampleSizes },
94
95    { FOURCC('s', 't', 's', 'c'), FOURCC('s', 't', 'b', 'l'),
96        &FragmentedMP4Parser::parseSampleToChunk },
97
98    { FOURCC('s', 't', 'c', 'o'), FOURCC('s', 't', 'b', 'l'),
99        &FragmentedMP4Parser::parseChunkOffsets },
100
101    { FOURCC('c', 'o', '6', '4'), FOURCC('s', 't', 'b', 'l'),
102        &FragmentedMP4Parser::parseChunkOffsets64 },
103
104    { FOURCC('a', 'v', 'c', 'C'), FOURCC('a', 'v', 'c', '1'),
105        &FragmentedMP4Parser::parseAVCCodecSpecificData },
106
107    { FOURCC('e', 's', 'd', 's'), FOURCC('m', 'p', '4', 'a'),
108        &FragmentedMP4Parser::parseESDSCodecSpecificData },
109
110    { FOURCC('e', 's', 'd', 's'), FOURCC('m', 'p', '4', 'v'),
111        &FragmentedMP4Parser::parseESDSCodecSpecificData },
112
113    { FOURCC('m', 'd', 'a', 't'), 0, &FragmentedMP4Parser::parseMediaData },
114
115    { FOURCC('m', 'o', 'o', 'f'), 0, NULL },
116    { FOURCC('t', 'r', 'a', 'f'), FOURCC('m', 'o', 'o', 'f'), NULL },
117
118    { FOURCC('t', 'f', 'h', 'd'), FOURCC('t', 'r', 'a', 'f'),
119        &FragmentedMP4Parser::parseTrackFragmentHeader
120    },
121    { FOURCC('t', 'r', 'u', 'n'), FOURCC('t', 'r', 'a', 'f'),
122        &FragmentedMP4Parser::parseTrackFragmentRun
123    },
124
125    { FOURCC('m', 'f', 'r', 'a'), 0, NULL },
126
127    { FOURCC('s', 'i', 'd', 'x'), 0, &FragmentedMP4Parser::parseSegmentIndex },
128};
129
130struct FileSource : public FragmentedMP4Parser::Source {
131    FileSource(const char *filename)
132        : mFile(fopen(filename, "rb")) {
133            CHECK(mFile != NULL);
134        }
135
136    virtual ~FileSource() {
137        fclose(mFile);
138    }
139
140    virtual ssize_t readAt(off64_t offset, void *data, size_t size) {
141        fseek(mFile, offset, SEEK_SET);
142        return fread(data, 1, size, mFile);
143    }
144
145    virtual bool isSeekable() {
146        return true;
147    }
148
149    private:
150    FILE *mFile;
151
152    DISALLOW_EVIL_CONSTRUCTORS(FileSource);
153};
154
155struct ReadTracker : public RefBase {
156    ReadTracker(off64_t size) {
157        allocSize = 1 + size / 8192; // 1 bit per kilobyte
158        bitmap = (char*) calloc(1, allocSize);
159    }
160    virtual ~ReadTracker() {
161        dumpToLog();
162        free(bitmap);
163    }
164    void mark(off64_t offset, size_t size) {
165        int firstbit = offset / 1024;
166        int lastbit = (offset + size - 1) / 1024;
167        for (int i = firstbit; i <= lastbit; i++) {
168            bitmap[i/8] |= (0x80 >> (i & 7));
169        }
170    }
171
172 private:
173    void dumpToLog() {
174        // 96 chars per line, each char represents one kilobyte, 1 kb per bit
175        int numlines = allocSize / 12;
176        char buf[97];
177        char *cur = bitmap;
178        for (int i = 0; i < numlines; i++ && cur) {
179            for (int j = 0; j < 12; j++) {
180                for (int k = 0; k < 8; k++) {
181                    buf[(j * 8) + k] = (*cur & (0x80 >> k)) ? 'X' : '.';
182                }
183                cur++;
184            }
185            buf[96] = '\0';
186            ALOGI("%5dk: %s", i * 96, buf);
187        }
188    }
189
190    size_t allocSize;
191    char *bitmap;
192};
193
194struct DataSourceSource : public FragmentedMP4Parser::Source {
195    DataSourceSource(sp<DataSource> &source)
196        : mDataSource(source) {
197            CHECK(mDataSource != NULL);
198#if 0
199            off64_t size;
200            if (source->getSize(&size) == OK) {
201                mReadTracker = new ReadTracker(size);
202            } else {
203                ALOGE("couldn't get data source size");
204            }
205#endif
206        }
207
208    virtual ssize_t readAt(off64_t offset, void *data, size_t size) {
209        if (mReadTracker != NULL) {
210            mReadTracker->mark(offset, size);
211        }
212        return mDataSource->readAt(offset, data, size);
213    }
214
215    virtual bool isSeekable() {
216        return true;
217    }
218
219    private:
220    sp<DataSource> mDataSource;
221    sp<ReadTracker> mReadTracker;
222
223    DISALLOW_EVIL_CONSTRUCTORS(DataSourceSource);
224};
225
226FragmentedMP4Parser::FragmentedMP4Parser()
227    : mBufferPos(0),
228      mSuspended(false),
229      mDoneWithMoov(false),
230      mFirstMoofOffset(0),
231      mFinalResult(OK) {
232}
233
234FragmentedMP4Parser::~FragmentedMP4Parser() {
235}
236
237void FragmentedMP4Parser::start(const char *filename) {
238    sp<AMessage> msg = new AMessage(kWhatStart, id());
239    msg->setObject("source", new FileSource(filename));
240    msg->post();
241    ALOGV("Parser::start(%s)", filename);
242}
243
244void FragmentedMP4Parser::start(const sp<Source> &source) {
245    sp<AMessage> msg = new AMessage(kWhatStart, id());
246    msg->setObject("source", source);
247    msg->post();
248    ALOGV("Parser::start(Source)");
249}
250
251void FragmentedMP4Parser::start(sp<DataSource> &source) {
252    sp<AMessage> msg = new AMessage(kWhatStart, id());
253    msg->setObject("source", new DataSourceSource(source));
254    msg->post();
255    ALOGV("Parser::start(DataSource)");
256}
257
258sp<AMessage> FragmentedMP4Parser::getFormat(bool audio, bool synchronous) {
259
260    while (true) {
261        bool moovDone = mDoneWithMoov;
262        sp<AMessage> msg = new AMessage(kWhatGetFormat, id());
263        msg->setInt32("audio", audio);
264
265        sp<AMessage> response;
266        status_t err = msg->postAndAwaitResponse(&response);
267
268        if (err != OK) {
269            ALOGV("getFormat post failed: %d", err);
270            return NULL;
271        }
272
273        if (response->findInt32("err", &err) && err != OK) {
274            if (synchronous && err == -EWOULDBLOCK && !moovDone) {
275                resumeIfNecessary();
276                ALOGV("@getFormat parser not ready yet, retrying");
277                usleep(10000);
278                continue;
279            }
280            ALOGV("getFormat failed: %d", err);
281            return NULL;
282        }
283
284        sp<AMessage> format;
285        CHECK(response->findMessage("format", &format));
286
287        ALOGV("returning format %s", format->debugString().c_str());
288        return format;
289    }
290}
291
292status_t FragmentedMP4Parser::seekTo(bool wantAudio, int64_t timeUs) {
293    sp<AMessage> msg = new AMessage(kWhatSeekTo, id());
294    msg->setInt32("audio", wantAudio);
295    msg->setInt64("position", timeUs);
296
297    sp<AMessage> response;
298    status_t err = msg->postAndAwaitResponse(&response);
299    return err;
300}
301
302bool FragmentedMP4Parser::isSeekable() const {
303    while (mFirstMoofOffset == 0 && mFinalResult == OK) {
304        usleep(10000);
305    }
306    bool seekable = mSource->isSeekable();
307    for (size_t i = 0; seekable && i < mTracks.size(); i++) {
308        const TrackInfo *info = &mTracks.valueAt(i);
309        seekable &= !info->mSidx.empty();
310    }
311    return seekable;
312}
313
314status_t FragmentedMP4Parser::onSeekTo(bool wantAudio, int64_t position) {
315    status_t err = -EINVAL;
316    ssize_t trackIndex = findTrack(wantAudio);
317    if (trackIndex < 0) {
318        err = trackIndex;
319    } else {
320        TrackInfo *info = &mTracks.editValueAt(trackIndex);
321
322        int numSidxEntries = info->mSidx.size();
323        int64_t totalTime = 0;
324        off_t totalOffset = mFirstMoofOffset;
325        for (int i = 0; i < numSidxEntries; i++) {
326            const SidxEntry *se = &info->mSidx[i];
327            if (totalTime + se->mDurationUs > position) {
328                mBuffer->setRange(0,0);
329                mBufferPos = totalOffset;
330                if (mFinalResult == ERROR_END_OF_STREAM) {
331                    mFinalResult = OK;
332                    mSuspended = true; // force resume
333                    resumeIfNecessary();
334                }
335                info->mFragments.clear();
336                info->mDecodingTime = totalTime * info->mMediaTimeScale / 1000000ll;
337                return OK;
338            }
339            totalTime += se->mDurationUs;
340            totalOffset += se->mSize;
341        }
342    }
343    ALOGV("seekTo out of range");
344    return err;
345}
346
347status_t FragmentedMP4Parser::dequeueAccessUnit(bool audio, sp<ABuffer> *accessUnit,
348                                                bool synchronous) {
349
350    while (true) {
351        sp<AMessage> msg = new AMessage(kWhatDequeueAccessUnit, id());
352        msg->setInt32("audio", audio);
353
354        sp<AMessage> response;
355        status_t err = msg->postAndAwaitResponse(&response);
356
357        if (err != OK) {
358            ALOGV("dequeue fail 1: %d", err);
359            return err;
360        }
361
362        if (response->findInt32("err", &err) && err != OK) {
363            if (synchronous && err == -EWOULDBLOCK) {
364                resumeIfNecessary();
365                ALOGV("Parser not ready yet, retrying");
366                usleep(10000);
367                continue;
368            }
369            ALOGV("dequeue fail 2: %d, %d", err, synchronous);
370            return err;
371        }
372
373        CHECK(response->findBuffer("accessUnit", accessUnit));
374
375        return OK;
376    }
377}
378
379ssize_t FragmentedMP4Parser::findTrack(bool wantAudio) const {
380    for (size_t i = 0; i < mTracks.size(); ++i) {
381        const TrackInfo *info = &mTracks.valueAt(i);
382
383        bool isAudio =
384            info->mMediaHandlerType == FOURCC('s', 'o', 'u', 'n');
385
386        bool isVideo =
387            info->mMediaHandlerType == FOURCC('v', 'i', 'd', 'e');
388
389        if ((wantAudio && isAudio) || (!wantAudio && !isAudio)) {
390            if (info->mSampleDescs.empty()) {
391                break;
392            }
393
394            return i;
395        }
396    }
397
398    return -EWOULDBLOCK;
399}
400
401void FragmentedMP4Parser::onMessageReceived(const sp<AMessage> &msg) {
402    switch (msg->what()) {
403        case kWhatStart:
404        {
405            sp<RefBase> obj;
406            CHECK(msg->findObject("source", &obj));
407
408            mSource = static_cast<Source *>(obj.get());
409
410            mBuffer = new ABuffer(512 * 1024);
411            mBuffer->setRange(0, 0);
412
413            enter(0ll, 0, 0);
414
415            (new AMessage(kWhatProceed, id()))->post();
416            break;
417        }
418
419        case kWhatProceed:
420        {
421            CHECK(!mSuspended);
422
423            status_t err = onProceed();
424
425            if (err == OK) {
426                if (!mSuspended) {
427                    msg->post();
428                }
429            } else if (err != -EAGAIN) {
430                ALOGE("onProceed returned error %d", err);
431            }
432
433            break;
434        }
435
436        case kWhatReadMore:
437        {
438            size_t needed;
439            CHECK(msg->findSize("needed", &needed));
440
441            memmove(mBuffer->base(), mBuffer->data(), mBuffer->size());
442            mBufferPos += mBuffer->offset();
443            mBuffer->setRange(0, mBuffer->size());
444
445            size_t maxBytesToRead = mBuffer->capacity() - mBuffer->size();
446
447            if (maxBytesToRead < needed) {
448                ALOGV("resizing buffer.");
449
450                sp<ABuffer> newBuffer =
451                    new ABuffer((mBuffer->size() + needed + 1023) & ~1023);
452                memcpy(newBuffer->data(), mBuffer->data(), mBuffer->size());
453                newBuffer->setRange(0, mBuffer->size());
454
455                mBuffer = newBuffer;
456                maxBytesToRead = mBuffer->capacity() - mBuffer->size();
457            }
458
459            CHECK_GE(maxBytesToRead, needed);
460
461            ssize_t n = mSource->readAt(
462                    mBufferPos + mBuffer->size(),
463                    mBuffer->data() + mBuffer->size(), needed);
464
465            if (n < (ssize_t)needed) {
466                ALOGV("Reached EOF when reading %d @ %d + %d", needed, mBufferPos, mBuffer->size());
467                if (n < 0) {
468                    mFinalResult = n;
469                } else if (n == 0) {
470                    mFinalResult = ERROR_END_OF_STREAM;
471                } else {
472                    mFinalResult = ERROR_IO;
473                }
474            } else {
475                mBuffer->setRange(0, mBuffer->size() + n);
476                (new AMessage(kWhatProceed, id()))->post();
477            }
478
479            break;
480        }
481
482        case kWhatGetFormat:
483        {
484            int32_t wantAudio;
485            CHECK(msg->findInt32("audio", &wantAudio));
486
487            status_t err = -EWOULDBLOCK;
488            sp<AMessage> response = new AMessage;
489
490            ssize_t trackIndex = findTrack(wantAudio);
491
492            if (trackIndex < 0) {
493                err = trackIndex;
494            } else {
495                TrackInfo *info = &mTracks.editValueAt(trackIndex);
496
497                sp<AMessage> format = info->mSampleDescs.itemAt(0).mFormat;
498                if (info->mSidxDuration) {
499                    format->setInt64("durationUs", info->mSidxDuration);
500                } else {
501                    // this is probably going to be zero. Oh well...
502                    format->setInt64("durationUs",
503                                     1000000ll * info->mDuration / info->mMediaTimeScale);
504                }
505                response->setMessage(
506                        "format", format);
507
508                err = OK;
509            }
510
511            response->setInt32("err", err);
512
513            uint32_t replyID;
514            CHECK(msg->senderAwaitsResponse(&replyID));
515
516            response->postReply(replyID);
517            break;
518        }
519
520        case kWhatDequeueAccessUnit:
521        {
522            int32_t wantAudio;
523            CHECK(msg->findInt32("audio", &wantAudio));
524
525            status_t err = -EWOULDBLOCK;
526            sp<AMessage> response = new AMessage;
527
528            ssize_t trackIndex = findTrack(wantAudio);
529
530            if (trackIndex < 0) {
531                err = trackIndex;
532            } else {
533                sp<ABuffer> accessUnit;
534                err = onDequeueAccessUnit(trackIndex, &accessUnit);
535
536                if (err == OK) {
537                    response->setBuffer("accessUnit", accessUnit);
538                }
539            }
540
541            response->setInt32("err", err);
542
543            uint32_t replyID;
544            CHECK(msg->senderAwaitsResponse(&replyID));
545
546            response->postReply(replyID);
547            break;
548        }
549
550        case kWhatSeekTo:
551        {
552            ALOGV("kWhatSeekTo");
553            int32_t wantAudio;
554            CHECK(msg->findInt32("audio", &wantAudio));
555            int64_t position;
556            CHECK(msg->findInt64("position", &position));
557
558            status_t err = -EWOULDBLOCK;
559            sp<AMessage> response = new AMessage;
560
561            ssize_t trackIndex = findTrack(wantAudio);
562
563            if (trackIndex < 0) {
564                err = trackIndex;
565            } else {
566                err = onSeekTo(wantAudio, position);
567            }
568            response->setInt32("err", err);
569            uint32_t replyID;
570            CHECK(msg->senderAwaitsResponse(&replyID));
571            response->postReply(replyID);
572            break;
573        }
574        default:
575            TRESPASS();
576    }
577}
578
579status_t FragmentedMP4Parser::onProceed() {
580    status_t err;
581
582    if ((err = need(8)) != OK) {
583        return err;
584    }
585
586    uint64_t size = readU32(0);
587    uint32_t type = readU32(4);
588
589    size_t offset = 8;
590
591    if (size == 1) {
592        if ((err = need(16)) != OK) {
593            return err;
594        }
595
596        size = readU64(offset);
597        offset += 8;
598    }
599
600    uint8_t userType[16];
601
602    if (type == FOURCC('u', 'u', 'i', 'd')) {
603        if ((err = need(offset + 16)) != OK) {
604            return err;
605        }
606
607        memcpy(userType, mBuffer->data() + offset, 16);
608        offset += 16;
609    }
610
611    CHECK(!mStack.isEmpty());
612    uint32_t ptype = mStack.itemAt(mStack.size() - 1).mType;
613
614    static const size_t kNumDispatchers =
615        sizeof(kDispatchTable) / sizeof(kDispatchTable[0]);
616
617    size_t i;
618    for (i = 0; i < kNumDispatchers; ++i) {
619        if (kDispatchTable[i].mType == type
620                && kDispatchTable[i].mParentType == ptype) {
621            break;
622        }
623    }
624
625    // SampleEntry boxes are container boxes that start with a variable
626    // amount of data depending on the media handler type.
627    // We don't look inside 'hint' type SampleEntry boxes.
628
629    bool isSampleEntryBox =
630        (ptype == FOURCC('s', 't', 's', 'd'))
631        && editTrack(mCurrentTrackID)->mMediaHandlerType
632        != FOURCC('h', 'i', 'n', 't');
633
634    if ((i < kNumDispatchers && kDispatchTable[i].mHandler == 0)
635            || isSampleEntryBox || ptype == FOURCC('i', 'l', 's', 't')) {
636        // This is a container box.
637        if (type == FOURCC('m', 'o', 'o', 'f')) {
638            if (mFirstMoofOffset == 0) {
639                ALOGV("first moof @ %08x", mBufferPos + offset);
640                mFirstMoofOffset = mBufferPos + offset - 8; // point at the size
641            }
642        }
643        if (type == FOURCC('m', 'e', 't', 'a')) {
644            if ((err = need(offset + 4)) < OK) {
645                return err;
646            }
647
648            if (readU32(offset) != 0) {
649                return -EINVAL;
650            }
651
652            offset += 4;
653        } else if (type == FOURCC('s', 't', 's', 'd')) {
654            if ((err = need(offset + 8)) < OK) {
655                return err;
656            }
657
658            if (readU32(offset) != 0) {
659                return -EINVAL;
660            }
661
662            if (readU32(offset + 4) == 0) {
663                // We need at least some entries.
664                return -EINVAL;
665            }
666
667            offset += 8;
668        } else if (isSampleEntryBox) {
669            size_t headerSize;
670
671            switch (editTrack(mCurrentTrackID)->mMediaHandlerType) {
672                case FOURCC('v', 'i', 'd', 'e'):
673                {
674                    // 8 bytes SampleEntry + 70 bytes VisualSampleEntry
675                    headerSize = 78;
676                    break;
677                }
678
679                case FOURCC('s', 'o', 'u', 'n'):
680                {
681                    // 8 bytes SampleEntry + 20 bytes AudioSampleEntry
682                    headerSize = 28;
683                    break;
684                }
685
686                case FOURCC('m', 'e', 't', 'a'):
687                {
688                    headerSize = 8;  // 8 bytes SampleEntry
689                    break;
690                }
691
692                default:
693                    TRESPASS();
694            }
695
696            if (offset + headerSize > size) {
697                return -EINVAL;
698            }
699
700            if ((err = need(offset + headerSize)) != OK) {
701                return err;
702            }
703
704            switch (editTrack(mCurrentTrackID)->mMediaHandlerType) {
705                case FOURCC('v', 'i', 'd', 'e'):
706                {
707                    err = parseVisualSampleEntry(
708                            type, offset, offset + headerSize);
709                    break;
710                }
711
712                case FOURCC('s', 'o', 'u', 'n'):
713                {
714                    err = parseAudioSampleEntry(
715                            type, offset, offset + headerSize);
716                    break;
717                }
718
719                case FOURCC('m', 'e', 't', 'a'):
720                {
721                    err = OK;
722                    break;
723                }
724
725                default:
726                    TRESPASS();
727            }
728
729            if (err != OK) {
730                return err;
731            }
732
733            offset += headerSize;
734        }
735
736        skip(offset);
737
738        ALOGV("%sentering box of type '%s'",
739                IndentString(mStack.size()), Fourcc2String(type));
740
741        enter(mBufferPos - offset, type, size - offset);
742    } else {
743        if (!fitsContainer(size)) {
744            return -EINVAL;
745        }
746
747        if (i < kNumDispatchers && kDispatchTable[i].mHandler != 0) {
748            // We have a handler for this box type.
749
750            if ((err = need(size)) != OK) {
751                return err;
752            }
753
754            ALOGV("%sparsing box of type '%s'",
755                    IndentString(mStack.size()), Fourcc2String(type));
756
757            if ((err = (this->*kDispatchTable[i].mHandler)(
758                            type, offset, size)) != OK) {
759                return err;
760            }
761        } else {
762            // Unknown box type
763
764            ALOGV("%sskipping box of type '%s', size %llu",
765                    IndentString(mStack.size()),
766                    Fourcc2String(type), size);
767
768        }
769
770        skip(size);
771    }
772
773    return OK;
774}
775
776// static
777int FragmentedMP4Parser::CompareSampleLocation(
778        const SampleInfo &sample, const MediaDataInfo &mdatInfo) {
779    if (sample.mOffset + sample.mSize < mdatInfo.mOffset) {
780        return -1;
781    }
782
783    if (sample.mOffset >= mdatInfo.mOffset + mdatInfo.mBuffer->size()) {
784        return 1;
785    }
786
787    // Otherwise make sure the sample is completely contained within this
788    // media data block.
789
790    CHECK_GE(sample.mOffset, mdatInfo.mOffset);
791
792    CHECK_LE(sample.mOffset + sample.mSize,
793             mdatInfo.mOffset + mdatInfo.mBuffer->size());
794
795    return 0;
796}
797
798void FragmentedMP4Parser::resumeIfNecessary() {
799    if (!mSuspended) {
800        return;
801    }
802
803    ALOGV("resuming.");
804
805    mSuspended = false;
806    (new AMessage(kWhatProceed, id()))->post();
807}
808
809status_t FragmentedMP4Parser::getSample(
810        TrackInfo *info, sp<TrackFragment> *fragment, SampleInfo *sampleInfo) {
811    for (;;) {
812        if (info->mFragments.empty()) {
813            if (mFinalResult != OK) {
814                return mFinalResult;
815            }
816
817            resumeIfNecessary();
818            return -EWOULDBLOCK;
819        }
820
821        *fragment = *info->mFragments.begin();
822
823        status_t err = (*fragment)->getSample(sampleInfo);
824
825        if (err == OK) {
826            return OK;
827        } else if (err != ERROR_END_OF_STREAM) {
828            return err;
829        }
830
831        // Really, end of this fragment...
832
833        info->mFragments.erase(info->mFragments.begin());
834    }
835}
836
837status_t FragmentedMP4Parser::onDequeueAccessUnit(
838        size_t trackIndex, sp<ABuffer> *accessUnit) {
839    TrackInfo *info = &mTracks.editValueAt(trackIndex);
840
841    sp<TrackFragment> fragment;
842    SampleInfo sampleInfo;
843    status_t err = getSample(info, &fragment, &sampleInfo);
844
845    if (err == -EWOULDBLOCK) {
846        resumeIfNecessary();
847        return err;
848    } else if (err != OK) {
849        return err;
850    }
851
852    err = -EWOULDBLOCK;
853
854    bool checkDroppable = false;
855
856    for (size_t i = 0; i < mMediaData.size(); ++i) {
857        const MediaDataInfo &mdatInfo = mMediaData.itemAt(i);
858
859        int cmp = CompareSampleLocation(sampleInfo, mdatInfo);
860
861        if (cmp < 0 && !mSource->isSeekable()) {
862            return -EPIPE;
863        } else if (cmp == 0) {
864            if (i > 0) {
865                checkDroppable = true;
866            }
867
868            err = makeAccessUnit(info, sampleInfo, mdatInfo, accessUnit);
869            break;
870        }
871    }
872
873    if (err != OK) {
874        return err;
875    }
876
877    fragment->advance();
878
879    if (!mMediaData.empty() && checkDroppable) {
880        size_t numDroppable = 0;
881        bool done = false;
882
883        // XXX FIXME: if one of the tracks is not advanced (e.g. if you play an audio+video
884        // file with sf2), then mMediaData will not be pruned and keeps growing
885        for (size_t i = 0; !done && i < mMediaData.size(); ++i) {
886            const MediaDataInfo &mdatInfo = mMediaData.itemAt(i);
887
888            for (size_t j = 0; j < mTracks.size(); ++j) {
889                TrackInfo *info = &mTracks.editValueAt(j);
890
891                sp<TrackFragment> fragment;
892                SampleInfo sampleInfo;
893                err = getSample(info, &fragment, &sampleInfo);
894
895                if (err != OK) {
896                    done = true;
897                    break;
898                }
899
900                int cmp = CompareSampleLocation(sampleInfo, mdatInfo);
901
902                if (cmp <= 0) {
903                    done = true;
904                    break;
905                }
906            }
907
908            if (!done) {
909                ++numDroppable;
910            }
911        }
912
913        if (numDroppable > 0) {
914            mMediaData.removeItemsAt(0, numDroppable);
915
916            if (mMediaData.size() < 5) {
917                resumeIfNecessary();
918            }
919        }
920    }
921
922    return err;
923}
924
925static size_t parseNALSize(size_t nalLengthSize, const uint8_t *data) {
926    switch (nalLengthSize) {
927        case 1:
928            return *data;
929        case 2:
930            return U16_AT(data);
931        case 3:
932            return ((size_t)data[0] << 16) | U16_AT(&data[1]);
933        case 4:
934            return U32_AT(data);
935    }
936
937    // This cannot happen, mNALLengthSize springs to life by adding 1 to
938    // a 2-bit integer.
939    TRESPASS();
940
941    return 0;
942}
943
944status_t FragmentedMP4Parser::makeAccessUnit(
945        TrackInfo *info,
946        const SampleInfo &sample,
947        const MediaDataInfo &mdatInfo,
948        sp<ABuffer> *accessUnit) {
949    if (sample.mSampleDescIndex < 1
950            || sample.mSampleDescIndex > info->mSampleDescs.size()) {
951        return ERROR_MALFORMED;
952    }
953
954    int64_t presentationTimeUs =
955        1000000ll * sample.mPresentationTime / info->mMediaTimeScale;
956
957    const SampleDescription &sampleDesc =
958        info->mSampleDescs.itemAt(sample.mSampleDescIndex - 1);
959
960    size_t nalLengthSize;
961    if (!sampleDesc.mFormat->findSize("nal-length-size", &nalLengthSize)) {
962        *accessUnit = new ABuffer(sample.mSize);
963
964        memcpy((*accessUnit)->data(),
965               mdatInfo.mBuffer->data() + (sample.mOffset - mdatInfo.mOffset),
966               sample.mSize);
967
968        (*accessUnit)->meta()->setInt64("timeUs", presentationTimeUs);
969        if (IsIDR(*accessUnit)) {
970            (*accessUnit)->meta()->setInt32("is-sync-frame", 1);
971        }
972
973        return OK;
974    }
975
976    const uint8_t *srcPtr =
977        mdatInfo.mBuffer->data() + (sample.mOffset - mdatInfo.mOffset);
978
979    for (int i = 0; i < 2 ; ++i) {
980        size_t srcOffset = 0;
981        size_t dstOffset = 0;
982
983        while (srcOffset < sample.mSize) {
984            if (srcOffset + nalLengthSize > sample.mSize) {
985                return ERROR_MALFORMED;
986            }
987
988            size_t nalSize = parseNALSize(nalLengthSize, &srcPtr[srcOffset]);
989            srcOffset += nalLengthSize;
990
991            if (srcOffset + nalSize > sample.mSize) {
992                return ERROR_MALFORMED;
993            }
994
995            if (i == 1) {
996                memcpy((*accessUnit)->data() + dstOffset,
997                       "\x00\x00\x00\x01",
998                       4);
999
1000                memcpy((*accessUnit)->data() + dstOffset + 4,
1001                       srcPtr + srcOffset,
1002                       nalSize);
1003            }
1004
1005            srcOffset += nalSize;
1006            dstOffset += nalSize + 4;
1007        }
1008
1009        if (i == 0) {
1010            (*accessUnit) = new ABuffer(dstOffset);
1011            (*accessUnit)->meta()->setInt64(
1012                    "timeUs", presentationTimeUs);
1013        }
1014    }
1015    if (IsIDR(*accessUnit)) {
1016        (*accessUnit)->meta()->setInt32("is-sync-frame", 1);
1017    }
1018
1019    return OK;
1020}
1021
1022status_t FragmentedMP4Parser::need(size_t size) {
1023    if (!fitsContainer(size)) {
1024        return -EINVAL;
1025    }
1026
1027    if (size <= mBuffer->size()) {
1028        return OK;
1029    }
1030
1031    sp<AMessage> msg = new AMessage(kWhatReadMore, id());
1032    msg->setSize("needed", size - mBuffer->size());
1033    msg->post();
1034
1035    // ALOGV("need(%d) returning -EAGAIN, only have %d", size, mBuffer->size());
1036
1037    return -EAGAIN;
1038}
1039
1040void FragmentedMP4Parser::enter(off64_t offset, uint32_t type, uint64_t size) {
1041    Container container;
1042    container.mOffset = offset;
1043    container.mType = type;
1044    container.mExtendsToEOF = (size == 0);
1045    container.mBytesRemaining = size;
1046
1047    mStack.push(container);
1048}
1049
1050bool FragmentedMP4Parser::fitsContainer(uint64_t size) const {
1051    CHECK(!mStack.isEmpty());
1052    const Container &container = mStack.itemAt(mStack.size() - 1);
1053
1054    return container.mExtendsToEOF || size <= container.mBytesRemaining;
1055}
1056
1057uint16_t FragmentedMP4Parser::readU16(size_t offset) {
1058    CHECK_LE(offset + 2, mBuffer->size());
1059
1060    const uint8_t *ptr = mBuffer->data() + offset;
1061    return (ptr[0] << 8) | ptr[1];
1062}
1063
1064uint32_t FragmentedMP4Parser::readU32(size_t offset) {
1065    CHECK_LE(offset + 4, mBuffer->size());
1066
1067    const uint8_t *ptr = mBuffer->data() + offset;
1068    return (ptr[0] << 24) | (ptr[1] << 16) | (ptr[2] << 8) | ptr[3];
1069}
1070
1071uint64_t FragmentedMP4Parser::readU64(size_t offset) {
1072    return (((uint64_t)readU32(offset)) << 32) | readU32(offset + 4);
1073}
1074
1075void FragmentedMP4Parser::skip(off_t distance) {
1076    CHECK(!mStack.isEmpty());
1077    for (size_t i = mStack.size(); i-- > 0;) {
1078        Container *container = &mStack.editItemAt(i);
1079        if (!container->mExtendsToEOF) {
1080            CHECK_LE(distance, (off_t)container->mBytesRemaining);
1081
1082            container->mBytesRemaining -= distance;
1083
1084            if (container->mBytesRemaining == 0) {
1085                ALOGV("%sleaving box of type '%s'",
1086                        IndentString(mStack.size() - 1),
1087                        Fourcc2String(container->mType));
1088
1089#if 0
1090                if (container->mType == FOURCC('s', 't', 's', 'd')) {
1091                    TrackInfo *trackInfo = editTrack(mCurrentTrackID);
1092                    for (size_t i = 0;
1093                            i < trackInfo->mSampleDescs.size(); ++i) {
1094                        ALOGI("format #%d: %s",
1095                              i,
1096                              trackInfo->mSampleDescs.itemAt(i)
1097                                .mFormat->debugString().c_str());
1098                    }
1099                }
1100#endif
1101
1102                if (container->mType == FOURCC('s', 't', 'b', 'l')) {
1103                    TrackInfo *trackInfo = editTrack(mCurrentTrackID);
1104
1105                    trackInfo->mStaticFragment->signalCompletion();
1106
1107                    CHECK(trackInfo->mFragments.empty());
1108                    trackInfo->mFragments.push_back(trackInfo->mStaticFragment);
1109                    trackInfo->mStaticFragment.clear();
1110                } else if (container->mType == FOURCC('t', 'r', 'a', 'f')) {
1111                    TrackInfo *trackInfo =
1112                        editTrack(mTrackFragmentHeaderInfo.mTrackID);
1113
1114                    const sp<TrackFragment> &fragment =
1115                        *--trackInfo->mFragments.end();
1116
1117                    static_cast<DynamicTrackFragment *>(
1118                            fragment.get())->signalCompletion();
1119                } else if (container->mType == FOURCC('m', 'o', 'o', 'v')) {
1120                    mDoneWithMoov = true;
1121                }
1122
1123                container = NULL;
1124                mStack.removeItemsAt(i);
1125            }
1126        }
1127    }
1128
1129    if (distance < (off_t)mBuffer->size()) {
1130        mBuffer->setRange(mBuffer->offset() + distance, mBuffer->size() - distance);
1131        mBufferPos += distance;
1132        return;
1133    }
1134
1135    mBuffer->setRange(0, 0);
1136    mBufferPos += distance;
1137}
1138
1139status_t FragmentedMP4Parser::parseTrackHeader(
1140        uint32_t type, size_t offset, uint64_t size) {
1141    if (offset + 4 > size) {
1142        return -EINVAL;
1143    }
1144
1145    uint32_t flags = readU32(offset);
1146
1147    uint32_t version = flags >> 24;
1148    flags &= 0xffffff;
1149
1150    uint32_t trackID;
1151    uint64_t duration;
1152
1153    if (version == 1) {
1154        if (offset + 36 > size) {
1155            return -EINVAL;
1156        }
1157
1158        trackID = readU32(offset + 20);
1159        duration = readU64(offset + 28);
1160
1161        offset += 36;
1162    } else if (version == 0) {
1163        if (offset + 24 > size) {
1164            return -EINVAL;
1165        }
1166
1167        trackID = readU32(offset + 12);
1168        duration = readU32(offset + 20);
1169
1170        offset += 24;
1171    } else {
1172        return -EINVAL;
1173    }
1174
1175    TrackInfo *info = editTrack(trackID, true /* createIfNecessary */);
1176    info->mFlags = flags;
1177    info->mDuration = duration;
1178    if (info->mDuration == 0xffffffff) {
1179        // ffmpeg sets this to -1, which is incorrect.
1180        info->mDuration = 0;
1181    }
1182
1183    info->mStaticFragment = new StaticTrackFragment;
1184
1185    mCurrentTrackID = trackID;
1186
1187    return OK;
1188}
1189
1190status_t FragmentedMP4Parser::parseMediaHeader(
1191        uint32_t type, size_t offset, uint64_t size) {
1192    if (offset + 4 > size) {
1193        return -EINVAL;
1194    }
1195
1196    uint32_t versionAndFlags = readU32(offset);
1197
1198    if (versionAndFlags & 0xffffff) {
1199        return ERROR_MALFORMED;
1200    }
1201
1202    uint32_t version = versionAndFlags >> 24;
1203
1204    TrackInfo *info = editTrack(mCurrentTrackID);
1205
1206    if (version == 1) {
1207        if (offset + 4 + 32 > size) {
1208            return -EINVAL;
1209        }
1210        info->mMediaTimeScale = U32_AT(mBuffer->data() + offset + 20);
1211    } else if (version == 0) {
1212        if (offset + 4 + 20 > size) {
1213            return -EINVAL;
1214        }
1215        info->mMediaTimeScale = U32_AT(mBuffer->data() + offset + 12);
1216    } else {
1217        return ERROR_MALFORMED;
1218    }
1219
1220    return OK;
1221}
1222
1223status_t FragmentedMP4Parser::parseMediaHandler(
1224        uint32_t type, size_t offset, uint64_t size) {
1225    if (offset + 12 > size) {
1226        return -EINVAL;
1227    }
1228
1229    if (readU32(offset) != 0) {
1230        return -EINVAL;
1231    }
1232
1233    uint32_t handlerType = readU32(offset + 8);
1234
1235    switch (handlerType) {
1236        case FOURCC('v', 'i', 'd', 'e'):
1237        case FOURCC('s', 'o', 'u', 'n'):
1238        case FOURCC('h', 'i', 'n', 't'):
1239        case FOURCC('m', 'e', 't', 'a'):
1240            break;
1241
1242        default:
1243            return -EINVAL;
1244    }
1245
1246    editTrack(mCurrentTrackID)->mMediaHandlerType = handlerType;
1247
1248    return OK;
1249}
1250
1251status_t FragmentedMP4Parser::parseVisualSampleEntry(
1252        uint32_t type, size_t offset, uint64_t size) {
1253    if (offset + 78 > size) {
1254        return -EINVAL;
1255    }
1256
1257    TrackInfo *trackInfo = editTrack(mCurrentTrackID);
1258
1259    trackInfo->mSampleDescs.push();
1260    SampleDescription *sampleDesc =
1261        &trackInfo->mSampleDescs.editItemAt(
1262                trackInfo->mSampleDescs.size() - 1);
1263
1264    sampleDesc->mType = type;
1265    sampleDesc->mDataRefIndex = readU16(offset + 6);
1266
1267    sp<AMessage> format = new AMessage;
1268
1269    switch (type) {
1270        case FOURCC('a', 'v', 'c', '1'):
1271            format->setString("mime", MEDIA_MIMETYPE_VIDEO_AVC);
1272            break;
1273        case FOURCC('m', 'p', '4', 'v'):
1274            format->setString("mime", MEDIA_MIMETYPE_VIDEO_MPEG4);
1275            break;
1276        case FOURCC('s', '2', '6', '3'):
1277        case FOURCC('h', '2', '6', '3'):
1278        case FOURCC('H', '2', '6', '3'):
1279            format->setString("mime", MEDIA_MIMETYPE_VIDEO_H263);
1280            break;
1281        default:
1282            format->setString("mime", "application/octet-stream");
1283            break;
1284    }
1285
1286    format->setInt32("width", readU16(offset + 8 + 16));
1287    format->setInt32("height", readU16(offset + 8 + 18));
1288
1289    sampleDesc->mFormat = format;
1290
1291    return OK;
1292}
1293
1294status_t FragmentedMP4Parser::parseAudioSampleEntry(
1295        uint32_t type, size_t offset, uint64_t size) {
1296    if (offset + 28 > size) {
1297        return -EINVAL;
1298    }
1299
1300    TrackInfo *trackInfo = editTrack(mCurrentTrackID);
1301
1302    trackInfo->mSampleDescs.push();
1303    SampleDescription *sampleDesc =
1304        &trackInfo->mSampleDescs.editItemAt(
1305                trackInfo->mSampleDescs.size() - 1);
1306
1307    sampleDesc->mType = type;
1308    sampleDesc->mDataRefIndex = readU16(offset + 6);
1309
1310    sp<AMessage> format = new AMessage;
1311
1312    format->setInt32("channel-count", readU16(offset + 8 + 8));
1313    format->setInt32("sample-size", readU16(offset + 8 + 10));
1314    format->setInt32("sample-rate", readU32(offset + 8 + 16) / 65536.0f);
1315
1316    switch (type) {
1317        case FOURCC('m', 'p', '4', 'a'):
1318            format->setString("mime", MEDIA_MIMETYPE_AUDIO_AAC);
1319            break;
1320
1321        case FOURCC('s', 'a', 'm', 'r'):
1322            format->setString("mime", MEDIA_MIMETYPE_AUDIO_AMR_NB);
1323            format->setInt32("channel-count", 1);
1324            format->setInt32("sample-rate", 8000);
1325            break;
1326
1327        case FOURCC('s', 'a', 'w', 'b'):
1328            format->setString("mime", MEDIA_MIMETYPE_AUDIO_AMR_WB);
1329            format->setInt32("channel-count", 1);
1330            format->setInt32("sample-rate", 16000);
1331            break;
1332        default:
1333            format->setString("mime", "application/octet-stream");
1334            break;
1335    }
1336
1337    sampleDesc->mFormat = format;
1338
1339    return OK;
1340}
1341
1342static void addCodecSpecificData(
1343        const sp<AMessage> &format, int32_t index,
1344        const void *data, size_t size,
1345        bool insertStartCode = false) {
1346    sp<ABuffer> csd = new ABuffer(insertStartCode ? size + 4 : size);
1347
1348    memcpy(csd->data() + (insertStartCode ? 4 : 0), data, size);
1349
1350    if (insertStartCode) {
1351        memcpy(csd->data(), "\x00\x00\x00\x01", 4);
1352    }
1353
1354    csd->meta()->setInt32("csd", true);
1355    csd->meta()->setInt64("timeUs", 0ll);
1356
1357    format->setBuffer(StringPrintf("csd-%d", index).c_str(), csd);
1358}
1359
1360status_t FragmentedMP4Parser::parseSampleSizes(
1361        uint32_t type, size_t offset, uint64_t size) {
1362    return editTrack(mCurrentTrackID)->mStaticFragment->parseSampleSizes(
1363            this, type, offset, size);
1364}
1365
1366status_t FragmentedMP4Parser::parseCompactSampleSizes(
1367        uint32_t type, size_t offset, uint64_t size) {
1368    return editTrack(mCurrentTrackID)->mStaticFragment->parseCompactSampleSizes(
1369            this, type, offset, size);
1370}
1371
1372status_t FragmentedMP4Parser::parseSampleToChunk(
1373        uint32_t type, size_t offset, uint64_t size) {
1374    return editTrack(mCurrentTrackID)->mStaticFragment->parseSampleToChunk(
1375            this, type, offset, size);
1376}
1377
1378status_t FragmentedMP4Parser::parseChunkOffsets(
1379        uint32_t type, size_t offset, uint64_t size) {
1380    return editTrack(mCurrentTrackID)->mStaticFragment->parseChunkOffsets(
1381            this, type, offset, size);
1382}
1383
1384status_t FragmentedMP4Parser::parseChunkOffsets64(
1385        uint32_t type, size_t offset, uint64_t size) {
1386    return editTrack(mCurrentTrackID)->mStaticFragment->parseChunkOffsets64(
1387            this, type, offset, size);
1388}
1389
1390status_t FragmentedMP4Parser::parseAVCCodecSpecificData(
1391        uint32_t type, size_t offset, uint64_t size) {
1392    TrackInfo *trackInfo = editTrack(mCurrentTrackID);
1393
1394    SampleDescription *sampleDesc =
1395        &trackInfo->mSampleDescs.editItemAt(
1396                trackInfo->mSampleDescs.size() - 1);
1397
1398    if (sampleDesc->mType != FOURCC('a', 'v', 'c', '1')) {
1399        return -EINVAL;
1400    }
1401
1402    const uint8_t *ptr = mBuffer->data() + offset;
1403
1404    size -= offset;
1405    offset = 0;
1406
1407    if (size < 7 || ptr[0] != 0x01) {
1408        return ERROR_MALFORMED;
1409    }
1410
1411    sampleDesc->mFormat->setSize("nal-length-size", 1 + (ptr[4] & 3));
1412
1413    size_t numSPS = ptr[5] & 31;
1414
1415    ptr += 6;
1416    size -= 6;
1417
1418    for (size_t i = 0; i < numSPS; ++i) {
1419        if (size < 2) {
1420            return ERROR_MALFORMED;
1421        }
1422
1423        size_t length = U16_AT(ptr);
1424
1425        ptr += 2;
1426        size -= 2;
1427
1428        if (size < length) {
1429            return ERROR_MALFORMED;
1430        }
1431
1432        addCodecSpecificData(
1433                sampleDesc->mFormat, i, ptr, length,
1434                true /* insertStartCode */);
1435
1436        ptr += length;
1437        size -= length;
1438    }
1439
1440    if (size < 1) {
1441        return ERROR_MALFORMED;
1442    }
1443
1444    size_t numPPS = *ptr;
1445    ++ptr;
1446    --size;
1447
1448    for (size_t i = 0; i < numPPS; ++i) {
1449        if (size < 2) {
1450            return ERROR_MALFORMED;
1451        }
1452
1453        size_t length = U16_AT(ptr);
1454
1455        ptr += 2;
1456        size -= 2;
1457
1458        if (size < length) {
1459            return ERROR_MALFORMED;
1460        }
1461
1462        addCodecSpecificData(
1463                sampleDesc->mFormat, numSPS + i, ptr, length,
1464                true /* insertStartCode */);
1465
1466        ptr += length;
1467        size -= length;
1468    }
1469
1470    return OK;
1471}
1472
1473status_t FragmentedMP4Parser::parseESDSCodecSpecificData(
1474        uint32_t type, size_t offset, uint64_t size) {
1475    TrackInfo *trackInfo = editTrack(mCurrentTrackID);
1476
1477    SampleDescription *sampleDesc =
1478        &trackInfo->mSampleDescs.editItemAt(
1479                trackInfo->mSampleDescs.size() - 1);
1480
1481    if (sampleDesc->mType != FOURCC('m', 'p', '4', 'a')
1482            && sampleDesc->mType != FOURCC('m', 'p', '4', 'v')) {
1483        return -EINVAL;
1484    }
1485
1486    const uint8_t *ptr = mBuffer->data() + offset;
1487
1488    size -= offset;
1489    offset = 0;
1490
1491    if (size < 4) {
1492        return -EINVAL;
1493    }
1494
1495    if (U32_AT(ptr) != 0) {
1496        return -EINVAL;
1497    }
1498
1499    ptr += 4;
1500    size -=4;
1501
1502    ESDS esds(ptr, size);
1503
1504    uint8_t objectTypeIndication;
1505    if (esds.getObjectTypeIndication(&objectTypeIndication) != OK) {
1506        return ERROR_MALFORMED;
1507    }
1508
1509    const uint8_t *csd;
1510    size_t csd_size;
1511    if (esds.getCodecSpecificInfo(
1512                (const void **)&csd, &csd_size) != OK) {
1513        return ERROR_MALFORMED;
1514    }
1515
1516    addCodecSpecificData(sampleDesc->mFormat, 0, csd, csd_size);
1517
1518    if (sampleDesc->mType != FOURCC('m', 'p', '4', 'a')) {
1519        return OK;
1520    }
1521
1522    if (csd_size == 0) {
1523        // There's no further information, i.e. no codec specific data
1524        // Let's assume that the information provided in the mpeg4 headers
1525        // is accurate and hope for the best.
1526
1527        return OK;
1528    }
1529
1530    if (csd_size < 2) {
1531        return ERROR_MALFORMED;
1532    }
1533
1534    uint32_t objectType = csd[0] >> 3;
1535
1536    if (objectType == 31) {
1537        return ERROR_UNSUPPORTED;
1538    }
1539
1540    uint32_t freqIndex = (csd[0] & 7) << 1 | (csd[1] >> 7);
1541    int32_t sampleRate = 0;
1542    int32_t numChannels = 0;
1543    if (freqIndex == 15) {
1544        if (csd_size < 5) {
1545            return ERROR_MALFORMED;
1546        }
1547
1548        sampleRate = (csd[1] & 0x7f) << 17
1549                        | csd[2] << 9
1550                        | csd[3] << 1
1551                        | (csd[4] >> 7);
1552
1553        numChannels = (csd[4] >> 3) & 15;
1554    } else {
1555        static uint32_t kSamplingRate[] = {
1556            96000, 88200, 64000, 48000, 44100, 32000, 24000, 22050,
1557            16000, 12000, 11025, 8000, 7350
1558        };
1559
1560        if (freqIndex == 13 || freqIndex == 14) {
1561            return ERROR_MALFORMED;
1562        }
1563
1564        sampleRate = kSamplingRate[freqIndex];
1565        numChannels = (csd[1] >> 3) & 15;
1566    }
1567
1568    if (numChannels == 0) {
1569        return ERROR_UNSUPPORTED;
1570    }
1571
1572    sampleDesc->mFormat->setInt32("sample-rate", sampleRate);
1573    sampleDesc->mFormat->setInt32("channel-count", numChannels);
1574
1575    return OK;
1576}
1577
1578status_t FragmentedMP4Parser::parseMediaData(
1579        uint32_t type, size_t offset, uint64_t size) {
1580    ALOGV("skipping 'mdat' chunk at offsets 0x%08lx-0x%08llx.",
1581          mBufferPos + offset, mBufferPos + size);
1582
1583    sp<ABuffer> buffer = new ABuffer(size - offset);
1584    memcpy(buffer->data(), mBuffer->data() + offset, size - offset);
1585
1586    mMediaData.push();
1587    MediaDataInfo *info = &mMediaData.editItemAt(mMediaData.size() - 1);
1588    info->mBuffer = buffer;
1589    info->mOffset = mBufferPos + offset;
1590
1591    if (mMediaData.size() > 10) {
1592        ALOGV("suspending for now.");
1593        mSuspended = true;
1594    }
1595
1596    return OK;
1597}
1598
1599status_t FragmentedMP4Parser::parseSegmentIndex(
1600        uint32_t type, size_t offset, uint64_t size) {
1601    ALOGV("sidx box type %d, offset %d, size %d", type, int(offset), int(size));
1602//    AString sidxstr;
1603//    hexdump(mBuffer->data() + offset, size, 0 /* indent */, &sidxstr);
1604//    ALOGV("raw sidx:");
1605//    ALOGV("%s", sidxstr.c_str());
1606    if (offset + 12 > size) {
1607        return -EINVAL;
1608    }
1609
1610    uint32_t flags = readU32(offset);
1611
1612    uint32_t version = flags >> 24;
1613    flags &= 0xffffff;
1614
1615    ALOGV("sidx version %d", version);
1616
1617    uint32_t referenceId = readU32(offset + 4);
1618    uint32_t timeScale = readU32(offset + 8);
1619    ALOGV("sidx refid/timescale: %d/%d", referenceId, timeScale);
1620
1621    uint64_t earliestPresentationTime;
1622    uint64_t firstOffset;
1623
1624    offset += 12;
1625
1626    if (version == 0) {
1627        if (offset + 8 > size) {
1628            return -EINVAL;
1629        }
1630        earliestPresentationTime = readU32(offset);
1631        firstOffset = readU32(offset + 4);
1632        offset += 8;
1633    } else {
1634        if (offset + 16 > size) {
1635            return -EINVAL;
1636        }
1637        earliestPresentationTime = readU64(offset);
1638        firstOffset = readU64(offset + 8);
1639        offset += 16;
1640    }
1641    ALOGV("sidx pres/off: %Ld/%Ld", earliestPresentationTime, firstOffset);
1642
1643    if (offset + 4 > size) {
1644        return -EINVAL;
1645    }
1646    if (readU16(offset) != 0) { // reserved
1647        return -EINVAL;
1648    }
1649    int32_t referenceCount = readU16(offset + 2);
1650    offset += 4;
1651    ALOGV("refcount: %d", referenceCount);
1652
1653    if (offset + referenceCount * 12 > size) {
1654        return -EINVAL;
1655    }
1656
1657    TrackInfo *info = editTrack(mCurrentTrackID);
1658    uint64_t total_duration = 0;
1659    for (int i = 0; i < referenceCount; i++) {
1660        uint32_t d1 = readU32(offset);
1661        uint32_t d2 = readU32(offset + 4);
1662        uint32_t d3 = readU32(offset + 8);
1663
1664        if (d1 & 0x80000000) {
1665            ALOGW("sub-sidx boxes not supported yet");
1666        }
1667        bool sap = d3 & 0x80000000;
1668        bool saptype = d3 >> 28;
1669        if (!sap || saptype > 2) {
1670            ALOGW("not a stream access point, or unsupported type");
1671        }
1672        total_duration += d2;
1673        offset += 12;
1674        ALOGV(" item %d, %08x %08x %08x", i, d1, d2, d3);
1675        SidxEntry se;
1676        se.mSize = d1 & 0x7fffffff;
1677        se.mDurationUs = 1000000LL * d2 / timeScale;
1678        info->mSidx.add(se);
1679    }
1680
1681    info->mSidxDuration = total_duration * 1000000 / timeScale;
1682    ALOGV("duration: %lld", info->mSidxDuration);
1683    return OK;
1684}
1685
1686status_t FragmentedMP4Parser::parseTrackExtends(
1687        uint32_t type, size_t offset, uint64_t size) {
1688    if (offset + 24 > size) {
1689        return -EINVAL;
1690    }
1691
1692    if (readU32(offset) != 0) {
1693        return -EINVAL;
1694    }
1695
1696    uint32_t trackID = readU32(offset + 4);
1697
1698    TrackInfo *info = editTrack(trackID, true /* createIfNecessary */);
1699    info->mDefaultSampleDescriptionIndex = readU32(offset + 8);
1700    info->mDefaultSampleDuration = readU32(offset + 12);
1701    info->mDefaultSampleSize = readU32(offset + 16);
1702    info->mDefaultSampleFlags = readU32(offset + 20);
1703
1704    return OK;
1705}
1706
1707FragmentedMP4Parser::TrackInfo *FragmentedMP4Parser::editTrack(
1708        uint32_t trackID, bool createIfNecessary) {
1709    ssize_t i = mTracks.indexOfKey(trackID);
1710
1711    if (i >= 0) {
1712        return &mTracks.editValueAt(i);
1713    }
1714
1715    if (!createIfNecessary) {
1716        return NULL;
1717    }
1718
1719    TrackInfo info;
1720    info.mTrackID = trackID;
1721    info.mFlags = 0;
1722    info.mDuration = 0xffffffff;
1723    info.mSidxDuration = 0;
1724    info.mMediaTimeScale = 0;
1725    info.mMediaHandlerType = 0;
1726    info.mDefaultSampleDescriptionIndex = 0;
1727    info.mDefaultSampleDuration = 0;
1728    info.mDefaultSampleSize = 0;
1729    info.mDefaultSampleFlags = 0;
1730
1731    info.mDecodingTime = 0;
1732
1733    mTracks.add(trackID, info);
1734    return &mTracks.editValueAt(mTracks.indexOfKey(trackID));
1735}
1736
1737status_t FragmentedMP4Parser::parseTrackFragmentHeader(
1738        uint32_t type, size_t offset, uint64_t size) {
1739    if (offset + 8 > size) {
1740        return -EINVAL;
1741    }
1742
1743    uint32_t flags = readU32(offset);
1744
1745    if (flags & 0xff000000) {
1746        return -EINVAL;
1747    }
1748
1749    mTrackFragmentHeaderInfo.mFlags = flags;
1750
1751    mTrackFragmentHeaderInfo.mTrackID = readU32(offset + 4);
1752    offset += 8;
1753
1754    if (flags & TrackFragmentHeaderInfo::kBaseDataOffsetPresent) {
1755        if (offset + 8 > size) {
1756            return -EINVAL;
1757        }
1758
1759        mTrackFragmentHeaderInfo.mBaseDataOffset = readU64(offset);
1760        offset += 8;
1761    }
1762
1763    if (flags & TrackFragmentHeaderInfo::kSampleDescriptionIndexPresent) {
1764        if (offset + 4 > size) {
1765            return -EINVAL;
1766        }
1767
1768        mTrackFragmentHeaderInfo.mSampleDescriptionIndex = readU32(offset);
1769        offset += 4;
1770    }
1771
1772    if (flags & TrackFragmentHeaderInfo::kDefaultSampleDurationPresent) {
1773        if (offset + 4 > size) {
1774            return -EINVAL;
1775        }
1776
1777        mTrackFragmentHeaderInfo.mDefaultSampleDuration = readU32(offset);
1778        offset += 4;
1779    }
1780
1781    if (flags & TrackFragmentHeaderInfo::kDefaultSampleSizePresent) {
1782        if (offset + 4 > size) {
1783            return -EINVAL;
1784        }
1785
1786        mTrackFragmentHeaderInfo.mDefaultSampleSize = readU32(offset);
1787        offset += 4;
1788    }
1789
1790    if (flags & TrackFragmentHeaderInfo::kDefaultSampleFlagsPresent) {
1791        if (offset + 4 > size) {
1792            return -EINVAL;
1793        }
1794
1795        mTrackFragmentHeaderInfo.mDefaultSampleFlags = readU32(offset);
1796        offset += 4;
1797    }
1798
1799    if (!(flags & TrackFragmentHeaderInfo::kBaseDataOffsetPresent)) {
1800        // This should point to the position of the first byte of the
1801        // enclosing 'moof' container for the first track and
1802        // the end of the data of the preceding fragment for subsequent
1803        // tracks.
1804
1805        CHECK_GE(mStack.size(), 2u);
1806
1807        mTrackFragmentHeaderInfo.mBaseDataOffset =
1808            mStack.itemAt(mStack.size() - 2).mOffset;
1809
1810        // XXX TODO: This does not do the right thing for the 2nd and
1811        // subsequent tracks yet.
1812    }
1813
1814    mTrackFragmentHeaderInfo.mDataOffset =
1815        mTrackFragmentHeaderInfo.mBaseDataOffset;
1816
1817    TrackInfo *trackInfo = editTrack(mTrackFragmentHeaderInfo.mTrackID);
1818
1819    if (trackInfo->mFragments.empty()
1820            || (*trackInfo->mFragments.begin())->complete()) {
1821        trackInfo->mFragments.push_back(new DynamicTrackFragment);
1822    }
1823
1824    return OK;
1825}
1826
1827status_t FragmentedMP4Parser::parseTrackFragmentRun(
1828        uint32_t type, size_t offset, uint64_t size) {
1829    if (offset + 8 > size) {
1830        return -EINVAL;
1831    }
1832
1833    enum {
1834        kDataOffsetPresent                  = 0x01,
1835        kFirstSampleFlagsPresent            = 0x04,
1836        kSampleDurationPresent              = 0x100,
1837        kSampleSizePresent                  = 0x200,
1838        kSampleFlagsPresent                 = 0x400,
1839        kSampleCompositionTimeOffsetPresent = 0x800,
1840    };
1841
1842    uint32_t flags = readU32(offset);
1843
1844    if (flags & 0xff000000) {
1845        return -EINVAL;
1846    }
1847
1848    if ((flags & kFirstSampleFlagsPresent) && (flags & kSampleFlagsPresent)) {
1849        // These two shall not be used together.
1850        return -EINVAL;
1851    }
1852
1853    uint32_t sampleCount = readU32(offset + 4);
1854    offset += 8;
1855
1856    uint64_t dataOffset = mTrackFragmentHeaderInfo.mDataOffset;
1857
1858    uint32_t firstSampleFlags = 0;
1859
1860    if (flags & kDataOffsetPresent) {
1861        if (offset + 4 > size) {
1862            return -EINVAL;
1863        }
1864
1865        int32_t dataOffsetDelta = (int32_t)readU32(offset);
1866
1867        dataOffset = mTrackFragmentHeaderInfo.mBaseDataOffset + dataOffsetDelta;
1868
1869        offset += 4;
1870    }
1871
1872    if (flags & kFirstSampleFlagsPresent) {
1873        if (offset + 4 > size) {
1874            return -EINVAL;
1875        }
1876
1877        firstSampleFlags = readU32(offset);
1878        offset += 4;
1879    }
1880
1881    TrackInfo *info = editTrack(mTrackFragmentHeaderInfo.mTrackID);
1882
1883    if (info == NULL) {
1884        return -EINVAL;
1885    }
1886
1887    uint32_t sampleDuration = 0, sampleSize = 0, sampleFlags = 0,
1888             sampleCtsOffset = 0;
1889
1890    size_t bytesPerSample = 0;
1891    if (flags & kSampleDurationPresent) {
1892        bytesPerSample += 4;
1893    } else if (mTrackFragmentHeaderInfo.mFlags
1894            & TrackFragmentHeaderInfo::kDefaultSampleDurationPresent) {
1895        sampleDuration = mTrackFragmentHeaderInfo.mDefaultSampleDuration;
1896    } else {
1897        sampleDuration = info->mDefaultSampleDuration;
1898    }
1899
1900    if (flags & kSampleSizePresent) {
1901        bytesPerSample += 4;
1902    } else if (mTrackFragmentHeaderInfo.mFlags
1903            & TrackFragmentHeaderInfo::kDefaultSampleSizePresent) {
1904        sampleSize = mTrackFragmentHeaderInfo.mDefaultSampleSize;
1905    } else {
1906        sampleSize = info->mDefaultSampleSize;
1907    }
1908
1909    if (flags & kSampleFlagsPresent) {
1910        bytesPerSample += 4;
1911    } else if (mTrackFragmentHeaderInfo.mFlags
1912            & TrackFragmentHeaderInfo::kDefaultSampleFlagsPresent) {
1913        sampleFlags = mTrackFragmentHeaderInfo.mDefaultSampleFlags;
1914    } else {
1915        sampleFlags = info->mDefaultSampleFlags;
1916    }
1917
1918    if (flags & kSampleCompositionTimeOffsetPresent) {
1919        bytesPerSample += 4;
1920    } else {
1921        sampleCtsOffset = 0;
1922    }
1923
1924    if (offset + sampleCount * bytesPerSample > size) {
1925        return -EINVAL;
1926    }
1927
1928    uint32_t sampleDescIndex =
1929        (mTrackFragmentHeaderInfo.mFlags
1930            & TrackFragmentHeaderInfo::kSampleDescriptionIndexPresent)
1931            ? mTrackFragmentHeaderInfo.mSampleDescriptionIndex
1932            : info->mDefaultSampleDescriptionIndex;
1933
1934    for (uint32_t i = 0; i < sampleCount; ++i) {
1935        if (flags & kSampleDurationPresent) {
1936            sampleDuration = readU32(offset);
1937            offset += 4;
1938        }
1939
1940        if (flags & kSampleSizePresent) {
1941            sampleSize = readU32(offset);
1942            offset += 4;
1943        }
1944
1945        if (flags & kSampleFlagsPresent) {
1946            sampleFlags = readU32(offset);
1947            offset += 4;
1948        }
1949
1950        if (flags & kSampleCompositionTimeOffsetPresent) {
1951            sampleCtsOffset = readU32(offset);
1952            offset += 4;
1953        }
1954
1955        ALOGV("adding sample at offset 0x%08llx, size %u, duration %u, "
1956              "sampleDescIndex=%u, flags 0x%08x",
1957                dataOffset, sampleSize, sampleDuration,
1958                sampleDescIndex,
1959                (flags & kFirstSampleFlagsPresent) && i == 0
1960                    ? firstSampleFlags : sampleFlags);
1961
1962        const sp<TrackFragment> &fragment = *--info->mFragments.end();
1963
1964        uint32_t decodingTime = info->mDecodingTime;
1965        info->mDecodingTime += sampleDuration;
1966        uint32_t presentationTime = decodingTime + sampleCtsOffset;
1967
1968        static_cast<DynamicTrackFragment *>(
1969                fragment.get())->addSample(
1970                    dataOffset,
1971                    sampleSize,
1972                    presentationTime,
1973                    sampleDescIndex,
1974                    ((flags & kFirstSampleFlagsPresent) && i == 0)
1975                        ? firstSampleFlags : sampleFlags);
1976
1977        dataOffset += sampleSize;
1978    }
1979
1980    mTrackFragmentHeaderInfo.mDataOffset = dataOffset;
1981
1982    return OK;
1983}
1984
1985void FragmentedMP4Parser::copyBuffer(
1986        sp<ABuffer> *dst, size_t offset, uint64_t size) const {
1987    sp<ABuffer> buf = new ABuffer(size);
1988    memcpy(buf->data(), mBuffer->data() + offset, size);
1989
1990    *dst = buf;
1991}
1992
1993}  // namespace android
1994