MatroskaExtractor.cpp revision d42573cace9db2b5948e540c32beaef80f04153c
1/*
2 * Copyright (C) 2010 The Android Open Source Project
3 *
4 * Licensed under the Apache License, Version 2.0 (the "License");
5 * you may not use this file except in compliance with the License.
6 * You may obtain a copy of the License at
7 *
8 *      http://www.apache.org/licenses/LICENSE-2.0
9 *
10 * Unless required by applicable law or agreed to in writing, software
11 * distributed under the License is distributed on an "AS IS" BASIS,
12 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13 * See the License for the specific language governing permissions and
14 * limitations under the License.
15 */
16
17//#define LOG_NDEBUG 0
18#define LOG_TAG "MatroskaExtractor"
19#include <utils/Log.h>
20
21#include "MatroskaExtractor.h"
22
23#include "mkvparser.hpp"
24
25#include <media/stagefright/foundation/ADebug.h>
26#include <media/stagefright/foundation/hexdump.h>
27#include <media/stagefright/DataSource.h>
28#include <media/stagefright/MediaBuffer.h>
29#include <media/stagefright/MediaDefs.h>
30#include <media/stagefright/MediaErrors.h>
31#include <media/stagefright/MediaSource.h>
32#include <media/stagefright/MetaData.h>
33#include <media/stagefright/Utils.h>
34#include <utils/String8.h>
35
36namespace android {
37
38struct DataSourceReader : public mkvparser::IMkvReader {
39    DataSourceReader(const sp<DataSource> &source)
40        : mSource(source) {
41    }
42
43    virtual int Read(long long position, long length, unsigned char* buffer) {
44        CHECK(position >= 0);
45        CHECK(length >= 0);
46
47        if (length == 0) {
48            return 0;
49        }
50
51        ssize_t n = mSource->readAt(position, buffer, length);
52
53        if (n <= 0) {
54            return -1;
55        }
56
57        return 0;
58    }
59
60    virtual int Length(long long* total, long long* available) {
61        off64_t size;
62        if (mSource->getSize(&size) != OK) {
63            *total = -1;
64            *available = (long long)((1ull << 63) - 1);
65
66            return 0;
67        }
68
69        if (total) {
70            *total = size;
71        }
72
73        if (available) {
74            *available = size;
75        }
76
77        return 0;
78    }
79
80private:
81    sp<DataSource> mSource;
82
83    DataSourceReader(const DataSourceReader &);
84    DataSourceReader &operator=(const DataSourceReader &);
85};
86
87////////////////////////////////////////////////////////////////////////////////
88
89struct BlockIterator {
90    BlockIterator(MatroskaExtractor *extractor, unsigned long trackNum);
91
92    bool eos() const;
93
94    void advance();
95    void reset();
96    void seek(int64_t seekTimeUs);
97
98    const mkvparser::Block *block() const;
99    int64_t blockTimeUs() const;
100
101private:
102    MatroskaExtractor *mExtractor;
103    unsigned long mTrackNum;
104
105    const mkvparser::Cluster *mCluster;
106    const mkvparser::BlockEntry *mBlockEntry;
107    long mBlockEntryIndex;
108
109    void advance_l();
110
111    BlockIterator(const BlockIterator &);
112    BlockIterator &operator=(const BlockIterator &);
113};
114
115struct MatroskaSource : public MediaSource {
116    MatroskaSource(
117            const sp<MatroskaExtractor> &extractor, size_t index);
118
119    virtual status_t start(MetaData *params);
120    virtual status_t stop();
121
122    virtual sp<MetaData> getFormat();
123
124    virtual status_t read(
125            MediaBuffer **buffer, const ReadOptions *options);
126
127protected:
128    virtual ~MatroskaSource();
129
130private:
131    enum Type {
132        AVC,
133        AAC,
134        OTHER
135    };
136
137    sp<MatroskaExtractor> mExtractor;
138    size_t mTrackIndex;
139    Type mType;
140    BlockIterator mBlockIter;
141    size_t mNALSizeLen;  // for type AVC
142
143    List<MediaBuffer *> mPendingFrames;
144
145    status_t advance();
146
147    status_t readBlock();
148    void clearPendingFrames();
149
150    MatroskaSource(const MatroskaSource &);
151    MatroskaSource &operator=(const MatroskaSource &);
152};
153
154MatroskaSource::MatroskaSource(
155        const sp<MatroskaExtractor> &extractor, size_t index)
156    : mExtractor(extractor),
157      mTrackIndex(index),
158      mType(OTHER),
159      mBlockIter(mExtractor.get(),
160                 mExtractor->mTracks.itemAt(index).mTrackNum),
161      mNALSizeLen(0) {
162    sp<MetaData> meta = mExtractor->mTracks.itemAt(index).mMeta;
163
164    const char *mime;
165    CHECK(meta->findCString(kKeyMIMEType, &mime));
166
167    if (!strcasecmp(mime, MEDIA_MIMETYPE_VIDEO_AVC)) {
168        mType = AVC;
169
170        uint32_t dummy;
171        const uint8_t *avcc;
172        size_t avccSize;
173        CHECK(meta->findData(
174                    kKeyAVCC, &dummy, (const void **)&avcc, &avccSize));
175
176        CHECK_GE(avccSize, 5u);
177
178        mNALSizeLen = 1 + (avcc[4] & 3);
179        LOGV("mNALSizeLen = %d", mNALSizeLen);
180    } else if (!strcasecmp(mime, MEDIA_MIMETYPE_AUDIO_AAC)) {
181        mType = AAC;
182    }
183}
184
185MatroskaSource::~MatroskaSource() {
186    clearPendingFrames();
187}
188
189status_t MatroskaSource::start(MetaData *params) {
190    mBlockIter.reset();
191
192    return OK;
193}
194
195status_t MatroskaSource::stop() {
196    clearPendingFrames();
197
198    return OK;
199}
200
201sp<MetaData> MatroskaSource::getFormat() {
202    return mExtractor->mTracks.itemAt(mTrackIndex).mMeta;
203}
204
205////////////////////////////////////////////////////////////////////////////////
206
207BlockIterator::BlockIterator(
208        MatroskaExtractor *extractor, unsigned long trackNum)
209    : mExtractor(extractor),
210      mTrackNum(trackNum),
211      mCluster(NULL),
212      mBlockEntry(NULL),
213      mBlockEntryIndex(0) {
214    reset();
215}
216
217bool BlockIterator::eos() const {
218    return mCluster == NULL || mCluster->EOS();
219}
220
221void BlockIterator::advance() {
222    Mutex::Autolock autoLock(mExtractor->mLock);
223    advance_l();
224}
225
226void BlockIterator::advance_l() {
227    for (;;) {
228        long res = mCluster->GetEntry(mBlockEntryIndex, mBlockEntry);
229        LOGV("GetEntry returned %ld", res);
230
231        long long pos;
232        long len;
233        if (res < 0) {
234            // Need to parse this cluster some more
235
236            CHECK_EQ(res, mkvparser::E_BUFFER_NOT_FULL);
237
238            res = mCluster->Parse(pos, len);
239            LOGV("Parse returned %ld", res);
240
241            if (res < 0) {
242                // I/O error
243
244                LOGE("Cluster::Parse returned result %ld", res);
245
246                mCluster = NULL;
247                break;
248            }
249
250            continue;
251        } else if (res == 0) {
252            // We're done with this cluster
253
254            const mkvparser::Cluster *nextCluster;
255            res = mExtractor->mSegment->ParseNext(
256                    mCluster, nextCluster, pos, len);
257            LOGV("ParseNext returned %ld", res);
258
259            if (res > 0) {
260                // EOF
261
262                mCluster = NULL;
263                break;
264            }
265
266            CHECK_EQ(res, 0);
267            CHECK(nextCluster != NULL);
268            CHECK(!nextCluster->EOS());
269
270            mCluster = nextCluster;
271
272            res = mCluster->Parse(pos, len);
273            LOGV("Parse (2) returned %ld", res);
274            CHECK_GE(res, 0);
275
276            mBlockEntryIndex = 0;
277            continue;
278        }
279
280        CHECK(mBlockEntry != NULL);
281        CHECK(mBlockEntry->GetBlock() != NULL);
282        ++mBlockEntryIndex;
283
284        if (mBlockEntry->GetBlock()->GetTrackNumber() == mTrackNum) {
285            break;
286        }
287    }
288}
289
290void BlockIterator::reset() {
291    Mutex::Autolock autoLock(mExtractor->mLock);
292
293    mCluster = mExtractor->mSegment->GetFirst();
294    mBlockEntryIndex = 0;
295
296    do {
297        advance_l();
298    } while (!eos() && block()->GetTrackNumber() != mTrackNum);
299}
300
301void BlockIterator::seek(int64_t seekTimeUs) {
302    Mutex::Autolock autoLock(mExtractor->mLock);
303
304    mCluster = mExtractor->mSegment->FindCluster(seekTimeUs * 1000ll);
305    mBlockEntryIndex = 0;
306
307    while (!eos() && block()->GetTrackNumber() != mTrackNum) {
308        advance_l();
309    }
310
311    while (!eos() && !mBlockEntry->GetBlock()->IsKey()) {
312        advance_l();
313    }
314}
315
316const mkvparser::Block *BlockIterator::block() const {
317    CHECK(!eos());
318
319    return mBlockEntry->GetBlock();
320}
321
322int64_t BlockIterator::blockTimeUs() const {
323    return (mBlockEntry->GetBlock()->GetTime(mCluster) + 500ll) / 1000ll;
324}
325
326////////////////////////////////////////////////////////////////////////////////
327
328static unsigned U24_AT(const uint8_t *ptr) {
329    return ptr[0] << 16 | ptr[1] << 8 | ptr[2];
330}
331
332static size_t clz(uint8_t x) {
333    size_t numLeadingZeroes = 0;
334
335    while (!(x & 0x80)) {
336        ++numLeadingZeroes;
337        x = x << 1;
338    }
339
340    return numLeadingZeroes;
341}
342
343void MatroskaSource::clearPendingFrames() {
344    while (!mPendingFrames.empty()) {
345        MediaBuffer *frame = *mPendingFrames.begin();
346        mPendingFrames.erase(mPendingFrames.begin());
347
348        frame->release();
349        frame = NULL;
350    }
351}
352
353status_t MatroskaSource::readBlock() {
354    CHECK(mPendingFrames.empty());
355
356    if (mBlockIter.eos()) {
357        return ERROR_END_OF_STREAM;
358    }
359
360    const mkvparser::Block *block = mBlockIter.block();
361
362    int64_t timeUs = mBlockIter.blockTimeUs();
363
364    for (int i = 0; i < block->GetFrameCount(); ++i) {
365        const mkvparser::Block::Frame &frame = block->GetFrame(i);
366
367        MediaBuffer *mbuf = new MediaBuffer(frame.len);
368        mbuf->meta_data()->setInt64(kKeyTime, timeUs);
369        mbuf->meta_data()->setInt32(kKeyIsSyncFrame, block->IsKey());
370
371        long n = frame.Read(mExtractor->mReader, (unsigned char *)mbuf->data());
372        if (n != 0) {
373            mPendingFrames.clear();
374
375            mBlockIter.advance();
376            return ERROR_IO;
377        }
378
379        mPendingFrames.push_back(mbuf);
380    }
381
382    mBlockIter.advance();
383
384    return OK;
385}
386
387status_t MatroskaSource::read(
388        MediaBuffer **out, const ReadOptions *options) {
389    *out = NULL;
390
391    int64_t seekTimeUs;
392    ReadOptions::SeekMode mode;
393    if (options && options->getSeekTo(&seekTimeUs, &mode)
394            && !mExtractor->isLiveStreaming()) {
395        clearPendingFrames();
396        mBlockIter.seek(seekTimeUs);
397    }
398
399again:
400    while (mPendingFrames.empty()) {
401        status_t err = readBlock();
402
403        if (err != OK) {
404            clearPendingFrames();
405
406            return err;
407        }
408    }
409
410    MediaBuffer *frame = *mPendingFrames.begin();
411    mPendingFrames.erase(mPendingFrames.begin());
412
413    size_t size = frame->range_length();
414
415    if (mType != AVC) {
416        *out = frame;
417
418        return OK;
419    }
420
421    if (size < mNALSizeLen) {
422        frame->release();
423        frame = NULL;
424
425        return ERROR_MALFORMED;
426    }
427
428    // In the case of AVC content, each NAL unit is prefixed by
429    // mNALSizeLen bytes of length. We want to prefix the data with
430    // a four-byte 0x00000001 startcode instead of the length prefix.
431    // mNALSizeLen ranges from 1 through 4 bytes, so add an extra
432    // 3 bytes of padding to the buffer start.
433    static const size_t kPadding = 3;
434
435    MediaBuffer *buffer = new MediaBuffer(size + kPadding);
436
437    int64_t timeUs;
438    CHECK(frame->meta_data()->findInt64(kKeyTime, &timeUs));
439    int32_t isSync;
440    CHECK(frame->meta_data()->findInt32(kKeyIsSyncFrame, &isSync));
441
442    buffer->meta_data()->setInt64(kKeyTime, timeUs);
443    buffer->meta_data()->setInt32(kKeyIsSyncFrame, isSync);
444
445    memcpy((uint8_t *)buffer->data() + kPadding,
446           (const uint8_t *)frame->data() + frame->range_offset(),
447           size);
448
449    buffer->set_range(kPadding, size);
450
451    frame->release();
452    frame = NULL;
453
454    uint8_t *data = (uint8_t *)buffer->data();
455
456    size_t NALsize;
457    switch (mNALSizeLen) {
458        case 1: NALsize = data[kPadding]; break;
459        case 2: NALsize = U16_AT(&data[kPadding]); break;
460        case 3: NALsize = U24_AT(&data[kPadding]); break;
461        case 4: NALsize = U32_AT(&data[kPadding]); break;
462        default:
463            TRESPASS();
464    }
465
466    if (size < NALsize + mNALSizeLen) {
467        buffer->release();
468        buffer = NULL;
469
470        return ERROR_MALFORMED;
471    }
472
473    if (size > NALsize + mNALSizeLen) {
474        LOGW("discarding %d bytes of data.", size - NALsize - mNALSizeLen);
475    }
476
477    // actual data starts at &data[kPadding + mNALSizeLen]
478
479    memcpy(&data[mNALSizeLen - 1], "\x00\x00\x00\x01", 4);
480    buffer->set_range(mNALSizeLen - 1, NALsize + 4);
481
482    *out = buffer;
483
484    return OK;
485}
486
487////////////////////////////////////////////////////////////////////////////////
488
489MatroskaExtractor::MatroskaExtractor(const sp<DataSource> &source)
490    : mDataSource(source),
491      mReader(new DataSourceReader(mDataSource)),
492      mSegment(NULL),
493      mExtractedThumbnails(false) {
494    off64_t size;
495    mIsLiveStreaming =
496        (mDataSource->flags()
497            & (DataSource::kWantsPrefetching
498                | DataSource::kIsCachingDataSource))
499        && mDataSource->getSize(&size) != OK;
500
501    mkvparser::EBMLHeader ebmlHeader;
502    long long pos;
503    if (ebmlHeader.Parse(mReader, pos) < 0) {
504        return;
505    }
506
507    long long ret =
508        mkvparser::Segment::CreateInstance(mReader, pos, mSegment);
509
510    if (ret) {
511        CHECK(mSegment == NULL);
512        return;
513    }
514
515    if (isLiveStreaming()) {
516        ret = mSegment->ParseHeaders();
517        CHECK_EQ(ret, 0);
518
519        long len;
520        ret = mSegment->LoadCluster(pos, len);
521        CHECK_EQ(ret, 0);
522    } else {
523        ret = mSegment->Load();
524    }
525
526    if (ret < 0) {
527        delete mSegment;
528        mSegment = NULL;
529        return;
530    }
531
532    addTracks();
533}
534
535MatroskaExtractor::~MatroskaExtractor() {
536    delete mSegment;
537    mSegment = NULL;
538
539    delete mReader;
540    mReader = NULL;
541}
542
543size_t MatroskaExtractor::countTracks() {
544    return mTracks.size();
545}
546
547sp<MediaSource> MatroskaExtractor::getTrack(size_t index) {
548    if (index >= mTracks.size()) {
549        return NULL;
550    }
551
552    return new MatroskaSource(this, index);
553}
554
555sp<MetaData> MatroskaExtractor::getTrackMetaData(
556        size_t index, uint32_t flags) {
557    if (index >= mTracks.size()) {
558        return NULL;
559    }
560
561    if ((flags & kIncludeExtensiveMetaData) && !mExtractedThumbnails
562            && !isLiveStreaming()) {
563        findThumbnails();
564        mExtractedThumbnails = true;
565    }
566
567    return mTracks.itemAt(index).mMeta;
568}
569
570bool MatroskaExtractor::isLiveStreaming() const {
571    return mIsLiveStreaming;
572}
573
574static void addESDSFromAudioSpecificInfo(
575        const sp<MetaData> &meta, const void *asi, size_t asiSize) {
576    static const uint8_t kStaticESDS[] = {
577        0x03, 22,
578        0x00, 0x00,     // ES_ID
579        0x00,           // streamDependenceFlag, URL_Flag, OCRstreamFlag
580
581        0x04, 17,
582        0x40,                       // Audio ISO/IEC 14496-3
583        0x00, 0x00, 0x00, 0x00,
584        0x00, 0x00, 0x00, 0x00,
585        0x00, 0x00, 0x00, 0x00,
586
587        0x05,
588        // AudioSpecificInfo (with size prefix) follows
589    };
590
591    CHECK(asiSize < 128);
592    size_t esdsSize = sizeof(kStaticESDS) + asiSize + 1;
593    uint8_t *esds = new uint8_t[esdsSize];
594    memcpy(esds, kStaticESDS, sizeof(kStaticESDS));
595    uint8_t *ptr = esds + sizeof(kStaticESDS);
596    *ptr++ = asiSize;
597    memcpy(ptr, asi, asiSize);
598
599    meta->setData(kKeyESDS, 0, esds, esdsSize);
600
601    delete[] esds;
602    esds = NULL;
603}
604
605void addVorbisCodecInfo(
606        const sp<MetaData> &meta,
607        const void *_codecPrivate, size_t codecPrivateSize) {
608    // printf("vorbis private data follows:\n");
609    // hexdump(_codecPrivate, codecPrivateSize);
610
611    CHECK(codecPrivateSize >= 3);
612
613    const uint8_t *codecPrivate = (const uint8_t *)_codecPrivate;
614    CHECK(codecPrivate[0] == 0x02);
615
616    size_t len1 = codecPrivate[1];
617    size_t len2 = codecPrivate[2];
618
619    CHECK(codecPrivateSize > 3 + len1 + len2);
620
621    CHECK(codecPrivate[3] == 0x01);
622    meta->setData(kKeyVorbisInfo, 0, &codecPrivate[3], len1);
623
624    CHECK(codecPrivate[len1 + 3] == 0x03);
625
626    CHECK(codecPrivate[len1 + len2 + 3] == 0x05);
627    meta->setData(
628            kKeyVorbisBooks, 0, &codecPrivate[len1 + len2 + 3],
629            codecPrivateSize - len1 - len2 - 3);
630}
631
632void MatroskaExtractor::addTracks() {
633    const mkvparser::Tracks *tracks = mSegment->GetTracks();
634
635    for (size_t index = 0; index < tracks->GetTracksCount(); ++index) {
636        const mkvparser::Track *track = tracks->GetTrackByIndex(index);
637
638        if (track == NULL) {
639            // Apparently this is currently valid (if unexpected) behaviour
640            // of the mkv parser lib.
641            continue;
642        }
643
644        const char *const codecID = track->GetCodecId();
645        LOGV("codec id = %s", codecID);
646        LOGV("codec name = %s", track->GetCodecNameAsUTF8());
647
648        size_t codecPrivateSize;
649        const unsigned char *codecPrivate =
650            track->GetCodecPrivate(codecPrivateSize);
651
652        enum { VIDEO_TRACK = 1, AUDIO_TRACK = 2 };
653
654        sp<MetaData> meta = new MetaData;
655
656        switch (track->GetType()) {
657            case VIDEO_TRACK:
658            {
659                const mkvparser::VideoTrack *vtrack =
660                    static_cast<const mkvparser::VideoTrack *>(track);
661
662                if (!strcmp("V_MPEG4/ISO/AVC", codecID)) {
663                    meta->setCString(kKeyMIMEType, MEDIA_MIMETYPE_VIDEO_AVC);
664                    meta->setData(kKeyAVCC, 0, codecPrivate, codecPrivateSize);
665                } else if (!strcmp("V_VP8", codecID)) {
666                    meta->setCString(kKeyMIMEType, MEDIA_MIMETYPE_VIDEO_VPX);
667                } else {
668                    continue;
669                }
670
671                meta->setInt32(kKeyWidth, vtrack->GetWidth());
672                meta->setInt32(kKeyHeight, vtrack->GetHeight());
673                break;
674            }
675
676            case AUDIO_TRACK:
677            {
678                const mkvparser::AudioTrack *atrack =
679                    static_cast<const mkvparser::AudioTrack *>(track);
680
681                if (!strcmp("A_AAC", codecID)) {
682                    meta->setCString(kKeyMIMEType, MEDIA_MIMETYPE_AUDIO_AAC);
683                    CHECK(codecPrivateSize >= 2);
684
685                    addESDSFromAudioSpecificInfo(
686                            meta, codecPrivate, codecPrivateSize);
687                } else if (!strcmp("A_VORBIS", codecID)) {
688                    meta->setCString(kKeyMIMEType, MEDIA_MIMETYPE_AUDIO_VORBIS);
689
690                    addVorbisCodecInfo(meta, codecPrivate, codecPrivateSize);
691                } else {
692                    continue;
693                }
694
695                meta->setInt32(kKeySampleRate, atrack->GetSamplingRate());
696                meta->setInt32(kKeyChannelCount, atrack->GetChannels());
697                break;
698            }
699
700            default:
701                continue;
702        }
703
704        long long durationNs = mSegment->GetDuration();
705        meta->setInt64(kKeyDuration, (durationNs + 500) / 1000);
706
707        mTracks.push();
708        TrackInfo *trackInfo = &mTracks.editItemAt(mTracks.size() - 1);
709        trackInfo->mTrackNum = track->GetNumber();
710        trackInfo->mMeta = meta;
711    }
712}
713
714void MatroskaExtractor::findThumbnails() {
715    for (size_t i = 0; i < mTracks.size(); ++i) {
716        TrackInfo *info = &mTracks.editItemAt(i);
717
718        const char *mime;
719        CHECK(info->mMeta->findCString(kKeyMIMEType, &mime));
720
721        if (strncasecmp(mime, "video/", 6)) {
722            continue;
723        }
724
725        BlockIterator iter(this, info->mTrackNum);
726        int32_t i = 0;
727        int64_t thumbnailTimeUs = 0;
728        size_t maxBlockSize = 0;
729        while (!iter.eos() && i < 20) {
730            if (iter.block()->IsKey()) {
731                ++i;
732
733                size_t blockSize = 0;
734                for (int i = 0; i < iter.block()->GetFrameCount(); ++i) {
735                    blockSize += iter.block()->GetFrame(i).len;
736                }
737
738                if (blockSize > maxBlockSize) {
739                    maxBlockSize = blockSize;
740                    thumbnailTimeUs = iter.blockTimeUs();
741                }
742            }
743            iter.advance();
744        }
745        info->mMeta->setInt64(kKeyThumbnailTime, thumbnailTimeUs);
746    }
747}
748
749sp<MetaData> MatroskaExtractor::getMetaData() {
750    sp<MetaData> meta = new MetaData;
751    meta->setCString(kKeyMIMEType, MEDIA_MIMETYPE_CONTAINER_MATROSKA);
752
753    return meta;
754}
755
756uint32_t MatroskaExtractor::flags() const {
757    uint32_t x = CAN_PAUSE;
758    if (!isLiveStreaming()) {
759        x |= CAN_SEEK_BACKWARD | CAN_SEEK_FORWARD | CAN_SEEK;
760    }
761
762    return x;
763}
764
765bool SniffMatroska(
766        const sp<DataSource> &source, String8 *mimeType, float *confidence,
767        sp<AMessage> *) {
768    DataSourceReader reader(source);
769    mkvparser::EBMLHeader ebmlHeader;
770    long long pos;
771    if (ebmlHeader.Parse(&reader, pos) < 0) {
772        return false;
773    }
774
775    mimeType->setTo(MEDIA_MIMETYPE_CONTAINER_MATROSKA);
776    *confidence = 0.6;
777
778    return true;
779}
780
781}  // namespace android
782