MatroskaExtractor.cpp revision b10f3669a9b73cd024662c2b70f5155bc0c2cd21
1/*
2 * Copyright (C) 2010 The Android Open Source Project
3 *
4 * Licensed under the Apache License, Version 2.0 (the "License");
5 * you may not use this file except in compliance with the License.
6 * You may obtain a copy of the License at
7 *
8 *      http://www.apache.org/licenses/LICENSE-2.0
9 *
10 * Unless required by applicable law or agreed to in writing, software
11 * distributed under the License is distributed on an "AS IS" BASIS,
12 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13 * See the License for the specific language governing permissions and
14 * limitations under the License.
15 */
16
17//#define LOG_NDEBUG 0
18#define LOG_TAG "MatroskaExtractor"
19#include <utils/Log.h>
20
21#include "MatroskaExtractor.h"
22
23#include "mkvparser.hpp"
24
25#include <media/stagefright/foundation/ADebug.h>
26#include <media/stagefright/foundation/hexdump.h>
27#include <media/stagefright/DataSource.h>
28#include <media/stagefright/MediaBuffer.h>
29#include <media/stagefright/MediaDefs.h>
30#include <media/stagefright/MediaErrors.h>
31#include <media/stagefright/MediaSource.h>
32#include <media/stagefright/MetaData.h>
33#include <media/stagefright/Utils.h>
34#include <utils/String8.h>
35
36namespace android {
37
38struct DataSourceReader : public mkvparser::IMkvReader {
39    DataSourceReader(const sp<DataSource> &source)
40        : mSource(source) {
41    }
42
43    virtual int Read(long long position, long length, unsigned char* buffer) {
44        CHECK(position >= 0);
45        CHECK(length >= 0);
46
47        if (length == 0) {
48            return 0;
49        }
50
51        ssize_t n = mSource->readAt(position, buffer, length);
52
53        if (n <= 0) {
54            return -1;
55        }
56
57        return 0;
58    }
59
60    virtual int Length(long long* total, long long* available) {
61        off_t size;
62        if (mSource->getSize(&size) != OK) {
63            return -1;
64        }
65
66        if (total) {
67            *total = size;
68        }
69
70        if (available) {
71            *available = size;
72        }
73
74        return 0;
75    }
76
77private:
78    sp<DataSource> mSource;
79
80    DataSourceReader(const DataSourceReader &);
81    DataSourceReader &operator=(const DataSourceReader &);
82};
83
84////////////////////////////////////////////////////////////////////////////////
85
86struct BlockIterator {
87    BlockIterator(mkvparser::Segment *segment, unsigned long trackNum);
88
89    bool eos() const;
90
91    void advance();
92    void reset();
93    void seek(int64_t seekTimeUs);
94
95    const mkvparser::Block *block() const;
96    int64_t blockTimeUs() const;
97
98private:
99    mkvparser::Segment *mSegment;
100    unsigned long mTrackNum;
101
102    mkvparser::Cluster *mCluster;
103    const mkvparser::BlockEntry *mBlockEntry;
104
105    BlockIterator(const BlockIterator &);
106    BlockIterator &operator=(const BlockIterator &);
107};
108
109struct MatroskaSource : public MediaSource {
110    MatroskaSource(
111            const sp<MatroskaExtractor> &extractor, size_t index);
112
113    virtual status_t start(MetaData *params);
114    virtual status_t stop();
115
116    virtual sp<MetaData> getFormat();
117
118    virtual status_t read(
119            MediaBuffer **buffer, const ReadOptions *options);
120
121private:
122    enum Type {
123        AVC,
124        AAC,
125        OTHER
126    };
127
128    sp<MatroskaExtractor> mExtractor;
129    size_t mTrackIndex;
130    Type mType;
131    BlockIterator mBlockIter;
132    size_t mNALSizeLen;  // for type AVC
133
134    status_t advance();
135
136    MatroskaSource(const MatroskaSource &);
137    MatroskaSource &operator=(const MatroskaSource &);
138};
139
140MatroskaSource::MatroskaSource(
141        const sp<MatroskaExtractor> &extractor, size_t index)
142    : mExtractor(extractor),
143      mTrackIndex(index),
144      mType(OTHER),
145      mBlockIter(mExtractor->mSegment,
146                 mExtractor->mTracks.itemAt(index).mTrackNum),
147      mNALSizeLen(0) {
148    sp<MetaData> meta = mExtractor->mTracks.itemAt(index).mMeta;
149
150    const char *mime;
151    CHECK(meta->findCString(kKeyMIMEType, &mime));
152
153    if (!strcasecmp(mime, MEDIA_MIMETYPE_VIDEO_AVC)) {
154        mType = AVC;
155
156        uint32_t dummy;
157        const uint8_t *avcc;
158        size_t avccSize;
159        CHECK(meta->findData(
160                    kKeyAVCC, &dummy, (const void **)&avcc, &avccSize));
161
162        CHECK_GE(avccSize, 5u);
163
164        mNALSizeLen = 1 + (avcc[4] & 3);
165        LOGV("mNALSizeLen = %d", mNALSizeLen);
166    } else if (!strcasecmp(mime, MEDIA_MIMETYPE_AUDIO_AAC)) {
167        mType = AAC;
168    }
169}
170
171status_t MatroskaSource::start(MetaData *params) {
172    mBlockIter.reset();
173
174    return OK;
175}
176
177status_t MatroskaSource::stop() {
178    return OK;
179}
180
181sp<MetaData> MatroskaSource::getFormat() {
182    return mExtractor->mTracks.itemAt(mTrackIndex).mMeta;
183}
184
185////////////////////////////////////////////////////////////////////////////////
186
187BlockIterator::BlockIterator(
188        mkvparser::Segment *segment, unsigned long trackNum)
189    : mSegment(segment),
190      mTrackNum(trackNum),
191      mCluster(NULL),
192      mBlockEntry(NULL) {
193    reset();
194}
195
196bool BlockIterator::eos() const {
197    return mCluster == NULL || mCluster->EOS();
198}
199
200void BlockIterator::advance() {
201    while (!eos()) {
202        if (mBlockEntry != NULL) {
203            mBlockEntry = mCluster->GetNext(mBlockEntry);
204        } else if (mCluster != NULL) {
205            mCluster = mSegment->GetNext(mCluster);
206
207            if (eos()) {
208                break;
209            }
210
211            mBlockEntry = mCluster->GetFirst();
212        }
213
214        if (mBlockEntry != NULL
215                && mBlockEntry->GetBlock()->GetTrackNumber() == mTrackNum) {
216            break;
217        }
218    }
219}
220
221void BlockIterator::reset() {
222    mCluster = mSegment->GetFirst();
223    mBlockEntry = mCluster->GetFirst();
224
225    while (!eos() && block()->GetTrackNumber() != mTrackNum) {
226        advance();
227    }
228}
229
230void BlockIterator::seek(int64_t seekTimeUs) {
231    mCluster = mSegment->FindCluster(seekTimeUs * 1000ll);
232    mBlockEntry = mCluster != NULL ? mCluster->GetFirst() : NULL;
233
234    while (!eos() && block()->GetTrackNumber() != mTrackNum) {
235        advance();
236    }
237
238    while (!eos() && !mBlockEntry->GetBlock()->IsKey()) {
239        advance();
240    }
241}
242
243const mkvparser::Block *BlockIterator::block() const {
244    CHECK(!eos());
245
246    return mBlockEntry->GetBlock();
247}
248
249int64_t BlockIterator::blockTimeUs() const {
250    return (mBlockEntry->GetBlock()->GetTime(mCluster) + 500ll) / 1000ll;
251}
252
253////////////////////////////////////////////////////////////////////////////////
254
255static unsigned U24_AT(const uint8_t *ptr) {
256    return ptr[0] << 16 | ptr[1] << 8 | ptr[2];
257}
258
259status_t MatroskaSource::read(
260        MediaBuffer **out, const ReadOptions *options) {
261    *out = NULL;
262
263    int64_t seekTimeUs;
264    ReadOptions::SeekMode mode;
265    if (options && options->getSeekTo(&seekTimeUs, &mode)) {
266        mBlockIter.seek(seekTimeUs);
267    }
268
269again:
270    if (mBlockIter.eos()) {
271        return ERROR_END_OF_STREAM;
272    }
273
274    const mkvparser::Block *block = mBlockIter.block();
275    size_t size = block->GetSize();
276    int64_t timeUs = mBlockIter.blockTimeUs();
277
278    // In the case of AVC content, each NAL unit is prefixed by
279    // mNALSizeLen bytes of length. We want to prefix the data with
280    // a four-byte 0x00000001 startcode instead of the length prefix.
281    // mNALSizeLen ranges from 1 through 4 bytes, so add an extra
282    // 3 bytes of padding to the buffer start.
283    static const size_t kPadding = 3;
284
285    MediaBuffer *buffer = new MediaBuffer(size + kPadding);
286    buffer->meta_data()->setInt64(kKeyTime, timeUs);
287    buffer->meta_data()->setInt32(kKeyIsSyncFrame, block->IsKey());
288
289    long res = block->Read(
290            mExtractor->mReader, (unsigned char *)buffer->data() + kPadding);
291
292    if (res != 0) {
293        return ERROR_END_OF_STREAM;
294    }
295
296    buffer->set_range(kPadding, size);
297
298    if (mType == AVC) {
299        CHECK_GE(size, mNALSizeLen);
300
301        uint8_t *data = (uint8_t *)buffer->data();
302
303        size_t NALsize;
304        switch (mNALSizeLen) {
305            case 1: NALsize = data[kPadding]; break;
306            case 2: NALsize = U16_AT(&data[kPadding]); break;
307            case 3: NALsize = U24_AT(&data[kPadding]); break;
308            case 4: NALsize = U32_AT(&data[kPadding]); break;
309            default:
310                TRESPASS();
311        }
312
313        CHECK_GE(size, NALsize + mNALSizeLen);
314        if (size > NALsize + mNALSizeLen) {
315            LOGW("discarding %d bytes of data.", size - NALsize - mNALSizeLen);
316        }
317
318        // actual data starts at &data[kPadding + mNALSizeLen]
319
320        memcpy(&data[mNALSizeLen - 1], "\x00\x00\x00\x01", 4);
321        buffer->set_range(mNALSizeLen - 1, NALsize + 4);
322    } else if (mType == AAC) {
323        // There's strange junk at the beginning...
324
325        const uint8_t *data = (const uint8_t *)buffer->data() + kPadding;
326
327        // hexdump(data, size);
328
329        size_t offset = 0;
330        while (offset < size && data[offset] != 0x21) {
331            ++offset;
332        }
333
334        if (size == offset) {
335            buffer->release();
336
337            mBlockIter.advance();
338            goto again;
339        }
340
341        buffer->set_range(kPadding + offset, size - offset);
342    }
343
344    *out = buffer;
345
346#if 0
347    hexdump((const uint8_t *)buffer->data() + buffer->range_offset(),
348            buffer->range_length());
349#endif
350
351    mBlockIter.advance();
352
353    return OK;
354}
355
356////////////////////////////////////////////////////////////////////////////////
357
358MatroskaExtractor::MatroskaExtractor(const sp<DataSource> &source)
359    : mDataSource(source),
360      mReader(new DataSourceReader(mDataSource)),
361      mSegment(NULL),
362      mExtractedThumbnails(false) {
363    mkvparser::EBMLHeader ebmlHeader;
364    long long pos;
365    if (ebmlHeader.Parse(mReader, pos) < 0) {
366        return;
367    }
368
369    long long ret =
370        mkvparser::Segment::CreateInstance(mReader, pos, mSegment);
371
372    if (ret) {
373        CHECK(mSegment == NULL);
374        return;
375    }
376
377    ret = mSegment->Load();
378
379    if (ret < 0) {
380        delete mSegment;
381        mSegment = NULL;
382        return;
383    }
384
385    addTracks();
386}
387
388MatroskaExtractor::~MatroskaExtractor() {
389    delete mSegment;
390    mSegment = NULL;
391
392    delete mReader;
393    mReader = NULL;
394}
395
396size_t MatroskaExtractor::countTracks() {
397    return mTracks.size();
398}
399
400sp<MediaSource> MatroskaExtractor::getTrack(size_t index) {
401    if (index >= mTracks.size()) {
402        return NULL;
403    }
404
405    return new MatroskaSource(this, index);
406}
407
408sp<MetaData> MatroskaExtractor::getTrackMetaData(
409        size_t index, uint32_t flags) {
410    if (index >= mTracks.size()) {
411        return NULL;
412    }
413
414    if ((flags & kIncludeExtensiveMetaData) && !mExtractedThumbnails) {
415        findThumbnails();
416        mExtractedThumbnails = true;
417    }
418
419    return mTracks.itemAt(index).mMeta;
420}
421
422static void addESDSFromAudioSpecificInfo(
423        const sp<MetaData> &meta, const void *asi, size_t asiSize) {
424    static const uint8_t kStaticESDS[] = {
425        0x03, 22,
426        0x00, 0x00,     // ES_ID
427        0x00,           // streamDependenceFlag, URL_Flag, OCRstreamFlag
428
429        0x04, 17,
430        0x40,                       // Audio ISO/IEC 14496-3
431        0x00, 0x00, 0x00, 0x00,
432        0x00, 0x00, 0x00, 0x00,
433        0x00, 0x00, 0x00, 0x00,
434
435        0x05,
436        // AudioSpecificInfo (with size prefix) follows
437    };
438
439    CHECK(asiSize < 128);
440    size_t esdsSize = sizeof(kStaticESDS) + asiSize + 1;
441    uint8_t *esds = new uint8_t[esdsSize];
442    memcpy(esds, kStaticESDS, sizeof(kStaticESDS));
443    uint8_t *ptr = esds + sizeof(kStaticESDS);
444    *ptr++ = asiSize;
445    memcpy(ptr, asi, asiSize);
446
447    meta->setData(kKeyESDS, 0, esds, esdsSize);
448
449    delete[] esds;
450    esds = NULL;
451}
452
453void addVorbisCodecInfo(
454        const sp<MetaData> &meta,
455        const void *_codecPrivate, size_t codecPrivateSize) {
456    // printf("vorbis private data follows:\n");
457    // hexdump(_codecPrivate, codecPrivateSize);
458
459    CHECK(codecPrivateSize >= 3);
460
461    const uint8_t *codecPrivate = (const uint8_t *)_codecPrivate;
462    CHECK(codecPrivate[0] == 0x02);
463
464    size_t len1 = codecPrivate[1];
465    size_t len2 = codecPrivate[2];
466
467    CHECK(codecPrivateSize > 3 + len1 + len2);
468
469    CHECK(codecPrivate[3] == 0x01);
470    meta->setData(kKeyVorbisInfo, 0, &codecPrivate[3], len1);
471
472    CHECK(codecPrivate[len1 + 3] == 0x03);
473
474    CHECK(codecPrivate[len1 + len2 + 3] == 0x05);
475    meta->setData(
476            kKeyVorbisBooks, 0, &codecPrivate[len1 + len2 + 3],
477            codecPrivateSize - len1 - len2 - 3);
478}
479
480void MatroskaExtractor::addTracks() {
481    const mkvparser::Tracks *tracks = mSegment->GetTracks();
482
483    for (size_t index = 0; index < tracks->GetTracksCount(); ++index) {
484        const mkvparser::Track *track = tracks->GetTrackByIndex(index);
485
486        const char *const codecID = track->GetCodecId();
487        LOGV("codec id = %s", codecID);
488        LOGV("codec name = %s", track->GetCodecNameAsUTF8());
489
490        size_t codecPrivateSize;
491        const unsigned char *codecPrivate =
492            track->GetCodecPrivate(codecPrivateSize);
493
494        enum { VIDEO_TRACK = 1, AUDIO_TRACK = 2 };
495
496        sp<MetaData> meta = new MetaData;
497
498        switch (track->GetType()) {
499            case VIDEO_TRACK:
500            {
501                const mkvparser::VideoTrack *vtrack =
502                    static_cast<const mkvparser::VideoTrack *>(track);
503
504                if (!strcmp("V_MPEG4/ISO/AVC", codecID)) {
505                    meta->setCString(kKeyMIMEType, MEDIA_MIMETYPE_VIDEO_AVC);
506                    meta->setData(kKeyAVCC, 0, codecPrivate, codecPrivateSize);
507                } else if (!strcmp("V_VP8", codecID)) {
508                    meta->setCString(kKeyMIMEType, MEDIA_MIMETYPE_VIDEO_VPX);
509                } else {
510                    continue;
511                }
512
513                meta->setInt32(kKeyWidth, vtrack->GetWidth());
514                meta->setInt32(kKeyHeight, vtrack->GetHeight());
515                break;
516            }
517
518            case AUDIO_TRACK:
519            {
520                const mkvparser::AudioTrack *atrack =
521                    static_cast<const mkvparser::AudioTrack *>(track);
522
523                if (!strcmp("A_AAC", codecID)) {
524                    meta->setCString(kKeyMIMEType, MEDIA_MIMETYPE_AUDIO_AAC);
525                    CHECK(codecPrivateSize >= 2);
526
527                    addESDSFromAudioSpecificInfo(
528                            meta, codecPrivate, codecPrivateSize);
529                } else if (!strcmp("A_VORBIS", codecID)) {
530                    meta->setCString(kKeyMIMEType, MEDIA_MIMETYPE_AUDIO_VORBIS);
531
532                    addVorbisCodecInfo(meta, codecPrivate, codecPrivateSize);
533                } else {
534                    continue;
535                }
536
537                meta->setInt32(kKeySampleRate, atrack->GetSamplingRate());
538                meta->setInt32(kKeyChannelCount, atrack->GetChannels());
539                break;
540            }
541
542            default:
543                continue;
544        }
545
546        long long durationNs = mSegment->GetDuration();
547        meta->setInt64(kKeyDuration, (durationNs + 500) / 1000);
548
549        mTracks.push();
550        TrackInfo *trackInfo = &mTracks.editItemAt(mTracks.size() - 1);
551        trackInfo->mTrackNum = track->GetNumber();
552        trackInfo->mMeta = meta;
553    }
554}
555
556void MatroskaExtractor::findThumbnails() {
557    for (size_t i = 0; i < mTracks.size(); ++i) {
558        TrackInfo *info = &mTracks.editItemAt(i);
559
560        const char *mime;
561        CHECK(info->mMeta->findCString(kKeyMIMEType, &mime));
562
563        if (strncasecmp(mime, "video/", 6)) {
564            continue;
565        }
566
567        BlockIterator iter(mSegment, info->mTrackNum);
568        int32_t i = 0;
569        int64_t thumbnailTimeUs = 0;
570        size_t maxBlockSize = 0;
571        while (!iter.eos() && i < 20) {
572            if (iter.block()->IsKey()) {
573                ++i;
574
575                size_t blockSize = iter.block()->GetSize();
576                if (blockSize > maxBlockSize) {
577                    maxBlockSize = blockSize;
578                    thumbnailTimeUs = iter.blockTimeUs();
579                }
580            }
581            iter.advance();
582        }
583        info->mMeta->setInt64(kKeyThumbnailTime, thumbnailTimeUs);
584    }
585}
586
587sp<MetaData> MatroskaExtractor::getMetaData() {
588    sp<MetaData> meta = new MetaData;
589    meta->setCString(kKeyMIMEType, MEDIA_MIMETYPE_CONTAINER_MATROSKA);
590
591    return meta;
592}
593
594bool SniffMatroska(
595        const sp<DataSource> &source, String8 *mimeType, float *confidence,
596        sp<AMessage> *) {
597    DataSourceReader reader(source);
598    mkvparser::EBMLHeader ebmlHeader;
599    long long pos;
600    if (ebmlHeader.Parse(&reader, pos) < 0) {
601        return false;
602    }
603
604    mimeType->setTo(MEDIA_MIMETYPE_CONTAINER_MATROSKA);
605    *confidence = 0.6;
606
607    return true;
608}
609
610}  // namespace android
611