MatroskaExtractor.cpp revision 2f46e8152fb881d3a1d7afd223f1ed51f6e358b8
1/*
2 * Copyright (C) 2010 The Android Open Source Project
3 *
4 * Licensed under the Apache License, Version 2.0 (the "License");
5 * you may not use this file except in compliance with the License.
6 * You may obtain a copy of the License at
7 *
8 *      http://www.apache.org/licenses/LICENSE-2.0
9 *
10 * Unless required by applicable law or agreed to in writing, software
11 * distributed under the License is distributed on an "AS IS" BASIS,
12 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13 * See the License for the specific language governing permissions and
14 * limitations under the License.
15 */
16
17//#define LOG_NDEBUG 0
18#define LOG_TAG "MatroskaExtractor"
19#include <utils/Log.h>
20
21#include "MatroskaExtractor.h"
22
23#include <media/stagefright/foundation/ADebug.h>
24#include <media/stagefright/foundation/hexdump.h>
25#include <media/stagefright/DataSource.h>
26#include <media/stagefright/MediaBuffer.h>
27#include <media/stagefright/MediaDefs.h>
28#include <media/stagefright/MediaErrors.h>
29#include <media/stagefright/MediaSource.h>
30#include <media/stagefright/MetaData.h>
31#include <media/stagefright/Utils.h>
32#include <utils/String8.h>
33
34#include <inttypes.h>
35
36namespace android {
37
38struct DataSourceReader : public mkvparser::IMkvReader {
39    DataSourceReader(const sp<DataSource> &source)
40        : mSource(source) {
41    }
42
43    virtual int Read(long long position, long length, unsigned char* buffer) {
44        CHECK(position >= 0);
45        CHECK(length >= 0);
46
47        if (length == 0) {
48            return 0;
49        }
50
51        ssize_t n = mSource->readAt(position, buffer, length);
52
53        if (n <= 0) {
54            return -1;
55        }
56
57        return 0;
58    }
59
60    virtual int Length(long long* total, long long* available) {
61        off64_t size;
62        if (mSource->getSize(&size) != OK) {
63            *total = -1;
64            *available = (long long)((1ull << 63) - 1);
65
66            return 0;
67        }
68
69        if (total) {
70            *total = size;
71        }
72
73        if (available) {
74            *available = size;
75        }
76
77        return 0;
78    }
79
80private:
81    sp<DataSource> mSource;
82
83    DataSourceReader(const DataSourceReader &);
84    DataSourceReader &operator=(const DataSourceReader &);
85};
86
87////////////////////////////////////////////////////////////////////////////////
88
89struct BlockIterator {
90    BlockIterator(MatroskaExtractor *extractor, unsigned long trackNum, unsigned long index);
91
92    bool eos() const;
93
94    void advance();
95    void reset();
96
97    void seek(
98            int64_t seekTimeUs, bool isAudio,
99            int64_t *actualFrameTimeUs);
100
101    const mkvparser::Block *block() const;
102    int64_t blockTimeUs() const;
103
104private:
105    MatroskaExtractor *mExtractor;
106    long long mTrackNum;
107    unsigned long mIndex;
108
109    const mkvparser::Cluster *mCluster;
110    const mkvparser::BlockEntry *mBlockEntry;
111    long mBlockEntryIndex;
112
113    void advance_l();
114
115    BlockIterator(const BlockIterator &);
116    BlockIterator &operator=(const BlockIterator &);
117};
118
119struct MatroskaSource : public MediaSource {
120    MatroskaSource(
121            const sp<MatroskaExtractor> &extractor, size_t index);
122
123    virtual status_t start(MetaData *params);
124    virtual status_t stop();
125
126    virtual sp<MetaData> getFormat();
127
128    virtual status_t read(
129            MediaBuffer **buffer, const ReadOptions *options);
130
131protected:
132    virtual ~MatroskaSource();
133
134private:
135    enum Type {
136        AVC,
137        AAC,
138        OTHER
139    };
140
141    sp<MatroskaExtractor> mExtractor;
142    size_t mTrackIndex;
143    Type mType;
144    bool mIsAudio;
145    BlockIterator mBlockIter;
146    size_t mNALSizeLen;  // for type AVC
147
148    List<MediaBuffer *> mPendingFrames;
149
150    status_t advance();
151
152    status_t readBlock();
153    void clearPendingFrames();
154
155    MatroskaSource(const MatroskaSource &);
156    MatroskaSource &operator=(const MatroskaSource &);
157};
158
159const mkvparser::Track* MatroskaExtractor::TrackInfo::getTrack() const {
160    return mExtractor->mSegment->GetTracks()->GetTrackByNumber(mTrackNum);
161}
162
163// This function does exactly the same as mkvparser::Cues::Find, except that it
164// searches in our own track based vectors. We should not need this once mkvparser
165// adds the same functionality.
166const mkvparser::CuePoint::TrackPosition *MatroskaExtractor::TrackInfo::find(
167        long long timeNs) const {
168    ALOGV("mCuePoints.size %zu", mCuePoints.size());
169    if (mCuePoints.empty()) {
170        return NULL;
171    }
172
173    const mkvparser::CuePoint* cp = mCuePoints.itemAt(0);
174    const mkvparser::Track* track = getTrack();
175    if (timeNs <= cp->GetTime(mExtractor->mSegment)) {
176        return cp->Find(track);
177    }
178
179    // Binary searches through relevant cues; assumes cues are ordered by timecode.
180    // If we do detect out-of-order cues, return NULL.
181    size_t lo = 0;
182    size_t hi = mCuePoints.size();
183    while (lo < hi) {
184        const size_t mid = lo + (hi - lo) / 2;
185        const mkvparser::CuePoint* const midCp = mCuePoints.itemAt(mid);
186        const long long cueTimeNs = midCp->GetTime(mExtractor->mSegment);
187        if (cueTimeNs <= timeNs) {
188            lo = mid + 1;
189        } else {
190            hi = mid;
191        }
192    }
193
194    if (lo == 0) {
195        return NULL;
196    }
197
198    cp = mCuePoints.itemAt(lo - 1);
199    if (cp->GetTime(mExtractor->mSegment) > timeNs) {
200        return NULL;
201    }
202
203    return cp->Find(track);
204}
205
206MatroskaSource::MatroskaSource(
207        const sp<MatroskaExtractor> &extractor, size_t index)
208    : mExtractor(extractor),
209      mTrackIndex(index),
210      mType(OTHER),
211      mIsAudio(false),
212      mBlockIter(mExtractor.get(),
213                 mExtractor->mTracks.itemAt(index).mTrackNum,
214                 index),
215      mNALSizeLen(0) {
216    sp<MetaData> meta = mExtractor->mTracks.itemAt(index).mMeta;
217
218    const char *mime;
219    CHECK(meta->findCString(kKeyMIMEType, &mime));
220
221    mIsAudio = !strncasecmp("audio/", mime, 6);
222
223    if (!strcasecmp(mime, MEDIA_MIMETYPE_VIDEO_AVC)) {
224        mType = AVC;
225
226        uint32_t dummy;
227        const uint8_t *avcc;
228        size_t avccSize;
229        CHECK(meta->findData(
230                    kKeyAVCC, &dummy, (const void **)&avcc, &avccSize));
231
232        CHECK_GE(avccSize, 5u);
233
234        mNALSizeLen = 1 + (avcc[4] & 3);
235        ALOGV("mNALSizeLen = %zu", mNALSizeLen);
236    } else if (!strcasecmp(mime, MEDIA_MIMETYPE_AUDIO_AAC)) {
237        mType = AAC;
238    }
239}
240
241MatroskaSource::~MatroskaSource() {
242    clearPendingFrames();
243}
244
245status_t MatroskaSource::start(MetaData * /* params */) {
246    mBlockIter.reset();
247
248    return OK;
249}
250
251status_t MatroskaSource::stop() {
252    clearPendingFrames();
253
254    return OK;
255}
256
257sp<MetaData> MatroskaSource::getFormat() {
258    return mExtractor->mTracks.itemAt(mTrackIndex).mMeta;
259}
260
261////////////////////////////////////////////////////////////////////////////////
262
263BlockIterator::BlockIterator(
264        MatroskaExtractor *extractor, unsigned long trackNum, unsigned long index)
265    : mExtractor(extractor),
266      mTrackNum(trackNum),
267      mIndex(index),
268      mCluster(NULL),
269      mBlockEntry(NULL),
270      mBlockEntryIndex(0) {
271    reset();
272}
273
274bool BlockIterator::eos() const {
275    return mCluster == NULL || mCluster->EOS();
276}
277
278void BlockIterator::advance() {
279    Mutex::Autolock autoLock(mExtractor->mLock);
280    advance_l();
281}
282
283void BlockIterator::advance_l() {
284    for (;;) {
285        long res = mCluster->GetEntry(mBlockEntryIndex, mBlockEntry);
286        ALOGV("GetEntry returned %ld", res);
287
288        long long pos;
289        long len;
290        if (res < 0) {
291            // Need to parse this cluster some more
292
293            CHECK_EQ(res, mkvparser::E_BUFFER_NOT_FULL);
294
295            res = mCluster->Parse(pos, len);
296            ALOGV("Parse returned %ld", res);
297
298            if (res < 0) {
299                // I/O error
300
301                ALOGE("Cluster::Parse returned result %ld", res);
302
303                mCluster = NULL;
304                break;
305            }
306
307            continue;
308        } else if (res == 0) {
309            // We're done with this cluster
310
311            const mkvparser::Cluster *nextCluster;
312            res = mExtractor->mSegment->ParseNext(
313                    mCluster, nextCluster, pos, len);
314            ALOGV("ParseNext returned %ld", res);
315
316            if (res != 0) {
317                // EOF or error
318
319                mCluster = NULL;
320                break;
321            }
322
323            CHECK_EQ(res, 0);
324            CHECK(nextCluster != NULL);
325            CHECK(!nextCluster->EOS());
326
327            mCluster = nextCluster;
328
329            res = mCluster->Parse(pos, len);
330            ALOGV("Parse (2) returned %ld", res);
331            CHECK_GE(res, 0);
332
333            mBlockEntryIndex = 0;
334            continue;
335        }
336
337        CHECK(mBlockEntry != NULL);
338        CHECK(mBlockEntry->GetBlock() != NULL);
339        ++mBlockEntryIndex;
340
341        if (mBlockEntry->GetBlock()->GetTrackNumber() == mTrackNum) {
342            break;
343        }
344    }
345}
346
347void BlockIterator::reset() {
348    Mutex::Autolock autoLock(mExtractor->mLock);
349
350    mCluster = mExtractor->mSegment->GetFirst();
351    mBlockEntry = NULL;
352    mBlockEntryIndex = 0;
353
354    do {
355        advance_l();
356    } while (!eos() && block()->GetTrackNumber() != mTrackNum);
357}
358
359void BlockIterator::seek(
360        int64_t seekTimeUs, bool isAudio,
361        int64_t *actualFrameTimeUs) {
362    Mutex::Autolock autoLock(mExtractor->mLock);
363
364    *actualFrameTimeUs = -1ll;
365
366    const int64_t seekTimeNs = seekTimeUs * 1000ll - mExtractor->mSeekPreRollNs;
367
368    mkvparser::Segment* const pSegment = mExtractor->mSegment;
369
370    // Special case the 0 seek to avoid loading Cues when the application
371    // extraneously seeks to 0 before playing.
372    if (seekTimeNs <= 0) {
373        ALOGV("Seek to beginning: %" PRId64, seekTimeUs);
374        mCluster = pSegment->GetFirst();
375        mBlockEntryIndex = 0;
376        do {
377            advance_l();
378        } while (!eos() && block()->GetTrackNumber() != mTrackNum);
379        return;
380    }
381
382    ALOGV("Seeking to: %" PRId64, seekTimeUs);
383
384    // If the Cues have not been located then find them.
385    const mkvparser::Cues* pCues = pSegment->GetCues();
386    const mkvparser::SeekHead* pSH = pSegment->GetSeekHead();
387    if (!pCues && pSH) {
388        const size_t count = pSH->GetCount();
389        const mkvparser::SeekHead::Entry* pEntry;
390        ALOGV("No Cues yet");
391
392        for (size_t index = 0; index < count; index++) {
393            pEntry = pSH->GetEntry(index);
394
395            if (pEntry->id == 0x0C53BB6B) { // Cues ID
396                long len; long long pos;
397                pSegment->ParseCues(pEntry->pos, pos, len);
398                pCues = pSegment->GetCues();
399                ALOGV("Cues found");
400                break;
401            }
402        }
403
404        if (!pCues) {
405            ALOGE("No Cues in file");
406            return;
407        }
408    }
409    else if (!pSH) {
410        ALOGE("No SeekHead");
411        return;
412    }
413
414    const mkvparser::CuePoint* pCP;
415    mkvparser::Tracks const *pTracks = pSegment->GetTracks();
416    unsigned long int trackCount = pTracks->GetTracksCount();
417    while (!pCues->DoneParsing()) {
418        pCues->LoadCuePoint();
419        pCP = pCues->GetLast();
420        CHECK(pCP);
421
422        for (size_t index = 0; index < trackCount; ++index) {
423            const mkvparser::Track *pTrack = pTracks->GetTrackByIndex(index);
424            if (pTrack && pTrack->GetType() == 1 && pCP->Find(pTrack)) { // VIDEO_TRACK
425                MatroskaExtractor::TrackInfo& track = mExtractor->mTracks.editItemAt(index);
426                track.mCuePoints.push_back(pCP);
427            }
428        }
429
430        if (pCP->GetTime(pSegment) >= seekTimeNs) {
431            ALOGV("Parsed past relevant Cue");
432            break;
433        }
434    }
435
436    const mkvparser::CuePoint::TrackPosition *pTP = NULL;
437    const mkvparser::Track *thisTrack = pTracks->GetTrackByIndex(mIndex);
438    if (thisTrack->GetType() == 1) { // video
439        MatroskaExtractor::TrackInfo& track = mExtractor->mTracks.editItemAt(mIndex);
440        pTP = track.find(seekTimeNs);
441    } else {
442        // The Cue index is built around video keyframes
443        for (size_t index = 0; index < trackCount; ++index) {
444            const mkvparser::Track *pTrack = pTracks->GetTrackByIndex(index);
445            if (pTrack && pTrack->GetType() == 1 && pCues->Find(seekTimeNs, pTrack, pCP, pTP)) {
446                ALOGV("Video track located at %zu", index);
447                break;
448            }
449        }
450    }
451
452
453    // Always *search* based on the video track, but finalize based on mTrackNum
454    if (!pTP) {
455        ALOGE("Did not locate the video track for seeking");
456        return;
457    }
458
459    mCluster = pSegment->FindOrPreloadCluster(pTP->m_pos);
460
461    CHECK(mCluster);
462    CHECK(!mCluster->EOS());
463
464    // mBlockEntryIndex starts at 0 but m_block starts at 1
465    CHECK_GT(pTP->m_block, 0);
466    mBlockEntryIndex = pTP->m_block - 1;
467
468    for (;;) {
469        advance_l();
470
471        if (eos()) break;
472
473        if (isAudio || block()->IsKey()) {
474            // Accept the first key frame
475            int64_t frameTimeUs = (block()->GetTime(mCluster) + 500LL) / 1000LL;
476            if (thisTrack->GetType() == 1 || frameTimeUs >= seekTimeUs) {
477                *actualFrameTimeUs = frameTimeUs;
478                ALOGV("Requested seek point: %" PRId64 " actual: %" PRId64,
479                      seekTimeUs, *actualFrameTimeUs);
480                break;
481            }
482        }
483    }
484}
485
486const mkvparser::Block *BlockIterator::block() const {
487    CHECK(!eos());
488
489    return mBlockEntry->GetBlock();
490}
491
492int64_t BlockIterator::blockTimeUs() const {
493    return (mBlockEntry->GetBlock()->GetTime(mCluster) + 500ll) / 1000ll;
494}
495
496////////////////////////////////////////////////////////////////////////////////
497
498static unsigned U24_AT(const uint8_t *ptr) {
499    return ptr[0] << 16 | ptr[1] << 8 | ptr[2];
500}
501
502static size_t clz(uint8_t x) {
503    size_t numLeadingZeroes = 0;
504
505    while (!(x & 0x80)) {
506        ++numLeadingZeroes;
507        x = x << 1;
508    }
509
510    return numLeadingZeroes;
511}
512
513void MatroskaSource::clearPendingFrames() {
514    while (!mPendingFrames.empty()) {
515        MediaBuffer *frame = *mPendingFrames.begin();
516        mPendingFrames.erase(mPendingFrames.begin());
517
518        frame->release();
519        frame = NULL;
520    }
521}
522
523status_t MatroskaSource::readBlock() {
524    CHECK(mPendingFrames.empty());
525
526    if (mBlockIter.eos()) {
527        return ERROR_END_OF_STREAM;
528    }
529
530    const mkvparser::Block *block = mBlockIter.block();
531
532    int64_t timeUs = mBlockIter.blockTimeUs();
533
534    for (int i = 0; i < block->GetFrameCount(); ++i) {
535        const mkvparser::Block::Frame &frame = block->GetFrame(i);
536
537        MediaBuffer *mbuf = new MediaBuffer(frame.len);
538        mbuf->meta_data()->setInt64(kKeyTime, timeUs);
539        mbuf->meta_data()->setInt32(kKeyIsSyncFrame, block->IsKey());
540
541        long n = frame.Read(mExtractor->mReader, (unsigned char *)mbuf->data());
542        if (n != 0) {
543            mPendingFrames.clear();
544
545            mBlockIter.advance();
546            return ERROR_IO;
547        }
548
549        mPendingFrames.push_back(mbuf);
550    }
551
552    mBlockIter.advance();
553
554    return OK;
555}
556
557status_t MatroskaSource::read(
558        MediaBuffer **out, const ReadOptions *options) {
559    *out = NULL;
560
561    int64_t targetSampleTimeUs = -1ll;
562
563    int64_t seekTimeUs;
564    ReadOptions::SeekMode mode;
565    if (options && options->getSeekTo(&seekTimeUs, &mode)
566            && !mExtractor->isLiveStreaming()) {
567        clearPendingFrames();
568
569        // The audio we want is located by using the Cues to seek the video
570        // stream to find the target Cluster then iterating to finalize for
571        // audio.
572        int64_t actualFrameTimeUs;
573        mBlockIter.seek(seekTimeUs, mIsAudio, &actualFrameTimeUs);
574
575        if (mode == ReadOptions::SEEK_CLOSEST) {
576            targetSampleTimeUs = actualFrameTimeUs;
577        }
578    }
579
580    while (mPendingFrames.empty()) {
581        status_t err = readBlock();
582
583        if (err != OK) {
584            clearPendingFrames();
585
586            return err;
587        }
588    }
589
590    MediaBuffer *frame = *mPendingFrames.begin();
591    mPendingFrames.erase(mPendingFrames.begin());
592
593    if (mType != AVC) {
594        if (targetSampleTimeUs >= 0ll) {
595            frame->meta_data()->setInt64(
596                    kKeyTargetTime, targetSampleTimeUs);
597        }
598
599        *out = frame;
600
601        return OK;
602    }
603
604    // Each input frame contains one or more NAL fragments, each fragment
605    // is prefixed by mNALSizeLen bytes giving the fragment length,
606    // followed by a corresponding number of bytes containing the fragment.
607    // We output all these fragments into a single large buffer separated
608    // by startcodes (0x00 0x00 0x00 0x01).
609
610    const uint8_t *srcPtr =
611        (const uint8_t *)frame->data() + frame->range_offset();
612
613    size_t srcSize = frame->range_length();
614
615    size_t dstSize = 0;
616    MediaBuffer *buffer = NULL;
617    uint8_t *dstPtr = NULL;
618
619    for (int32_t pass = 0; pass < 2; ++pass) {
620        size_t srcOffset = 0;
621        size_t dstOffset = 0;
622        while (srcOffset + mNALSizeLen <= srcSize) {
623            size_t NALsize;
624            switch (mNALSizeLen) {
625                case 1: NALsize = srcPtr[srcOffset]; break;
626                case 2: NALsize = U16_AT(srcPtr + srcOffset); break;
627                case 3: NALsize = U24_AT(srcPtr + srcOffset); break;
628                case 4: NALsize = U32_AT(srcPtr + srcOffset); break;
629                default:
630                    TRESPASS();
631            }
632
633            if (srcOffset + mNALSizeLen + NALsize > srcSize) {
634                break;
635            }
636
637            if (pass == 1) {
638                memcpy(&dstPtr[dstOffset], "\x00\x00\x00\x01", 4);
639
640                memcpy(&dstPtr[dstOffset + 4],
641                       &srcPtr[srcOffset + mNALSizeLen],
642                       NALsize);
643            }
644
645            dstOffset += 4;  // 0x00 00 00 01
646            dstOffset += NALsize;
647
648            srcOffset += mNALSizeLen + NALsize;
649        }
650
651        if (srcOffset < srcSize) {
652            // There were trailing bytes or not enough data to complete
653            // a fragment.
654
655            frame->release();
656            frame = NULL;
657
658            return ERROR_MALFORMED;
659        }
660
661        if (pass == 0) {
662            dstSize = dstOffset;
663
664            buffer = new MediaBuffer(dstSize);
665
666            int64_t timeUs;
667            CHECK(frame->meta_data()->findInt64(kKeyTime, &timeUs));
668            int32_t isSync;
669            CHECK(frame->meta_data()->findInt32(kKeyIsSyncFrame, &isSync));
670
671            buffer->meta_data()->setInt64(kKeyTime, timeUs);
672            buffer->meta_data()->setInt32(kKeyIsSyncFrame, isSync);
673
674            dstPtr = (uint8_t *)buffer->data();
675        }
676    }
677
678    frame->release();
679    frame = NULL;
680
681    if (targetSampleTimeUs >= 0ll) {
682        buffer->meta_data()->setInt64(
683                kKeyTargetTime, targetSampleTimeUs);
684    }
685
686    *out = buffer;
687
688    return OK;
689}
690
691////////////////////////////////////////////////////////////////////////////////
692
693MatroskaExtractor::MatroskaExtractor(const sp<DataSource> &source)
694    : mDataSource(source),
695      mReader(new DataSourceReader(mDataSource)),
696      mSegment(NULL),
697      mExtractedThumbnails(false),
698      mIsWebm(false),
699      mSeekPreRollNs(0) {
700    off64_t size;
701    mIsLiveStreaming =
702        (mDataSource->flags()
703            & (DataSource::kWantsPrefetching
704                | DataSource::kIsCachingDataSource))
705        && mDataSource->getSize(&size) != OK;
706
707    mkvparser::EBMLHeader ebmlHeader;
708    long long pos;
709    if (ebmlHeader.Parse(mReader, pos) < 0) {
710        return;
711    }
712
713    if (ebmlHeader.m_docType && !strcmp("webm", ebmlHeader.m_docType)) {
714        mIsWebm = true;
715    }
716
717    long long ret =
718        mkvparser::Segment::CreateInstance(mReader, pos, mSegment);
719
720    if (ret) {
721        CHECK(mSegment == NULL);
722        return;
723    }
724
725    // from mkvparser::Segment::Load(), but stop at first cluster
726    ret = mSegment->ParseHeaders();
727    if (ret == 0) {
728        long len;
729        ret = mSegment->LoadCluster(pos, len);
730        if (ret >= 1) {
731            // no more clusters
732            ret = 0;
733        }
734    } else if (ret > 0) {
735        ret = mkvparser::E_BUFFER_NOT_FULL;
736    }
737
738    if (ret < 0) {
739        ALOGW("Corrupt %s source: %s", mIsWebm ? "webm" : "matroska",
740                uriDebugString(mDataSource->getUri()).c_str());
741        delete mSegment;
742        mSegment = NULL;
743        return;
744    }
745
746#if 0
747    const mkvparser::SegmentInfo *info = mSegment->GetInfo();
748    ALOGI("muxing app: %s, writing app: %s",
749         info->GetMuxingAppAsUTF8(),
750         info->GetWritingAppAsUTF8());
751#endif
752
753    addTracks();
754}
755
756MatroskaExtractor::~MatroskaExtractor() {
757    delete mSegment;
758    mSegment = NULL;
759
760    delete mReader;
761    mReader = NULL;
762}
763
764size_t MatroskaExtractor::countTracks() {
765    return mTracks.size();
766}
767
768sp<MediaSource> MatroskaExtractor::getTrack(size_t index) {
769    if (index >= mTracks.size()) {
770        return NULL;
771    }
772
773    return new MatroskaSource(this, index);
774}
775
776sp<MetaData> MatroskaExtractor::getTrackMetaData(
777        size_t index, uint32_t flags) {
778    if (index >= mTracks.size()) {
779        return NULL;
780    }
781
782    if ((flags & kIncludeExtensiveMetaData) && !mExtractedThumbnails
783            && !isLiveStreaming()) {
784        findThumbnails();
785        mExtractedThumbnails = true;
786    }
787
788    return mTracks.itemAt(index).mMeta;
789}
790
791bool MatroskaExtractor::isLiveStreaming() const {
792    return mIsLiveStreaming;
793}
794
795static int bytesForSize(size_t size) {
796    // use at most 28 bits (4 times 7)
797    CHECK(size <= 0xfffffff);
798
799    if (size > 0x1fffff) {
800        return 4;
801    } else if (size > 0x3fff) {
802        return 3;
803    } else if (size > 0x7f) {
804        return 2;
805    }
806    return 1;
807}
808
809static void storeSize(uint8_t *data, size_t &idx, size_t size) {
810    int numBytes = bytesForSize(size);
811    idx += numBytes;
812
813    data += idx;
814    size_t next = 0;
815    while (numBytes--) {
816        *--data = (size & 0x7f) | next;
817        size >>= 7;
818        next = 0x80;
819    }
820}
821
822static void addESDSFromCodecPrivate(
823        const sp<MetaData> &meta,
824        bool isAudio, const void *priv, size_t privSize) {
825
826    int privSizeBytesRequired = bytesForSize(privSize);
827    int esdsSize2 = 14 + privSizeBytesRequired + privSize;
828    int esdsSize2BytesRequired = bytesForSize(esdsSize2);
829    int esdsSize1 = 4 + esdsSize2BytesRequired + esdsSize2;
830    int esdsSize1BytesRequired = bytesForSize(esdsSize1);
831    size_t esdsSize = 1 + esdsSize1BytesRequired + esdsSize1;
832    uint8_t *esds = new uint8_t[esdsSize];
833
834    size_t idx = 0;
835    esds[idx++] = 0x03;
836    storeSize(esds, idx, esdsSize1);
837    esds[idx++] = 0x00; // ES_ID
838    esds[idx++] = 0x00; // ES_ID
839    esds[idx++] = 0x00; // streamDependenceFlag, URL_Flag, OCRstreamFlag
840    esds[idx++] = 0x04;
841    storeSize(esds, idx, esdsSize2);
842    esds[idx++] = isAudio ? 0x40   // Audio ISO/IEC 14496-3
843                          : 0x20;  // Visual ISO/IEC 14496-2
844    for (int i = 0; i < 12; i++) {
845        esds[idx++] = 0x00;
846    }
847    esds[idx++] = 0x05;
848    storeSize(esds, idx, privSize);
849    memcpy(esds + idx, priv, privSize);
850
851    meta->setData(kKeyESDS, 0, esds, esdsSize);
852
853    delete[] esds;
854    esds = NULL;
855}
856
857status_t addVorbisCodecInfo(
858        const sp<MetaData> &meta,
859        const void *_codecPrivate, size_t codecPrivateSize) {
860    // hexdump(_codecPrivate, codecPrivateSize);
861
862    if (codecPrivateSize < 1) {
863        return ERROR_MALFORMED;
864    }
865
866    const uint8_t *codecPrivate = (const uint8_t *)_codecPrivate;
867
868    if (codecPrivate[0] != 0x02) {
869        return ERROR_MALFORMED;
870    }
871
872    // codecInfo starts with two lengths, len1 and len2, that are
873    // "Xiph-style-lacing encoded"...
874
875    size_t offset = 1;
876    size_t len1 = 0;
877    while (offset < codecPrivateSize && codecPrivate[offset] == 0xff) {
878        len1 += 0xff;
879        ++offset;
880    }
881    if (offset >= codecPrivateSize) {
882        return ERROR_MALFORMED;
883    }
884    len1 += codecPrivate[offset++];
885
886    size_t len2 = 0;
887    while (offset < codecPrivateSize && codecPrivate[offset] == 0xff) {
888        len2 += 0xff;
889        ++offset;
890    }
891    if (offset >= codecPrivateSize) {
892        return ERROR_MALFORMED;
893    }
894    len2 += codecPrivate[offset++];
895
896    if (codecPrivateSize < offset + len1 + len2) {
897        return ERROR_MALFORMED;
898    }
899
900    if (codecPrivate[offset] != 0x01) {
901        return ERROR_MALFORMED;
902    }
903    meta->setData(kKeyVorbisInfo, 0, &codecPrivate[offset], len1);
904
905    offset += len1;
906    if (codecPrivate[offset] != 0x03) {
907        return ERROR_MALFORMED;
908    }
909
910    offset += len2;
911    if (codecPrivate[offset] != 0x05) {
912        return ERROR_MALFORMED;
913    }
914
915    meta->setData(
916            kKeyVorbisBooks, 0, &codecPrivate[offset],
917            codecPrivateSize - offset);
918
919    return OK;
920}
921
922void MatroskaExtractor::addTracks() {
923    const mkvparser::Tracks *tracks = mSegment->GetTracks();
924
925    for (size_t index = 0; index < tracks->GetTracksCount(); ++index) {
926        const mkvparser::Track *track = tracks->GetTrackByIndex(index);
927
928        if (track == NULL) {
929            // Apparently this is currently valid (if unexpected) behaviour
930            // of the mkv parser lib.
931            continue;
932        }
933
934        const char *const codecID = track->GetCodecId();
935        ALOGV("codec id = %s", codecID);
936        ALOGV("codec name = %s", track->GetCodecNameAsUTF8());
937
938        size_t codecPrivateSize;
939        const unsigned char *codecPrivate =
940            track->GetCodecPrivate(codecPrivateSize);
941
942        enum { VIDEO_TRACK = 1, AUDIO_TRACK = 2 };
943
944        sp<MetaData> meta = new MetaData;
945
946        status_t err = OK;
947
948        switch (track->GetType()) {
949            case VIDEO_TRACK:
950            {
951                const mkvparser::VideoTrack *vtrack =
952                    static_cast<const mkvparser::VideoTrack *>(track);
953
954                if (!strcmp("V_MPEG4/ISO/AVC", codecID)) {
955                    meta->setCString(kKeyMIMEType, MEDIA_MIMETYPE_VIDEO_AVC);
956                    meta->setData(kKeyAVCC, 0, codecPrivate, codecPrivateSize);
957                } else if (!strcmp("V_MPEG4/ISO/ASP", codecID)) {
958                    if (codecPrivateSize > 0) {
959                        meta->setCString(
960                                kKeyMIMEType, MEDIA_MIMETYPE_VIDEO_MPEG4);
961                        addESDSFromCodecPrivate(
962                                meta, false, codecPrivate, codecPrivateSize);
963                    } else {
964                        ALOGW("%s is detected, but does not have configuration.",
965                                codecID);
966                        continue;
967                    }
968                } else if (!strcmp("V_VP8", codecID)) {
969                    meta->setCString(kKeyMIMEType, MEDIA_MIMETYPE_VIDEO_VP8);
970                } else if (!strcmp("V_VP9", codecID)) {
971                    meta->setCString(kKeyMIMEType, MEDIA_MIMETYPE_VIDEO_VP9);
972                } else {
973                    ALOGW("%s is not supported.", codecID);
974                    continue;
975                }
976
977                meta->setInt32(kKeyWidth, vtrack->GetWidth());
978                meta->setInt32(kKeyHeight, vtrack->GetHeight());
979                break;
980            }
981
982            case AUDIO_TRACK:
983            {
984                const mkvparser::AudioTrack *atrack =
985                    static_cast<const mkvparser::AudioTrack *>(track);
986
987                if (!strcmp("A_AAC", codecID)) {
988                    meta->setCString(kKeyMIMEType, MEDIA_MIMETYPE_AUDIO_AAC);
989                    CHECK(codecPrivateSize >= 2);
990
991                    addESDSFromCodecPrivate(
992                            meta, true, codecPrivate, codecPrivateSize);
993                } else if (!strcmp("A_VORBIS", codecID)) {
994                    meta->setCString(kKeyMIMEType, MEDIA_MIMETYPE_AUDIO_VORBIS);
995
996                    err = addVorbisCodecInfo(
997                            meta, codecPrivate, codecPrivateSize);
998                } else if (!strcmp("A_OPUS", codecID)) {
999                    meta->setCString(kKeyMIMEType, MEDIA_MIMETYPE_AUDIO_OPUS);
1000                    meta->setData(kKeyOpusHeader, 0, codecPrivate, codecPrivateSize);
1001                    meta->setInt64(kKeyOpusCodecDelay, track->GetCodecDelay());
1002                    meta->setInt64(kKeyOpusSeekPreRoll, track->GetSeekPreRoll());
1003                    mSeekPreRollNs = track->GetSeekPreRoll();
1004                } else if (!strcmp("A_MPEG/L3", codecID)) {
1005                    meta->setCString(kKeyMIMEType, MEDIA_MIMETYPE_AUDIO_MPEG);
1006                } else {
1007                    ALOGW("%s is not supported.", codecID);
1008                    continue;
1009                }
1010
1011                meta->setInt32(kKeySampleRate, atrack->GetSamplingRate());
1012                meta->setInt32(kKeyChannelCount, atrack->GetChannels());
1013                break;
1014            }
1015
1016            default:
1017                continue;
1018        }
1019
1020        if (err != OK) {
1021            ALOGE("skipping track, codec specific data was malformed.");
1022            continue;
1023        }
1024
1025        long long durationNs = mSegment->GetDuration();
1026        meta->setInt64(kKeyDuration, (durationNs + 500) / 1000);
1027
1028        mTracks.push();
1029        TrackInfo *trackInfo = &mTracks.editItemAt(mTracks.size() - 1);
1030        trackInfo->mTrackNum = track->GetNumber();
1031        trackInfo->mMeta = meta;
1032        trackInfo->mExtractor = this;
1033    }
1034}
1035
1036void MatroskaExtractor::findThumbnails() {
1037    for (size_t i = 0; i < mTracks.size(); ++i) {
1038        TrackInfo *info = &mTracks.editItemAt(i);
1039
1040        const char *mime;
1041        CHECK(info->mMeta->findCString(kKeyMIMEType, &mime));
1042
1043        if (strncasecmp(mime, "video/", 6)) {
1044            continue;
1045        }
1046
1047        BlockIterator iter(this, info->mTrackNum, i);
1048        int32_t j = 0;
1049        int64_t thumbnailTimeUs = 0;
1050        size_t maxBlockSize = 0;
1051        while (!iter.eos() && j < 20) {
1052            if (iter.block()->IsKey()) {
1053                ++j;
1054
1055                size_t blockSize = 0;
1056                for (int k = 0; k < iter.block()->GetFrameCount(); ++k) {
1057                    blockSize += iter.block()->GetFrame(k).len;
1058                }
1059
1060                if (blockSize > maxBlockSize) {
1061                    maxBlockSize = blockSize;
1062                    thumbnailTimeUs = iter.blockTimeUs();
1063                }
1064            }
1065            iter.advance();
1066        }
1067        info->mMeta->setInt64(kKeyThumbnailTime, thumbnailTimeUs);
1068    }
1069}
1070
1071sp<MetaData> MatroskaExtractor::getMetaData() {
1072    sp<MetaData> meta = new MetaData;
1073
1074    meta->setCString(
1075            kKeyMIMEType,
1076            mIsWebm ? "video/webm" : MEDIA_MIMETYPE_CONTAINER_MATROSKA);
1077
1078    return meta;
1079}
1080
1081uint32_t MatroskaExtractor::flags() const {
1082    uint32_t x = CAN_PAUSE;
1083    if (!isLiveStreaming()) {
1084        x |= CAN_SEEK_BACKWARD | CAN_SEEK_FORWARD | CAN_SEEK;
1085    }
1086
1087    return x;
1088}
1089
1090bool SniffMatroska(
1091        const sp<DataSource> &source, String8 *mimeType, float *confidence,
1092        sp<AMessage> *) {
1093    DataSourceReader reader(source);
1094    mkvparser::EBMLHeader ebmlHeader;
1095    long long pos;
1096    if (ebmlHeader.Parse(&reader, pos) < 0) {
1097        return false;
1098    }
1099
1100    mimeType->setTo(MEDIA_MIMETYPE_CONTAINER_MATROSKA);
1101    *confidence = 0.6;
1102
1103    return true;
1104}
1105
1106}  // namespace android
1107