MP3Extractor.cpp revision a9e05b911f978dc3f25d4b1e35e51383dc4f9fc7
1/*
2 * Copyright (C) 2009 The Android Open Source Project
3 *
4 * Licensed under the Apache License, Version 2.0 (the "License");
5 * you may not use this file except in compliance with the License.
6 * You may obtain a copy of the License at
7 *
8 *      http://www.apache.org/licenses/LICENSE-2.0
9 *
10 * Unless required by applicable law or agreed to in writing, software
11 * distributed under the License is distributed on an "AS IS" BASIS,
12 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13 * See the License for the specific language governing permissions and
14 * limitations under the License.
15 */
16
17//#define LOG_NDEBUG 0
18#define LOG_TAG "MP3Extractor"
19#include <utils/Log.h>
20
21#include "include/MP3Extractor.h"
22
23#include "include/ID3.h"
24#include "include/VBRISeeker.h"
25#include "include/XINGSeeker.h"
26
27#include <media/stagefright/foundation/AMessage.h>
28#include <media/stagefright/DataSource.h>
29#include <media/stagefright/MediaBuffer.h>
30#include <media/stagefright/MediaBufferGroup.h>
31#include <media/stagefright/MediaDebug.h>
32#include <media/stagefright/MediaDefs.h>
33#include <media/stagefright/MediaErrors.h>
34#include <media/stagefright/MediaSource.h>
35#include <media/stagefright/MetaData.h>
36#include <media/stagefright/Utils.h>
37#include <utils/String8.h>
38
39namespace android {
40
41// Everything must match except for
42// protection, bitrate, padding, private bits, mode, mode extension,
43// copyright bit, original bit and emphasis.
44// Yes ... there are things that must indeed match...
45static const uint32_t kMask = 0xfffe0c00;
46
47// static
48bool MP3Extractor::get_mp3_frame_size(
49        uint32_t header, size_t *frame_size,
50        int *out_sampling_rate, int *out_channels,
51        int *out_bitrate, int *out_num_samples) {
52    *frame_size = 0;
53
54    if (out_sampling_rate) {
55        *out_sampling_rate = 0;
56    }
57
58    if (out_channels) {
59        *out_channels = 0;
60    }
61
62    if (out_bitrate) {
63        *out_bitrate = 0;
64    }
65
66    if (out_num_samples) {
67        *out_num_samples = 1152;
68    }
69
70    if ((header & 0xffe00000) != 0xffe00000) {
71        return false;
72    }
73
74    unsigned version = (header >> 19) & 3;
75
76    if (version == 0x01) {
77        return false;
78    }
79
80    unsigned layer = (header >> 17) & 3;
81
82    if (layer == 0x00) {
83        return false;
84    }
85
86    unsigned protection = (header >> 16) & 1;
87
88    unsigned bitrate_index = (header >> 12) & 0x0f;
89
90    if (bitrate_index == 0 || bitrate_index == 0x0f) {
91        // Disallow "free" bitrate.
92        return false;
93    }
94
95    unsigned sampling_rate_index = (header >> 10) & 3;
96
97    if (sampling_rate_index == 3) {
98        return false;
99    }
100
101    static const int kSamplingRateV1[] = { 44100, 48000, 32000 };
102    int sampling_rate = kSamplingRateV1[sampling_rate_index];
103    if (version == 2 /* V2 */) {
104        sampling_rate /= 2;
105    } else if (version == 0 /* V2.5 */) {
106        sampling_rate /= 4;
107    }
108
109    unsigned padding = (header >> 9) & 1;
110
111    if (layer == 3) {
112        // layer I
113
114        static const int kBitrateV1[] = {
115            32, 64, 96, 128, 160, 192, 224, 256,
116            288, 320, 352, 384, 416, 448
117        };
118
119        static const int kBitrateV2[] = {
120            32, 48, 56, 64, 80, 96, 112, 128,
121            144, 160, 176, 192, 224, 256
122        };
123
124        int bitrate =
125            (version == 3 /* V1 */)
126                ? kBitrateV1[bitrate_index - 1]
127                : kBitrateV2[bitrate_index - 1];
128
129        if (out_bitrate) {
130            *out_bitrate = bitrate;
131        }
132
133        *frame_size = (12000 * bitrate / sampling_rate + padding) * 4;
134
135        if (out_num_samples) {
136            *out_num_samples = 384;
137        }
138    } else {
139        // layer II or III
140
141        static const int kBitrateV1L2[] = {
142            32, 48, 56, 64, 80, 96, 112, 128,
143            160, 192, 224, 256, 320, 384
144        };
145
146        static const int kBitrateV1L3[] = {
147            32, 40, 48, 56, 64, 80, 96, 112,
148            128, 160, 192, 224, 256, 320
149        };
150
151        static const int kBitrateV2[] = {
152            8, 16, 24, 32, 40, 48, 56, 64,
153            80, 96, 112, 128, 144, 160
154        };
155
156        int bitrate;
157        if (version == 3 /* V1 */) {
158            bitrate = (layer == 2 /* L2 */)
159                ? kBitrateV1L2[bitrate_index - 1]
160                : kBitrateV1L3[bitrate_index - 1];
161
162            if (out_num_samples) {
163                *out_num_samples = 1152;
164            }
165        } else {
166            // V2 (or 2.5)
167
168            bitrate = kBitrateV2[bitrate_index - 1];
169            if (out_num_samples) {
170                *out_num_samples = 576;
171            }
172        }
173
174        if (out_bitrate) {
175            *out_bitrate = bitrate;
176        }
177
178        if (version == 3 /* V1 */) {
179            *frame_size = 144000 * bitrate / sampling_rate + padding;
180        } else {
181            // V2 or V2.5
182            *frame_size = 72000 * bitrate / sampling_rate + padding;
183        }
184    }
185
186    if (out_sampling_rate) {
187        *out_sampling_rate = sampling_rate;
188    }
189
190    if (out_channels) {
191        int channel_mode = (header >> 6) & 3;
192
193        *out_channels = (channel_mode == 3) ? 1 : 2;
194    }
195
196    return true;
197}
198
199static bool Resync(
200        const sp<DataSource> &source, uint32_t match_header,
201        off64_t *inout_pos, off64_t *post_id3_pos, uint32_t *out_header) {
202    if (post_id3_pos != NULL) {
203        *post_id3_pos = 0;
204    }
205
206    if (*inout_pos == 0) {
207        // Skip an optional ID3 header if syncing at the very beginning
208        // of the datasource.
209
210        for (;;) {
211            uint8_t id3header[10];
212            if (source->readAt(*inout_pos, id3header, sizeof(id3header))
213                    < (ssize_t)sizeof(id3header)) {
214                // If we can't even read these 10 bytes, we might as well bail
215                // out, even if there _were_ 10 bytes of valid mp3 audio data...
216                return false;
217            }
218
219            if (memcmp("ID3", id3header, 3)) {
220                break;
221            }
222
223            // Skip the ID3v2 header.
224
225            size_t len =
226                ((id3header[6] & 0x7f) << 21)
227                | ((id3header[7] & 0x7f) << 14)
228                | ((id3header[8] & 0x7f) << 7)
229                | (id3header[9] & 0x7f);
230
231            len += 10;
232
233            *inout_pos += len;
234
235            LOGV("skipped ID3 tag, new starting offset is %lld (0x%016llx)",
236                 *inout_pos, *inout_pos);
237        }
238
239        if (post_id3_pos != NULL) {
240            *post_id3_pos = *inout_pos;
241        }
242    }
243
244    off64_t pos = *inout_pos;
245    bool valid = false;
246
247    const size_t kMaxReadBytes = 1024;
248    const size_t kMaxBytesChecked = 128 * 1024;
249    uint8_t buf[kMaxReadBytes];
250    ssize_t bytesToRead = kMaxReadBytes;
251    ssize_t totalBytesRead = 0;
252    ssize_t remainingBytes = 0;
253    bool reachEOS = false;
254    uint8_t *tmp = buf;
255
256    do {
257        if (pos >= *inout_pos + kMaxBytesChecked) {
258            // Don't scan forever.
259            LOGV("giving up at offset %lld", pos);
260            break;
261        }
262
263        if (remainingBytes < 4) {
264            if (reachEOS) {
265                break;
266            } else {
267                memcpy(buf, tmp, remainingBytes);
268                bytesToRead = kMaxReadBytes - remainingBytes;
269
270                /*
271                 * The next read position should start from the end of
272                 * the last buffer, and thus should include the remaining
273                 * bytes in the buffer.
274                 */
275                totalBytesRead = source->readAt(pos + remainingBytes,
276                                                buf + remainingBytes,
277                                                bytesToRead);
278                if (totalBytesRead <= 0) {
279                    break;
280                }
281                reachEOS = (totalBytesRead != bytesToRead);
282                totalBytesRead += remainingBytes;
283                remainingBytes = totalBytesRead;
284                tmp = buf;
285                continue;
286            }
287        }
288
289        uint32_t header = U32_AT(tmp);
290
291        if (match_header != 0 && (header & kMask) != (match_header & kMask)) {
292            ++pos;
293            ++tmp;
294            --remainingBytes;
295            continue;
296        }
297
298        size_t frame_size;
299        int sample_rate, num_channels, bitrate;
300        if (!MP3Extractor::get_mp3_frame_size(
301                    header, &frame_size,
302                    &sample_rate, &num_channels, &bitrate)) {
303            ++pos;
304            ++tmp;
305            --remainingBytes;
306            continue;
307        }
308
309        LOGV("found possible 1st frame at %lld (header = 0x%08x)", pos, header);
310
311        // We found what looks like a valid frame,
312        // now find its successors.
313
314        off64_t test_pos = pos + frame_size;
315
316        valid = true;
317        for (int j = 0; j < 3; ++j) {
318            uint8_t tmp[4];
319            if (source->readAt(test_pos, tmp, 4) < 4) {
320                valid = false;
321                break;
322            }
323
324            uint32_t test_header = U32_AT(tmp);
325
326            LOGV("subsequent header is %08x", test_header);
327
328            if ((test_header & kMask) != (header & kMask)) {
329                valid = false;
330                break;
331            }
332
333            size_t test_frame_size;
334            if (!MP3Extractor::get_mp3_frame_size(
335                        test_header, &test_frame_size)) {
336                valid = false;
337                break;
338            }
339
340            LOGV("found subsequent frame #%d at %lld", j + 2, test_pos);
341
342            test_pos += test_frame_size;
343        }
344
345        if (valid) {
346            *inout_pos = pos;
347
348            if (out_header != NULL) {
349                *out_header = header;
350            }
351        } else {
352            LOGV("no dice, no valid sequence of frames found.");
353        }
354
355        ++pos;
356        ++tmp;
357        --remainingBytes;
358    } while (!valid);
359
360    return valid;
361}
362
363class MP3Source : public MediaSource {
364public:
365    MP3Source(
366            const sp<MetaData> &meta, const sp<DataSource> &source,
367            off64_t first_frame_pos, uint32_t fixed_header,
368            const sp<MP3Seeker> &seeker);
369
370    virtual status_t start(MetaData *params = NULL);
371    virtual status_t stop();
372
373    virtual sp<MetaData> getFormat();
374
375    virtual status_t read(
376            MediaBuffer **buffer, const ReadOptions *options = NULL);
377
378protected:
379    virtual ~MP3Source();
380
381private:
382    sp<MetaData> mMeta;
383    sp<DataSource> mDataSource;
384    off64_t mFirstFramePos;
385    uint32_t mFixedHeader;
386    off64_t mCurrentPos;
387    int64_t mCurrentTimeUs;
388    bool mStarted;
389    sp<MP3Seeker> mSeeker;
390    MediaBufferGroup *mGroup;
391
392    int64_t mBasisTimeUs;
393    int64_t mSamplesRead;
394
395    MP3Source(const MP3Source &);
396    MP3Source &operator=(const MP3Source &);
397};
398
399MP3Extractor::MP3Extractor(
400        const sp<DataSource> &source, const sp<AMessage> &meta)
401    : mInitCheck(NO_INIT),
402      mDataSource(source),
403      mFirstFramePos(-1),
404      mFixedHeader(0) {
405    off64_t pos = 0;
406    off64_t post_id3_pos;
407    uint32_t header;
408    bool success;
409
410    int64_t meta_offset;
411    uint32_t meta_header;
412    int64_t meta_post_id3_offset;
413    if (meta != NULL
414            && meta->findInt64("offset", &meta_offset)
415            && meta->findInt32("header", (int32_t *)&meta_header)
416            && meta->findInt64("post-id3-offset", &meta_post_id3_offset)) {
417        // The sniffer has already done all the hard work for us, simply
418        // accept its judgement.
419        pos = (off64_t)meta_offset;
420        header = meta_header;
421        post_id3_pos = (off64_t)meta_post_id3_offset;
422
423        success = true;
424    } else {
425        success = Resync(mDataSource, 0, &pos, &post_id3_pos, &header);
426    }
427
428    if (!success) {
429        // mInitCheck will remain NO_INIT
430        return;
431    }
432
433    mFirstFramePos = pos;
434    mFixedHeader = header;
435
436    size_t frame_size;
437    int sample_rate;
438    int num_channels;
439    int bitrate;
440    get_mp3_frame_size(
441            header, &frame_size, &sample_rate, &num_channels, &bitrate);
442
443    mMeta = new MetaData;
444
445    mMeta->setCString(kKeyMIMEType, MEDIA_MIMETYPE_AUDIO_MPEG);
446    mMeta->setInt32(kKeySampleRate, sample_rate);
447    mMeta->setInt32(kKeyBitRate, bitrate * 1000);
448    mMeta->setInt32(kKeyChannelCount, num_channels);
449
450    mSeeker = XINGSeeker::CreateFromSource(mDataSource, mFirstFramePos);
451
452    if (mSeeker == NULL) {
453        mSeeker = VBRISeeker::CreateFromSource(mDataSource, post_id3_pos);
454    }
455
456    int64_t durationUs;
457
458    if (mSeeker == NULL || !mSeeker->getDuration(&durationUs)) {
459        off64_t fileSize;
460        if (mDataSource->getSize(&fileSize) == OK) {
461            durationUs = 8000LL * (fileSize - mFirstFramePos) / bitrate;
462        } else {
463            durationUs = -1;
464        }
465    }
466
467    if (durationUs >= 0) {
468        mMeta->setInt64(kKeyDuration, durationUs);
469    }
470
471    mInitCheck = OK;
472}
473
474size_t MP3Extractor::countTracks() {
475    return mInitCheck != OK ? 0 : 1;
476}
477
478sp<MediaSource> MP3Extractor::getTrack(size_t index) {
479    if (mInitCheck != OK || index != 0) {
480        return NULL;
481    }
482
483    return new MP3Source(
484            mMeta, mDataSource, mFirstFramePos, mFixedHeader,
485            mSeeker);
486}
487
488sp<MetaData> MP3Extractor::getTrackMetaData(size_t index, uint32_t flags) {
489    if (mInitCheck != OK || index != 0) {
490        return NULL;
491    }
492
493    return mMeta;
494}
495
496////////////////////////////////////////////////////////////////////////////////
497
498MP3Source::MP3Source(
499        const sp<MetaData> &meta, const sp<DataSource> &source,
500        off64_t first_frame_pos, uint32_t fixed_header,
501        const sp<MP3Seeker> &seeker)
502    : mMeta(meta),
503      mDataSource(source),
504      mFirstFramePos(first_frame_pos),
505      mFixedHeader(fixed_header),
506      mCurrentPos(0),
507      mCurrentTimeUs(0),
508      mStarted(false),
509      mSeeker(seeker),
510      mGroup(NULL),
511      mBasisTimeUs(0),
512      mSamplesRead(0) {
513}
514
515MP3Source::~MP3Source() {
516    if (mStarted) {
517        stop();
518    }
519}
520
521status_t MP3Source::start(MetaData *) {
522    CHECK(!mStarted);
523
524    mGroup = new MediaBufferGroup;
525
526    const size_t kMaxFrameSize = 32768;
527    mGroup->add_buffer(new MediaBuffer(kMaxFrameSize));
528
529    mCurrentPos = mFirstFramePos;
530    mCurrentTimeUs = 0;
531
532    mBasisTimeUs = mCurrentTimeUs;
533    mSamplesRead = 0;
534
535    mStarted = true;
536
537    return OK;
538}
539
540status_t MP3Source::stop() {
541    CHECK(mStarted);
542
543    delete mGroup;
544    mGroup = NULL;
545
546    mStarted = false;
547
548    return OK;
549}
550
551sp<MetaData> MP3Source::getFormat() {
552    return mMeta;
553}
554
555status_t MP3Source::read(
556        MediaBuffer **out, const ReadOptions *options) {
557    *out = NULL;
558
559    int64_t seekTimeUs;
560    ReadOptions::SeekMode mode;
561    if (options != NULL && options->getSeekTo(&seekTimeUs, &mode)) {
562        int64_t actualSeekTimeUs = seekTimeUs;
563        if (mSeeker == NULL
564                || !mSeeker->getOffsetForTime(&actualSeekTimeUs, &mCurrentPos)) {
565            int32_t bitrate;
566            if (!mMeta->findInt32(kKeyBitRate, &bitrate)) {
567                // bitrate is in bits/sec.
568                LOGI("no bitrate");
569
570                return ERROR_UNSUPPORTED;
571            }
572
573            mCurrentTimeUs = seekTimeUs;
574            mCurrentPos = mFirstFramePos + seekTimeUs * bitrate / 8000000;
575        } else {
576            mCurrentTimeUs = actualSeekTimeUs;
577        }
578
579        mBasisTimeUs = mCurrentTimeUs;
580        mSamplesRead = 0;
581    }
582
583    MediaBuffer *buffer;
584    status_t err = mGroup->acquire_buffer(&buffer);
585    if (err != OK) {
586        return err;
587    }
588
589    size_t frame_size;
590    int bitrate;
591    int num_samples;
592    int sample_rate;
593    for (;;) {
594        ssize_t n = mDataSource->readAt(mCurrentPos, buffer->data(), 4);
595        if (n < 4) {
596            buffer->release();
597            buffer = NULL;
598
599            return ERROR_END_OF_STREAM;
600        }
601
602        uint32_t header = U32_AT((const uint8_t *)buffer->data());
603
604        if ((header & kMask) == (mFixedHeader & kMask)
605            && MP3Extractor::get_mp3_frame_size(
606                header, &frame_size, &sample_rate, NULL, &bitrate, &num_samples)) {
607            break;
608        }
609
610        // Lost sync.
611        LOGV("lost sync! header = 0x%08x, old header = 0x%08x\n", header, mFixedHeader);
612
613        off64_t pos = mCurrentPos;
614        if (!Resync(mDataSource, mFixedHeader, &pos, NULL, NULL)) {
615            LOGE("Unable to resync. Signalling end of stream.");
616
617            buffer->release();
618            buffer = NULL;
619
620            return ERROR_END_OF_STREAM;
621        }
622
623        mCurrentPos = pos;
624
625        // Try again with the new position.
626    }
627
628    CHECK(frame_size <= buffer->size());
629
630    ssize_t n = mDataSource->readAt(mCurrentPos, buffer->data(), frame_size);
631    if (n < (ssize_t)frame_size) {
632        buffer->release();
633        buffer = NULL;
634
635        return ERROR_END_OF_STREAM;
636    }
637
638    buffer->set_range(0, frame_size);
639
640    buffer->meta_data()->setInt64(kKeyTime, mCurrentTimeUs);
641    buffer->meta_data()->setInt32(kKeyIsSyncFrame, 1);
642
643    mCurrentPos += frame_size;
644
645    mSamplesRead += num_samples;
646    mCurrentTimeUs = mBasisTimeUs + ((mSamplesRead * 1000000) / sample_rate);
647
648    *out = buffer;
649
650    return OK;
651}
652
653sp<MetaData> MP3Extractor::getMetaData() {
654    sp<MetaData> meta = new MetaData;
655
656    if (mInitCheck != OK) {
657        return meta;
658    }
659
660    meta->setCString(kKeyMIMEType, "audio/mpeg");
661
662    ID3 id3(mDataSource);
663
664    if (!id3.isValid()) {
665        return meta;
666    }
667
668    struct Map {
669        int key;
670        const char *tag1;
671        const char *tag2;
672    };
673    static const Map kMap[] = {
674        { kKeyAlbum, "TALB", "TAL" },
675        { kKeyArtist, "TPE1", "TP1" },
676        { kKeyAlbumArtist, "TPE2", "TP2" },
677        { kKeyComposer, "TCOM", "TCM" },
678        { kKeyGenre, "TCON", "TCO" },
679        { kKeyTitle, "TIT2", "TT2" },
680        { kKeyYear, "TYE", "TYER" },
681        { kKeyAuthor, "TXT", "TEXT" },
682        { kKeyCDTrackNumber, "TRK", "TRCK" },
683        { kKeyDiscNumber, "TPA", "TPOS" },
684        { kKeyCompilation, "TCP", "TCMP" },
685    };
686    static const size_t kNumMapEntries = sizeof(kMap) / sizeof(kMap[0]);
687
688    for (size_t i = 0; i < kNumMapEntries; ++i) {
689        ID3::Iterator *it = new ID3::Iterator(id3, kMap[i].tag1);
690        if (it->done()) {
691            delete it;
692            it = new ID3::Iterator(id3, kMap[i].tag2);
693        }
694
695        if (it->done()) {
696            delete it;
697            continue;
698        }
699
700        String8 s;
701        it->getString(&s);
702        delete it;
703
704        meta->setCString(kMap[i].key, s);
705    }
706
707    size_t dataSize;
708    String8 mime;
709    const void *data = id3.getAlbumArt(&dataSize, &mime);
710
711    if (data) {
712        meta->setData(kKeyAlbumArt, MetaData::TYPE_NONE, data, dataSize);
713        meta->setCString(kKeyAlbumArtMIME, mime.string());
714    }
715
716    return meta;
717}
718
719bool SniffMP3(
720        const sp<DataSource> &source, String8 *mimeType,
721        float *confidence, sp<AMessage> *meta) {
722    off64_t pos = 0;
723    off64_t post_id3_pos;
724    uint32_t header;
725    if (!Resync(source, 0, &pos, &post_id3_pos, &header)) {
726        return false;
727    }
728
729    *meta = new AMessage;
730    (*meta)->setInt64("offset", pos);
731    (*meta)->setInt32("header", header);
732    (*meta)->setInt64("post-id3-offset", post_id3_pos);
733
734    *mimeType = MEDIA_MIMETYPE_AUDIO_MPEG;
735    *confidence = 0.2f;
736
737    return true;
738}
739
740}  // namespace android
741