MP3Extractor.cpp revision 7cc497733b7602b99b783decfa8c002bc27e25aa
1/*
2 * Copyright (C) 2009 The Android Open Source Project
3 *
4 * Licensed under the Apache License, Version 2.0 (the "License");
5 * you may not use this file except in compliance with the License.
6 * You may obtain a copy of the License at
7 *
8 *      http://www.apache.org/licenses/LICENSE-2.0
9 *
10 * Unless required by applicable law or agreed to in writing, software
11 * distributed under the License is distributed on an "AS IS" BASIS,
12 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13 * See the License for the specific language governing permissions and
14 * limitations under the License.
15 */
16
17//#define LOG_NDEBUG 0
18#define LOG_TAG "MP3Extractor"
19#include <utils/Log.h>
20
21#include "include/MP3Extractor.h"
22
23#include "include/ID3.h"
24#include "include/VBRISeeker.h"
25#include "include/XINGSeeker.h"
26
27#include <media/stagefright/foundation/AMessage.h>
28#include <media/stagefright/DataSource.h>
29#include <media/stagefright/MediaBuffer.h>
30#include <media/stagefright/MediaBufferGroup.h>
31#include <media/stagefright/MediaDebug.h>
32#include <media/stagefright/MediaDefs.h>
33#include <media/stagefright/MediaErrors.h>
34#include <media/stagefright/MediaSource.h>
35#include <media/stagefright/MetaData.h>
36#include <media/stagefright/Utils.h>
37#include <utils/String8.h>
38
39namespace android {
40
41// Everything must match except for
42// protection, bitrate, padding, private bits, mode extension,
43// copyright bit, original bit and emphasis.
44// Yes ... there are things that must indeed match...
45static const uint32_t kMask = 0xfffe0cc0;
46
47// static
48bool MP3Extractor::get_mp3_frame_size(
49        uint32_t header, size_t *frame_size,
50        int *out_sampling_rate, int *out_channels,
51        int *out_bitrate) {
52    *frame_size = 0;
53
54    if (out_sampling_rate) {
55        *out_sampling_rate = 0;
56    }
57
58    if (out_channels) {
59        *out_channels = 0;
60    }
61
62    if (out_bitrate) {
63        *out_bitrate = 0;
64    }
65
66    if ((header & 0xffe00000) != 0xffe00000) {
67        return false;
68    }
69
70    unsigned version = (header >> 19) & 3;
71
72    if (version == 0x01) {
73        return false;
74    }
75
76    unsigned layer = (header >> 17) & 3;
77
78    if (layer == 0x00) {
79        return false;
80    }
81
82    unsigned protection = (header >> 16) & 1;
83
84    unsigned bitrate_index = (header >> 12) & 0x0f;
85
86    if (bitrate_index == 0 || bitrate_index == 0x0f) {
87        // Disallow "free" bitrate.
88        return false;
89    }
90
91    unsigned sampling_rate_index = (header >> 10) & 3;
92
93    if (sampling_rate_index == 3) {
94        return false;
95    }
96
97    static const int kSamplingRateV1[] = { 44100, 48000, 32000 };
98    int sampling_rate = kSamplingRateV1[sampling_rate_index];
99    if (version == 2 /* V2 */) {
100        sampling_rate /= 2;
101    } else if (version == 0 /* V2.5 */) {
102        sampling_rate /= 4;
103    }
104
105    unsigned padding = (header >> 9) & 1;
106
107    if (layer == 3) {
108        // layer I
109
110        static const int kBitrateV1[] = {
111            32, 64, 96, 128, 160, 192, 224, 256,
112            288, 320, 352, 384, 416, 448
113        };
114
115        static const int kBitrateV2[] = {
116            32, 48, 56, 64, 80, 96, 112, 128,
117            144, 160, 176, 192, 224, 256
118        };
119
120        int bitrate =
121            (version == 3 /* V1 */)
122                ? kBitrateV1[bitrate_index - 1]
123                : kBitrateV2[bitrate_index - 1];
124
125        if (out_bitrate) {
126            *out_bitrate = bitrate;
127        }
128
129        *frame_size = (12000 * bitrate / sampling_rate + padding) * 4;
130    } else {
131        // layer II or III
132
133        static const int kBitrateV1L2[] = {
134            32, 48, 56, 64, 80, 96, 112, 128,
135            160, 192, 224, 256, 320, 384
136        };
137
138        static const int kBitrateV1L3[] = {
139            32, 40, 48, 56, 64, 80, 96, 112,
140            128, 160, 192, 224, 256, 320
141        };
142
143        static const int kBitrateV2[] = {
144            8, 16, 24, 32, 40, 48, 56, 64,
145            80, 96, 112, 128, 144, 160
146        };
147
148        int bitrate;
149        if (version == 3 /* V1 */) {
150            bitrate = (layer == 2 /* L2 */)
151                ? kBitrateV1L2[bitrate_index - 1]
152                : kBitrateV1L3[bitrate_index - 1];
153        } else {
154            // V2 (or 2.5)
155
156            bitrate = kBitrateV2[bitrate_index - 1];
157        }
158
159        if (out_bitrate) {
160            *out_bitrate = bitrate;
161        }
162
163        if (version == 3 /* V1 */) {
164            *frame_size = 144000 * bitrate / sampling_rate + padding;
165        } else {
166            // V2 or V2.5
167            *frame_size = 72000 * bitrate / sampling_rate + padding;
168        }
169    }
170
171    if (out_sampling_rate) {
172        *out_sampling_rate = sampling_rate;
173    }
174
175    if (out_channels) {
176        int channel_mode = (header >> 6) & 3;
177
178        *out_channels = (channel_mode == 3) ? 1 : 2;
179    }
180
181    return true;
182}
183
184static bool Resync(
185        const sp<DataSource> &source, uint32_t match_header,
186        off64_t *inout_pos, off64_t *post_id3_pos, uint32_t *out_header) {
187    if (post_id3_pos != NULL) {
188        *post_id3_pos = 0;
189    }
190
191    if (*inout_pos == 0) {
192        // Skip an optional ID3 header if syncing at the very beginning
193        // of the datasource.
194
195        for (;;) {
196            uint8_t id3header[10];
197            if (source->readAt(*inout_pos, id3header, sizeof(id3header))
198                    < (ssize_t)sizeof(id3header)) {
199                // If we can't even read these 10 bytes, we might as well bail
200                // out, even if there _were_ 10 bytes of valid mp3 audio data...
201                return false;
202            }
203
204            if (memcmp("ID3", id3header, 3)) {
205                break;
206            }
207
208            // Skip the ID3v2 header.
209
210            size_t len =
211                ((id3header[6] & 0x7f) << 21)
212                | ((id3header[7] & 0x7f) << 14)
213                | ((id3header[8] & 0x7f) << 7)
214                | (id3header[9] & 0x7f);
215
216            len += 10;
217
218            *inout_pos += len;
219
220            LOGV("skipped ID3 tag, new starting offset is %ld (0x%08lx)",
221                 *inout_pos, *inout_pos);
222        }
223
224        if (post_id3_pos != NULL) {
225            *post_id3_pos = *inout_pos;
226        }
227    }
228
229    off64_t pos = *inout_pos;
230    bool valid = false;
231
232    const size_t kMaxReadBytes = 1024;
233    const size_t kMaxBytesChecked = 128 * 1024;
234    uint8_t buf[kMaxReadBytes];
235    ssize_t bytesToRead = kMaxReadBytes;
236    ssize_t totalBytesRead = 0;
237    ssize_t remainingBytes = 0;
238    bool reachEOS = false;
239    uint8_t *tmp = buf;
240
241    do {
242        if (pos >= *inout_pos + kMaxBytesChecked) {
243            // Don't scan forever.
244            LOGV("giving up at offset %ld", pos);
245            break;
246        }
247
248        if (remainingBytes < 4) {
249            if (reachEOS) {
250                break;
251            } else {
252                memcpy(buf, tmp, remainingBytes);
253                bytesToRead = kMaxReadBytes - remainingBytes;
254                totalBytesRead = source->readAt(pos, buf + remainingBytes, bytesToRead);
255                if (totalBytesRead <= 0) {
256                    break;
257                }
258                reachEOS = (totalBytesRead != bytesToRead);
259                totalBytesRead += remainingBytes;
260                remainingBytes = totalBytesRead;
261                tmp = buf;
262                continue;
263            }
264        }
265
266        uint32_t header = U32_AT(tmp);
267
268        if (match_header != 0 && (header & kMask) != (match_header & kMask)) {
269            ++pos;
270            ++tmp;
271            --remainingBytes;
272            continue;
273        }
274
275        size_t frame_size;
276        int sample_rate, num_channels, bitrate;
277        if (!MP3Extractor::get_mp3_frame_size(
278                    header, &frame_size,
279                    &sample_rate, &num_channels, &bitrate)) {
280            ++pos;
281            ++tmp;
282            --remainingBytes;
283            continue;
284        }
285
286        LOGV("found possible 1st frame at %ld (header = 0x%08x)", pos, header);
287
288        // We found what looks like a valid frame,
289        // now find its successors.
290
291        off64_t test_pos = pos + frame_size;
292
293        valid = true;
294        for (int j = 0; j < 3; ++j) {
295            uint8_t tmp[4];
296            if (source->readAt(test_pos, tmp, 4) < 4) {
297                valid = false;
298                break;
299            }
300
301            uint32_t test_header = U32_AT(tmp);
302
303            LOGV("subsequent header is %08x", test_header);
304
305            if ((test_header & kMask) != (header & kMask)) {
306                valid = false;
307                break;
308            }
309
310            size_t test_frame_size;
311            if (!MP3Extractor::get_mp3_frame_size(
312                        test_header, &test_frame_size)) {
313                valid = false;
314                break;
315            }
316
317            LOGV("found subsequent frame #%d at %ld", j + 2, test_pos);
318
319            test_pos += test_frame_size;
320        }
321
322        if (valid) {
323            *inout_pos = pos;
324
325            if (out_header != NULL) {
326                *out_header = header;
327            }
328        } else {
329            LOGV("no dice, no valid sequence of frames found.");
330        }
331
332        ++pos;
333        ++tmp;
334        --remainingBytes;
335    } while (!valid);
336
337    return valid;
338}
339
340class MP3Source : public MediaSource {
341public:
342    MP3Source(
343            const sp<MetaData> &meta, const sp<DataSource> &source,
344            off64_t first_frame_pos, uint32_t fixed_header,
345            const sp<MP3Seeker> &seeker);
346
347    virtual status_t start(MetaData *params = NULL);
348    virtual status_t stop();
349
350    virtual sp<MetaData> getFormat();
351
352    virtual status_t read(
353            MediaBuffer **buffer, const ReadOptions *options = NULL);
354
355protected:
356    virtual ~MP3Source();
357
358private:
359    sp<MetaData> mMeta;
360    sp<DataSource> mDataSource;
361    off64_t mFirstFramePos;
362    uint32_t mFixedHeader;
363    off64_t mCurrentPos;
364    int64_t mCurrentTimeUs;
365    bool mStarted;
366    sp<MP3Seeker> mSeeker;
367    MediaBufferGroup *mGroup;
368
369    MP3Source(const MP3Source &);
370    MP3Source &operator=(const MP3Source &);
371};
372
373MP3Extractor::MP3Extractor(
374        const sp<DataSource> &source, const sp<AMessage> &meta)
375    : mInitCheck(NO_INIT),
376      mDataSource(source),
377      mFirstFramePos(-1),
378      mFixedHeader(0) {
379    off64_t pos = 0;
380    off64_t post_id3_pos;
381    uint32_t header;
382    bool success;
383
384    int64_t meta_offset;
385    uint32_t meta_header;
386    int64_t meta_post_id3_offset;
387    if (meta != NULL
388            && meta->findInt64("offset", &meta_offset)
389            && meta->findInt32("header", (int32_t *)&meta_header)
390            && meta->findInt64("post-id3-offset", &meta_post_id3_offset)) {
391        // The sniffer has already done all the hard work for us, simply
392        // accept its judgement.
393        pos = (off64_t)meta_offset;
394        header = meta_header;
395        post_id3_pos = (off64_t)meta_post_id3_offset;
396
397        success = true;
398    } else {
399        success = Resync(mDataSource, 0, &pos, &post_id3_pos, &header);
400    }
401
402    if (!success) {
403        // mInitCheck will remain NO_INIT
404        return;
405    }
406
407    mFirstFramePos = pos;
408    mFixedHeader = header;
409
410    size_t frame_size;
411    int sample_rate;
412    int num_channels;
413    int bitrate;
414    get_mp3_frame_size(
415            header, &frame_size, &sample_rate, &num_channels, &bitrate);
416
417    mMeta = new MetaData;
418
419    mMeta->setCString(kKeyMIMEType, MEDIA_MIMETYPE_AUDIO_MPEG);
420    mMeta->setInt32(kKeySampleRate, sample_rate);
421    mMeta->setInt32(kKeyBitRate, bitrate * 1000);
422    mMeta->setInt32(kKeyChannelCount, num_channels);
423
424    mSeeker = XINGSeeker::CreateFromSource(mDataSource, mFirstFramePos);
425
426    if (mSeeker == NULL) {
427        mSeeker = VBRISeeker::CreateFromSource(mDataSource, post_id3_pos);
428    }
429
430    int64_t durationUs;
431
432    if (mSeeker == NULL || !mSeeker->getDuration(&durationUs)) {
433        off64_t fileSize;
434        if (mDataSource->getSize(&fileSize) == OK) {
435            durationUs = 8000LL * (fileSize - mFirstFramePos) / bitrate;
436        } else {
437            durationUs = -1;
438        }
439    }
440
441    if (durationUs >= 0) {
442        mMeta->setInt64(kKeyDuration, durationUs);
443    }
444
445    mInitCheck = OK;
446}
447
448size_t MP3Extractor::countTracks() {
449    return mInitCheck != OK ? 0 : 1;
450}
451
452sp<MediaSource> MP3Extractor::getTrack(size_t index) {
453    if (mInitCheck != OK || index != 0) {
454        return NULL;
455    }
456
457    return new MP3Source(
458            mMeta, mDataSource, mFirstFramePos, mFixedHeader,
459            mSeeker);
460}
461
462sp<MetaData> MP3Extractor::getTrackMetaData(size_t index, uint32_t flags) {
463    if (mInitCheck != OK || index != 0) {
464        return NULL;
465    }
466
467    return mMeta;
468}
469
470////////////////////////////////////////////////////////////////////////////////
471
472MP3Source::MP3Source(
473        const sp<MetaData> &meta, const sp<DataSource> &source,
474        off64_t first_frame_pos, uint32_t fixed_header,
475        const sp<MP3Seeker> &seeker)
476    : mMeta(meta),
477      mDataSource(source),
478      mFirstFramePos(first_frame_pos),
479      mFixedHeader(fixed_header),
480      mCurrentPos(0),
481      mCurrentTimeUs(0),
482      mStarted(false),
483      mSeeker(seeker),
484      mGroup(NULL) {
485}
486
487MP3Source::~MP3Source() {
488    if (mStarted) {
489        stop();
490    }
491}
492
493status_t MP3Source::start(MetaData *) {
494    CHECK(!mStarted);
495
496    mGroup = new MediaBufferGroup;
497
498    const size_t kMaxFrameSize = 32768;
499    mGroup->add_buffer(new MediaBuffer(kMaxFrameSize));
500
501    mCurrentPos = mFirstFramePos;
502    mCurrentTimeUs = 0;
503
504    mStarted = true;
505
506    return OK;
507}
508
509status_t MP3Source::stop() {
510    CHECK(mStarted);
511
512    delete mGroup;
513    mGroup = NULL;
514
515    mStarted = false;
516
517    return OK;
518}
519
520sp<MetaData> MP3Source::getFormat() {
521    return mMeta;
522}
523
524status_t MP3Source::read(
525        MediaBuffer **out, const ReadOptions *options) {
526    *out = NULL;
527
528    int64_t seekTimeUs;
529    ReadOptions::SeekMode mode;
530    if (options != NULL && options->getSeekTo(&seekTimeUs, &mode)) {
531        int64_t actualSeekTimeUs = seekTimeUs;
532        if (mSeeker == NULL
533                || !mSeeker->getOffsetForTime(&actualSeekTimeUs, &mCurrentPos)) {
534            int32_t bitrate;
535            if (!mMeta->findInt32(kKeyBitRate, &bitrate)) {
536                // bitrate is in bits/sec.
537                LOGI("no bitrate");
538
539                return ERROR_UNSUPPORTED;
540            }
541
542            mCurrentTimeUs = seekTimeUs;
543            mCurrentPos = mFirstFramePos + seekTimeUs * bitrate / 8000000;
544        } else {
545            mCurrentTimeUs = actualSeekTimeUs;
546        }
547    }
548
549    MediaBuffer *buffer;
550    status_t err = mGroup->acquire_buffer(&buffer);
551    if (err != OK) {
552        return err;
553    }
554
555    size_t frame_size;
556    int bitrate;
557    for (;;) {
558        ssize_t n = mDataSource->readAt(mCurrentPos, buffer->data(), 4);
559        if (n < 4) {
560            buffer->release();
561            buffer = NULL;
562
563            return ERROR_END_OF_STREAM;
564        }
565
566        uint32_t header = U32_AT((const uint8_t *)buffer->data());
567
568        if ((header & kMask) == (mFixedHeader & kMask)
569            && MP3Extractor::get_mp3_frame_size(
570                header, &frame_size, NULL, NULL, &bitrate)) {
571            break;
572        }
573
574        // Lost sync.
575        LOGV("lost sync! header = 0x%08x, old header = 0x%08x\n", header, mFixedHeader);
576
577        off64_t pos = mCurrentPos;
578        if (!Resync(mDataSource, mFixedHeader, &pos, NULL, NULL)) {
579            LOGE("Unable to resync. Signalling end of stream.");
580
581            buffer->release();
582            buffer = NULL;
583
584            return ERROR_END_OF_STREAM;
585        }
586
587        mCurrentPos = pos;
588
589        // Try again with the new position.
590    }
591
592    CHECK(frame_size <= buffer->size());
593
594    ssize_t n = mDataSource->readAt(mCurrentPos, buffer->data(), frame_size);
595    if (n < (ssize_t)frame_size) {
596        buffer->release();
597        buffer = NULL;
598
599        return ERROR_END_OF_STREAM;
600    }
601
602    buffer->set_range(0, frame_size);
603
604    buffer->meta_data()->setInt64(kKeyTime, mCurrentTimeUs);
605    buffer->meta_data()->setInt32(kKeyIsSyncFrame, 1);
606
607    mCurrentPos += frame_size;
608    mCurrentTimeUs += frame_size * 8000ll / bitrate;
609
610    *out = buffer;
611
612    return OK;
613}
614
615sp<MetaData> MP3Extractor::getMetaData() {
616    sp<MetaData> meta = new MetaData;
617
618    if (mInitCheck != OK) {
619        return meta;
620    }
621
622    meta->setCString(kKeyMIMEType, "audio/mpeg");
623
624    ID3 id3(mDataSource);
625
626    if (!id3.isValid()) {
627        return meta;
628    }
629
630    struct Map {
631        int key;
632        const char *tag1;
633        const char *tag2;
634    };
635    static const Map kMap[] = {
636        { kKeyAlbum, "TALB", "TAL" },
637        { kKeyArtist, "TPE1", "TP1" },
638        { kKeyAlbumArtist, "TPE2", "TP2" },
639        { kKeyComposer, "TCOM", "TCM" },
640        { kKeyGenre, "TCON", "TCO" },
641        { kKeyTitle, "TIT2", "TT2" },
642        { kKeyYear, "TYE", "TYER" },
643        { kKeyAuthor, "TXT", "TEXT" },
644        { kKeyCDTrackNumber, "TRK", "TRCK" },
645        { kKeyDiscNumber, "TPA", "TPOS" },
646        { kKeyCompilation, "TCP", "TCMP" },
647    };
648    static const size_t kNumMapEntries = sizeof(kMap) / sizeof(kMap[0]);
649
650    for (size_t i = 0; i < kNumMapEntries; ++i) {
651        ID3::Iterator *it = new ID3::Iterator(id3, kMap[i].tag1);
652        if (it->done()) {
653            delete it;
654            it = new ID3::Iterator(id3, kMap[i].tag2);
655        }
656
657        if (it->done()) {
658            delete it;
659            continue;
660        }
661
662        String8 s;
663        it->getString(&s);
664        delete it;
665
666        meta->setCString(kMap[i].key, s);
667    }
668
669    size_t dataSize;
670    String8 mime;
671    const void *data = id3.getAlbumArt(&dataSize, &mime);
672
673    if (data) {
674        meta->setData(kKeyAlbumArt, MetaData::TYPE_NONE, data, dataSize);
675        meta->setCString(kKeyAlbumArtMIME, mime.string());
676    }
677
678    return meta;
679}
680
681bool SniffMP3(
682        const sp<DataSource> &source, String8 *mimeType,
683        float *confidence, sp<AMessage> *meta) {
684    off64_t pos = 0;
685    off64_t post_id3_pos;
686    uint32_t header;
687    if (!Resync(source, 0, &pos, &post_id3_pos, &header)) {
688        return false;
689    }
690
691    *meta = new AMessage;
692    (*meta)->setInt64("offset", pos);
693    (*meta)->setInt32("header", header);
694    (*meta)->setInt64("post-id3-offset", post_id3_pos);
695
696    *mimeType = MEDIA_MIMETYPE_AUDIO_MPEG;
697    *confidence = 0.2f;
698
699    return true;
700}
701
702}  // namespace android
703