MP3Extractor.cpp revision ba0707dc52b3ad2bec2244e1e6c39f31e06d467a
1/*
2 * Copyright (C) 2009 The Android Open Source Project
3 *
4 * Licensed under the Apache License, Version 2.0 (the "License");
5 * you may not use this file except in compliance with the License.
6 * You may obtain a copy of the License at
7 *
8 *      http://www.apache.org/licenses/LICENSE-2.0
9 *
10 * Unless required by applicable law or agreed to in writing, software
11 * distributed under the License is distributed on an "AS IS" BASIS,
12 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13 * See the License for the specific language governing permissions and
14 * limitations under the License.
15 */
16
17//#define LOG_NDEBUG 0
18#define LOG_TAG "MP3Extractor"
19#include <utils/Log.h>
20
21#include "include/MP3Extractor.h"
22
23#include "include/ID3.h"
24
25#include <media/stagefright/DataSource.h>
26#include <media/stagefright/MediaBuffer.h>
27#include <media/stagefright/MediaBufferGroup.h>
28#include <media/stagefright/MediaDebug.h>
29#include <media/stagefright/MediaDefs.h>
30#include <media/stagefright/MediaErrors.h>
31#include <media/stagefright/MediaSource.h>
32#include <media/stagefright/MetaData.h>
33#include <media/stagefright/Utils.h>
34#include <utils/String8.h>
35
36namespace android {
37
38// Everything must match except for
39// protection, bitrate, padding, private bits, mode extension,
40// copyright bit, original bit and emphasis.
41// Yes ... there are things that must indeed match...
42static const uint32_t kMask = 0xfffe0cc0;
43
44static bool get_mp3_frame_size(
45        uint32_t header, size_t *frame_size,
46        int *out_sampling_rate = NULL, int *out_channels = NULL,
47        int *out_bitrate = NULL) {
48    *frame_size = 0;
49
50    if (out_sampling_rate) {
51        *out_sampling_rate = 0;
52    }
53
54    if (out_channels) {
55        *out_channels = 0;
56    }
57
58    if (out_bitrate) {
59        *out_bitrate = 0;
60    }
61
62    if ((header & 0xffe00000) != 0xffe00000) {
63        return false;
64    }
65
66    unsigned version = (header >> 19) & 3;
67
68    if (version == 0x01) {
69        return false;
70    }
71
72    unsigned layer = (header >> 17) & 3;
73
74    if (layer == 0x00) {
75        return false;
76    }
77
78    unsigned protection = (header >> 16) & 1;
79
80    unsigned bitrate_index = (header >> 12) & 0x0f;
81
82    if (bitrate_index == 0 || bitrate_index == 0x0f) {
83        // Disallow "free" bitrate.
84        return false;
85    }
86
87    unsigned sampling_rate_index = (header >> 10) & 3;
88
89    if (sampling_rate_index == 3) {
90        return false;
91    }
92
93    static const int kSamplingRateV1[] = { 44100, 48000, 32000 };
94    int sampling_rate = kSamplingRateV1[sampling_rate_index];
95    if (version == 2 /* V2 */) {
96        sampling_rate /= 2;
97    } else if (version == 0 /* V2.5 */) {
98        sampling_rate /= 4;
99    }
100
101    unsigned padding = (header >> 9) & 1;
102
103    if (layer == 3) {
104        // layer I
105
106        static const int kBitrateV1[] = {
107            32, 64, 96, 128, 160, 192, 224, 256,
108            288, 320, 352, 384, 416, 448
109        };
110
111        static const int kBitrateV2[] = {
112            32, 48, 56, 64, 80, 96, 112, 128,
113            144, 160, 176, 192, 224, 256
114        };
115
116        int bitrate =
117            (version == 3 /* V1 */)
118                ? kBitrateV1[bitrate_index - 1]
119                : kBitrateV2[bitrate_index - 1];
120
121        if (out_bitrate) {
122            *out_bitrate = bitrate;
123        }
124
125        *frame_size = (12000 * bitrate / sampling_rate + padding) * 4;
126    } else {
127        // layer II or III
128
129        static const int kBitrateV1L2[] = {
130            32, 48, 56, 64, 80, 96, 112, 128,
131            160, 192, 224, 256, 320, 384
132        };
133
134        static const int kBitrateV1L3[] = {
135            32, 40, 48, 56, 64, 80, 96, 112,
136            128, 160, 192, 224, 256, 320
137        };
138
139        static const int kBitrateV2[] = {
140            8, 16, 24, 32, 40, 48, 56, 64,
141            80, 96, 112, 128, 144, 160
142        };
143
144        int bitrate;
145        if (version == 3 /* V1 */) {
146            bitrate = (layer == 2 /* L2 */)
147                ? kBitrateV1L2[bitrate_index - 1]
148                : kBitrateV1L3[bitrate_index - 1];
149        } else {
150            // V2 (or 2.5)
151
152            bitrate = kBitrateV2[bitrate_index - 1];
153        }
154
155        if (out_bitrate) {
156            *out_bitrate = bitrate;
157        }
158
159        if (version == 3 /* V1 */) {
160            *frame_size = 144000 * bitrate / sampling_rate + padding;
161        } else {
162            // V2 or V2.5
163            *frame_size = 72000 * bitrate / sampling_rate + padding;
164        }
165    }
166
167    if (out_sampling_rate) {
168        *out_sampling_rate = sampling_rate;
169    }
170
171    if (out_channels) {
172        int channel_mode = (header >> 6) & 3;
173
174        *out_channels = (channel_mode == 3) ? 1 : 2;
175    }
176
177    return true;
178}
179
180static bool parse_xing_header(
181        const sp<DataSource> &source, off_t first_frame_pos,
182        int32_t *frame_number = NULL, int32_t *byte_number = NULL,
183        char *table_of_contents = NULL, int32_t *quality_indicator = NULL,
184        int64_t *duration = NULL) {
185
186    if (frame_number) {
187        *frame_number = 0;
188    }
189    if (byte_number) {
190        *byte_number = 0;
191    }
192    if (table_of_contents) {
193        table_of_contents[0] = 0;
194    }
195    if (quality_indicator) {
196        *quality_indicator = 0;
197    }
198    if (duration) {
199        *duration = 0;
200    }
201
202    uint8_t buffer[4];
203    int offset = first_frame_pos;
204    if (source->readAt(offset, &buffer, 4) < 4) { // get header
205        return false;
206    }
207    offset += 4;
208
209    uint8_t id, layer, sr_index, mode;
210    layer = (buffer[1] >> 1) & 3;
211    id = (buffer[1] >> 3) & 3;
212    sr_index = (buffer[2] >> 2) & 3;
213    mode = (buffer[3] >> 6) & 3;
214    if (layer == 0) {
215        return false;
216    }
217    if (id == 1) {
218        return false;
219    }
220    if (sr_index == 3) {
221        return false;
222    }
223    // determine offset of XING header
224    if(id&1) { // mpeg1
225        if (mode != 3) offset += 32;
226        else offset += 17;
227    } else { // mpeg2
228        if (mode != 3) offset += 17;
229        else offset += 9;
230    }
231
232    if (source->readAt(offset, &buffer, 4) < 4) { // XING header ID
233        return false;
234    }
235    offset += 4;
236    // Check XING ID
237    if ((buffer[0] != 'X') || (buffer[1] != 'i')
238                || (buffer[2] != 'n') || (buffer[3] != 'g')) {
239        if ((buffer[0] != 'I') || (buffer[1] != 'n')
240                    || (buffer[2] != 'f') || (buffer[3] != 'o')) {
241            return false;
242        }
243    }
244
245    if (source->readAt(offset, &buffer, 4) < 4) { // flags
246        return false;
247    }
248    offset += 4;
249    uint32_t flags = U32_AT(buffer);
250
251    if (flags & 0x0001) {  // Frames field is present
252        if (source->readAt(offset, buffer, 4) < 4) {
253             return false;
254        }
255        if (frame_number) {
256           *frame_number = U32_AT(buffer);
257        }
258        int32_t frame = U32_AT(buffer);
259        // Samples per Frame: 1. index = MPEG Version ID, 2. index = Layer
260        const int samplesPerFrames[2][3] =
261        {
262            { 384, 1152, 576  }, // MPEG 2, 2.5: layer1, layer2, layer3
263            { 384, 1152, 1152 }, // MPEG 1: layer1, layer2, layer3
264        };
265        // sampling rates in hertz: 1. index = MPEG Version ID, 2. index = sampling rate index
266        const int samplingRates[4][3] =
267        {
268            { 11025, 12000, 8000,  },    // MPEG 2.5
269            { 0,     0,     0,     },    // reserved
270            { 22050, 24000, 16000, },    // MPEG 2
271            { 44100, 48000, 32000, }     // MPEG 1
272        };
273        if (duration) {
274            *duration = (int64_t)frame * samplesPerFrames[id&1][3-layer] * 1000000LL
275                / samplingRates[id][sr_index];
276        }
277        offset += 4;
278    }
279    if (flags & 0x0002) {  // Bytes field is present
280        if (byte_number) {
281            if (source->readAt(offset, buffer, 4) < 4) {
282                return false;
283            }
284            *byte_number = U32_AT(buffer);
285        }
286        offset += 4;
287    }
288    if (flags & 0x0004) {  // TOC field is present
289       if (table_of_contents) {
290            if (source->readAt(offset + 1, table_of_contents, 99) < 99) {
291                return false;
292            }
293        }
294        offset += 100;
295    }
296    if (flags & 0x0008) {  // Quality indicator field is present
297        if (quality_indicator) {
298            if (source->readAt(offset, buffer, 4) < 4) {
299                return false;
300            }
301            *quality_indicator = U32_AT(buffer);
302        }
303    }
304    return true;
305}
306
307static bool Resync(
308        const sp<DataSource> &source, uint32_t match_header,
309        off_t *inout_pos, uint32_t *out_header) {
310    if (*inout_pos == 0) {
311        // Skip an optional ID3 header if syncing at the very beginning
312        // of the datasource.
313
314        for (;;) {
315            uint8_t id3header[10];
316            if (source->readAt(*inout_pos, id3header, sizeof(id3header))
317                    < (ssize_t)sizeof(id3header)) {
318                // If we can't even read these 10 bytes, we might as well bail
319                // out, even if there _were_ 10 bytes of valid mp3 audio data...
320                return false;
321            }
322
323            if (memcmp("ID3", id3header, 3)) {
324                break;
325            }
326
327            // Skip the ID3v2 header.
328
329            size_t len =
330                ((id3header[6] & 0x7f) << 21)
331                | ((id3header[7] & 0x7f) << 14)
332                | ((id3header[8] & 0x7f) << 7)
333                | (id3header[9] & 0x7f);
334
335            len += 10;
336
337            *inout_pos += len;
338
339            LOGV("skipped ID3 tag, new starting offset is %ld (0x%08lx)",
340                 *inout_pos, *inout_pos);
341        }
342    }
343
344    off_t pos = *inout_pos;
345    bool valid = false;
346    do {
347        if (pos >= *inout_pos + 128 * 1024) {
348            // Don't scan forever.
349            LOGV("giving up at offset %ld", pos);
350            break;
351        }
352
353        uint8_t tmp[4];
354        if (source->readAt(pos, tmp, 4) != 4) {
355            break;
356        }
357
358        uint32_t header = U32_AT(tmp);
359
360        if (match_header != 0 && (header & kMask) != (match_header & kMask)) {
361            ++pos;
362            continue;
363        }
364
365        size_t frame_size;
366        int sample_rate, num_channels, bitrate;
367        if (!get_mp3_frame_size(header, &frame_size,
368                               &sample_rate, &num_channels, &bitrate)) {
369            ++pos;
370            continue;
371        }
372
373        LOGV("found possible 1st frame at %ld (header = 0x%08x)", pos, header);
374
375        // We found what looks like a valid frame,
376        // now find its successors.
377
378        off_t test_pos = pos + frame_size;
379
380        valid = true;
381        for (int j = 0; j < 3; ++j) {
382            uint8_t tmp[4];
383            if (source->readAt(test_pos, tmp, 4) < 4) {
384                valid = false;
385                break;
386            }
387
388            uint32_t test_header = U32_AT(tmp);
389
390            LOGV("subsequent header is %08x", test_header);
391
392            if ((test_header & kMask) != (header & kMask)) {
393                valid = false;
394                break;
395            }
396
397            size_t test_frame_size;
398            if (!get_mp3_frame_size(test_header, &test_frame_size)) {
399                valid = false;
400                break;
401            }
402
403            LOGV("found subsequent frame #%d at %ld", j + 2, test_pos);
404
405            test_pos += test_frame_size;
406        }
407
408        if (valid) {
409            *inout_pos = pos;
410
411            if (out_header != NULL) {
412                *out_header = header;
413            }
414        } else {
415            LOGV("no dice, no valid sequence of frames found.");
416        }
417
418        ++pos;
419    } while (!valid);
420
421    return valid;
422}
423
424class MP3Source : public MediaSource {
425public:
426    MP3Source(
427            const sp<MetaData> &meta, const sp<DataSource> &source,
428            off_t first_frame_pos, uint32_t fixed_header,
429            int32_t byte_number, const char *table_of_contents);
430
431    virtual status_t start(MetaData *params = NULL);
432    virtual status_t stop();
433
434    virtual sp<MetaData> getFormat();
435
436    virtual status_t read(
437            MediaBuffer **buffer, const ReadOptions *options = NULL);
438
439protected:
440    virtual ~MP3Source();
441
442private:
443    sp<MetaData> mMeta;
444    sp<DataSource> mDataSource;
445    off_t mFirstFramePos;
446    uint32_t mFixedHeader;
447    off_t mCurrentPos;
448    int64_t mCurrentTimeUs;
449    bool mStarted;
450    int32_t mByteNumber; // total number of bytes in this MP3
451    // TOC entries in XING header. Skip the first one since it's always 0.
452    char mTableOfContents[99];
453    MediaBufferGroup *mGroup;
454
455    MP3Source(const MP3Source &);
456    MP3Source &operator=(const MP3Source &);
457};
458
459MP3Extractor::MP3Extractor(const sp<DataSource> &source)
460    : mDataSource(source),
461      mFirstFramePos(-1),
462      mFixedHeader(0),
463      mByteNumber(0) {
464    off_t pos = 0;
465    uint32_t header;
466    bool success = Resync(mDataSource, 0, &pos, &header);
467    CHECK(success);
468
469    if (success) {
470        mFirstFramePos = pos;
471        mFixedHeader = header;
472
473        size_t frame_size;
474        int sample_rate;
475        int num_channels;
476        int bitrate;
477        get_mp3_frame_size(
478                header, &frame_size, &sample_rate, &num_channels, &bitrate);
479
480        mMeta = new MetaData;
481
482        mMeta->setCString(kKeyMIMEType, MEDIA_MIMETYPE_AUDIO_MPEG);
483        mMeta->setInt32(kKeySampleRate, sample_rate);
484        mMeta->setInt32(kKeyBitRate, bitrate * 1000);
485        mMeta->setInt32(kKeyChannelCount, num_channels);
486
487        int64_t duration;
488        parse_xing_header(
489                mDataSource, mFirstFramePos, NULL, &mByteNumber,
490                mTableOfContents, NULL, &duration);
491        if (duration > 0) {
492            mMeta->setInt64(kKeyDuration, duration);
493        } else {
494            off_t fileSize;
495            if (mDataSource->getSize(&fileSize) == OK) {
496                mMeta->setInt64(
497                        kKeyDuration,
498                        8000LL * (fileSize - mFirstFramePos) / bitrate);
499            }
500        }
501    }
502}
503
504MP3Extractor::~MP3Extractor() {
505}
506
507size_t MP3Extractor::countTracks() {
508    return (mFirstFramePos < 0) ? 0 : 1;
509}
510
511sp<MediaSource> MP3Extractor::getTrack(size_t index) {
512    if (mFirstFramePos < 0 || index != 0) {
513        return NULL;
514    }
515
516    return new MP3Source(
517            mMeta, mDataSource, mFirstFramePos, mFixedHeader,
518            mByteNumber, mTableOfContents);
519}
520
521sp<MetaData> MP3Extractor::getTrackMetaData(size_t index, uint32_t flags) {
522    if (mFirstFramePos < 0 || index != 0) {
523        return NULL;
524    }
525
526    return mMeta;
527}
528
529////////////////////////////////////////////////////////////////////////////////
530
531MP3Source::MP3Source(
532        const sp<MetaData> &meta, const sp<DataSource> &source,
533        off_t first_frame_pos, uint32_t fixed_header,
534        int32_t byte_number, const char *table_of_contents)
535    : mMeta(meta),
536      mDataSource(source),
537      mFirstFramePos(first_frame_pos),
538      mFixedHeader(fixed_header),
539      mCurrentPos(0),
540      mCurrentTimeUs(0),
541      mStarted(false),
542      mByteNumber(byte_number),
543      mGroup(NULL) {
544    memcpy (mTableOfContents, table_of_contents, sizeof(mTableOfContents));
545}
546
547MP3Source::~MP3Source() {
548    if (mStarted) {
549        stop();
550    }
551}
552
553status_t MP3Source::start(MetaData *) {
554    CHECK(!mStarted);
555
556    mGroup = new MediaBufferGroup;
557
558    const size_t kMaxFrameSize = 32768;
559    mGroup->add_buffer(new MediaBuffer(kMaxFrameSize));
560
561    mCurrentPos = mFirstFramePos;
562    mCurrentTimeUs = 0;
563
564    mStarted = true;
565
566    return OK;
567}
568
569status_t MP3Source::stop() {
570    CHECK(mStarted);
571
572    delete mGroup;
573    mGroup = NULL;
574
575    mStarted = false;
576
577    return OK;
578}
579
580sp<MetaData> MP3Source::getFormat() {
581    return mMeta;
582}
583
584status_t MP3Source::read(
585        MediaBuffer **out, const ReadOptions *options) {
586    *out = NULL;
587
588    int64_t seekTimeUs;
589    if (options != NULL && options->getSeekTo(&seekTimeUs)) {
590        int32_t bitrate;
591        if (!mMeta->findInt32(kKeyBitRate, &bitrate)) {
592            // bitrate is in bits/sec.
593            LOGI("no bitrate");
594
595            return ERROR_UNSUPPORTED;
596        }
597
598        mCurrentTimeUs = seekTimeUs;
599        // interpolate in TOC to get file seek point in bytes
600        int64_t duration;
601        if ((mByteNumber > 0) && (mTableOfContents[0] > 0)
602            && mMeta->findInt64(kKeyDuration, &duration)) {
603            float percent = (float)seekTimeUs * 100 / duration;
604            float fx;
605            if( percent <= 0.0f ) {
606                fx = 0.0f;
607            } else if( percent >= 100.0f ) {
608                fx = 256.0f;
609            } else {
610                int a = (int)percent;
611                float fa, fb;
612                if ( a == 0 ) {
613                    fa = 0.0f;
614                } else {
615                    fa = (float)mTableOfContents[a-1];
616                }
617                if ( a < 99 ) {
618                    fb = (float)mTableOfContents[a];
619                } else {
620                    fb = 256.0f;
621                }
622                fx = fa + (fb-fa)*(percent-a);
623            }
624            mCurrentPos = mFirstFramePos + (int)((1.0f/256.0f)*fx*mByteNumber);
625        } else {
626            mCurrentPos = mFirstFramePos + seekTimeUs * bitrate / 8000000;
627        }
628    }
629
630    MediaBuffer *buffer;
631    status_t err = mGroup->acquire_buffer(&buffer);
632    if (err != OK) {
633        return err;
634    }
635
636    size_t frame_size;
637    for (;;) {
638        ssize_t n = mDataSource->readAt(mCurrentPos, buffer->data(), 4);
639        if (n < 4) {
640            buffer->release();
641            buffer = NULL;
642
643            return ERROR_END_OF_STREAM;
644        }
645
646        uint32_t header = U32_AT((const uint8_t *)buffer->data());
647
648        if ((header & kMask) == (mFixedHeader & kMask)
649            && get_mp3_frame_size(header, &frame_size)) {
650            break;
651        }
652
653        // Lost sync.
654        LOGV("lost sync! header = 0x%08x, old header = 0x%08x\n", header, mFixedHeader);
655
656        off_t pos = mCurrentPos;
657        if (!Resync(mDataSource, mFixedHeader, &pos, NULL)) {
658            LOGE("Unable to resync. Signalling end of stream.");
659
660            buffer->release();
661            buffer = NULL;
662
663            return ERROR_END_OF_STREAM;
664        }
665
666        mCurrentPos = pos;
667
668        // Try again with the new position.
669    }
670
671    CHECK(frame_size <= buffer->size());
672
673    ssize_t n = mDataSource->readAt(mCurrentPos, buffer->data(), frame_size);
674    if (n < (ssize_t)frame_size) {
675        buffer->release();
676        buffer = NULL;
677
678        return ERROR_END_OF_STREAM;
679    }
680
681    buffer->set_range(0, frame_size);
682
683    buffer->meta_data()->setInt64(kKeyTime, mCurrentTimeUs);
684
685    mCurrentPos += frame_size;
686    mCurrentTimeUs += 1152 * 1000000 / 44100;
687
688    *out = buffer;
689
690    return OK;
691}
692
693sp<MetaData> MP3Extractor::getMetaData() {
694    sp<MetaData> meta = new MetaData;
695
696    if (mFirstFramePos < 0) {
697        return meta;
698    }
699
700    meta->setCString(kKeyMIMEType, "audio/mpeg");
701
702    ID3 id3(mDataSource);
703
704    if (!id3.isValid()) {
705        return meta;
706    }
707
708    struct Map {
709        int key;
710        const char *tag1;
711        const char *tag2;
712    };
713    static const Map kMap[] = {
714        { kKeyAlbum, "TALB", "TAL" },
715        { kKeyArtist, "TPE1", "TP1" },
716        { kKeyAlbumArtist, "TPE2", "TP2" },
717        { kKeyComposer, "TCOM", "TCM" },
718        { kKeyGenre, "TCON", "TCO" },
719        { kKeyTitle, "TIT2", "TT2" },
720        { kKeyYear, "TYE", "TYER" },
721        { kKeyAuthor, "TXT", "TEXT" },
722        { kKeyCDTrackNumber, "TRK", "TRCK" },
723        { kKeyDiscNumber, "TPA", "TPOS" },
724    };
725    static const size_t kNumMapEntries = sizeof(kMap) / sizeof(kMap[0]);
726
727    for (size_t i = 0; i < kNumMapEntries; ++i) {
728        ID3::Iterator *it = new ID3::Iterator(id3, kMap[i].tag1);
729        if (it->done()) {
730            delete it;
731            it = new ID3::Iterator(id3, kMap[i].tag2);
732        }
733
734        if (it->done()) {
735            delete it;
736            continue;
737        }
738
739        String8 s;
740        it->getString(&s);
741        delete it;
742
743        meta->setCString(kMap[i].key, s);
744    }
745
746    size_t dataSize;
747    String8 mime;
748    const void *data = id3.getAlbumArt(&dataSize, &mime);
749
750    if (data) {
751        meta->setData(kKeyAlbumArt, MetaData::TYPE_NONE, data, dataSize);
752        meta->setCString(kKeyAlbumArtMIME, mime.string());
753    }
754
755    return meta;
756}
757
758bool SniffMP3(
759        const sp<DataSource> &source, String8 *mimeType, float *confidence) {
760    off_t pos = 0;
761    uint32_t header;
762    if (!Resync(source, 0, &pos, &header)) {
763        return false;
764    }
765
766    *mimeType = MEDIA_MIMETYPE_AUDIO_MPEG;
767    *confidence = 0.3f;
768
769    return true;
770}
771
772}  // namespace android
773