MP3Extractor.cpp revision ba1f481614b4a4dd290e3b75e0f3f1879a383a44
1/*
2 * Copyright (C) 2009 The Android Open Source Project
3 *
4 * Licensed under the Apache License, Version 2.0 (the "License");
5 * you may not use this file except in compliance with the License.
6 * You may obtain a copy of the License at
7 *
8 *      http://www.apache.org/licenses/LICENSE-2.0
9 *
10 * Unless required by applicable law or agreed to in writing, software
11 * distributed under the License is distributed on an "AS IS" BASIS,
12 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13 * See the License for the specific language governing permissions and
14 * limitations under the License.
15 */
16
17//#define LOG_NDEBUG 0
18#define LOG_TAG "MP3Extractor"
19#include <utils/Log.h>
20
21#include "include/MP3Extractor.h"
22
23#include "include/ID3.h"
24
25#include <media/stagefright/DataSource.h>
26#include <media/stagefright/MediaBuffer.h>
27#include <media/stagefright/MediaBufferGroup.h>
28#include <media/stagefright/MediaDebug.h>
29#include <media/stagefright/MediaDefs.h>
30#include <media/stagefright/MediaErrors.h>
31#include <media/stagefright/MediaSource.h>
32#include <media/stagefright/MetaData.h>
33#include <media/stagefright/Utils.h>
34#include <utils/String8.h>
35
36namespace android {
37
38// Everything must match except for
39// protection, bitrate, padding, private bits, mode extension,
40// copyright bit, original bit and emphasis.
41// Yes ... there are things that must indeed match...
42static const uint32_t kMask = 0xfffe0cc0;
43
44static bool get_mp3_frame_size(
45        uint32_t header, size_t *frame_size,
46        int *out_sampling_rate = NULL, int *out_channels = NULL,
47        int *out_bitrate = NULL) {
48    *frame_size = 0;
49
50    if (out_sampling_rate) {
51        *out_sampling_rate = 0;
52    }
53
54    if (out_channels) {
55        *out_channels = 0;
56    }
57
58    if (out_bitrate) {
59        *out_bitrate = 0;
60    }
61
62    if ((header & 0xffe00000) != 0xffe00000) {
63        return false;
64    }
65
66    unsigned version = (header >> 19) & 3;
67
68    if (version == 0x01) {
69        return false;
70    }
71
72    unsigned layer = (header >> 17) & 3;
73
74    if (layer == 0x00) {
75        return false;
76    }
77
78    unsigned protection = (header >> 16) & 1;
79
80    unsigned bitrate_index = (header >> 12) & 0x0f;
81
82    if (bitrate_index == 0 || bitrate_index == 0x0f) {
83        // Disallow "free" bitrate.
84        return false;
85    }
86
87    unsigned sampling_rate_index = (header >> 10) & 3;
88
89    if (sampling_rate_index == 3) {
90        return false;
91    }
92
93    static const int kSamplingRateV1[] = { 44100, 48000, 32000 };
94    int sampling_rate = kSamplingRateV1[sampling_rate_index];
95    if (version == 2 /* V2 */) {
96        sampling_rate /= 2;
97    } else if (version == 0 /* V2.5 */) {
98        sampling_rate /= 4;
99    }
100
101    unsigned padding = (header >> 9) & 1;
102
103    if (layer == 3) {
104        // layer I
105
106        static const int kBitrateV1[] = {
107            32, 64, 96, 128, 160, 192, 224, 256,
108            288, 320, 352, 384, 416, 448
109        };
110
111        static const int kBitrateV2[] = {
112            32, 48, 56, 64, 80, 96, 112, 128,
113            144, 160, 176, 192, 224, 256
114        };
115
116        int bitrate =
117            (version == 3 /* V1 */)
118                ? kBitrateV1[bitrate_index - 1]
119                : kBitrateV2[bitrate_index - 1];
120
121        if (out_bitrate) {
122            *out_bitrate = bitrate;
123        }
124
125        *frame_size = (12000 * bitrate / sampling_rate + padding) * 4;
126    } else {
127        // layer II or III
128
129        static const int kBitrateV1L2[] = {
130            32, 48, 56, 64, 80, 96, 112, 128,
131            160, 192, 224, 256, 320, 384
132        };
133
134        static const int kBitrateV1L3[] = {
135            32, 40, 48, 56, 64, 80, 96, 112,
136            128, 160, 192, 224, 256, 320
137        };
138
139        static const int kBitrateV2[] = {
140            8, 16, 24, 32, 40, 48, 56, 64,
141            80, 96, 112, 128, 144, 160
142        };
143
144        int bitrate;
145        if (version == 3 /* V1 */) {
146            bitrate = (layer == 2 /* L2 */)
147                ? kBitrateV1L2[bitrate_index - 1]
148                : kBitrateV1L3[bitrate_index - 1];
149        } else {
150            // V2 (or 2.5)
151
152            bitrate = kBitrateV2[bitrate_index - 1];
153        }
154
155        if (out_bitrate) {
156            *out_bitrate = bitrate;
157        }
158
159        if (version == 3 /* V1 */) {
160            *frame_size = 144000 * bitrate / sampling_rate + padding;
161        } else {
162            // V2 or V2.5
163            *frame_size = 72000 * bitrate / sampling_rate + padding;
164        }
165    }
166
167    if (out_sampling_rate) {
168        *out_sampling_rate = sampling_rate;
169    }
170
171    if (out_channels) {
172        int channel_mode = (header >> 6) & 3;
173
174        *out_channels = (channel_mode == 3) ? 1 : 2;
175    }
176
177    return true;
178}
179
180static bool parse_xing_header(
181        const sp<DataSource> &source, off_t first_frame_pos,
182        int32_t *frame_number = NULL, int32_t *byte_number = NULL,
183        char *table_of_contents = NULL, int32_t *quality_indicator = NULL,
184        int64_t *duration = NULL) {
185
186    if (frame_number) {
187        *frame_number = 0;
188    }
189    if (byte_number) {
190        *byte_number = 0;
191    }
192    if (table_of_contents) {
193        table_of_contents[0] = 0;
194    }
195    if (quality_indicator) {
196        *quality_indicator = 0;
197    }
198    if (duration) {
199        *duration = 0;
200    }
201
202    uint8_t buffer[4];
203    int offset = first_frame_pos;
204    if (source->readAt(offset, &buffer, 4) < 4) { // get header
205        return false;
206    }
207    offset += 4;
208
209    uint8_t id, layer, sr_index, mode;
210    layer = (buffer[1] >> 1) & 3;
211    id = (buffer[1] >> 3) & 3;
212    sr_index = (buffer[2] >> 2) & 3;
213    mode = (buffer[3] >> 6) & 3;
214    if (layer == 0) {
215        return false;
216    }
217    if (id == 1) {
218        return false;
219    }
220    if (sr_index == 3) {
221        return false;
222    }
223    // determine offset of XING header
224    if(id&1) { // mpeg1
225        if (mode != 3) offset += 32;
226        else offset += 17;
227    } else { // mpeg2
228        if (mode != 3) offset += 17;
229        else offset += 9;
230    }
231
232    if (source->readAt(offset, &buffer, 4) < 4) { // XING header ID
233        return false;
234    }
235    offset += 4;
236    // Check XING ID
237    if ((buffer[0] != 'X') || (buffer[1] != 'i')
238                || (buffer[2] != 'n') || (buffer[3] != 'g')) {
239        if ((buffer[0] != 'I') || (buffer[1] != 'n')
240                    || (buffer[2] != 'f') || (buffer[3] != 'o')) {
241            return false;
242        }
243    }
244
245    if (source->readAt(offset, &buffer, 4) < 4) { // flags
246        return false;
247    }
248    offset += 4;
249    uint32_t flags = U32_AT(buffer);
250
251    if (flags & 0x0001) {  // Frames field is present
252        if (source->readAt(offset, buffer, 4) < 4) {
253             return false;
254        }
255        if (frame_number) {
256           *frame_number = U32_AT(buffer);
257        }
258        int32_t frame = U32_AT(buffer);
259        // Samples per Frame: 1. index = MPEG Version ID, 2. index = Layer
260        const int samplesPerFrames[2][3] =
261        {
262            { 384, 1152, 576  }, // MPEG 2, 2.5: layer1, layer2, layer3
263            { 384, 1152, 1152 }, // MPEG 1: layer1, layer2, layer3
264        };
265        // sampling rates in hertz: 1. index = MPEG Version ID, 2. index = sampling rate index
266        const int samplingRates[4][3] =
267        {
268            { 11025, 12000, 8000,  },    // MPEG 2.5
269            { 0,     0,     0,     },    // reserved
270            { 22050, 24000, 16000, },    // MPEG 2
271            { 44100, 48000, 32000, }     // MPEG 1
272        };
273        if (duration) {
274            *duration = (int64_t)frame * samplesPerFrames[id&1][3-layer] * 1000000LL
275                / samplingRates[id][sr_index];
276        }
277        offset += 4;
278    }
279    if (flags & 0x0002) {  // Bytes field is present
280        if (byte_number) {
281            if (source->readAt(offset, buffer, 4) < 4) {
282                return false;
283            }
284            *byte_number = U32_AT(buffer);
285        }
286        offset += 4;
287    }
288    if (flags & 0x0004) {  // TOC field is present
289       if (table_of_contents) {
290            if (source->readAt(offset + 1, table_of_contents, 99) < 99) {
291                return false;
292            }
293        }
294        offset += 100;
295    }
296    if (flags & 0x0008) {  // Quality indicator field is present
297        if (quality_indicator) {
298            if (source->readAt(offset, buffer, 4) < 4) {
299                return false;
300            }
301            *quality_indicator = U32_AT(buffer);
302        }
303    }
304    return true;
305}
306
307static bool Resync(
308        const sp<DataSource> &source, uint32_t match_header,
309        off_t *inout_pos, uint32_t *out_header) {
310    if (*inout_pos == 0) {
311        // Skip an optional ID3 header if syncing at the very beginning
312        // of the datasource.
313
314        uint8_t id3header[10];
315        if (source->readAt(0, id3header, sizeof(id3header))
316                < (ssize_t)sizeof(id3header)) {
317            // If we can't even read these 10 bytes, we might as well bail out,
318            // even if there _were_ 10 bytes of valid mp3 audio data...
319            return false;
320        }
321
322        if (id3header[0] == 'I' && id3header[1] == 'D' && id3header[2] == '3') {
323            // Skip the ID3v2 header.
324
325            size_t len =
326                ((id3header[6] & 0x7f) << 21)
327                | ((id3header[7] & 0x7f) << 14)
328                | ((id3header[8] & 0x7f) << 7)
329                | (id3header[9] & 0x7f);
330
331            len += 10;
332
333            *inout_pos += len;
334        }
335    }
336
337    const size_t kMaxFrameSize = 4096;
338    uint8_t *buffer = new uint8_t[kMaxFrameSize];
339
340    off_t pos = *inout_pos - kMaxFrameSize;
341    size_t buffer_offset = kMaxFrameSize;
342    size_t buffer_length = kMaxFrameSize;
343    bool valid = false;
344    do {
345        if (buffer_offset + 3 >= buffer_length) {
346            if (buffer_length < kMaxFrameSize) {
347                break;
348            }
349
350            pos += buffer_offset;
351
352            if (pos >= *inout_pos + 128 * 1024) {
353                // Don't scan forever.
354                LOGV("giving up at offset %ld", pos);
355                break;
356            }
357
358            memmove(buffer, &buffer[buffer_offset], buffer_length - buffer_offset);
359            buffer_length = buffer_length - buffer_offset;
360            buffer_offset = 0;
361
362            ssize_t n = source->readAt(
363                    pos, &buffer[buffer_length], kMaxFrameSize - buffer_length);
364
365            if (n <= 0) {
366                break;
367            }
368
369            buffer_length += (size_t)n;
370
371            continue;
372        }
373
374        uint32_t header = U32_AT(&buffer[buffer_offset]);
375
376        if (match_header != 0 && (header & kMask) != (match_header & kMask)) {
377            ++buffer_offset;
378            continue;
379        }
380
381        size_t frame_size;
382        int sample_rate, num_channels, bitrate;
383        if (!get_mp3_frame_size(header, &frame_size,
384                               &sample_rate, &num_channels, &bitrate)) {
385            ++buffer_offset;
386            continue;
387        }
388
389        LOGV("found possible 1st frame at %ld", pos + buffer_offset);
390
391        // We found what looks like a valid frame,
392        // now find its successors.
393
394        off_t test_pos = pos + buffer_offset + frame_size;
395
396        valid = true;
397        for (int j = 0; j < 3; ++j) {
398            uint8_t tmp[4];
399            if (source->readAt(test_pos, tmp, 4) < 4) {
400                valid = false;
401                break;
402            }
403
404            uint32_t test_header = U32_AT(tmp);
405
406            LOGV("subsequent header is %08x", test_header);
407
408            if ((test_header & kMask) != (header & kMask)) {
409                valid = false;
410                break;
411            }
412
413            size_t test_frame_size;
414            if (!get_mp3_frame_size(test_header, &test_frame_size)) {
415                valid = false;
416                break;
417            }
418
419            LOGV("found subsequent frame #%d at %ld", j + 2, test_pos);
420
421            test_pos += test_frame_size;
422        }
423
424        if (valid) {
425            *inout_pos = pos + buffer_offset;
426
427            if (out_header != NULL) {
428                *out_header = header;
429            }
430        } else {
431            LOGV("no dice, no valid sequence of frames found.");
432        }
433
434        ++buffer_offset;
435
436    } while (!valid);
437
438    delete[] buffer;
439    buffer = NULL;
440
441    return valid;
442}
443
444class MP3Source : public MediaSource {
445public:
446    MP3Source(
447            const sp<MetaData> &meta, const sp<DataSource> &source,
448            off_t first_frame_pos, uint32_t fixed_header,
449            int32_t byte_number, const char *table_of_contents);
450
451    virtual status_t start(MetaData *params = NULL);
452    virtual status_t stop();
453
454    virtual sp<MetaData> getFormat();
455
456    virtual status_t read(
457            MediaBuffer **buffer, const ReadOptions *options = NULL);
458
459protected:
460    virtual ~MP3Source();
461
462private:
463    sp<MetaData> mMeta;
464    sp<DataSource> mDataSource;
465    off_t mFirstFramePos;
466    uint32_t mFixedHeader;
467    off_t mCurrentPos;
468    int64_t mCurrentTimeUs;
469    bool mStarted;
470    int32_t mByteNumber; // total number of bytes in this MP3
471    // TOC entries in XING header. Skip the first one since it's always 0.
472    char mTableOfContents[99];
473    MediaBufferGroup *mGroup;
474
475    MP3Source(const MP3Source &);
476    MP3Source &operator=(const MP3Source &);
477};
478
479MP3Extractor::MP3Extractor(const sp<DataSource> &source)
480    : mDataSource(source),
481      mFirstFramePos(-1),
482      mFixedHeader(0),
483      mByteNumber(0) {
484    off_t pos = 0;
485    uint32_t header;
486    bool success = Resync(mDataSource, 0, &pos, &header);
487    CHECK(success);
488
489    if (success) {
490        mFirstFramePos = pos;
491        mFixedHeader = header;
492
493        size_t frame_size;
494        int sample_rate;
495        int num_channels;
496        int bitrate;
497        get_mp3_frame_size(
498                header, &frame_size, &sample_rate, &num_channels, &bitrate);
499
500        mMeta = new MetaData;
501
502        mMeta->setCString(kKeyMIMEType, MEDIA_MIMETYPE_AUDIO_MPEG);
503        mMeta->setInt32(kKeySampleRate, sample_rate);
504        mMeta->setInt32(kKeyBitRate, bitrate * 1000);
505        mMeta->setInt32(kKeyChannelCount, num_channels);
506
507        int64_t duration;
508        parse_xing_header(
509                mDataSource, mFirstFramePos, NULL, &mByteNumber,
510                mTableOfContents, NULL, &duration);
511        if (duration > 0) {
512            mMeta->setInt64(kKeyDuration, duration);
513        } else {
514            off_t fileSize;
515            if (mDataSource->getSize(&fileSize) == OK) {
516                mMeta->setInt64(
517                        kKeyDuration,
518                        8000LL * (fileSize - mFirstFramePos) / bitrate);
519            }
520        }
521    }
522}
523
524MP3Extractor::~MP3Extractor() {
525}
526
527size_t MP3Extractor::countTracks() {
528    return (mFirstFramePos < 0) ? 0 : 1;
529}
530
531sp<MediaSource> MP3Extractor::getTrack(size_t index) {
532    if (mFirstFramePos < 0 || index != 0) {
533        return NULL;
534    }
535
536    return new MP3Source(
537            mMeta, mDataSource, mFirstFramePos, mFixedHeader,
538            mByteNumber, mTableOfContents);
539}
540
541sp<MetaData> MP3Extractor::getTrackMetaData(size_t index, uint32_t flags) {
542    if (mFirstFramePos < 0 || index != 0) {
543        return NULL;
544    }
545
546    return mMeta;
547}
548
549////////////////////////////////////////////////////////////////////////////////
550
551MP3Source::MP3Source(
552        const sp<MetaData> &meta, const sp<DataSource> &source,
553        off_t first_frame_pos, uint32_t fixed_header,
554        int32_t byte_number, const char *table_of_contents)
555    : mMeta(meta),
556      mDataSource(source),
557      mFirstFramePos(first_frame_pos),
558      mFixedHeader(fixed_header),
559      mCurrentPos(0),
560      mCurrentTimeUs(0),
561      mStarted(false),
562      mByteNumber(byte_number),
563      mGroup(NULL) {
564    memcpy (mTableOfContents, table_of_contents, sizeof(mTableOfContents));
565}
566
567MP3Source::~MP3Source() {
568    if (mStarted) {
569        stop();
570    }
571}
572
573status_t MP3Source::start(MetaData *) {
574    CHECK(!mStarted);
575
576    mGroup = new MediaBufferGroup;
577
578    const size_t kMaxFrameSize = 32768;
579    mGroup->add_buffer(new MediaBuffer(kMaxFrameSize));
580
581    mCurrentPos = mFirstFramePos;
582    mCurrentTimeUs = 0;
583
584    mStarted = true;
585
586    return OK;
587}
588
589status_t MP3Source::stop() {
590    CHECK(mStarted);
591
592    delete mGroup;
593    mGroup = NULL;
594
595    mStarted = false;
596
597    return OK;
598}
599
600sp<MetaData> MP3Source::getFormat() {
601    return mMeta;
602}
603
604status_t MP3Source::read(
605        MediaBuffer **out, const ReadOptions *options) {
606    *out = NULL;
607
608    int64_t seekTimeUs;
609    if (options != NULL && options->getSeekTo(&seekTimeUs)) {
610        int32_t bitrate;
611        if (!mMeta->findInt32(kKeyBitRate, &bitrate)) {
612            // bitrate is in bits/sec.
613            LOGI("no bitrate");
614
615            return ERROR_UNSUPPORTED;
616        }
617
618        mCurrentTimeUs = seekTimeUs;
619        // interpolate in TOC to get file seek point in bytes
620        int64_t duration;
621        if ((mByteNumber > 0) && (mTableOfContents[0] > 0)
622            && mMeta->findInt64(kKeyDuration, &duration)) {
623            float percent = (float)seekTimeUs * 100 / duration;
624            float fx;
625            if( percent <= 0.0f ) {
626                fx = 0.0f;
627            } else if( percent >= 100.0f ) {
628                fx = 256.0f;
629            } else {
630                int a = (int)percent;
631                float fa, fb;
632                if ( a == 0 ) {
633                    fa = 0.0f;
634                } else {
635                    fa = (float)mTableOfContents[a-1];
636                }
637                if ( a < 99 ) {
638                    fb = (float)mTableOfContents[a];
639                } else {
640                    fb = 256.0f;
641                }
642                fx = fa + (fb-fa)*(percent-a);
643            }
644            mCurrentPos = mFirstFramePos + (int)((1.0f/256.0f)*fx*mByteNumber);
645        } else {
646            mCurrentPos = mFirstFramePos + seekTimeUs * bitrate / 8000000;
647        }
648    }
649
650    MediaBuffer *buffer;
651    status_t err = mGroup->acquire_buffer(&buffer);
652    if (err != OK) {
653        return err;
654    }
655
656    size_t frame_size;
657    for (;;) {
658        ssize_t n = mDataSource->readAt(mCurrentPos, buffer->data(), 4);
659        if (n < 4) {
660            buffer->release();
661            buffer = NULL;
662
663            return ERROR_END_OF_STREAM;
664        }
665
666        uint32_t header = U32_AT((const uint8_t *)buffer->data());
667
668        if ((header & kMask) == (mFixedHeader & kMask)
669            && get_mp3_frame_size(header, &frame_size)) {
670            break;
671        }
672
673        // Lost sync.
674        LOGV("lost sync! header = 0x%08x, old header = 0x%08x\n", header, mFixedHeader);
675
676        off_t pos = mCurrentPos;
677        if (!Resync(mDataSource, mFixedHeader, &pos, NULL)) {
678            LOGE("Unable to resync. Signalling end of stream.");
679
680            buffer->release();
681            buffer = NULL;
682
683            return ERROR_END_OF_STREAM;
684        }
685
686        mCurrentPos = pos;
687
688        // Try again with the new position.
689    }
690
691    CHECK(frame_size <= buffer->size());
692
693    ssize_t n = mDataSource->readAt(mCurrentPos, buffer->data(), frame_size);
694    if (n < (ssize_t)frame_size) {
695        buffer->release();
696        buffer = NULL;
697
698        return ERROR_END_OF_STREAM;
699    }
700
701    buffer->set_range(0, frame_size);
702
703    buffer->meta_data()->setInt64(kKeyTime, mCurrentTimeUs);
704
705    mCurrentPos += frame_size;
706    mCurrentTimeUs += 1152 * 1000000 / 44100;
707
708    *out = buffer;
709
710    return OK;
711}
712
713sp<MetaData> MP3Extractor::getMetaData() {
714    sp<MetaData> meta = new MetaData;
715
716    if (mFirstFramePos < 0) {
717        return meta;
718    }
719
720    meta->setCString(kKeyMIMEType, "audio/mpeg");
721
722    ID3 id3(mDataSource);
723
724    if (!id3.isValid()) {
725        return meta;
726    }
727
728    struct Map {
729        int key;
730        const char *tag1;
731        const char *tag2;
732    };
733    static const Map kMap[] = {
734        { kKeyAlbum, "TALB", "TAL" },
735        { kKeyArtist, "TPE1", "TP1" },
736        { kKeyAlbumArtist, "TPE2", "TP2" },
737        { kKeyComposer, "TCOM", "TCM" },
738        { kKeyGenre, "TCON", "TCO" },
739        { kKeyTitle, "TIT2", "TT2" },
740        { kKeyYear, "TYE", "TYER" },
741        { kKeyAuthor, "TXT", "TEXT" },
742        { kKeyCDTrackNumber, "TRK", "TRCK" },
743        { kKeyDiscNumber, "TPA", "TPOS" },
744    };
745    static const size_t kNumMapEntries = sizeof(kMap) / sizeof(kMap[0]);
746
747    for (size_t i = 0; i < kNumMapEntries; ++i) {
748        ID3::Iterator *it = new ID3::Iterator(id3, kMap[i].tag1);
749        if (it->done()) {
750            delete it;
751            it = new ID3::Iterator(id3, kMap[i].tag2);
752        }
753
754        if (it->done()) {
755            delete it;
756            continue;
757        }
758
759        String8 s;
760        it->getString(&s);
761        delete it;
762
763        meta->setCString(kMap[i].key, s);
764    }
765
766    size_t dataSize;
767    String8 mime;
768    const void *data = id3.getAlbumArt(&dataSize, &mime);
769
770    if (data) {
771        meta->setData(kKeyAlbumArt, MetaData::TYPE_NONE, data, dataSize);
772        meta->setCString(kKeyAlbumArtMIME, mime.string());
773    }
774
775    return meta;
776}
777
778bool SniffMP3(
779        const sp<DataSource> &source, String8 *mimeType, float *confidence) {
780    off_t pos = 0;
781    uint32_t header;
782    if (!Resync(source, 0, &pos, &header)) {
783        return false;
784    }
785
786    *mimeType = MEDIA_MIMETYPE_AUDIO_MPEG;
787    *confidence = 0.3f;
788
789    return true;
790}
791
792}  // namespace android
793