MP3Extractor.cpp revision 7be6407f2ad7f2b0782d195d9f792072c084d6f5
1/*
2 * Copyright (C) 2009 The Android Open Source Project
3 *
4 * Licensed under the Apache License, Version 2.0 (the "License");
5 * you may not use this file except in compliance with the License.
6 * You may obtain a copy of the License at
7 *
8 *      http://www.apache.org/licenses/LICENSE-2.0
9 *
10 * Unless required by applicable law or agreed to in writing, software
11 * distributed under the License is distributed on an "AS IS" BASIS,
12 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13 * See the License for the specific language governing permissions and
14 * limitations under the License.
15 */
16
17//#define LOG_NDEBUG 0
18#define LOG_TAG "MP3Extractor"
19#include <utils/Log.h>
20
21#include "include/MP3Extractor.h"
22
23#include "include/ID3.h"
24
25#include <media/stagefright/DataSource.h>
26#include <media/stagefright/MediaBuffer.h>
27#include <media/stagefright/MediaBufferGroup.h>
28#include <media/stagefright/MediaDebug.h>
29#include <media/stagefright/MediaDefs.h>
30#include <media/stagefright/MediaErrors.h>
31#include <media/stagefright/MediaSource.h>
32#include <media/stagefright/MetaData.h>
33#include <media/stagefright/Utils.h>
34#include <utils/String8.h>
35
36namespace android {
37
38// Everything must match except for
39// protection, bitrate, padding, private bits and mode extension.
40static const uint32_t kMask = 0xfffe0ccf;
41
42static bool get_mp3_frame_size(
43        uint32_t header, size_t *frame_size,
44        int *out_sampling_rate = NULL, int *out_channels = NULL,
45        int *out_bitrate = NULL) {
46    *frame_size = 0;
47
48    if (out_sampling_rate) {
49        *out_sampling_rate = 0;
50    }
51
52    if (out_channels) {
53        *out_channels = 0;
54    }
55
56    if (out_bitrate) {
57        *out_bitrate = 0;
58    }
59
60    if ((header & 0xffe00000) != 0xffe00000) {
61        return false;
62    }
63
64    unsigned version = (header >> 19) & 3;
65
66    if (version == 0x01) {
67        return false;
68    }
69
70    unsigned layer = (header >> 17) & 3;
71
72    if (layer == 0x00) {
73        return false;
74    }
75
76    unsigned protection = (header >> 16) & 1;
77
78    unsigned bitrate_index = (header >> 12) & 0x0f;
79
80    if (bitrate_index == 0 || bitrate_index == 0x0f) {
81        // Disallow "free" bitrate.
82        return false;
83    }
84
85    unsigned sampling_rate_index = (header >> 10) & 3;
86
87    if (sampling_rate_index == 3) {
88        return false;
89    }
90
91    static const int kSamplingRateV1[] = { 44100, 48000, 32000 };
92    int sampling_rate = kSamplingRateV1[sampling_rate_index];
93    if (version == 2 /* V2 */) {
94        sampling_rate /= 2;
95    } else if (version == 0 /* V2.5 */) {
96        sampling_rate /= 4;
97    }
98
99    unsigned padding = (header >> 9) & 1;
100
101    if (layer == 3) {
102        // layer I
103
104        static const int kBitrateV1[] = {
105            32, 64, 96, 128, 160, 192, 224, 256,
106            288, 320, 352, 384, 416, 448
107        };
108
109        static const int kBitrateV2[] = {
110            32, 48, 56, 64, 80, 96, 112, 128,
111            144, 160, 176, 192, 224, 256
112        };
113
114        int bitrate =
115            (version == 3 /* V1 */)
116                ? kBitrateV1[bitrate_index - 1]
117                : kBitrateV2[bitrate_index - 1];
118
119        if (out_bitrate) {
120            *out_bitrate = bitrate;
121        }
122
123        *frame_size = (12000 * bitrate / sampling_rate + padding) * 4;
124    } else {
125        // layer II or III
126
127        static const int kBitrateV1L2[] = {
128            32, 48, 56, 64, 80, 96, 112, 128,
129            160, 192, 224, 256, 320, 384
130        };
131
132        static const int kBitrateV1L3[] = {
133            32, 40, 48, 56, 64, 80, 96, 112,
134            128, 160, 192, 224, 256, 320
135        };
136
137        static const int kBitrateV2[] = {
138            8, 16, 24, 32, 40, 48, 56, 64,
139            80, 96, 112, 128, 144, 160
140        };
141
142        int bitrate;
143        if (version == 3 /* V1 */) {
144            bitrate = (layer == 2 /* L2 */)
145                ? kBitrateV1L2[bitrate_index - 1]
146                : kBitrateV1L3[bitrate_index - 1];
147        } else {
148            // V2 (or 2.5)
149
150            bitrate = kBitrateV2[bitrate_index - 1];
151        }
152
153        if (out_bitrate) {
154            *out_bitrate = bitrate;
155        }
156
157        if (version == 3 /* V1 */) {
158            *frame_size = 144000 * bitrate / sampling_rate + padding;
159        } else {
160            // V2 or V2.5
161            *frame_size = 72000 * bitrate / sampling_rate + padding;
162        }
163    }
164
165    if (out_sampling_rate) {
166        *out_sampling_rate = sampling_rate;
167    }
168
169    if (out_channels) {
170        int channel_mode = (header >> 6) & 3;
171
172        *out_channels = (channel_mode == 3) ? 1 : 2;
173    }
174
175    return true;
176}
177
178static bool parse_xing_header(
179        const sp<DataSource> &source, off_t first_frame_pos,
180        int32_t *frame_number = NULL, int32_t *byte_number = NULL,
181        char *table_of_contents = NULL, int32_t *quality_indicator = NULL,
182        int64_t *duration = NULL) {
183
184    if (frame_number) {
185        *frame_number = 0;
186    }
187    if (byte_number) {
188        *byte_number = 0;
189    }
190    if (table_of_contents) {
191        table_of_contents[0] = 0;
192    }
193    if (quality_indicator) {
194        *quality_indicator = 0;
195    }
196    if (duration) {
197        *duration = 0;
198    }
199
200    uint8_t buffer[4];
201    int offset = first_frame_pos;
202    if (source->readAt(offset, &buffer, 4) < 4) { // get header
203        return false;
204    }
205    offset += 4;
206
207    uint8_t id, layer, sr_index, mode;
208    layer = (buffer[1] >> 1) & 3;
209    id = (buffer[1] >> 3) & 3;
210    sr_index = (buffer[2] >> 2) & 3;
211    mode = (buffer[3] >> 6) & 3;
212    if (layer == 0) {
213        return false;
214    }
215    if (id == 1) {
216        return false;
217    }
218    if (sr_index == 3) {
219        return false;
220    }
221    // determine offset of XING header
222    if(id&1) { // mpeg1
223        if (mode != 3) offset += 32;
224        else offset += 17;
225    } else { // mpeg2
226        if (mode != 3) offset += 17;
227        else offset += 9;
228    }
229
230    if (source->readAt(offset, &buffer, 4) < 4) { // XING header ID
231        return false;
232    }
233    offset += 4;
234    // Check XING ID
235    if ((buffer[0] != 'X') || (buffer[1] != 'i')
236                || (buffer[2] != 'n') || (buffer[3] != 'g')) {
237        if ((buffer[0] != 'I') || (buffer[1] != 'n')
238                    || (buffer[2] != 'f') || (buffer[3] != 'o')) {
239            return false;
240        }
241    }
242
243    if (source->readAt(offset, &buffer, 4) < 4) { // flags
244        return false;
245    }
246    offset += 4;
247    uint32_t flags = U32_AT(buffer);
248
249    if (flags & 0x0001) {  // Frames field is present
250        if (source->readAt(offset, buffer, 4) < 4) {
251             return false;
252        }
253        if (frame_number) {
254           *frame_number = U32_AT(buffer);
255        }
256        int32_t frame = U32_AT(buffer);
257        // Samples per Frame: 1. index = MPEG Version ID, 2. index = Layer
258        const int samplesPerFrames[2][3] =
259        {
260            { 384, 1152, 576  }, // MPEG 2, 2.5: layer1, layer2, layer3
261            { 384, 1152, 1152 }, // MPEG 1: layer1, layer2, layer3
262        };
263        // sampling rates in hertz: 1. index = MPEG Version ID, 2. index = sampling rate index
264        const int samplingRates[4][3] =
265        {
266            { 11025, 12000, 8000,  },    // MPEG 2.5
267            { 0,     0,     0,     },    // reserved
268            { 22050, 24000, 16000, },    // MPEG 2
269            { 44100, 48000, 32000, }     // MPEG 1
270        };
271        if (duration) {
272            *duration = (int64_t)frame * samplesPerFrames[id&1][3-layer] * 1000000LL
273                / samplingRates[id][sr_index];
274        }
275        offset += 4;
276    }
277    if (flags & 0x0002) {  // Bytes field is present
278        if (byte_number) {
279            if (source->readAt(offset, buffer, 4) < 4) {
280                return false;
281            }
282            *byte_number = U32_AT(buffer);
283        }
284        offset += 4;
285    }
286    if (flags & 0x0004) {  // TOC field is present
287       if (table_of_contents) {
288            if (source->readAt(offset + 1, table_of_contents, 99) < 99) {
289                return false;
290            }
291        }
292        offset += 100;
293    }
294    if (flags & 0x0008) {  // Quality indicator field is present
295        if (quality_indicator) {
296            if (source->readAt(offset, buffer, 4) < 4) {
297                return false;
298            }
299            *quality_indicator = U32_AT(buffer);
300        }
301    }
302    return true;
303}
304
305static bool Resync(
306        const sp<DataSource> &source, uint32_t match_header,
307        off_t *inout_pos, uint32_t *out_header) {
308    if (*inout_pos == 0) {
309        // Skip an optional ID3 header if syncing at the very beginning
310        // of the datasource.
311
312        uint8_t id3header[10];
313        if (source->readAt(0, id3header, sizeof(id3header))
314                < (ssize_t)sizeof(id3header)) {
315            // If we can't even read these 10 bytes, we might as well bail out,
316            // even if there _were_ 10 bytes of valid mp3 audio data...
317            return false;
318        }
319
320        if (id3header[0] == 'I' && id3header[1] == 'D' && id3header[2] == '3') {
321            // Skip the ID3v2 header.
322
323            size_t len =
324                ((id3header[6] & 0x7f) << 21)
325                | ((id3header[7] & 0x7f) << 14)
326                | ((id3header[8] & 0x7f) << 7)
327                | (id3header[9] & 0x7f);
328
329            len += 10;
330
331            *inout_pos += len;
332        }
333    }
334
335    const size_t kMaxFrameSize = 4096;
336    uint8_t *buffer = new uint8_t[kMaxFrameSize];
337
338    off_t pos = *inout_pos - kMaxFrameSize;
339    size_t buffer_offset = kMaxFrameSize;
340    size_t buffer_length = kMaxFrameSize;
341    bool valid = false;
342    do {
343        if (buffer_offset + 3 >= buffer_length) {
344            if (buffer_length < kMaxFrameSize) {
345                break;
346            }
347
348            pos += buffer_offset;
349
350            if (pos >= *inout_pos + 128 * 1024) {
351                // Don't scan forever.
352                LOGV("giving up at offset %ld", pos);
353                break;
354            }
355
356            memmove(buffer, &buffer[buffer_offset], buffer_length - buffer_offset);
357            buffer_length = buffer_length - buffer_offset;
358            buffer_offset = 0;
359
360            ssize_t n = source->readAt(
361                    pos, &buffer[buffer_length], kMaxFrameSize - buffer_length);
362
363            if (n <= 0) {
364                break;
365            }
366
367            buffer_length += (size_t)n;
368
369            continue;
370        }
371
372        uint32_t header = U32_AT(&buffer[buffer_offset]);
373
374        if (match_header != 0 && (header & kMask) != (match_header & kMask)) {
375            ++buffer_offset;
376            continue;
377        }
378
379        size_t frame_size;
380        int sample_rate, num_channels, bitrate;
381        if (!get_mp3_frame_size(header, &frame_size,
382                               &sample_rate, &num_channels, &bitrate)) {
383            ++buffer_offset;
384            continue;
385        }
386
387        LOGV("found possible 1st frame at %ld", pos + buffer_offset);
388
389        // We found what looks like a valid frame,
390        // now find its successors.
391
392        off_t test_pos = pos + buffer_offset + frame_size;
393
394        valid = true;
395        for (int j = 0; j < 3; ++j) {
396            uint8_t tmp[4];
397            if (source->readAt(test_pos, tmp, 4) < 4) {
398                valid = false;
399                break;
400            }
401
402            uint32_t test_header = U32_AT(tmp);
403
404            LOGV("subsequent header is %08x", test_header);
405
406            if ((test_header & kMask) != (header & kMask)) {
407                valid = false;
408                break;
409            }
410
411            size_t test_frame_size;
412            if (!get_mp3_frame_size(test_header, &test_frame_size)) {
413                valid = false;
414                break;
415            }
416
417            LOGV("found subsequent frame #%d at %ld", j + 2, test_pos);
418
419            test_pos += test_frame_size;
420        }
421
422        if (valid) {
423            *inout_pos = pos + buffer_offset;
424
425            if (out_header != NULL) {
426                *out_header = header;
427            }
428        } else {
429            LOGV("no dice, no valid sequence of frames found.");
430        }
431
432        ++buffer_offset;
433
434    } while (!valid);
435
436    delete[] buffer;
437    buffer = NULL;
438
439    return valid;
440}
441
442class MP3Source : public MediaSource {
443public:
444    MP3Source(
445            const sp<MetaData> &meta, const sp<DataSource> &source,
446            off_t first_frame_pos, uint32_t fixed_header,
447            int32_t byte_number, const char *table_of_contents);
448
449    virtual status_t start(MetaData *params = NULL);
450    virtual status_t stop();
451
452    virtual sp<MetaData> getFormat();
453
454    virtual status_t read(
455            MediaBuffer **buffer, const ReadOptions *options = NULL);
456
457protected:
458    virtual ~MP3Source();
459
460private:
461    sp<MetaData> mMeta;
462    sp<DataSource> mDataSource;
463    off_t mFirstFramePos;
464    uint32_t mFixedHeader;
465    off_t mCurrentPos;
466    int64_t mCurrentTimeUs;
467    bool mStarted;
468    int32_t mByteNumber; // total number of bytes in this MP3
469    // TOC entries in XING header. Skip the first one since it's always 0.
470    char mTableOfContents[99];
471    MediaBufferGroup *mGroup;
472
473    MP3Source(const MP3Source &);
474    MP3Source &operator=(const MP3Source &);
475};
476
477MP3Extractor::MP3Extractor(const sp<DataSource> &source)
478    : mDataSource(source),
479      mFirstFramePos(-1),
480      mFixedHeader(0),
481      mByteNumber(0) {
482    off_t pos = 0;
483    uint32_t header;
484    bool success = Resync(mDataSource, 0, &pos, &header);
485    CHECK(success);
486
487    if (success) {
488        mFirstFramePos = pos;
489        mFixedHeader = header;
490
491        size_t frame_size;
492        int sample_rate;
493        int num_channels;
494        int bitrate;
495        get_mp3_frame_size(
496                header, &frame_size, &sample_rate, &num_channels, &bitrate);
497
498        mMeta = new MetaData;
499
500        mMeta->setCString(kKeyMIMEType, MEDIA_MIMETYPE_AUDIO_MPEG);
501        mMeta->setInt32(kKeySampleRate, sample_rate);
502        mMeta->setInt32(kKeyBitRate, bitrate * 1000);
503        mMeta->setInt32(kKeyChannelCount, num_channels);
504
505        int64_t duration;
506        parse_xing_header(
507                mDataSource, mFirstFramePos, NULL, &mByteNumber,
508                mTableOfContents, NULL, &duration);
509        if (duration > 0) {
510            mMeta->setInt64(kKeyDuration, duration);
511        } else {
512            off_t fileSize;
513            if (mDataSource->getSize(&fileSize) == OK) {
514                mMeta->setInt64(
515                        kKeyDuration,
516                        8000LL * (fileSize - mFirstFramePos) / bitrate);
517            }
518        }
519    }
520}
521
522MP3Extractor::~MP3Extractor() {
523}
524
525size_t MP3Extractor::countTracks() {
526    return (mFirstFramePos < 0) ? 0 : 1;
527}
528
529sp<MediaSource> MP3Extractor::getTrack(size_t index) {
530    if (mFirstFramePos < 0 || index != 0) {
531        return NULL;
532    }
533
534    return new MP3Source(
535            mMeta, mDataSource, mFirstFramePos, mFixedHeader,
536            mByteNumber, mTableOfContents);
537}
538
539sp<MetaData> MP3Extractor::getTrackMetaData(size_t index, uint32_t flags) {
540    if (mFirstFramePos < 0 || index != 0) {
541        return NULL;
542    }
543
544    return mMeta;
545}
546
547////////////////////////////////////////////////////////////////////////////////
548
549MP3Source::MP3Source(
550        const sp<MetaData> &meta, const sp<DataSource> &source,
551        off_t first_frame_pos, uint32_t fixed_header,
552        int32_t byte_number, const char *table_of_contents)
553    : mMeta(meta),
554      mDataSource(source),
555      mFirstFramePos(first_frame_pos),
556      mFixedHeader(fixed_header),
557      mCurrentPos(0),
558      mCurrentTimeUs(0),
559      mStarted(false),
560      mByteNumber(byte_number),
561      mGroup(NULL) {
562    memcpy (mTableOfContents, table_of_contents, sizeof(mTableOfContents));
563}
564
565MP3Source::~MP3Source() {
566    if (mStarted) {
567        stop();
568    }
569}
570
571status_t MP3Source::start(MetaData *) {
572    CHECK(!mStarted);
573
574    mGroup = new MediaBufferGroup;
575
576    const size_t kMaxFrameSize = 32768;
577    mGroup->add_buffer(new MediaBuffer(kMaxFrameSize));
578
579    mCurrentPos = mFirstFramePos;
580    mCurrentTimeUs = 0;
581
582    mStarted = true;
583
584    return OK;
585}
586
587status_t MP3Source::stop() {
588    CHECK(mStarted);
589
590    delete mGroup;
591    mGroup = NULL;
592
593    mStarted = false;
594
595    return OK;
596}
597
598sp<MetaData> MP3Source::getFormat() {
599    return mMeta;
600}
601
602status_t MP3Source::read(
603        MediaBuffer **out, const ReadOptions *options) {
604    *out = NULL;
605
606    int64_t seekTimeUs;
607    if (options != NULL && options->getSeekTo(&seekTimeUs)) {
608        int32_t bitrate;
609        if (!mMeta->findInt32(kKeyBitRate, &bitrate)) {
610            // bitrate is in bits/sec.
611            LOGI("no bitrate");
612
613            return ERROR_UNSUPPORTED;
614        }
615
616        mCurrentTimeUs = seekTimeUs;
617        // interpolate in TOC to get file seek point in bytes
618        int64_t duration;
619        if ((mByteNumber > 0) && (mTableOfContents[0] > 0)
620            && mMeta->findInt64(kKeyDuration, &duration)) {
621            float percent = (float)seekTimeUs * 100 / duration;
622            float fx;
623            if( percent <= 0.0f ) {
624                fx = 0.0f;
625            } else if( percent >= 100.0f ) {
626                fx = 256.0f;
627            } else {
628                int a = (int)percent;
629                float fa, fb;
630                if ( a == 0 ) {
631                    fa = 0.0f;
632                } else {
633                    fa = (float)mTableOfContents[a-1];
634                }
635                if ( a < 99 ) {
636                    fb = (float)mTableOfContents[a];
637                } else {
638                    fb = 256.0f;
639                }
640                fx = fa + (fb-fa)*(percent-a);
641            }
642            mCurrentPos = mFirstFramePos + (int)((1.0f/256.0f)*fx*mByteNumber);
643        } else {
644            mCurrentPos = mFirstFramePos + seekTimeUs * bitrate / 8000000;
645        }
646    }
647
648    MediaBuffer *buffer;
649    status_t err = mGroup->acquire_buffer(&buffer);
650    if (err != OK) {
651        return err;
652    }
653
654    size_t frame_size;
655    for (;;) {
656        ssize_t n = mDataSource->readAt(mCurrentPos, buffer->data(), 4);
657        if (n < 4) {
658            buffer->release();
659            buffer = NULL;
660
661            return ERROR_END_OF_STREAM;
662        }
663
664        uint32_t header = U32_AT((const uint8_t *)buffer->data());
665
666        if ((header & kMask) == (mFixedHeader & kMask)
667            && get_mp3_frame_size(header, &frame_size)) {
668            break;
669        }
670
671        // Lost sync.
672        LOGV("lost sync!\n");
673
674        off_t pos = mCurrentPos;
675        if (!Resync(mDataSource, mFixedHeader, &pos, NULL)) {
676            LOGE("Unable to resync. Signalling end of stream.");
677
678            buffer->release();
679            buffer = NULL;
680
681            return ERROR_END_OF_STREAM;
682        }
683
684        mCurrentPos = pos;
685
686        // Try again with the new position.
687    }
688
689    CHECK(frame_size <= buffer->size());
690
691    ssize_t n = mDataSource->readAt(mCurrentPos, buffer->data(), frame_size);
692    if (n < (ssize_t)frame_size) {
693        buffer->release();
694        buffer = NULL;
695
696        return ERROR_END_OF_STREAM;
697    }
698
699    buffer->set_range(0, frame_size);
700
701    buffer->meta_data()->setInt64(kKeyTime, mCurrentTimeUs);
702
703    mCurrentPos += frame_size;
704    mCurrentTimeUs += 1152 * 1000000 / 44100;
705
706    *out = buffer;
707
708    return OK;
709}
710
711sp<MetaData> MP3Extractor::getMetaData() {
712    sp<MetaData> meta = new MetaData;
713
714    if (mFirstFramePos < 0) {
715        return meta;
716    }
717
718    meta->setCString(kKeyMIMEType, "audio/mpeg");
719
720    ID3 id3(mDataSource);
721
722    if (!id3.isValid()) {
723        return meta;
724    }
725
726    struct Map {
727        int key;
728        const char *tag1;
729        const char *tag2;
730    };
731    static const Map kMap[] = {
732        { kKeyAlbum, "TALB", "TAL" },
733        { kKeyArtist, "TPE1", "TP1" },
734        { kKeyComposer, "TCOM", "TCM" },
735        { kKeyGenre, "TCON", "TCO" },
736        { kKeyTitle, "TIT2", "TT2" },
737        { kKeyYear, "TYE", "TYER" },
738        { kKeyAuthor, "TXT", "TEXT" },
739        { kKeyCDTrackNumber, "TRK", "TRCK" },
740    };
741    static const size_t kNumMapEntries = sizeof(kMap) / sizeof(kMap[0]);
742
743    for (size_t i = 0; i < kNumMapEntries; ++i) {
744        ID3::Iterator *it = new ID3::Iterator(id3, kMap[i].tag1);
745        if (it->done()) {
746            delete it;
747            it = new ID3::Iterator(id3, kMap[i].tag2);
748        }
749
750        if (it->done()) {
751            delete it;
752            continue;
753        }
754
755        String8 s;
756        it->getString(&s);
757        delete it;
758
759        meta->setCString(kMap[i].key, s);
760    }
761
762    size_t dataSize;
763    String8 mime;
764    const void *data = id3.getAlbumArt(&dataSize, &mime);
765
766    if (data) {
767        meta->setData(kKeyAlbumArt, MetaData::TYPE_NONE, data, dataSize);
768        meta->setCString(kKeyAlbumArtMIME, mime.string());
769    }
770
771    return meta;
772}
773
774bool SniffMP3(
775        const sp<DataSource> &source, String8 *mimeType, float *confidence) {
776    off_t pos = 0;
777    uint32_t header;
778    if (!Resync(source, 0, &pos, &header)) {
779        return false;
780    }
781
782    *mimeType = MEDIA_MIMETYPE_AUDIO_MPEG;
783    *confidence = 0.3f;
784
785    return true;
786}
787
788}  // namespace android
789