MP3Extractor.cpp revision db74495dbf653a72018396607fae63946bed44ec
1/*
2 * Copyright (C) 2009 The Android Open Source Project
3 *
4 * Licensed under the Apache License, Version 2.0 (the "License");
5 * you may not use this file except in compliance with the License.
6 * You may obtain a copy of the License at
7 *
8 *      http://www.apache.org/licenses/LICENSE-2.0
9 *
10 * Unless required by applicable law or agreed to in writing, software
11 * distributed under the License is distributed on an "AS IS" BASIS,
12 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13 * See the License for the specific language governing permissions and
14 * limitations under the License.
15 */
16
17//#define LOG_NDEBUG 0
18#define LOG_TAG "MP3Extractor"
19#include <utils/Log.h>
20
21#include "include/MP3Extractor.h"
22
23#include <media/stagefright/DataSource.h>
24#include <media/stagefright/MediaBuffer.h>
25#include <media/stagefright/MediaBufferGroup.h>
26#include <media/stagefright/MediaDebug.h>
27#include <media/stagefright/MediaDefs.h>
28#include <media/stagefright/MediaErrors.h>
29#include <media/stagefright/MediaSource.h>
30#include <media/stagefright/MetaData.h>
31#include <media/stagefright/Utils.h>
32#include <utils/String8.h>
33
34namespace android {
35
36// Everything must match except for
37// protection, bitrate, padding, private bits and mode extension.
38static const uint32_t kMask = 0xfffe0ccf;
39
40static bool get_mp3_frame_size(
41        uint32_t header, size_t *frame_size,
42        int *out_sampling_rate = NULL, int *out_channels = NULL,
43        int *out_bitrate = NULL) {
44    *frame_size = 0;
45
46    if (out_sampling_rate) {
47        *out_sampling_rate = 0;
48    }
49
50    if (out_channels) {
51        *out_channels = 0;
52    }
53
54    if (out_bitrate) {
55        *out_bitrate = 0;
56    }
57
58    if ((header & 0xffe00000) != 0xffe00000) {
59        return false;
60    }
61
62    unsigned version = (header >> 19) & 3;
63
64    if (version == 0x01) {
65        return false;
66    }
67
68    unsigned layer = (header >> 17) & 3;
69
70    if (layer == 0x00) {
71        return false;
72    }
73
74    unsigned protection = (header >> 16) & 1;
75
76    unsigned bitrate_index = (header >> 12) & 0x0f;
77
78    if (bitrate_index == 0 || bitrate_index == 0x0f) {
79        // Disallow "free" bitrate.
80        return false;
81    }
82
83    unsigned sampling_rate_index = (header >> 10) & 3;
84
85    if (sampling_rate_index == 3) {
86        return false;
87    }
88
89    static const int kSamplingRateV1[] = { 44100, 48000, 32000 };
90    int sampling_rate = kSamplingRateV1[sampling_rate_index];
91    if (version == 2 /* V2 */) {
92        sampling_rate /= 2;
93    } else if (version == 0 /* V2.5 */) {
94        sampling_rate /= 4;
95    }
96
97    unsigned padding = (header >> 9) & 1;
98
99    if (layer == 3) {
100        // layer I
101
102        static const int kBitrateV1[] = {
103            32, 64, 96, 128, 160, 192, 224, 256,
104            288, 320, 352, 384, 416, 448
105        };
106
107        static const int kBitrateV2[] = {
108            32, 48, 56, 64, 80, 96, 112, 128,
109            144, 160, 176, 192, 224, 256
110        };
111
112        int bitrate =
113            (version == 3 /* V1 */)
114                ? kBitrateV1[bitrate_index - 1]
115                : kBitrateV2[bitrate_index - 1];
116
117        if (out_bitrate) {
118            *out_bitrate = bitrate;
119        }
120
121        *frame_size = (12000 * bitrate / sampling_rate + padding) * 4;
122    } else {
123        // layer II or III
124
125        static const int kBitrateV1L2[] = {
126            32, 48, 56, 64, 80, 96, 112, 128,
127            160, 192, 224, 256, 320, 384
128        };
129
130        static const int kBitrateV1L3[] = {
131            32, 40, 48, 56, 64, 80, 96, 112,
132            128, 160, 192, 224, 256, 320
133        };
134
135        static const int kBitrateV2[] = {
136            8, 16, 24, 32, 40, 48, 56, 64,
137            80, 96, 112, 128, 144, 160
138        };
139
140        int bitrate;
141        if (version == 3 /* V1 */) {
142            bitrate = (layer == 2 /* L2 */)
143                ? kBitrateV1L2[bitrate_index - 1]
144                : kBitrateV1L3[bitrate_index - 1];
145        } else {
146            // V2 (or 2.5)
147
148            bitrate = kBitrateV2[bitrate_index - 1];
149        }
150
151        if (out_bitrate) {
152            *out_bitrate = bitrate;
153        }
154
155        if (version == 3 /* V1 */) {
156            *frame_size = 144000 * bitrate / sampling_rate + padding;
157        } else {
158            // V2 or V2.5
159            *frame_size = 72000 * bitrate / sampling_rate + padding;
160        }
161    }
162
163    if (out_sampling_rate) {
164        *out_sampling_rate = sampling_rate;
165    }
166
167    if (out_channels) {
168        int channel_mode = (header >> 6) & 3;
169
170        *out_channels = (channel_mode == 3) ? 1 : 2;
171    }
172
173    return true;
174}
175
176static bool parse_xing_header(
177        const sp<DataSource> &source, off_t first_frame_pos,
178        int32_t *frame_number = NULL, int32_t *byte_number = NULL,
179        char *table_of_contents = NULL, int32_t *quality_indicator = NULL,
180        int64_t *duration = NULL) {
181
182    if (frame_number) {
183        *frame_number = 0;
184    }
185    if (byte_number) {
186        *byte_number = 0;
187    }
188    if (table_of_contents) {
189        table_of_contents[0] = 0;
190    }
191    if (quality_indicator) {
192        *quality_indicator = 0;
193    }
194    if (duration) {
195        *duration = 0;
196    }
197
198    uint8_t buffer[4];
199    int offset = first_frame_pos;
200    if (source->readAt(offset, &buffer, 4) < 4) { // get header
201        return false;
202    }
203    offset += 4;
204
205    uint8_t id, layer, sr_index, mode;
206    layer = (buffer[1] >> 1) & 3;
207    id = (buffer[1] >> 3) & 3;
208    sr_index = (buffer[2] >> 2) & 3;
209    mode = (buffer[3] >> 6) & 3;
210    if (layer == 0) {
211        return false;
212    }
213    if (id == 1) {
214        return false;
215    }
216    if (sr_index == 3) {
217        return false;
218    }
219    // determine offset of XING header
220    if(id&1) { // mpeg1
221        if (mode != 3) offset += 32;
222        else offset += 17;
223    } else { // mpeg2
224        if (mode != 3) offset += 17;
225        else offset += 9;
226    }
227
228    if (source->readAt(offset, &buffer, 4) < 4) { // XING header ID
229        return false;
230    }
231    offset += 4;
232    // Check XING ID
233    if ((buffer[0] != 'X') || (buffer[1] != 'i')
234                || (buffer[2] != 'n') || (buffer[3] != 'g')) {
235        if ((buffer[0] != 'I') || (buffer[1] != 'n')
236                    || (buffer[2] != 'f') || (buffer[3] != 'o')) {
237            return false;
238        }
239    }
240
241    if (source->readAt(offset, &buffer, 4) < 4) { // flags
242        return false;
243    }
244    offset += 4;
245    uint32_t flags = U32_AT(buffer);
246
247    if (flags & 0x0001) {  // Frames field is present
248        if (source->readAt(offset, buffer, 4) < 4) {
249             return false;
250        }
251        if (frame_number) {
252           *frame_number = U32_AT(buffer);
253        }
254        int32_t frame = U32_AT(buffer);
255        // Samples per Frame: 1. index = MPEG Version ID, 2. index = Layer
256        const int samplesPerFrames[2][3] =
257        {
258            { 384, 1152, 576  }, // MPEG 2, 2.5: layer1, layer2, layer3
259            { 384, 1152, 1152 }, // MPEG 1: layer1, layer2, layer3
260        };
261        // sampling rates in hertz: 1. index = MPEG Version ID, 2. index = sampling rate index
262        const int samplingRates[4][3] =
263        {
264            { 11025, 12000, 8000,  },    // MPEG 2.5
265            { 0,     0,     0,     },    // reserved
266            { 22050, 24000, 16000, },    // MPEG 2
267            { 44100, 48000, 32000, }     // MPEG 1
268        };
269        if (duration) {
270            *duration = (int64_t)frame * samplesPerFrames[id&1][3-layer] * 1000000LL
271                / samplingRates[id][sr_index];
272        }
273        offset += 4;
274    }
275    if (flags & 0x0002) {  // Bytes field is present
276        if (byte_number) {
277            if (source->readAt(offset, buffer, 4) < 4) {
278                return false;
279            }
280            *byte_number = U32_AT(buffer);
281        }
282        offset += 4;
283    }
284    if (flags & 0x0004) {  // TOC field is present
285       if (table_of_contents) {
286            if (source->readAt(offset + 1, table_of_contents, 99) < 99) {
287                return false;
288            }
289        }
290        offset += 100;
291    }
292    if (flags & 0x0008) {  // Quality indicator field is present
293        if (quality_indicator) {
294            if (source->readAt(offset, buffer, 4) < 4) {
295                return false;
296            }
297            *quality_indicator = U32_AT(buffer);
298        }
299    }
300    return true;
301}
302
303static bool Resync(
304        const sp<DataSource> &source, uint32_t match_header,
305        off_t *inout_pos, uint32_t *out_header) {
306    if (*inout_pos == 0) {
307        // Skip an optional ID3 header if syncing at the very beginning
308        // of the datasource.
309
310        uint8_t id3header[10];
311        if (source->readAt(0, id3header, sizeof(id3header))
312                < (ssize_t)sizeof(id3header)) {
313            // If we can't even read these 10 bytes, we might as well bail out,
314            // even if there _were_ 10 bytes of valid mp3 audio data...
315            return false;
316        }
317
318        if (id3header[0] == 'I' && id3header[1] == 'D' && id3header[2] == '3') {
319            // Skip the ID3v2 header.
320
321            size_t len =
322                ((id3header[6] & 0x7f) << 21)
323                | ((id3header[7] & 0x7f) << 14)
324                | ((id3header[8] & 0x7f) << 7)
325                | (id3header[9] & 0x7f);
326
327            len += 10;
328
329            *inout_pos += len;
330        }
331    }
332
333    const size_t kMaxFrameSize = 4096;
334    uint8_t *buffer = new uint8_t[kMaxFrameSize];
335
336    off_t pos = *inout_pos - kMaxFrameSize;
337    size_t buffer_offset = kMaxFrameSize;
338    size_t buffer_length = kMaxFrameSize;
339    bool valid = false;
340    do {
341        if (buffer_offset + 3 >= buffer_length) {
342            if (buffer_length < kMaxFrameSize) {
343                break;
344            }
345
346            pos += buffer_offset;
347
348            if (pos >= *inout_pos + 128 * 1024) {
349                // Don't scan forever.
350                LOGV("giving up at offset %ld", pos);
351                break;
352            }
353
354            memmove(buffer, &buffer[buffer_offset], buffer_length - buffer_offset);
355            buffer_length = buffer_length - buffer_offset;
356            buffer_offset = 0;
357
358            ssize_t n = source->readAt(
359                    pos, &buffer[buffer_length], kMaxFrameSize - buffer_length);
360
361            if (n <= 0) {
362                break;
363            }
364
365            buffer_length += (size_t)n;
366
367            continue;
368        }
369
370        uint32_t header = U32_AT(&buffer[buffer_offset]);
371
372        if (match_header != 0 && (header & kMask) != (match_header & kMask)) {
373            ++buffer_offset;
374            continue;
375        }
376
377        size_t frame_size;
378        int sample_rate, num_channels, bitrate;
379        if (!get_mp3_frame_size(header, &frame_size,
380                               &sample_rate, &num_channels, &bitrate)) {
381            ++buffer_offset;
382            continue;
383        }
384
385        LOGV("found possible 1st frame at %ld", pos + buffer_offset);
386
387        // We found what looks like a valid frame,
388        // now find its successors.
389
390        off_t test_pos = pos + buffer_offset + frame_size;
391
392        valid = true;
393        for (int j = 0; j < 3; ++j) {
394            uint8_t tmp[4];
395            if (source->readAt(test_pos, tmp, 4) < 4) {
396                valid = false;
397                break;
398            }
399
400            uint32_t test_header = U32_AT(tmp);
401
402            LOGV("subsequent header is %08x", test_header);
403
404            if ((test_header & kMask) != (header & kMask)) {
405                valid = false;
406                break;
407            }
408
409            size_t test_frame_size;
410            if (!get_mp3_frame_size(test_header, &test_frame_size)) {
411                valid = false;
412                break;
413            }
414
415            LOGV("found subsequent frame #%d at %ld", j + 2, test_pos);
416
417            test_pos += test_frame_size;
418        }
419
420        if (valid) {
421            *inout_pos = pos + buffer_offset;
422
423            if (out_header != NULL) {
424                *out_header = header;
425            }
426        } else {
427            LOGV("no dice, no valid sequence of frames found.");
428        }
429
430        ++buffer_offset;
431
432    } while (!valid);
433
434    delete[] buffer;
435    buffer = NULL;
436
437    return valid;
438}
439
440class MP3Source : public MediaSource {
441public:
442    MP3Source(
443            const sp<MetaData> &meta, const sp<DataSource> &source,
444            off_t first_frame_pos, uint32_t fixed_header,
445            int32_t byte_number, const char *table_of_contents);
446
447    virtual status_t start(MetaData *params = NULL);
448    virtual status_t stop();
449
450    virtual sp<MetaData> getFormat();
451
452    virtual status_t read(
453            MediaBuffer **buffer, const ReadOptions *options = NULL);
454
455protected:
456    virtual ~MP3Source();
457
458private:
459    sp<MetaData> mMeta;
460    sp<DataSource> mDataSource;
461    off_t mFirstFramePos;
462    uint32_t mFixedHeader;
463    off_t mCurrentPos;
464    int64_t mCurrentTimeUs;
465    bool mStarted;
466    int32_t mByteNumber; // total number of bytes in this MP3
467    // TOC entries in XING header. Skip the first one since it's always 0.
468    char mTableOfContents[99];
469    MediaBufferGroup *mGroup;
470
471    MP3Source(const MP3Source &);
472    MP3Source &operator=(const MP3Source &);
473};
474
475MP3Extractor::MP3Extractor(const sp<DataSource> &source)
476    : mDataSource(source),
477      mFirstFramePos(-1),
478      mFixedHeader(0),
479      mByteNumber(0) {
480    off_t pos = 0;
481    uint32_t header;
482    bool success = Resync(mDataSource, 0, &pos, &header);
483    CHECK(success);
484
485    if (success) {
486        mFirstFramePos = pos;
487        mFixedHeader = header;
488
489        size_t frame_size;
490        int sample_rate;
491        int num_channels;
492        int bitrate;
493        get_mp3_frame_size(
494                header, &frame_size, &sample_rate, &num_channels, &bitrate);
495
496        mMeta = new MetaData;
497
498        mMeta->setCString(kKeyMIMEType, MEDIA_MIMETYPE_AUDIO_MPEG);
499        mMeta->setInt32(kKeySampleRate, sample_rate);
500        mMeta->setInt32(kKeyBitRate, bitrate * 1000);
501        mMeta->setInt32(kKeyChannelCount, num_channels);
502
503        int64_t duration;
504        parse_xing_header(
505                mDataSource, mFirstFramePos, NULL, &mByteNumber,
506                mTableOfContents, NULL, &duration);
507        if (duration > 0) {
508            mMeta->setInt64(kKeyDuration, duration);
509        } else {
510            off_t fileSize;
511            if (mDataSource->getSize(&fileSize) == OK) {
512                mMeta->setInt64(
513                        kKeyDuration,
514                        8000LL * (fileSize - mFirstFramePos) / bitrate);
515            }
516        }
517    }
518}
519
520MP3Extractor::~MP3Extractor() {
521}
522
523size_t MP3Extractor::countTracks() {
524    return (mFirstFramePos < 0) ? 0 : 1;
525}
526
527sp<MediaSource> MP3Extractor::getTrack(size_t index) {
528    if (mFirstFramePos < 0 || index != 0) {
529        return NULL;
530    }
531
532    return new MP3Source(
533            mMeta, mDataSource, mFirstFramePos, mFixedHeader,
534            mByteNumber, mTableOfContents);
535}
536
537sp<MetaData> MP3Extractor::getTrackMetaData(size_t index, uint32_t flags) {
538    if (mFirstFramePos < 0 || index != 0) {
539        return NULL;
540    }
541
542    return mMeta;
543}
544
545////////////////////////////////////////////////////////////////////////////////
546
547MP3Source::MP3Source(
548        const sp<MetaData> &meta, const sp<DataSource> &source,
549        off_t first_frame_pos, uint32_t fixed_header,
550        int32_t byte_number, const char *table_of_contents)
551    : mMeta(meta),
552      mDataSource(source),
553      mFirstFramePos(first_frame_pos),
554      mFixedHeader(fixed_header),
555      mCurrentPos(0),
556      mCurrentTimeUs(0),
557      mStarted(false),
558      mByteNumber(byte_number),
559      mGroup(NULL) {
560    memcpy (mTableOfContents, table_of_contents, sizeof(mTableOfContents));
561}
562
563MP3Source::~MP3Source() {
564    if (mStarted) {
565        stop();
566    }
567}
568
569status_t MP3Source::start(MetaData *) {
570    CHECK(!mStarted);
571
572    mGroup = new MediaBufferGroup;
573
574    const size_t kMaxFrameSize = 32768;
575    mGroup->add_buffer(new MediaBuffer(kMaxFrameSize));
576
577    mCurrentPos = mFirstFramePos;
578    mCurrentTimeUs = 0;
579
580    mStarted = true;
581
582    return OK;
583}
584
585status_t MP3Source::stop() {
586    CHECK(mStarted);
587
588    delete mGroup;
589    mGroup = NULL;
590
591    mStarted = false;
592
593    return OK;
594}
595
596sp<MetaData> MP3Source::getFormat() {
597    return mMeta;
598}
599
600status_t MP3Source::read(
601        MediaBuffer **out, const ReadOptions *options) {
602    *out = NULL;
603
604    int64_t seekTimeUs;
605    if (options != NULL && options->getSeekTo(&seekTimeUs)) {
606        int32_t bitrate;
607        if (!mMeta->findInt32(kKeyBitRate, &bitrate)) {
608            // bitrate is in bits/sec.
609            LOGI("no bitrate");
610
611            return ERROR_UNSUPPORTED;
612        }
613
614        mCurrentTimeUs = seekTimeUs;
615        // interpolate in TOC to get file seek point in bytes
616        int64_t duration;
617        if ((mByteNumber > 0) && (mTableOfContents[0] > 0)
618            && mMeta->findInt64(kKeyDuration, &duration)) {
619            float percent = (float)seekTimeUs * 100 / duration;
620            float fx;
621            if( percent <= 0.0f ) {
622                fx = 0.0f;
623            } else if( percent >= 100.0f ) {
624                fx = 256.0f;
625            } else {
626                int a = (int)percent;
627                float fa, fb;
628                if ( a == 0 ) {
629                    fa = 0.0f;
630                } else {
631                    fa = (float)mTableOfContents[a-1];
632                }
633                if ( a < 99 ) {
634                    fb = (float)mTableOfContents[a];
635                } else {
636                    fb = 256.0f;
637                }
638                fx = fa + (fb-fa)*(percent-a);
639            }
640            mCurrentPos = mFirstFramePos + (int)((1.0f/256.0f)*fx*mByteNumber);
641        } else {
642            mCurrentPos = mFirstFramePos + seekTimeUs * bitrate / 8000000;
643        }
644    }
645
646    MediaBuffer *buffer;
647    status_t err = mGroup->acquire_buffer(&buffer);
648    if (err != OK) {
649        return err;
650    }
651
652    size_t frame_size;
653    for (;;) {
654        ssize_t n = mDataSource->readAt(mCurrentPos, buffer->data(), 4);
655        if (n < 4) {
656            buffer->release();
657            buffer = NULL;
658
659            return ERROR_END_OF_STREAM;
660        }
661
662        uint32_t header = U32_AT((const uint8_t *)buffer->data());
663
664        if ((header & kMask) == (mFixedHeader & kMask)
665            && get_mp3_frame_size(header, &frame_size)) {
666            break;
667        }
668
669        // Lost sync.
670        LOGW("lost sync!\n");
671
672        off_t pos = mCurrentPos;
673        if (!Resync(mDataSource, mFixedHeader, &pos, NULL)) {
674            LOGE("Unable to resync. Signalling end of stream.");
675
676            buffer->release();
677            buffer = NULL;
678
679            return ERROR_END_OF_STREAM;
680        }
681
682        mCurrentPos = pos;
683
684        // Try again with the new position.
685    }
686
687    CHECK(frame_size <= buffer->size());
688
689    ssize_t n = mDataSource->readAt(mCurrentPos, buffer->data(), frame_size);
690    if (n < (ssize_t)frame_size) {
691        buffer->release();
692        buffer = NULL;
693
694        return ERROR_END_OF_STREAM;
695    }
696
697    buffer->set_range(0, frame_size);
698
699    buffer->meta_data()->setInt64(kKeyTime, mCurrentTimeUs);
700
701    mCurrentPos += frame_size;
702    mCurrentTimeUs += 1152 * 1000000 / 44100;
703
704    *out = buffer;
705
706    return OK;
707}
708
709bool SniffMP3(
710        const sp<DataSource> &source, String8 *mimeType, float *confidence) {
711    off_t pos = 0;
712    uint32_t header;
713    if (!Resync(source, 0, &pos, &header)) {
714        return false;
715    }
716
717    *mimeType = MEDIA_MIMETYPE_AUDIO_MPEG;
718    *confidence = 0.3f;
719
720    return true;
721}
722
723}  // namespace android
724