MP3Extractor.cpp revision 0bf3921522461b1c7d321d0c667c1020ab3110bc
1/*
2 * Copyright (C) 2009 The Android Open Source Project
3 *
4 * Licensed under the Apache License, Version 2.0 (the "License");
5 * you may not use this file except in compliance with the License.
6 * You may obtain a copy of the License at
7 *
8 *      http://www.apache.org/licenses/LICENSE-2.0
9 *
10 * Unless required by applicable law or agreed to in writing, software
11 * distributed under the License is distributed on an "AS IS" BASIS,
12 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13 * See the License for the specific language governing permissions and
14 * limitations under the License.
15 */
16
17//#define LOG_NDEBUG 0
18#define LOG_TAG "MP3Extractor"
19#include <utils/Log.h>
20
21#include <media/stagefright/DataSource.h>
22#include <media/stagefright/MP3Extractor.h>
23#include <media/stagefright/MediaBuffer.h>
24#include <media/stagefright/MediaBufferGroup.h>
25#include <media/stagefright/MediaDebug.h>
26#include <media/stagefright/MediaDefs.h>
27#include <media/stagefright/MediaErrors.h>
28#include <media/stagefright/MediaSource.h>
29#include <media/stagefright/MetaData.h>
30#include <media/stagefright/Utils.h>
31#include <utils/String8.h>
32
33namespace android {
34
35static bool get_mp3_frame_size(
36        uint32_t header, size_t *frame_size,
37        int *out_sampling_rate = NULL, int *out_channels = NULL,
38        int *out_bitrate = NULL) {
39    *frame_size = 0;
40
41    if (out_sampling_rate) {
42        *out_sampling_rate = 0;
43    }
44
45    if (out_channels) {
46        *out_channels = 0;
47    }
48
49    if (out_bitrate) {
50        *out_bitrate = 0;
51    }
52
53    if ((header & 0xffe00000) != 0xffe00000) {
54        return false;
55    }
56
57    unsigned version = (header >> 19) & 3;
58
59    if (version == 0x01) {
60        return false;
61    }
62
63    unsigned layer = (header >> 17) & 3;
64
65    if (layer == 0x00) {
66        return false;
67    }
68
69    unsigned protection = (header >> 16) & 1;
70
71    unsigned bitrate_index = (header >> 12) & 0x0f;
72
73    if (bitrate_index == 0 || bitrate_index == 0x0f) {
74        // Disallow "free" bitrate.
75        return false;
76    }
77
78    unsigned sampling_rate_index = (header >> 10) & 3;
79
80    if (sampling_rate_index == 3) {
81        return false;
82    }
83
84    static const int kSamplingRateV1[] = { 44100, 48000, 32000 };
85    int sampling_rate = kSamplingRateV1[sampling_rate_index];
86    if (version == 2 /* V2 */) {
87        sampling_rate /= 2;
88    } else if (version == 0 /* V2.5 */) {
89        sampling_rate /= 4;
90    }
91
92    unsigned padding = (header >> 9) & 1;
93
94    if (layer == 3) {
95        // layer I
96
97        static const int kBitrateV1[] = {
98            32, 64, 96, 128, 160, 192, 224, 256,
99            288, 320, 352, 384, 416, 448
100        };
101
102        static const int kBitrateV2[] = {
103            32, 48, 56, 64, 80, 96, 112, 128,
104            144, 160, 176, 192, 224, 256
105        };
106
107        int bitrate =
108            (version == 3 /* V1 */)
109                ? kBitrateV1[bitrate_index - 1]
110                : kBitrateV2[bitrate_index - 1];
111
112        if (out_bitrate) {
113            *out_bitrate = bitrate;
114        }
115
116        *frame_size = (12000 * bitrate / sampling_rate + padding) * 4;
117    } else {
118        // layer II or III
119
120        static const int kBitrateV1L2[] = {
121            32, 48, 56, 64, 80, 96, 112, 128,
122            160, 192, 224, 256, 320, 384
123        };
124
125        static const int kBitrateV1L3[] = {
126            32, 40, 48, 56, 64, 80, 96, 112,
127            128, 160, 192, 224, 256, 320
128        };
129
130        static const int kBitrateV2[] = {
131            8, 16, 24, 32, 40, 48, 56, 64,
132            80, 96, 112, 128, 144, 160
133        };
134
135        int bitrate;
136        if (version == 3 /* V1 */) {
137            bitrate = (layer == 2 /* L2 */)
138                ? kBitrateV1L2[bitrate_index - 1]
139                : kBitrateV1L3[bitrate_index - 1];
140        } else {
141            // V2 (or 2.5)
142
143            bitrate = kBitrateV2[bitrate_index - 1];
144        }
145
146        if (out_bitrate) {
147            *out_bitrate = bitrate;
148        }
149
150        if (version == 3 /* V1 */) {
151            *frame_size = 144000 * bitrate / sampling_rate + padding;
152        } else {
153            // V2 or V2.5
154            *frame_size = 72000 * bitrate / sampling_rate + padding;
155        }
156    }
157
158    if (out_sampling_rate) {
159        *out_sampling_rate = sampling_rate;
160    }
161
162    if (out_channels) {
163        int channel_mode = (header >> 6) & 3;
164
165        *out_channels = (channel_mode == 3) ? 1 : 2;
166    }
167
168    return true;
169}
170
171static bool Resync(
172        const sp<DataSource> &source, uint32_t match_header,
173        off_t *inout_pos, uint32_t *out_header) {
174    if (*inout_pos == 0) {
175        // Skip an optional ID3 header if syncing at the very beginning
176        // of the datasource.
177
178        uint8_t id3header[10];
179        if (source->read_at(0, id3header, sizeof(id3header))
180                < (ssize_t)sizeof(id3header)) {
181            // If we can't even read these 10 bytes, we might as well bail out,
182            // even if there _were_ 10 bytes of valid mp3 audio data...
183            return false;
184        }
185
186        if (id3header[0] == 'I' && id3header[1] == 'D' && id3header[2] == '3') {
187            // Skip the ID3v2 header.
188
189            size_t len =
190                ((id3header[6] & 0x7f) << 21)
191                | ((id3header[7] & 0x7f) << 14)
192                | ((id3header[8] & 0x7f) << 7)
193                | (id3header[9] & 0x7f);
194
195            len += 10;
196
197            *inout_pos += len;
198        }
199    }
200
201    // Everything must match except for
202    // protection, bitrate, padding, private bits and mode extension.
203    const uint32_t kMask = 0xfffe0ccf;
204
205    const size_t kMaxFrameSize = 4096;
206    uint8_t *buffer = new uint8_t[kMaxFrameSize];
207
208    off_t pos = *inout_pos - kMaxFrameSize;
209    size_t buffer_offset = kMaxFrameSize;
210    size_t buffer_length = kMaxFrameSize;
211    bool valid = false;
212    do {
213        if (buffer_offset + 3 >= buffer_length) {
214            if (buffer_length < kMaxFrameSize) {
215                break;
216            }
217
218            pos += buffer_offset;
219
220            if (pos >= *inout_pos + 128 * 1024) {
221                // Don't scan forever.
222                LOGV("giving up at offset %ld", pos);
223                break;
224            }
225
226            memmove(buffer, &buffer[buffer_offset], buffer_length - buffer_offset);
227            buffer_length = buffer_length - buffer_offset;
228            buffer_offset = 0;
229
230            ssize_t n = source->read_at(
231                    pos, &buffer[buffer_length], kMaxFrameSize - buffer_length);
232
233            if (n <= 0) {
234                break;
235            }
236
237            buffer_length += (size_t)n;
238
239            continue;
240        }
241
242        uint32_t header = U32_AT(&buffer[buffer_offset]);
243
244        if (match_header != 0 && (header & kMask) != (match_header & kMask)) {
245            ++buffer_offset;
246            continue;
247        }
248
249        size_t frame_size;
250        int sample_rate, num_channels, bitrate;
251        if (!get_mp3_frame_size(header, &frame_size,
252                               &sample_rate, &num_channels, &bitrate)) {
253            ++buffer_offset;
254            continue;
255        }
256
257        LOGV("found possible 1st frame at %ld", pos + buffer_offset);
258
259        // We found what looks like a valid frame,
260        // now find its successors.
261
262        off_t test_pos = pos + buffer_offset + frame_size;
263
264        valid = true;
265        for (int j = 0; j < 3; ++j) {
266            uint8_t tmp[4];
267            if (source->read_at(test_pos, tmp, 4) < 4) {
268                valid = false;
269                break;
270            }
271
272            uint32_t test_header = U32_AT(tmp);
273
274            LOGV("subsequent header is %08x", test_header);
275
276            if ((test_header & kMask) != (header & kMask)) {
277                valid = false;
278                break;
279            }
280
281            size_t test_frame_size;
282            if (!get_mp3_frame_size(test_header, &test_frame_size)) {
283                valid = false;
284                break;
285            }
286
287            LOGV("found subsequent frame #%d at %ld", j + 2, test_pos);
288
289            test_pos += test_frame_size;
290        }
291
292        if (valid) {
293            *inout_pos = pos + buffer_offset;
294
295            if (out_header != NULL) {
296                *out_header = header;
297            }
298        } else {
299            LOGV("no dice, no valid sequence of frames found.");
300        }
301
302        ++buffer_offset;
303
304    } while (!valid);
305
306    delete[] buffer;
307    buffer = NULL;
308
309    return valid;
310}
311
312class MP3Source : public MediaSource {
313public:
314    MP3Source(
315            const sp<MetaData> &meta, const sp<DataSource> &source,
316            off_t first_frame_pos, uint32_t fixed_header);
317
318    virtual status_t start(MetaData *params = NULL);
319    virtual status_t stop();
320
321    virtual sp<MetaData> getFormat();
322
323    virtual status_t read(
324            MediaBuffer **buffer, const ReadOptions *options = NULL);
325
326protected:
327    virtual ~MP3Source();
328
329private:
330    sp<MetaData> mMeta;
331    sp<DataSource> mDataSource;
332    off_t mFirstFramePos;
333    uint32_t mFixedHeader;
334    off_t mCurrentPos;
335    int64_t mCurrentTimeUs;
336    bool mStarted;
337
338    MediaBufferGroup *mGroup;
339
340    MP3Source(const MP3Source &);
341    MP3Source &operator=(const MP3Source &);
342};
343
344MP3Extractor::MP3Extractor(const sp<DataSource> &source)
345    : mDataSource(source),
346      mFirstFramePos(-1),
347      mFixedHeader(0) {
348    off_t pos = 0;
349    uint32_t header;
350    bool success = Resync(mDataSource, 0, &pos, &header);
351    CHECK(success);
352
353    if (success) {
354        mFirstFramePos = pos;
355        mFixedHeader = header;
356
357        size_t frame_size;
358        int sample_rate;
359        int num_channels;
360        int bitrate;
361        get_mp3_frame_size(
362                header, &frame_size, &sample_rate, &num_channels, &bitrate);
363
364        mMeta = new MetaData;
365
366        mMeta->setCString(kKeyMIMEType, MEDIA_MIMETYPE_AUDIO_MPEG);
367        mMeta->setInt32(kKeySampleRate, sample_rate);
368        mMeta->setInt32(kKeyBitRate, bitrate);
369        mMeta->setInt32(kKeyChannelCount, num_channels);
370
371        off_t fileSize;
372        if (mDataSource->getSize(&fileSize) == OK) {
373            mMeta->setInt64(
374                    kKeyDuration,
375                    8000 * (fileSize - mFirstFramePos) / bitrate);
376        }
377    }
378}
379
380MP3Extractor::~MP3Extractor() {
381}
382
383size_t MP3Extractor::countTracks() {
384    return (mFirstFramePos < 0) ? 0 : 1;
385}
386
387sp<MediaSource> MP3Extractor::getTrack(size_t index) {
388    if (mFirstFramePos < 0 || index != 0) {
389        return NULL;
390    }
391
392    return new MP3Source(
393            mMeta, mDataSource, mFirstFramePos, mFixedHeader);
394}
395
396sp<MetaData> MP3Extractor::getTrackMetaData(size_t index) {
397    if (mFirstFramePos < 0 || index != 0) {
398        return NULL;
399    }
400
401    return mMeta;
402}
403
404////////////////////////////////////////////////////////////////////////////////
405
406MP3Source::MP3Source(
407        const sp<MetaData> &meta, const sp<DataSource> &source,
408        off_t first_frame_pos, uint32_t fixed_header)
409    : mMeta(meta),
410      mDataSource(source),
411      mFirstFramePos(first_frame_pos),
412      mFixedHeader(fixed_header),
413      mCurrentPos(0),
414      mCurrentTimeUs(0),
415      mStarted(false),
416      mGroup(NULL) {
417}
418
419MP3Source::~MP3Source() {
420    if (mStarted) {
421        stop();
422    }
423}
424
425status_t MP3Source::start(MetaData *) {
426    CHECK(!mStarted);
427
428    mGroup = new MediaBufferGroup;
429
430    const size_t kMaxFrameSize = 32768;
431    mGroup->add_buffer(new MediaBuffer(kMaxFrameSize));
432
433    mCurrentPos = mFirstFramePos;
434    mCurrentTimeUs = 0;
435
436    mStarted = true;
437
438    return OK;
439}
440
441status_t MP3Source::stop() {
442    CHECK(mStarted);
443
444    delete mGroup;
445    mGroup = NULL;
446
447    mStarted = false;
448
449    return OK;
450}
451
452sp<MetaData> MP3Source::getFormat() {
453    return mMeta;
454}
455
456status_t MP3Source::read(
457        MediaBuffer **out, const ReadOptions *options) {
458    *out = NULL;
459
460    int64_t seekTimeUs;
461    if (options != NULL && options->getSeekTo(&seekTimeUs)) {
462        int32_t bitrate;
463        if (!mMeta->findInt32(kKeyBitRate, &bitrate)) {
464            // bitrate is in kbits/sec.
465            LOGI("no bitrate");
466
467            return ERROR_UNSUPPORTED;
468        }
469
470        mCurrentTimeUs = seekTimeUs;
471        mCurrentPos = mFirstFramePos + seekTimeUs * bitrate / 1000000 * 125;
472    }
473
474    MediaBuffer *buffer;
475    status_t err = mGroup->acquire_buffer(&buffer);
476    if (err != OK) {
477        return err;
478    }
479
480    size_t frame_size;
481    for (;;) {
482        ssize_t n = mDataSource->read_at(mCurrentPos, buffer->data(), 4);
483        if (n < 4) {
484            buffer->release();
485            buffer = NULL;
486
487            return ERROR_END_OF_STREAM;
488        }
489
490        uint32_t header = U32_AT((const uint8_t *)buffer->data());
491
492        if (get_mp3_frame_size(header, &frame_size)) {
493            break;
494        }
495
496        // Lost sync.
497        LOGW("lost sync!\n");
498
499        off_t pos = mCurrentPos;
500        if (!Resync(mDataSource, mFixedHeader, &pos, NULL)) {
501            LOGE("Unable to resync. Signalling end of stream.");
502
503            buffer->release();
504            buffer = NULL;
505
506            return ERROR_END_OF_STREAM;
507        }
508
509        mCurrentPos = pos;
510
511        // Try again with the new position.
512    }
513
514    CHECK(frame_size <= buffer->size());
515
516    ssize_t n = mDataSource->read_at(mCurrentPos, buffer->data(), frame_size);
517    if (n < (ssize_t)frame_size) {
518        buffer->release();
519        buffer = NULL;
520
521        return ERROR_END_OF_STREAM;
522    }
523
524    buffer->set_range(0, frame_size);
525
526    buffer->meta_data()->setInt64(kKeyTime, mCurrentTimeUs);
527
528    mCurrentPos += frame_size;
529    mCurrentTimeUs += 1152 * 1000000 / 44100;
530
531    *out = buffer;
532
533    return OK;
534}
535
536bool SniffMP3(
537        const sp<DataSource> &source, String8 *mimeType, float *confidence) {
538    off_t pos = 0;
539    uint32_t header;
540    if (!Resync(source, 0, &pos, &header)) {
541        return false;
542    }
543
544    *mimeType = MEDIA_MIMETYPE_AUDIO_MPEG;
545    *confidence = 0.3f;
546
547    return true;
548}
549
550}  // namespace android
551