MP3Extractor.cpp revision b841f14f8e51f2365945281fbfa54ef6a1b1b5a6
1/*
2 * Copyright (C) 2009 The Android Open Source Project
3 *
4 * Licensed under the Apache License, Version 2.0 (the "License");
5 * you may not use this file except in compliance with the License.
6 * You may obtain a copy of the License at
7 *
8 *      http://www.apache.org/licenses/LICENSE-2.0
9 *
10 * Unless required by applicable law or agreed to in writing, software
11 * distributed under the License is distributed on an "AS IS" BASIS,
12 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13 * See the License for the specific language governing permissions and
14 * limitations under the License.
15 */
16
17//#define LOG_NDEBUG 0
18#define LOG_TAG "MP3Extractor"
19#include <utils/Log.h>
20
21#include "include/MP3Extractor.h"
22
23#include <media/stagefright/DataSource.h>
24#include <media/stagefright/MediaBuffer.h>
25#include <media/stagefright/MediaBufferGroup.h>
26#include <media/stagefright/MediaDebug.h>
27#include <media/stagefright/MediaDefs.h>
28#include <media/stagefright/MediaErrors.h>
29#include <media/stagefright/MediaSource.h>
30#include <media/stagefright/MetaData.h>
31#include <media/stagefright/Utils.h>
32#include <utils/String8.h>
33
34namespace android {
35
36// Everything must match except for
37// protection, bitrate, padding, private bits and mode extension.
38static const uint32_t kMask = 0xfffe0ccf;
39
40static bool get_mp3_frame_size(
41        uint32_t header, size_t *frame_size,
42        int *out_sampling_rate = NULL, int *out_channels = NULL,
43        int *out_bitrate = NULL) {
44    *frame_size = 0;
45
46    if (out_sampling_rate) {
47        *out_sampling_rate = 0;
48    }
49
50    if (out_channels) {
51        *out_channels = 0;
52    }
53
54    if (out_bitrate) {
55        *out_bitrate = 0;
56    }
57
58    if ((header & 0xffe00000) != 0xffe00000) {
59        return false;
60    }
61
62    unsigned version = (header >> 19) & 3;
63
64    if (version == 0x01) {
65        return false;
66    }
67
68    unsigned layer = (header >> 17) & 3;
69
70    if (layer == 0x00) {
71        return false;
72    }
73
74    unsigned protection = (header >> 16) & 1;
75
76    unsigned bitrate_index = (header >> 12) & 0x0f;
77
78    if (bitrate_index == 0 || bitrate_index == 0x0f) {
79        // Disallow "free" bitrate.
80        return false;
81    }
82
83    unsigned sampling_rate_index = (header >> 10) & 3;
84
85    if (sampling_rate_index == 3) {
86        return false;
87    }
88
89    static const int kSamplingRateV1[] = { 44100, 48000, 32000 };
90    int sampling_rate = kSamplingRateV1[sampling_rate_index];
91    if (version == 2 /* V2 */) {
92        sampling_rate /= 2;
93    } else if (version == 0 /* V2.5 */) {
94        sampling_rate /= 4;
95    }
96
97    unsigned padding = (header >> 9) & 1;
98
99    if (layer == 3) {
100        // layer I
101
102        static const int kBitrateV1[] = {
103            32, 64, 96, 128, 160, 192, 224, 256,
104            288, 320, 352, 384, 416, 448
105        };
106
107        static const int kBitrateV2[] = {
108            32, 48, 56, 64, 80, 96, 112, 128,
109            144, 160, 176, 192, 224, 256
110        };
111
112        int bitrate =
113            (version == 3 /* V1 */)
114                ? kBitrateV1[bitrate_index - 1]
115                : kBitrateV2[bitrate_index - 1];
116
117        if (out_bitrate) {
118            *out_bitrate = bitrate;
119        }
120
121        *frame_size = (12000 * bitrate / sampling_rate + padding) * 4;
122    } else {
123        // layer II or III
124
125        static const int kBitrateV1L2[] = {
126            32, 48, 56, 64, 80, 96, 112, 128,
127            160, 192, 224, 256, 320, 384
128        };
129
130        static const int kBitrateV1L3[] = {
131            32, 40, 48, 56, 64, 80, 96, 112,
132            128, 160, 192, 224, 256, 320
133        };
134
135        static const int kBitrateV2[] = {
136            8, 16, 24, 32, 40, 48, 56, 64,
137            80, 96, 112, 128, 144, 160
138        };
139
140        int bitrate;
141        if (version == 3 /* V1 */) {
142            bitrate = (layer == 2 /* L2 */)
143                ? kBitrateV1L2[bitrate_index - 1]
144                : kBitrateV1L3[bitrate_index - 1];
145        } else {
146            // V2 (or 2.5)
147
148            bitrate = kBitrateV2[bitrate_index - 1];
149        }
150
151        if (out_bitrate) {
152            *out_bitrate = bitrate;
153        }
154
155        if (version == 3 /* V1 */) {
156            *frame_size = 144000 * bitrate / sampling_rate + padding;
157        } else {
158            // V2 or V2.5
159            *frame_size = 72000 * bitrate / sampling_rate + padding;
160        }
161    }
162
163    if (out_sampling_rate) {
164        *out_sampling_rate = sampling_rate;
165    }
166
167    if (out_channels) {
168        int channel_mode = (header >> 6) & 3;
169
170        *out_channels = (channel_mode == 3) ? 1 : 2;
171    }
172
173    return true;
174}
175
176static bool Resync(
177        const sp<DataSource> &source, uint32_t match_header,
178        off_t *inout_pos, uint32_t *out_header) {
179    if (*inout_pos == 0) {
180        // Skip an optional ID3 header if syncing at the very beginning
181        // of the datasource.
182
183        uint8_t id3header[10];
184        if (source->readAt(0, id3header, sizeof(id3header))
185                < (ssize_t)sizeof(id3header)) {
186            // If we can't even read these 10 bytes, we might as well bail out,
187            // even if there _were_ 10 bytes of valid mp3 audio data...
188            return false;
189        }
190
191        if (id3header[0] == 'I' && id3header[1] == 'D' && id3header[2] == '3') {
192            // Skip the ID3v2 header.
193
194            size_t len =
195                ((id3header[6] & 0x7f) << 21)
196                | ((id3header[7] & 0x7f) << 14)
197                | ((id3header[8] & 0x7f) << 7)
198                | (id3header[9] & 0x7f);
199
200            len += 10;
201
202            *inout_pos += len;
203        }
204    }
205
206    const size_t kMaxFrameSize = 4096;
207    uint8_t *buffer = new uint8_t[kMaxFrameSize];
208
209    off_t pos = *inout_pos - kMaxFrameSize;
210    size_t buffer_offset = kMaxFrameSize;
211    size_t buffer_length = kMaxFrameSize;
212    bool valid = false;
213    do {
214        if (buffer_offset + 3 >= buffer_length) {
215            if (buffer_length < kMaxFrameSize) {
216                break;
217            }
218
219            pos += buffer_offset;
220
221            if (pos >= *inout_pos + 128 * 1024) {
222                // Don't scan forever.
223                LOGV("giving up at offset %ld", pos);
224                break;
225            }
226
227            memmove(buffer, &buffer[buffer_offset], buffer_length - buffer_offset);
228            buffer_length = buffer_length - buffer_offset;
229            buffer_offset = 0;
230
231            ssize_t n = source->readAt(
232                    pos, &buffer[buffer_length], kMaxFrameSize - buffer_length);
233
234            if (n <= 0) {
235                break;
236            }
237
238            buffer_length += (size_t)n;
239
240            continue;
241        }
242
243        uint32_t header = U32_AT(&buffer[buffer_offset]);
244
245        if (match_header != 0 && (header & kMask) != (match_header & kMask)) {
246            ++buffer_offset;
247            continue;
248        }
249
250        size_t frame_size;
251        int sample_rate, num_channels, bitrate;
252        if (!get_mp3_frame_size(header, &frame_size,
253                               &sample_rate, &num_channels, &bitrate)) {
254            ++buffer_offset;
255            continue;
256        }
257
258        LOGV("found possible 1st frame at %ld", pos + buffer_offset);
259
260        // We found what looks like a valid frame,
261        // now find its successors.
262
263        off_t test_pos = pos + buffer_offset + frame_size;
264
265        valid = true;
266        for (int j = 0; j < 3; ++j) {
267            uint8_t tmp[4];
268            if (source->readAt(test_pos, tmp, 4) < 4) {
269                valid = false;
270                break;
271            }
272
273            uint32_t test_header = U32_AT(tmp);
274
275            LOGV("subsequent header is %08x", test_header);
276
277            if ((test_header & kMask) != (header & kMask)) {
278                valid = false;
279                break;
280            }
281
282            size_t test_frame_size;
283            if (!get_mp3_frame_size(test_header, &test_frame_size)) {
284                valid = false;
285                break;
286            }
287
288            LOGV("found subsequent frame #%d at %ld", j + 2, test_pos);
289
290            test_pos += test_frame_size;
291        }
292
293        if (valid) {
294            *inout_pos = pos + buffer_offset;
295
296            if (out_header != NULL) {
297                *out_header = header;
298            }
299        } else {
300            LOGV("no dice, no valid sequence of frames found.");
301        }
302
303        ++buffer_offset;
304
305    } while (!valid);
306
307    delete[] buffer;
308    buffer = NULL;
309
310    return valid;
311}
312
313class MP3Source : public MediaSource {
314public:
315    MP3Source(
316            const sp<MetaData> &meta, const sp<DataSource> &source,
317            off_t first_frame_pos, uint32_t fixed_header);
318
319    virtual status_t start(MetaData *params = NULL);
320    virtual status_t stop();
321
322    virtual sp<MetaData> getFormat();
323
324    virtual status_t read(
325            MediaBuffer **buffer, const ReadOptions *options = NULL);
326
327protected:
328    virtual ~MP3Source();
329
330private:
331    sp<MetaData> mMeta;
332    sp<DataSource> mDataSource;
333    off_t mFirstFramePos;
334    uint32_t mFixedHeader;
335    off_t mCurrentPos;
336    int64_t mCurrentTimeUs;
337    bool mStarted;
338
339    MediaBufferGroup *mGroup;
340
341    MP3Source(const MP3Source &);
342    MP3Source &operator=(const MP3Source &);
343};
344
345MP3Extractor::MP3Extractor(const sp<DataSource> &source)
346    : mDataSource(source),
347      mFirstFramePos(-1),
348      mFixedHeader(0) {
349    off_t pos = 0;
350    uint32_t header;
351    bool success = Resync(mDataSource, 0, &pos, &header);
352    CHECK(success);
353
354    if (success) {
355        mFirstFramePos = pos;
356        mFixedHeader = header;
357
358        size_t frame_size;
359        int sample_rate;
360        int num_channels;
361        int bitrate;
362        get_mp3_frame_size(
363                header, &frame_size, &sample_rate, &num_channels, &bitrate);
364
365        mMeta = new MetaData;
366
367        mMeta->setCString(kKeyMIMEType, MEDIA_MIMETYPE_AUDIO_MPEG);
368        mMeta->setInt32(kKeySampleRate, sample_rate);
369        mMeta->setInt32(kKeyBitRate, bitrate * 1000);
370        mMeta->setInt32(kKeyChannelCount, num_channels);
371
372        off_t fileSize;
373        if (mDataSource->getSize(&fileSize) == OK) {
374            mMeta->setInt64(
375                    kKeyDuration,
376                    8000LL * (fileSize - mFirstFramePos) / bitrate);
377        }
378    }
379}
380
381MP3Extractor::~MP3Extractor() {
382}
383
384size_t MP3Extractor::countTracks() {
385    return (mFirstFramePos < 0) ? 0 : 1;
386}
387
388sp<MediaSource> MP3Extractor::getTrack(size_t index) {
389    if (mFirstFramePos < 0 || index != 0) {
390        return NULL;
391    }
392
393    return new MP3Source(
394            mMeta, mDataSource, mFirstFramePos, mFixedHeader);
395}
396
397sp<MetaData> MP3Extractor::getTrackMetaData(size_t index, uint32_t flags) {
398    if (mFirstFramePos < 0 || index != 0) {
399        return NULL;
400    }
401
402    return mMeta;
403}
404
405////////////////////////////////////////////////////////////////////////////////
406
407MP3Source::MP3Source(
408        const sp<MetaData> &meta, const sp<DataSource> &source,
409        off_t first_frame_pos, uint32_t fixed_header)
410    : mMeta(meta),
411      mDataSource(source),
412      mFirstFramePos(first_frame_pos),
413      mFixedHeader(fixed_header),
414      mCurrentPos(0),
415      mCurrentTimeUs(0),
416      mStarted(false),
417      mGroup(NULL) {
418}
419
420MP3Source::~MP3Source() {
421    if (mStarted) {
422        stop();
423    }
424}
425
426status_t MP3Source::start(MetaData *) {
427    CHECK(!mStarted);
428
429    mGroup = new MediaBufferGroup;
430
431    const size_t kMaxFrameSize = 32768;
432    mGroup->add_buffer(new MediaBuffer(kMaxFrameSize));
433
434    mCurrentPos = mFirstFramePos;
435    mCurrentTimeUs = 0;
436
437    mStarted = true;
438
439    return OK;
440}
441
442status_t MP3Source::stop() {
443    CHECK(mStarted);
444
445    delete mGroup;
446    mGroup = NULL;
447
448    mStarted = false;
449
450    return OK;
451}
452
453sp<MetaData> MP3Source::getFormat() {
454    return mMeta;
455}
456
457status_t MP3Source::read(
458        MediaBuffer **out, const ReadOptions *options) {
459    *out = NULL;
460
461    int64_t seekTimeUs;
462    if (options != NULL && options->getSeekTo(&seekTimeUs)) {
463        int32_t bitrate;
464        if (!mMeta->findInt32(kKeyBitRate, &bitrate)) {
465            // bitrate is in bits/sec.
466            LOGI("no bitrate");
467
468            return ERROR_UNSUPPORTED;
469        }
470
471        mCurrentTimeUs = seekTimeUs;
472        mCurrentPos = mFirstFramePos + seekTimeUs * bitrate / 8000000;
473    }
474
475    MediaBuffer *buffer;
476    status_t err = mGroup->acquire_buffer(&buffer);
477    if (err != OK) {
478        return err;
479    }
480
481    size_t frame_size;
482    for (;;) {
483        ssize_t n = mDataSource->readAt(mCurrentPos, buffer->data(), 4);
484        if (n < 4) {
485            buffer->release();
486            buffer = NULL;
487
488            return ERROR_END_OF_STREAM;
489        }
490
491        uint32_t header = U32_AT((const uint8_t *)buffer->data());
492
493        if ((header & kMask) == (mFixedHeader & kMask)
494            && get_mp3_frame_size(header, &frame_size)) {
495            break;
496        }
497
498        // Lost sync.
499        LOGW("lost sync!\n");
500
501        off_t pos = mCurrentPos;
502        if (!Resync(mDataSource, mFixedHeader, &pos, NULL)) {
503            LOGE("Unable to resync. Signalling end of stream.");
504
505            buffer->release();
506            buffer = NULL;
507
508            return ERROR_END_OF_STREAM;
509        }
510
511        mCurrentPos = pos;
512
513        // Try again with the new position.
514    }
515
516    CHECK(frame_size <= buffer->size());
517
518    ssize_t n = mDataSource->readAt(mCurrentPos, buffer->data(), frame_size);
519    if (n < (ssize_t)frame_size) {
520        buffer->release();
521        buffer = NULL;
522
523        return ERROR_END_OF_STREAM;
524    }
525
526    buffer->set_range(0, frame_size);
527
528    buffer->meta_data()->setInt64(kKeyTime, mCurrentTimeUs);
529
530    mCurrentPos += frame_size;
531    mCurrentTimeUs += 1152 * 1000000 / 44100;
532
533    *out = buffer;
534
535    return OK;
536}
537
538bool SniffMP3(
539        const sp<DataSource> &source, String8 *mimeType, float *confidence) {
540    off_t pos = 0;
541    uint32_t header;
542    if (!Resync(source, 0, &pos, &header)) {
543        return false;
544    }
545
546    *mimeType = MEDIA_MIMETYPE_AUDIO_MPEG;
547    *confidence = 0.3f;
548
549    return true;
550}
551
552}  // namespace android
553