1/*
2 * Copyright (C) 2009 The Android Open Source Project
3 *
4 * Licensed under the Apache License, Version 2.0 (the "License");
5 * you may not use this file except in compliance with the License.
6 * You may obtain a copy of the License at
7 *
8 *      http://www.apache.org/licenses/LICENSE-2.0
9 *
10 * Unless required by applicable law or agreed to in writing, software
11 * distributed under the License is distributed on an "AS IS" BASIS,
12 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13 * See the License for the specific language governing permissions and
14 * limitations under the License.
15 */
16
17//#define LOG_NDEBUG 0
18#define LOG_TAG "MP3Extractor"
19#include <utils/Log.h>
20
21#include "include/MP3Extractor.h"
22
23#include "include/avc_utils.h"
24#include "include/ID3.h"
25#include "include/VBRISeeker.h"
26#include "include/XINGSeeker.h"
27
28#include <media/stagefright/foundation/ADebug.h>
29#include <media/stagefright/foundation/AMessage.h>
30#include <media/stagefright/DataSource.h>
31#include <media/stagefright/MediaBuffer.h>
32#include <media/stagefright/MediaBufferGroup.h>
33#include <media/stagefright/MediaDefs.h>
34#include <media/stagefright/MediaErrors.h>
35#include <media/stagefright/MediaSource.h>
36#include <media/stagefright/MetaData.h>
37#include <media/stagefright/Utils.h>
38#include <utils/String8.h>
39
40namespace android {
41
42// Everything must match except for
43// protection, bitrate, padding, private bits, mode, mode extension,
44// copyright bit, original bit and emphasis.
45// Yes ... there are things that must indeed match...
46static const uint32_t kMask = 0xfffe0c00;
47
48static bool Resync(
49        const sp<DataSource> &source, uint32_t match_header,
50        off64_t *inout_pos, off64_t *post_id3_pos, uint32_t *out_header) {
51    if (post_id3_pos != NULL) {
52        *post_id3_pos = 0;
53    }
54
55    if (*inout_pos == 0) {
56        // Skip an optional ID3 header if syncing at the very beginning
57        // of the datasource.
58
59        for (;;) {
60            uint8_t id3header[10];
61            if (source->readAt(*inout_pos, id3header, sizeof(id3header))
62                    < (ssize_t)sizeof(id3header)) {
63                // If we can't even read these 10 bytes, we might as well bail
64                // out, even if there _were_ 10 bytes of valid mp3 audio data...
65                return false;
66            }
67
68            if (memcmp("ID3", id3header, 3)) {
69                break;
70            }
71
72            // Skip the ID3v2 header.
73
74            size_t len =
75                ((id3header[6] & 0x7f) << 21)
76                | ((id3header[7] & 0x7f) << 14)
77                | ((id3header[8] & 0x7f) << 7)
78                | (id3header[9] & 0x7f);
79
80            len += 10;
81
82            *inout_pos += len;
83
84            ALOGV("skipped ID3 tag, new starting offset is %lld (0x%016llx)",
85                    (long long)*inout_pos, (long long)*inout_pos);
86        }
87
88        if (post_id3_pos != NULL) {
89            *post_id3_pos = *inout_pos;
90        }
91    }
92
93    off64_t pos = *inout_pos;
94    bool valid = false;
95
96    const size_t kMaxReadBytes = 1024;
97    const size_t kMaxBytesChecked = 128 * 1024;
98    uint8_t buf[kMaxReadBytes];
99    ssize_t bytesToRead = kMaxReadBytes;
100    ssize_t totalBytesRead = 0;
101    ssize_t remainingBytes = 0;
102    bool reachEOS = false;
103    uint8_t *tmp = buf;
104
105    do {
106        if (pos >= (off64_t)(*inout_pos + kMaxBytesChecked)) {
107            // Don't scan forever.
108            ALOGV("giving up at offset %lld", (long long)pos);
109            break;
110        }
111
112        if (remainingBytes < 4) {
113            if (reachEOS) {
114                break;
115            } else {
116                memcpy(buf, tmp, remainingBytes);
117                bytesToRead = kMaxReadBytes - remainingBytes;
118
119                /*
120                 * The next read position should start from the end of
121                 * the last buffer, and thus should include the remaining
122                 * bytes in the buffer.
123                 */
124                totalBytesRead = source->readAt(pos + remainingBytes,
125                                                buf + remainingBytes,
126                                                bytesToRead);
127                if (totalBytesRead <= 0) {
128                    break;
129                }
130                reachEOS = (totalBytesRead != bytesToRead);
131                totalBytesRead += remainingBytes;
132                remainingBytes = totalBytesRead;
133                tmp = buf;
134                continue;
135            }
136        }
137
138        uint32_t header = U32_AT(tmp);
139
140        if (match_header != 0 && (header & kMask) != (match_header & kMask)) {
141            ++pos;
142            ++tmp;
143            --remainingBytes;
144            continue;
145        }
146
147        size_t frame_size;
148        int sample_rate, num_channels, bitrate;
149        if (!GetMPEGAudioFrameSize(
150                    header, &frame_size,
151                    &sample_rate, &num_channels, &bitrate)) {
152            ++pos;
153            ++tmp;
154            --remainingBytes;
155            continue;
156        }
157
158        ALOGV("found possible 1st frame at %lld (header = 0x%08x)", (long long)pos, header);
159
160        // We found what looks like a valid frame,
161        // now find its successors.
162
163        off64_t test_pos = pos + frame_size;
164
165        valid = true;
166        for (int j = 0; j < 3; ++j) {
167            uint8_t tmp[4];
168            if (source->readAt(test_pos, tmp, 4) < 4) {
169                valid = false;
170                break;
171            }
172
173            uint32_t test_header = U32_AT(tmp);
174
175            ALOGV("subsequent header is %08x", test_header);
176
177            if ((test_header & kMask) != (header & kMask)) {
178                valid = false;
179                break;
180            }
181
182            size_t test_frame_size;
183            if (!GetMPEGAudioFrameSize(
184                        test_header, &test_frame_size)) {
185                valid = false;
186                break;
187            }
188
189            ALOGV("found subsequent frame #%d at %lld", j + 2, (long long)test_pos);
190
191            test_pos += test_frame_size;
192        }
193
194        if (valid) {
195            *inout_pos = pos;
196
197            if (out_header != NULL) {
198                *out_header = header;
199            }
200        } else {
201            ALOGV("no dice, no valid sequence of frames found.");
202        }
203
204        ++pos;
205        ++tmp;
206        --remainingBytes;
207    } while (!valid);
208
209    return valid;
210}
211
212class MP3Source : public MediaSource {
213public:
214    MP3Source(
215            const sp<MetaData> &meta, const sp<DataSource> &source,
216            off64_t first_frame_pos, uint32_t fixed_header,
217            const sp<MP3Seeker> &seeker);
218
219    virtual status_t start(MetaData *params = NULL);
220    virtual status_t stop();
221
222    virtual sp<MetaData> getFormat();
223
224    virtual status_t read(
225            MediaBuffer **buffer, const ReadOptions *options = NULL);
226
227protected:
228    virtual ~MP3Source();
229
230private:
231    static const size_t kMaxFrameSize;
232    sp<MetaData> mMeta;
233    sp<DataSource> mDataSource;
234    off64_t mFirstFramePos;
235    uint32_t mFixedHeader;
236    off64_t mCurrentPos;
237    int64_t mCurrentTimeUs;
238    bool mStarted;
239    sp<MP3Seeker> mSeeker;
240    MediaBufferGroup *mGroup;
241
242    int64_t mBasisTimeUs;
243    int64_t mSamplesRead;
244
245    MP3Source(const MP3Source &);
246    MP3Source &operator=(const MP3Source &);
247};
248
249MP3Extractor::MP3Extractor(
250        const sp<DataSource> &source, const sp<AMessage> &meta)
251    : mInitCheck(NO_INIT),
252      mDataSource(source),
253      mFirstFramePos(-1),
254      mFixedHeader(0) {
255
256    off64_t pos = 0;
257    off64_t post_id3_pos;
258    uint32_t header;
259    bool success;
260
261    int64_t meta_offset;
262    uint32_t meta_header;
263    int64_t meta_post_id3_offset;
264    if (meta != NULL
265            && meta->findInt64("offset", &meta_offset)
266            && meta->findInt32("header", (int32_t *)&meta_header)
267            && meta->findInt64("post-id3-offset", &meta_post_id3_offset)) {
268        // The sniffer has already done all the hard work for us, simply
269        // accept its judgement.
270        pos = (off64_t)meta_offset;
271        header = meta_header;
272        post_id3_pos = (off64_t)meta_post_id3_offset;
273
274        success = true;
275    } else {
276        success = Resync(mDataSource, 0, &pos, &post_id3_pos, &header);
277    }
278
279    if (!success) {
280        // mInitCheck will remain NO_INIT
281        return;
282    }
283
284    mFirstFramePos = pos;
285    mFixedHeader = header;
286    mMeta = new MetaData;
287    sp<XINGSeeker> seeker = XINGSeeker::CreateFromSource(mDataSource, mFirstFramePos);
288
289    if (seeker == NULL) {
290        mSeeker = VBRISeeker::CreateFromSource(mDataSource, post_id3_pos);
291    } else {
292        mSeeker = seeker;
293        int encd = seeker->getEncoderDelay();
294        int encp = seeker->getEncoderPadding();
295        if (encd != 0 || encp != 0) {
296            mMeta->setInt32(kKeyEncoderDelay, encd);
297            mMeta->setInt32(kKeyEncoderPadding, encp);
298        }
299    }
300
301    if (mSeeker != NULL) {
302        // While it is safe to send the XING/VBRI frame to the decoder, this will
303        // result in an extra 1152 samples being output. In addition, the bitrate
304        // of the Xing header might not match the rest of the file, which could
305        // lead to problems when seeking. The real first frame to decode is after
306        // the XING/VBRI frame, so skip there.
307        size_t frame_size;
308        int sample_rate;
309        int num_channels;
310        int bitrate;
311        GetMPEGAudioFrameSize(
312                header, &frame_size, &sample_rate, &num_channels, &bitrate);
313        pos += frame_size;
314        if (!Resync(mDataSource, 0, &pos, &post_id3_pos, &header)) {
315            // mInitCheck will remain NO_INIT
316            return;
317        }
318        mFirstFramePos = pos;
319        mFixedHeader = header;
320    }
321
322    size_t frame_size;
323    int sample_rate;
324    int num_channels;
325    int bitrate;
326    GetMPEGAudioFrameSize(
327            header, &frame_size, &sample_rate, &num_channels, &bitrate);
328
329    unsigned layer = 4 - ((header >> 17) & 3);
330
331    switch (layer) {
332        case 1:
333            mMeta->setCString(kKeyMIMEType, MEDIA_MIMETYPE_AUDIO_MPEG_LAYER_I);
334            break;
335        case 2:
336            mMeta->setCString(kKeyMIMEType, MEDIA_MIMETYPE_AUDIO_MPEG_LAYER_II);
337            break;
338        case 3:
339            mMeta->setCString(kKeyMIMEType, MEDIA_MIMETYPE_AUDIO_MPEG);
340            break;
341        default:
342            TRESPASS();
343    }
344
345    mMeta->setInt32(kKeySampleRate, sample_rate);
346    mMeta->setInt32(kKeyBitRate, bitrate * 1000);
347    mMeta->setInt32(kKeyChannelCount, num_channels);
348
349    int64_t durationUs;
350
351    if (mSeeker == NULL || !mSeeker->getDuration(&durationUs)) {
352        off64_t fileSize;
353        if (mDataSource->getSize(&fileSize) == OK) {
354            off64_t dataLength = fileSize - mFirstFramePos;
355            if (dataLength > INT64_MAX / 8000LL) {
356                // duration would overflow
357                durationUs = INT64_MAX;
358            } else {
359                durationUs = 8000LL * dataLength / bitrate;
360            }
361        } else {
362            durationUs = -1;
363        }
364    }
365
366    if (durationUs >= 0) {
367        mMeta->setInt64(kKeyDuration, durationUs);
368    }
369
370    mInitCheck = OK;
371
372    // Get iTunes-style gapless info if present.
373    // When getting the id3 tag, skip the V1 tags to prevent the source cache
374    // from being iterated to the end of the file.
375    ID3 id3(mDataSource, true);
376    if (id3.isValid()) {
377        ID3::Iterator *com = new ID3::Iterator(id3, "COM");
378        if (com->done()) {
379            delete com;
380            com = new ID3::Iterator(id3, "COMM");
381        }
382        while(!com->done()) {
383            String8 commentdesc;
384            String8 commentvalue;
385            com->getString(&commentdesc, &commentvalue);
386            const char * desc = commentdesc.string();
387            const char * value = commentvalue.string();
388
389            // first 3 characters are the language, which we don't care about
390            if(strlen(desc) > 3 && strcmp(desc + 3, "iTunSMPB") == 0) {
391
392                int32_t delay, padding;
393                if (sscanf(value, " %*x %x %x %*x", &delay, &padding) == 2) {
394                    mMeta->setInt32(kKeyEncoderDelay, delay);
395                    mMeta->setInt32(kKeyEncoderPadding, padding);
396                }
397                break;
398            }
399            com->next();
400        }
401        delete com;
402        com = NULL;
403    }
404}
405
406size_t MP3Extractor::countTracks() {
407    return mInitCheck != OK ? 0 : 1;
408}
409
410sp<IMediaSource> MP3Extractor::getTrack(size_t index) {
411    if (mInitCheck != OK || index != 0) {
412        return NULL;
413    }
414
415    return new MP3Source(
416            mMeta, mDataSource, mFirstFramePos, mFixedHeader,
417            mSeeker);
418}
419
420sp<MetaData> MP3Extractor::getTrackMetaData(
421        size_t index, uint32_t /* flags */) {
422    if (mInitCheck != OK || index != 0) {
423        return NULL;
424    }
425
426    return mMeta;
427}
428
429////////////////////////////////////////////////////////////////////////////////
430
431// The theoretical maximum frame size for an MPEG audio stream should occur
432// while playing a Layer 2, MPEGv2.5 audio stream at 160kbps (with padding).
433// The size of this frame should be...
434// ((1152 samples/frame * 160000 bits/sec) /
435//  (8000 samples/sec * 8 bits/byte)) + 1 padding byte/frame = 2881 bytes/frame.
436// Set our max frame size to the nearest power of 2 above this size (aka, 4kB)
437const size_t MP3Source::kMaxFrameSize = (1 << 12); /* 4096 bytes */
438MP3Source::MP3Source(
439        const sp<MetaData> &meta, const sp<DataSource> &source,
440        off64_t first_frame_pos, uint32_t fixed_header,
441        const sp<MP3Seeker> &seeker)
442    : mMeta(meta),
443      mDataSource(source),
444      mFirstFramePos(first_frame_pos),
445      mFixedHeader(fixed_header),
446      mCurrentPos(0),
447      mCurrentTimeUs(0),
448      mStarted(false),
449      mSeeker(seeker),
450      mGroup(NULL),
451      mBasisTimeUs(0),
452      mSamplesRead(0) {
453}
454
455MP3Source::~MP3Source() {
456    if (mStarted) {
457        stop();
458    }
459}
460
461status_t MP3Source::start(MetaData *) {
462    CHECK(!mStarted);
463
464    mGroup = new MediaBufferGroup;
465
466    mGroup->add_buffer(new MediaBuffer(kMaxFrameSize));
467
468    mCurrentPos = mFirstFramePos;
469    mCurrentTimeUs = 0;
470
471    mBasisTimeUs = mCurrentTimeUs;
472    mSamplesRead = 0;
473
474    mStarted = true;
475
476    return OK;
477}
478
479status_t MP3Source::stop() {
480    CHECK(mStarted);
481
482    delete mGroup;
483    mGroup = NULL;
484
485    mStarted = false;
486
487    return OK;
488}
489
490sp<MetaData> MP3Source::getFormat() {
491    return mMeta;
492}
493
494status_t MP3Source::read(
495        MediaBuffer **out, const ReadOptions *options) {
496    *out = NULL;
497
498    int64_t seekTimeUs;
499    ReadOptions::SeekMode mode;
500    bool seekCBR = false;
501
502    if (options != NULL && options->getSeekTo(&seekTimeUs, &mode)) {
503        int64_t actualSeekTimeUs = seekTimeUs;
504        if (mSeeker == NULL
505                || !mSeeker->getOffsetForTime(&actualSeekTimeUs, &mCurrentPos)) {
506            int32_t bitrate;
507            if (!mMeta->findInt32(kKeyBitRate, &bitrate)) {
508                // bitrate is in bits/sec.
509                ALOGI("no bitrate");
510
511                return ERROR_UNSUPPORTED;
512            }
513
514            mCurrentTimeUs = seekTimeUs;
515            mCurrentPos = mFirstFramePos + seekTimeUs * bitrate / 8000000;
516            seekCBR = true;
517        } else {
518            mCurrentTimeUs = actualSeekTimeUs;
519        }
520
521        mBasisTimeUs = mCurrentTimeUs;
522        mSamplesRead = 0;
523    }
524
525    MediaBuffer *buffer;
526    status_t err = mGroup->acquire_buffer(&buffer);
527    if (err != OK) {
528        return err;
529    }
530
531    size_t frame_size;
532    int bitrate;
533    int num_samples;
534    int sample_rate;
535    for (;;) {
536        ssize_t n = mDataSource->readAt(mCurrentPos, buffer->data(), 4);
537        if (n < 4) {
538            buffer->release();
539            buffer = NULL;
540
541            return (n < 0 ? n : ERROR_END_OF_STREAM);
542        }
543
544        uint32_t header = U32_AT((const uint8_t *)buffer->data());
545
546        if ((header & kMask) == (mFixedHeader & kMask)
547            && GetMPEGAudioFrameSize(
548                header, &frame_size, &sample_rate, NULL,
549                &bitrate, &num_samples)) {
550
551            // re-calculate mCurrentTimeUs because we might have called Resync()
552            if (seekCBR) {
553                mCurrentTimeUs = (mCurrentPos - mFirstFramePos) * 8000 / bitrate;
554                mBasisTimeUs = mCurrentTimeUs;
555            }
556
557            break;
558        }
559
560        // Lost sync.
561        ALOGV("lost sync! header = 0x%08x, old header = 0x%08x\n", header, mFixedHeader);
562
563        off64_t pos = mCurrentPos;
564        if (!Resync(mDataSource, mFixedHeader, &pos, NULL, NULL)) {
565            ALOGE("Unable to resync. Signalling end of stream.");
566
567            buffer->release();
568            buffer = NULL;
569
570            return ERROR_END_OF_STREAM;
571        }
572
573        mCurrentPos = pos;
574
575        // Try again with the new position.
576    }
577
578    CHECK(frame_size <= buffer->size());
579
580    ssize_t n = mDataSource->readAt(mCurrentPos, buffer->data(), frame_size);
581    if (n < (ssize_t)frame_size) {
582        buffer->release();
583        buffer = NULL;
584
585        return (n < 0 ? n : ERROR_END_OF_STREAM);
586    }
587
588    buffer->set_range(0, frame_size);
589
590    buffer->meta_data()->setInt64(kKeyTime, mCurrentTimeUs);
591    buffer->meta_data()->setInt32(kKeyIsSyncFrame, 1);
592
593    mCurrentPos += frame_size;
594
595    mSamplesRead += num_samples;
596    mCurrentTimeUs = mBasisTimeUs + ((mSamplesRead * 1000000) / sample_rate);
597
598    *out = buffer;
599
600    return OK;
601}
602
603sp<MetaData> MP3Extractor::getMetaData() {
604    sp<MetaData> meta = new MetaData;
605
606    if (mInitCheck != OK) {
607        return meta;
608    }
609
610    meta->setCString(kKeyMIMEType, "audio/mpeg");
611
612    ID3 id3(mDataSource);
613
614    if (!id3.isValid()) {
615        return meta;
616    }
617
618    struct Map {
619        int key;
620        const char *tag1;
621        const char *tag2;
622    };
623    static const Map kMap[] = {
624        { kKeyAlbum, "TALB", "TAL" },
625        { kKeyArtist, "TPE1", "TP1" },
626        { kKeyAlbumArtist, "TPE2", "TP2" },
627        { kKeyComposer, "TCOM", "TCM" },
628        { kKeyGenre, "TCON", "TCO" },
629        { kKeyTitle, "TIT2", "TT2" },
630        { kKeyYear, "TYE", "TYER" },
631        { kKeyAuthor, "TXT", "TEXT" },
632        { kKeyCDTrackNumber, "TRK", "TRCK" },
633        { kKeyDiscNumber, "TPA", "TPOS" },
634        { kKeyCompilation, "TCP", "TCMP" },
635    };
636    static const size_t kNumMapEntries = sizeof(kMap) / sizeof(kMap[0]);
637
638    for (size_t i = 0; i < kNumMapEntries; ++i) {
639        ID3::Iterator *it = new ID3::Iterator(id3, kMap[i].tag1);
640        if (it->done()) {
641            delete it;
642            it = new ID3::Iterator(id3, kMap[i].tag2);
643        }
644
645        if (it->done()) {
646            delete it;
647            continue;
648        }
649
650        String8 s;
651        it->getString(&s);
652        delete it;
653
654        meta->setCString(kMap[i].key, s);
655    }
656
657    size_t dataSize;
658    String8 mime;
659    const void *data = id3.getAlbumArt(&dataSize, &mime);
660
661    if (data) {
662        meta->setData(kKeyAlbumArt, MetaData::TYPE_NONE, data, dataSize);
663        meta->setCString(kKeyAlbumArtMIME, mime.string());
664    }
665
666    return meta;
667}
668
669bool SniffMP3(
670        const sp<DataSource> &source, String8 *mimeType,
671        float *confidence, sp<AMessage> *meta) {
672    off64_t pos = 0;
673    off64_t post_id3_pos;
674    uint32_t header;
675    if (!Resync(source, 0, &pos, &post_id3_pos, &header)) {
676        return false;
677    }
678
679    *meta = new AMessage;
680    (*meta)->setInt64("offset", pos);
681    (*meta)->setInt32("header", header);
682    (*meta)->setInt64("post-id3-offset", post_id3_pos);
683
684    *mimeType = MEDIA_MIMETYPE_AUDIO_MPEG;
685    *confidence = 0.2f;
686
687    return true;
688}
689
690}  // namespace android
691