MP3Extractor.cpp revision a9e05b911f978dc3f25d4b1e35e51383dc4f9fc7
1/* 2 * Copyright (C) 2009 The Android Open Source Project 3 * 4 * Licensed under the Apache License, Version 2.0 (the "License"); 5 * you may not use this file except in compliance with the License. 6 * You may obtain a copy of the License at 7 * 8 * http://www.apache.org/licenses/LICENSE-2.0 9 * 10 * Unless required by applicable law or agreed to in writing, software 11 * distributed under the License is distributed on an "AS IS" BASIS, 12 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 * See the License for the specific language governing permissions and 14 * limitations under the License. 15 */ 16 17//#define LOG_NDEBUG 0 18#define LOG_TAG "MP3Extractor" 19#include <utils/Log.h> 20 21#include "include/MP3Extractor.h" 22 23#include "include/ID3.h" 24#include "include/VBRISeeker.h" 25#include "include/XINGSeeker.h" 26 27#include <media/stagefright/foundation/AMessage.h> 28#include <media/stagefright/DataSource.h> 29#include <media/stagefright/MediaBuffer.h> 30#include <media/stagefright/MediaBufferGroup.h> 31#include <media/stagefright/MediaDebug.h> 32#include <media/stagefright/MediaDefs.h> 33#include <media/stagefright/MediaErrors.h> 34#include <media/stagefright/MediaSource.h> 35#include <media/stagefright/MetaData.h> 36#include <media/stagefright/Utils.h> 37#include <utils/String8.h> 38 39namespace android { 40 41// Everything must match except for 42// protection, bitrate, padding, private bits, mode, mode extension, 43// copyright bit, original bit and emphasis. 44// Yes ... there are things that must indeed match... 45static const uint32_t kMask = 0xfffe0c00; 46 47// static 48bool MP3Extractor::get_mp3_frame_size( 49 uint32_t header, size_t *frame_size, 50 int *out_sampling_rate, int *out_channels, 51 int *out_bitrate, int *out_num_samples) { 52 *frame_size = 0; 53 54 if (out_sampling_rate) { 55 *out_sampling_rate = 0; 56 } 57 58 if (out_channels) { 59 *out_channels = 0; 60 } 61 62 if (out_bitrate) { 63 *out_bitrate = 0; 64 } 65 66 if (out_num_samples) { 67 *out_num_samples = 1152; 68 } 69 70 if ((header & 0xffe00000) != 0xffe00000) { 71 return false; 72 } 73 74 unsigned version = (header >> 19) & 3; 75 76 if (version == 0x01) { 77 return false; 78 } 79 80 unsigned layer = (header >> 17) & 3; 81 82 if (layer == 0x00) { 83 return false; 84 } 85 86 unsigned protection = (header >> 16) & 1; 87 88 unsigned bitrate_index = (header >> 12) & 0x0f; 89 90 if (bitrate_index == 0 || bitrate_index == 0x0f) { 91 // Disallow "free" bitrate. 92 return false; 93 } 94 95 unsigned sampling_rate_index = (header >> 10) & 3; 96 97 if (sampling_rate_index == 3) { 98 return false; 99 } 100 101 static const int kSamplingRateV1[] = { 44100, 48000, 32000 }; 102 int sampling_rate = kSamplingRateV1[sampling_rate_index]; 103 if (version == 2 /* V2 */) { 104 sampling_rate /= 2; 105 } else if (version == 0 /* V2.5 */) { 106 sampling_rate /= 4; 107 } 108 109 unsigned padding = (header >> 9) & 1; 110 111 if (layer == 3) { 112 // layer I 113 114 static const int kBitrateV1[] = { 115 32, 64, 96, 128, 160, 192, 224, 256, 116 288, 320, 352, 384, 416, 448 117 }; 118 119 static const int kBitrateV2[] = { 120 32, 48, 56, 64, 80, 96, 112, 128, 121 144, 160, 176, 192, 224, 256 122 }; 123 124 int bitrate = 125 (version == 3 /* V1 */) 126 ? kBitrateV1[bitrate_index - 1] 127 : kBitrateV2[bitrate_index - 1]; 128 129 if (out_bitrate) { 130 *out_bitrate = bitrate; 131 } 132 133 *frame_size = (12000 * bitrate / sampling_rate + padding) * 4; 134 135 if (out_num_samples) { 136 *out_num_samples = 384; 137 } 138 } else { 139 // layer II or III 140 141 static const int kBitrateV1L2[] = { 142 32, 48, 56, 64, 80, 96, 112, 128, 143 160, 192, 224, 256, 320, 384 144 }; 145 146 static const int kBitrateV1L3[] = { 147 32, 40, 48, 56, 64, 80, 96, 112, 148 128, 160, 192, 224, 256, 320 149 }; 150 151 static const int kBitrateV2[] = { 152 8, 16, 24, 32, 40, 48, 56, 64, 153 80, 96, 112, 128, 144, 160 154 }; 155 156 int bitrate; 157 if (version == 3 /* V1 */) { 158 bitrate = (layer == 2 /* L2 */) 159 ? kBitrateV1L2[bitrate_index - 1] 160 : kBitrateV1L3[bitrate_index - 1]; 161 162 if (out_num_samples) { 163 *out_num_samples = 1152; 164 } 165 } else { 166 // V2 (or 2.5) 167 168 bitrate = kBitrateV2[bitrate_index - 1]; 169 if (out_num_samples) { 170 *out_num_samples = 576; 171 } 172 } 173 174 if (out_bitrate) { 175 *out_bitrate = bitrate; 176 } 177 178 if (version == 3 /* V1 */) { 179 *frame_size = 144000 * bitrate / sampling_rate + padding; 180 } else { 181 // V2 or V2.5 182 *frame_size = 72000 * bitrate / sampling_rate + padding; 183 } 184 } 185 186 if (out_sampling_rate) { 187 *out_sampling_rate = sampling_rate; 188 } 189 190 if (out_channels) { 191 int channel_mode = (header >> 6) & 3; 192 193 *out_channels = (channel_mode == 3) ? 1 : 2; 194 } 195 196 return true; 197} 198 199static bool Resync( 200 const sp<DataSource> &source, uint32_t match_header, 201 off64_t *inout_pos, off64_t *post_id3_pos, uint32_t *out_header) { 202 if (post_id3_pos != NULL) { 203 *post_id3_pos = 0; 204 } 205 206 if (*inout_pos == 0) { 207 // Skip an optional ID3 header if syncing at the very beginning 208 // of the datasource. 209 210 for (;;) { 211 uint8_t id3header[10]; 212 if (source->readAt(*inout_pos, id3header, sizeof(id3header)) 213 < (ssize_t)sizeof(id3header)) { 214 // If we can't even read these 10 bytes, we might as well bail 215 // out, even if there _were_ 10 bytes of valid mp3 audio data... 216 return false; 217 } 218 219 if (memcmp("ID3", id3header, 3)) { 220 break; 221 } 222 223 // Skip the ID3v2 header. 224 225 size_t len = 226 ((id3header[6] & 0x7f) << 21) 227 | ((id3header[7] & 0x7f) << 14) 228 | ((id3header[8] & 0x7f) << 7) 229 | (id3header[9] & 0x7f); 230 231 len += 10; 232 233 *inout_pos += len; 234 235 LOGV("skipped ID3 tag, new starting offset is %lld (0x%016llx)", 236 *inout_pos, *inout_pos); 237 } 238 239 if (post_id3_pos != NULL) { 240 *post_id3_pos = *inout_pos; 241 } 242 } 243 244 off64_t pos = *inout_pos; 245 bool valid = false; 246 247 const size_t kMaxReadBytes = 1024; 248 const size_t kMaxBytesChecked = 128 * 1024; 249 uint8_t buf[kMaxReadBytes]; 250 ssize_t bytesToRead = kMaxReadBytes; 251 ssize_t totalBytesRead = 0; 252 ssize_t remainingBytes = 0; 253 bool reachEOS = false; 254 uint8_t *tmp = buf; 255 256 do { 257 if (pos >= *inout_pos + kMaxBytesChecked) { 258 // Don't scan forever. 259 LOGV("giving up at offset %lld", pos); 260 break; 261 } 262 263 if (remainingBytes < 4) { 264 if (reachEOS) { 265 break; 266 } else { 267 memcpy(buf, tmp, remainingBytes); 268 bytesToRead = kMaxReadBytes - remainingBytes; 269 270 /* 271 * The next read position should start from the end of 272 * the last buffer, and thus should include the remaining 273 * bytes in the buffer. 274 */ 275 totalBytesRead = source->readAt(pos + remainingBytes, 276 buf + remainingBytes, 277 bytesToRead); 278 if (totalBytesRead <= 0) { 279 break; 280 } 281 reachEOS = (totalBytesRead != bytesToRead); 282 totalBytesRead += remainingBytes; 283 remainingBytes = totalBytesRead; 284 tmp = buf; 285 continue; 286 } 287 } 288 289 uint32_t header = U32_AT(tmp); 290 291 if (match_header != 0 && (header & kMask) != (match_header & kMask)) { 292 ++pos; 293 ++tmp; 294 --remainingBytes; 295 continue; 296 } 297 298 size_t frame_size; 299 int sample_rate, num_channels, bitrate; 300 if (!MP3Extractor::get_mp3_frame_size( 301 header, &frame_size, 302 &sample_rate, &num_channels, &bitrate)) { 303 ++pos; 304 ++tmp; 305 --remainingBytes; 306 continue; 307 } 308 309 LOGV("found possible 1st frame at %lld (header = 0x%08x)", pos, header); 310 311 // We found what looks like a valid frame, 312 // now find its successors. 313 314 off64_t test_pos = pos + frame_size; 315 316 valid = true; 317 for (int j = 0; j < 3; ++j) { 318 uint8_t tmp[4]; 319 if (source->readAt(test_pos, tmp, 4) < 4) { 320 valid = false; 321 break; 322 } 323 324 uint32_t test_header = U32_AT(tmp); 325 326 LOGV("subsequent header is %08x", test_header); 327 328 if ((test_header & kMask) != (header & kMask)) { 329 valid = false; 330 break; 331 } 332 333 size_t test_frame_size; 334 if (!MP3Extractor::get_mp3_frame_size( 335 test_header, &test_frame_size)) { 336 valid = false; 337 break; 338 } 339 340 LOGV("found subsequent frame #%d at %lld", j + 2, test_pos); 341 342 test_pos += test_frame_size; 343 } 344 345 if (valid) { 346 *inout_pos = pos; 347 348 if (out_header != NULL) { 349 *out_header = header; 350 } 351 } else { 352 LOGV("no dice, no valid sequence of frames found."); 353 } 354 355 ++pos; 356 ++tmp; 357 --remainingBytes; 358 } while (!valid); 359 360 return valid; 361} 362 363class MP3Source : public MediaSource { 364public: 365 MP3Source( 366 const sp<MetaData> &meta, const sp<DataSource> &source, 367 off64_t first_frame_pos, uint32_t fixed_header, 368 const sp<MP3Seeker> &seeker); 369 370 virtual status_t start(MetaData *params = NULL); 371 virtual status_t stop(); 372 373 virtual sp<MetaData> getFormat(); 374 375 virtual status_t read( 376 MediaBuffer **buffer, const ReadOptions *options = NULL); 377 378protected: 379 virtual ~MP3Source(); 380 381private: 382 sp<MetaData> mMeta; 383 sp<DataSource> mDataSource; 384 off64_t mFirstFramePos; 385 uint32_t mFixedHeader; 386 off64_t mCurrentPos; 387 int64_t mCurrentTimeUs; 388 bool mStarted; 389 sp<MP3Seeker> mSeeker; 390 MediaBufferGroup *mGroup; 391 392 int64_t mBasisTimeUs; 393 int64_t mSamplesRead; 394 395 MP3Source(const MP3Source &); 396 MP3Source &operator=(const MP3Source &); 397}; 398 399MP3Extractor::MP3Extractor( 400 const sp<DataSource> &source, const sp<AMessage> &meta) 401 : mInitCheck(NO_INIT), 402 mDataSource(source), 403 mFirstFramePos(-1), 404 mFixedHeader(0) { 405 off64_t pos = 0; 406 off64_t post_id3_pos; 407 uint32_t header; 408 bool success; 409 410 int64_t meta_offset; 411 uint32_t meta_header; 412 int64_t meta_post_id3_offset; 413 if (meta != NULL 414 && meta->findInt64("offset", &meta_offset) 415 && meta->findInt32("header", (int32_t *)&meta_header) 416 && meta->findInt64("post-id3-offset", &meta_post_id3_offset)) { 417 // The sniffer has already done all the hard work for us, simply 418 // accept its judgement. 419 pos = (off64_t)meta_offset; 420 header = meta_header; 421 post_id3_pos = (off64_t)meta_post_id3_offset; 422 423 success = true; 424 } else { 425 success = Resync(mDataSource, 0, &pos, &post_id3_pos, &header); 426 } 427 428 if (!success) { 429 // mInitCheck will remain NO_INIT 430 return; 431 } 432 433 mFirstFramePos = pos; 434 mFixedHeader = header; 435 436 size_t frame_size; 437 int sample_rate; 438 int num_channels; 439 int bitrate; 440 get_mp3_frame_size( 441 header, &frame_size, &sample_rate, &num_channels, &bitrate); 442 443 mMeta = new MetaData; 444 445 mMeta->setCString(kKeyMIMEType, MEDIA_MIMETYPE_AUDIO_MPEG); 446 mMeta->setInt32(kKeySampleRate, sample_rate); 447 mMeta->setInt32(kKeyBitRate, bitrate * 1000); 448 mMeta->setInt32(kKeyChannelCount, num_channels); 449 450 mSeeker = XINGSeeker::CreateFromSource(mDataSource, mFirstFramePos); 451 452 if (mSeeker == NULL) { 453 mSeeker = VBRISeeker::CreateFromSource(mDataSource, post_id3_pos); 454 } 455 456 int64_t durationUs; 457 458 if (mSeeker == NULL || !mSeeker->getDuration(&durationUs)) { 459 off64_t fileSize; 460 if (mDataSource->getSize(&fileSize) == OK) { 461 durationUs = 8000LL * (fileSize - mFirstFramePos) / bitrate; 462 } else { 463 durationUs = -1; 464 } 465 } 466 467 if (durationUs >= 0) { 468 mMeta->setInt64(kKeyDuration, durationUs); 469 } 470 471 mInitCheck = OK; 472} 473 474size_t MP3Extractor::countTracks() { 475 return mInitCheck != OK ? 0 : 1; 476} 477 478sp<MediaSource> MP3Extractor::getTrack(size_t index) { 479 if (mInitCheck != OK || index != 0) { 480 return NULL; 481 } 482 483 return new MP3Source( 484 mMeta, mDataSource, mFirstFramePos, mFixedHeader, 485 mSeeker); 486} 487 488sp<MetaData> MP3Extractor::getTrackMetaData(size_t index, uint32_t flags) { 489 if (mInitCheck != OK || index != 0) { 490 return NULL; 491 } 492 493 return mMeta; 494} 495 496//////////////////////////////////////////////////////////////////////////////// 497 498MP3Source::MP3Source( 499 const sp<MetaData> &meta, const sp<DataSource> &source, 500 off64_t first_frame_pos, uint32_t fixed_header, 501 const sp<MP3Seeker> &seeker) 502 : mMeta(meta), 503 mDataSource(source), 504 mFirstFramePos(first_frame_pos), 505 mFixedHeader(fixed_header), 506 mCurrentPos(0), 507 mCurrentTimeUs(0), 508 mStarted(false), 509 mSeeker(seeker), 510 mGroup(NULL), 511 mBasisTimeUs(0), 512 mSamplesRead(0) { 513} 514 515MP3Source::~MP3Source() { 516 if (mStarted) { 517 stop(); 518 } 519} 520 521status_t MP3Source::start(MetaData *) { 522 CHECK(!mStarted); 523 524 mGroup = new MediaBufferGroup; 525 526 const size_t kMaxFrameSize = 32768; 527 mGroup->add_buffer(new MediaBuffer(kMaxFrameSize)); 528 529 mCurrentPos = mFirstFramePos; 530 mCurrentTimeUs = 0; 531 532 mBasisTimeUs = mCurrentTimeUs; 533 mSamplesRead = 0; 534 535 mStarted = true; 536 537 return OK; 538} 539 540status_t MP3Source::stop() { 541 CHECK(mStarted); 542 543 delete mGroup; 544 mGroup = NULL; 545 546 mStarted = false; 547 548 return OK; 549} 550 551sp<MetaData> MP3Source::getFormat() { 552 return mMeta; 553} 554 555status_t MP3Source::read( 556 MediaBuffer **out, const ReadOptions *options) { 557 *out = NULL; 558 559 int64_t seekTimeUs; 560 ReadOptions::SeekMode mode; 561 if (options != NULL && options->getSeekTo(&seekTimeUs, &mode)) { 562 int64_t actualSeekTimeUs = seekTimeUs; 563 if (mSeeker == NULL 564 || !mSeeker->getOffsetForTime(&actualSeekTimeUs, &mCurrentPos)) { 565 int32_t bitrate; 566 if (!mMeta->findInt32(kKeyBitRate, &bitrate)) { 567 // bitrate is in bits/sec. 568 LOGI("no bitrate"); 569 570 return ERROR_UNSUPPORTED; 571 } 572 573 mCurrentTimeUs = seekTimeUs; 574 mCurrentPos = mFirstFramePos + seekTimeUs * bitrate / 8000000; 575 } else { 576 mCurrentTimeUs = actualSeekTimeUs; 577 } 578 579 mBasisTimeUs = mCurrentTimeUs; 580 mSamplesRead = 0; 581 } 582 583 MediaBuffer *buffer; 584 status_t err = mGroup->acquire_buffer(&buffer); 585 if (err != OK) { 586 return err; 587 } 588 589 size_t frame_size; 590 int bitrate; 591 int num_samples; 592 int sample_rate; 593 for (;;) { 594 ssize_t n = mDataSource->readAt(mCurrentPos, buffer->data(), 4); 595 if (n < 4) { 596 buffer->release(); 597 buffer = NULL; 598 599 return ERROR_END_OF_STREAM; 600 } 601 602 uint32_t header = U32_AT((const uint8_t *)buffer->data()); 603 604 if ((header & kMask) == (mFixedHeader & kMask) 605 && MP3Extractor::get_mp3_frame_size( 606 header, &frame_size, &sample_rate, NULL, &bitrate, &num_samples)) { 607 break; 608 } 609 610 // Lost sync. 611 LOGV("lost sync! header = 0x%08x, old header = 0x%08x\n", header, mFixedHeader); 612 613 off64_t pos = mCurrentPos; 614 if (!Resync(mDataSource, mFixedHeader, &pos, NULL, NULL)) { 615 LOGE("Unable to resync. Signalling end of stream."); 616 617 buffer->release(); 618 buffer = NULL; 619 620 return ERROR_END_OF_STREAM; 621 } 622 623 mCurrentPos = pos; 624 625 // Try again with the new position. 626 } 627 628 CHECK(frame_size <= buffer->size()); 629 630 ssize_t n = mDataSource->readAt(mCurrentPos, buffer->data(), frame_size); 631 if (n < (ssize_t)frame_size) { 632 buffer->release(); 633 buffer = NULL; 634 635 return ERROR_END_OF_STREAM; 636 } 637 638 buffer->set_range(0, frame_size); 639 640 buffer->meta_data()->setInt64(kKeyTime, mCurrentTimeUs); 641 buffer->meta_data()->setInt32(kKeyIsSyncFrame, 1); 642 643 mCurrentPos += frame_size; 644 645 mSamplesRead += num_samples; 646 mCurrentTimeUs = mBasisTimeUs + ((mSamplesRead * 1000000) / sample_rate); 647 648 *out = buffer; 649 650 return OK; 651} 652 653sp<MetaData> MP3Extractor::getMetaData() { 654 sp<MetaData> meta = new MetaData; 655 656 if (mInitCheck != OK) { 657 return meta; 658 } 659 660 meta->setCString(kKeyMIMEType, "audio/mpeg"); 661 662 ID3 id3(mDataSource); 663 664 if (!id3.isValid()) { 665 return meta; 666 } 667 668 struct Map { 669 int key; 670 const char *tag1; 671 const char *tag2; 672 }; 673 static const Map kMap[] = { 674 { kKeyAlbum, "TALB", "TAL" }, 675 { kKeyArtist, "TPE1", "TP1" }, 676 { kKeyAlbumArtist, "TPE2", "TP2" }, 677 { kKeyComposer, "TCOM", "TCM" }, 678 { kKeyGenre, "TCON", "TCO" }, 679 { kKeyTitle, "TIT2", "TT2" }, 680 { kKeyYear, "TYE", "TYER" }, 681 { kKeyAuthor, "TXT", "TEXT" }, 682 { kKeyCDTrackNumber, "TRK", "TRCK" }, 683 { kKeyDiscNumber, "TPA", "TPOS" }, 684 { kKeyCompilation, "TCP", "TCMP" }, 685 }; 686 static const size_t kNumMapEntries = sizeof(kMap) / sizeof(kMap[0]); 687 688 for (size_t i = 0; i < kNumMapEntries; ++i) { 689 ID3::Iterator *it = new ID3::Iterator(id3, kMap[i].tag1); 690 if (it->done()) { 691 delete it; 692 it = new ID3::Iterator(id3, kMap[i].tag2); 693 } 694 695 if (it->done()) { 696 delete it; 697 continue; 698 } 699 700 String8 s; 701 it->getString(&s); 702 delete it; 703 704 meta->setCString(kMap[i].key, s); 705 } 706 707 size_t dataSize; 708 String8 mime; 709 const void *data = id3.getAlbumArt(&dataSize, &mime); 710 711 if (data) { 712 meta->setData(kKeyAlbumArt, MetaData::TYPE_NONE, data, dataSize); 713 meta->setCString(kKeyAlbumArtMIME, mime.string()); 714 } 715 716 return meta; 717} 718 719bool SniffMP3( 720 const sp<DataSource> &source, String8 *mimeType, 721 float *confidence, sp<AMessage> *meta) { 722 off64_t pos = 0; 723 off64_t post_id3_pos; 724 uint32_t header; 725 if (!Resync(source, 0, &pos, &post_id3_pos, &header)) { 726 return false; 727 } 728 729 *meta = new AMessage; 730 (*meta)->setInt64("offset", pos); 731 (*meta)->setInt32("header", header); 732 (*meta)->setInt64("post-id3-offset", post_id3_pos); 733 734 *mimeType = MEDIA_MIMETYPE_AUDIO_MPEG; 735 *confidence = 0.2f; 736 737 return true; 738} 739 740} // namespace android 741