MP3Extractor.cpp revision 7be6407f2ad7f2b0782d195d9f792072c084d6f5
1/* 2 * Copyright (C) 2009 The Android Open Source Project 3 * 4 * Licensed under the Apache License, Version 2.0 (the "License"); 5 * you may not use this file except in compliance with the License. 6 * You may obtain a copy of the License at 7 * 8 * http://www.apache.org/licenses/LICENSE-2.0 9 * 10 * Unless required by applicable law or agreed to in writing, software 11 * distributed under the License is distributed on an "AS IS" BASIS, 12 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 * See the License for the specific language governing permissions and 14 * limitations under the License. 15 */ 16 17//#define LOG_NDEBUG 0 18#define LOG_TAG "MP3Extractor" 19#include <utils/Log.h> 20 21#include "include/MP3Extractor.h" 22 23#include "include/ID3.h" 24 25#include <media/stagefright/DataSource.h> 26#include <media/stagefright/MediaBuffer.h> 27#include <media/stagefright/MediaBufferGroup.h> 28#include <media/stagefright/MediaDebug.h> 29#include <media/stagefright/MediaDefs.h> 30#include <media/stagefright/MediaErrors.h> 31#include <media/stagefright/MediaSource.h> 32#include <media/stagefright/MetaData.h> 33#include <media/stagefright/Utils.h> 34#include <utils/String8.h> 35 36namespace android { 37 38// Everything must match except for 39// protection, bitrate, padding, private bits and mode extension. 40static const uint32_t kMask = 0xfffe0ccf; 41 42static bool get_mp3_frame_size( 43 uint32_t header, size_t *frame_size, 44 int *out_sampling_rate = NULL, int *out_channels = NULL, 45 int *out_bitrate = NULL) { 46 *frame_size = 0; 47 48 if (out_sampling_rate) { 49 *out_sampling_rate = 0; 50 } 51 52 if (out_channels) { 53 *out_channels = 0; 54 } 55 56 if (out_bitrate) { 57 *out_bitrate = 0; 58 } 59 60 if ((header & 0xffe00000) != 0xffe00000) { 61 return false; 62 } 63 64 unsigned version = (header >> 19) & 3; 65 66 if (version == 0x01) { 67 return false; 68 } 69 70 unsigned layer = (header >> 17) & 3; 71 72 if (layer == 0x00) { 73 return false; 74 } 75 76 unsigned protection = (header >> 16) & 1; 77 78 unsigned bitrate_index = (header >> 12) & 0x0f; 79 80 if (bitrate_index == 0 || bitrate_index == 0x0f) { 81 // Disallow "free" bitrate. 82 return false; 83 } 84 85 unsigned sampling_rate_index = (header >> 10) & 3; 86 87 if (sampling_rate_index == 3) { 88 return false; 89 } 90 91 static const int kSamplingRateV1[] = { 44100, 48000, 32000 }; 92 int sampling_rate = kSamplingRateV1[sampling_rate_index]; 93 if (version == 2 /* V2 */) { 94 sampling_rate /= 2; 95 } else if (version == 0 /* V2.5 */) { 96 sampling_rate /= 4; 97 } 98 99 unsigned padding = (header >> 9) & 1; 100 101 if (layer == 3) { 102 // layer I 103 104 static const int kBitrateV1[] = { 105 32, 64, 96, 128, 160, 192, 224, 256, 106 288, 320, 352, 384, 416, 448 107 }; 108 109 static const int kBitrateV2[] = { 110 32, 48, 56, 64, 80, 96, 112, 128, 111 144, 160, 176, 192, 224, 256 112 }; 113 114 int bitrate = 115 (version == 3 /* V1 */) 116 ? kBitrateV1[bitrate_index - 1] 117 : kBitrateV2[bitrate_index - 1]; 118 119 if (out_bitrate) { 120 *out_bitrate = bitrate; 121 } 122 123 *frame_size = (12000 * bitrate / sampling_rate + padding) * 4; 124 } else { 125 // layer II or III 126 127 static const int kBitrateV1L2[] = { 128 32, 48, 56, 64, 80, 96, 112, 128, 129 160, 192, 224, 256, 320, 384 130 }; 131 132 static const int kBitrateV1L3[] = { 133 32, 40, 48, 56, 64, 80, 96, 112, 134 128, 160, 192, 224, 256, 320 135 }; 136 137 static const int kBitrateV2[] = { 138 8, 16, 24, 32, 40, 48, 56, 64, 139 80, 96, 112, 128, 144, 160 140 }; 141 142 int bitrate; 143 if (version == 3 /* V1 */) { 144 bitrate = (layer == 2 /* L2 */) 145 ? kBitrateV1L2[bitrate_index - 1] 146 : kBitrateV1L3[bitrate_index - 1]; 147 } else { 148 // V2 (or 2.5) 149 150 bitrate = kBitrateV2[bitrate_index - 1]; 151 } 152 153 if (out_bitrate) { 154 *out_bitrate = bitrate; 155 } 156 157 if (version == 3 /* V1 */) { 158 *frame_size = 144000 * bitrate / sampling_rate + padding; 159 } else { 160 // V2 or V2.5 161 *frame_size = 72000 * bitrate / sampling_rate + padding; 162 } 163 } 164 165 if (out_sampling_rate) { 166 *out_sampling_rate = sampling_rate; 167 } 168 169 if (out_channels) { 170 int channel_mode = (header >> 6) & 3; 171 172 *out_channels = (channel_mode == 3) ? 1 : 2; 173 } 174 175 return true; 176} 177 178static bool parse_xing_header( 179 const sp<DataSource> &source, off_t first_frame_pos, 180 int32_t *frame_number = NULL, int32_t *byte_number = NULL, 181 char *table_of_contents = NULL, int32_t *quality_indicator = NULL, 182 int64_t *duration = NULL) { 183 184 if (frame_number) { 185 *frame_number = 0; 186 } 187 if (byte_number) { 188 *byte_number = 0; 189 } 190 if (table_of_contents) { 191 table_of_contents[0] = 0; 192 } 193 if (quality_indicator) { 194 *quality_indicator = 0; 195 } 196 if (duration) { 197 *duration = 0; 198 } 199 200 uint8_t buffer[4]; 201 int offset = first_frame_pos; 202 if (source->readAt(offset, &buffer, 4) < 4) { // get header 203 return false; 204 } 205 offset += 4; 206 207 uint8_t id, layer, sr_index, mode; 208 layer = (buffer[1] >> 1) & 3; 209 id = (buffer[1] >> 3) & 3; 210 sr_index = (buffer[2] >> 2) & 3; 211 mode = (buffer[3] >> 6) & 3; 212 if (layer == 0) { 213 return false; 214 } 215 if (id == 1) { 216 return false; 217 } 218 if (sr_index == 3) { 219 return false; 220 } 221 // determine offset of XING header 222 if(id&1) { // mpeg1 223 if (mode != 3) offset += 32; 224 else offset += 17; 225 } else { // mpeg2 226 if (mode != 3) offset += 17; 227 else offset += 9; 228 } 229 230 if (source->readAt(offset, &buffer, 4) < 4) { // XING header ID 231 return false; 232 } 233 offset += 4; 234 // Check XING ID 235 if ((buffer[0] != 'X') || (buffer[1] != 'i') 236 || (buffer[2] != 'n') || (buffer[3] != 'g')) { 237 if ((buffer[0] != 'I') || (buffer[1] != 'n') 238 || (buffer[2] != 'f') || (buffer[3] != 'o')) { 239 return false; 240 } 241 } 242 243 if (source->readAt(offset, &buffer, 4) < 4) { // flags 244 return false; 245 } 246 offset += 4; 247 uint32_t flags = U32_AT(buffer); 248 249 if (flags & 0x0001) { // Frames field is present 250 if (source->readAt(offset, buffer, 4) < 4) { 251 return false; 252 } 253 if (frame_number) { 254 *frame_number = U32_AT(buffer); 255 } 256 int32_t frame = U32_AT(buffer); 257 // Samples per Frame: 1. index = MPEG Version ID, 2. index = Layer 258 const int samplesPerFrames[2][3] = 259 { 260 { 384, 1152, 576 }, // MPEG 2, 2.5: layer1, layer2, layer3 261 { 384, 1152, 1152 }, // MPEG 1: layer1, layer2, layer3 262 }; 263 // sampling rates in hertz: 1. index = MPEG Version ID, 2. index = sampling rate index 264 const int samplingRates[4][3] = 265 { 266 { 11025, 12000, 8000, }, // MPEG 2.5 267 { 0, 0, 0, }, // reserved 268 { 22050, 24000, 16000, }, // MPEG 2 269 { 44100, 48000, 32000, } // MPEG 1 270 }; 271 if (duration) { 272 *duration = (int64_t)frame * samplesPerFrames[id&1][3-layer] * 1000000LL 273 / samplingRates[id][sr_index]; 274 } 275 offset += 4; 276 } 277 if (flags & 0x0002) { // Bytes field is present 278 if (byte_number) { 279 if (source->readAt(offset, buffer, 4) < 4) { 280 return false; 281 } 282 *byte_number = U32_AT(buffer); 283 } 284 offset += 4; 285 } 286 if (flags & 0x0004) { // TOC field is present 287 if (table_of_contents) { 288 if (source->readAt(offset + 1, table_of_contents, 99) < 99) { 289 return false; 290 } 291 } 292 offset += 100; 293 } 294 if (flags & 0x0008) { // Quality indicator field is present 295 if (quality_indicator) { 296 if (source->readAt(offset, buffer, 4) < 4) { 297 return false; 298 } 299 *quality_indicator = U32_AT(buffer); 300 } 301 } 302 return true; 303} 304 305static bool Resync( 306 const sp<DataSource> &source, uint32_t match_header, 307 off_t *inout_pos, uint32_t *out_header) { 308 if (*inout_pos == 0) { 309 // Skip an optional ID3 header if syncing at the very beginning 310 // of the datasource. 311 312 uint8_t id3header[10]; 313 if (source->readAt(0, id3header, sizeof(id3header)) 314 < (ssize_t)sizeof(id3header)) { 315 // If we can't even read these 10 bytes, we might as well bail out, 316 // even if there _were_ 10 bytes of valid mp3 audio data... 317 return false; 318 } 319 320 if (id3header[0] == 'I' && id3header[1] == 'D' && id3header[2] == '3') { 321 // Skip the ID3v2 header. 322 323 size_t len = 324 ((id3header[6] & 0x7f) << 21) 325 | ((id3header[7] & 0x7f) << 14) 326 | ((id3header[8] & 0x7f) << 7) 327 | (id3header[9] & 0x7f); 328 329 len += 10; 330 331 *inout_pos += len; 332 } 333 } 334 335 const size_t kMaxFrameSize = 4096; 336 uint8_t *buffer = new uint8_t[kMaxFrameSize]; 337 338 off_t pos = *inout_pos - kMaxFrameSize; 339 size_t buffer_offset = kMaxFrameSize; 340 size_t buffer_length = kMaxFrameSize; 341 bool valid = false; 342 do { 343 if (buffer_offset + 3 >= buffer_length) { 344 if (buffer_length < kMaxFrameSize) { 345 break; 346 } 347 348 pos += buffer_offset; 349 350 if (pos >= *inout_pos + 128 * 1024) { 351 // Don't scan forever. 352 LOGV("giving up at offset %ld", pos); 353 break; 354 } 355 356 memmove(buffer, &buffer[buffer_offset], buffer_length - buffer_offset); 357 buffer_length = buffer_length - buffer_offset; 358 buffer_offset = 0; 359 360 ssize_t n = source->readAt( 361 pos, &buffer[buffer_length], kMaxFrameSize - buffer_length); 362 363 if (n <= 0) { 364 break; 365 } 366 367 buffer_length += (size_t)n; 368 369 continue; 370 } 371 372 uint32_t header = U32_AT(&buffer[buffer_offset]); 373 374 if (match_header != 0 && (header & kMask) != (match_header & kMask)) { 375 ++buffer_offset; 376 continue; 377 } 378 379 size_t frame_size; 380 int sample_rate, num_channels, bitrate; 381 if (!get_mp3_frame_size(header, &frame_size, 382 &sample_rate, &num_channels, &bitrate)) { 383 ++buffer_offset; 384 continue; 385 } 386 387 LOGV("found possible 1st frame at %ld", pos + buffer_offset); 388 389 // We found what looks like a valid frame, 390 // now find its successors. 391 392 off_t test_pos = pos + buffer_offset + frame_size; 393 394 valid = true; 395 for (int j = 0; j < 3; ++j) { 396 uint8_t tmp[4]; 397 if (source->readAt(test_pos, tmp, 4) < 4) { 398 valid = false; 399 break; 400 } 401 402 uint32_t test_header = U32_AT(tmp); 403 404 LOGV("subsequent header is %08x", test_header); 405 406 if ((test_header & kMask) != (header & kMask)) { 407 valid = false; 408 break; 409 } 410 411 size_t test_frame_size; 412 if (!get_mp3_frame_size(test_header, &test_frame_size)) { 413 valid = false; 414 break; 415 } 416 417 LOGV("found subsequent frame #%d at %ld", j + 2, test_pos); 418 419 test_pos += test_frame_size; 420 } 421 422 if (valid) { 423 *inout_pos = pos + buffer_offset; 424 425 if (out_header != NULL) { 426 *out_header = header; 427 } 428 } else { 429 LOGV("no dice, no valid sequence of frames found."); 430 } 431 432 ++buffer_offset; 433 434 } while (!valid); 435 436 delete[] buffer; 437 buffer = NULL; 438 439 return valid; 440} 441 442class MP3Source : public MediaSource { 443public: 444 MP3Source( 445 const sp<MetaData> &meta, const sp<DataSource> &source, 446 off_t first_frame_pos, uint32_t fixed_header, 447 int32_t byte_number, const char *table_of_contents); 448 449 virtual status_t start(MetaData *params = NULL); 450 virtual status_t stop(); 451 452 virtual sp<MetaData> getFormat(); 453 454 virtual status_t read( 455 MediaBuffer **buffer, const ReadOptions *options = NULL); 456 457protected: 458 virtual ~MP3Source(); 459 460private: 461 sp<MetaData> mMeta; 462 sp<DataSource> mDataSource; 463 off_t mFirstFramePos; 464 uint32_t mFixedHeader; 465 off_t mCurrentPos; 466 int64_t mCurrentTimeUs; 467 bool mStarted; 468 int32_t mByteNumber; // total number of bytes in this MP3 469 // TOC entries in XING header. Skip the first one since it's always 0. 470 char mTableOfContents[99]; 471 MediaBufferGroup *mGroup; 472 473 MP3Source(const MP3Source &); 474 MP3Source &operator=(const MP3Source &); 475}; 476 477MP3Extractor::MP3Extractor(const sp<DataSource> &source) 478 : mDataSource(source), 479 mFirstFramePos(-1), 480 mFixedHeader(0), 481 mByteNumber(0) { 482 off_t pos = 0; 483 uint32_t header; 484 bool success = Resync(mDataSource, 0, &pos, &header); 485 CHECK(success); 486 487 if (success) { 488 mFirstFramePos = pos; 489 mFixedHeader = header; 490 491 size_t frame_size; 492 int sample_rate; 493 int num_channels; 494 int bitrate; 495 get_mp3_frame_size( 496 header, &frame_size, &sample_rate, &num_channels, &bitrate); 497 498 mMeta = new MetaData; 499 500 mMeta->setCString(kKeyMIMEType, MEDIA_MIMETYPE_AUDIO_MPEG); 501 mMeta->setInt32(kKeySampleRate, sample_rate); 502 mMeta->setInt32(kKeyBitRate, bitrate * 1000); 503 mMeta->setInt32(kKeyChannelCount, num_channels); 504 505 int64_t duration; 506 parse_xing_header( 507 mDataSource, mFirstFramePos, NULL, &mByteNumber, 508 mTableOfContents, NULL, &duration); 509 if (duration > 0) { 510 mMeta->setInt64(kKeyDuration, duration); 511 } else { 512 off_t fileSize; 513 if (mDataSource->getSize(&fileSize) == OK) { 514 mMeta->setInt64( 515 kKeyDuration, 516 8000LL * (fileSize - mFirstFramePos) / bitrate); 517 } 518 } 519 } 520} 521 522MP3Extractor::~MP3Extractor() { 523} 524 525size_t MP3Extractor::countTracks() { 526 return (mFirstFramePos < 0) ? 0 : 1; 527} 528 529sp<MediaSource> MP3Extractor::getTrack(size_t index) { 530 if (mFirstFramePos < 0 || index != 0) { 531 return NULL; 532 } 533 534 return new MP3Source( 535 mMeta, mDataSource, mFirstFramePos, mFixedHeader, 536 mByteNumber, mTableOfContents); 537} 538 539sp<MetaData> MP3Extractor::getTrackMetaData(size_t index, uint32_t flags) { 540 if (mFirstFramePos < 0 || index != 0) { 541 return NULL; 542 } 543 544 return mMeta; 545} 546 547//////////////////////////////////////////////////////////////////////////////// 548 549MP3Source::MP3Source( 550 const sp<MetaData> &meta, const sp<DataSource> &source, 551 off_t first_frame_pos, uint32_t fixed_header, 552 int32_t byte_number, const char *table_of_contents) 553 : mMeta(meta), 554 mDataSource(source), 555 mFirstFramePos(first_frame_pos), 556 mFixedHeader(fixed_header), 557 mCurrentPos(0), 558 mCurrentTimeUs(0), 559 mStarted(false), 560 mByteNumber(byte_number), 561 mGroup(NULL) { 562 memcpy (mTableOfContents, table_of_contents, sizeof(mTableOfContents)); 563} 564 565MP3Source::~MP3Source() { 566 if (mStarted) { 567 stop(); 568 } 569} 570 571status_t MP3Source::start(MetaData *) { 572 CHECK(!mStarted); 573 574 mGroup = new MediaBufferGroup; 575 576 const size_t kMaxFrameSize = 32768; 577 mGroup->add_buffer(new MediaBuffer(kMaxFrameSize)); 578 579 mCurrentPos = mFirstFramePos; 580 mCurrentTimeUs = 0; 581 582 mStarted = true; 583 584 return OK; 585} 586 587status_t MP3Source::stop() { 588 CHECK(mStarted); 589 590 delete mGroup; 591 mGroup = NULL; 592 593 mStarted = false; 594 595 return OK; 596} 597 598sp<MetaData> MP3Source::getFormat() { 599 return mMeta; 600} 601 602status_t MP3Source::read( 603 MediaBuffer **out, const ReadOptions *options) { 604 *out = NULL; 605 606 int64_t seekTimeUs; 607 if (options != NULL && options->getSeekTo(&seekTimeUs)) { 608 int32_t bitrate; 609 if (!mMeta->findInt32(kKeyBitRate, &bitrate)) { 610 // bitrate is in bits/sec. 611 LOGI("no bitrate"); 612 613 return ERROR_UNSUPPORTED; 614 } 615 616 mCurrentTimeUs = seekTimeUs; 617 // interpolate in TOC to get file seek point in bytes 618 int64_t duration; 619 if ((mByteNumber > 0) && (mTableOfContents[0] > 0) 620 && mMeta->findInt64(kKeyDuration, &duration)) { 621 float percent = (float)seekTimeUs * 100 / duration; 622 float fx; 623 if( percent <= 0.0f ) { 624 fx = 0.0f; 625 } else if( percent >= 100.0f ) { 626 fx = 256.0f; 627 } else { 628 int a = (int)percent; 629 float fa, fb; 630 if ( a == 0 ) { 631 fa = 0.0f; 632 } else { 633 fa = (float)mTableOfContents[a-1]; 634 } 635 if ( a < 99 ) { 636 fb = (float)mTableOfContents[a]; 637 } else { 638 fb = 256.0f; 639 } 640 fx = fa + (fb-fa)*(percent-a); 641 } 642 mCurrentPos = mFirstFramePos + (int)((1.0f/256.0f)*fx*mByteNumber); 643 } else { 644 mCurrentPos = mFirstFramePos + seekTimeUs * bitrate / 8000000; 645 } 646 } 647 648 MediaBuffer *buffer; 649 status_t err = mGroup->acquire_buffer(&buffer); 650 if (err != OK) { 651 return err; 652 } 653 654 size_t frame_size; 655 for (;;) { 656 ssize_t n = mDataSource->readAt(mCurrentPos, buffer->data(), 4); 657 if (n < 4) { 658 buffer->release(); 659 buffer = NULL; 660 661 return ERROR_END_OF_STREAM; 662 } 663 664 uint32_t header = U32_AT((const uint8_t *)buffer->data()); 665 666 if ((header & kMask) == (mFixedHeader & kMask) 667 && get_mp3_frame_size(header, &frame_size)) { 668 break; 669 } 670 671 // Lost sync. 672 LOGV("lost sync!\n"); 673 674 off_t pos = mCurrentPos; 675 if (!Resync(mDataSource, mFixedHeader, &pos, NULL)) { 676 LOGE("Unable to resync. Signalling end of stream."); 677 678 buffer->release(); 679 buffer = NULL; 680 681 return ERROR_END_OF_STREAM; 682 } 683 684 mCurrentPos = pos; 685 686 // Try again with the new position. 687 } 688 689 CHECK(frame_size <= buffer->size()); 690 691 ssize_t n = mDataSource->readAt(mCurrentPos, buffer->data(), frame_size); 692 if (n < (ssize_t)frame_size) { 693 buffer->release(); 694 buffer = NULL; 695 696 return ERROR_END_OF_STREAM; 697 } 698 699 buffer->set_range(0, frame_size); 700 701 buffer->meta_data()->setInt64(kKeyTime, mCurrentTimeUs); 702 703 mCurrentPos += frame_size; 704 mCurrentTimeUs += 1152 * 1000000 / 44100; 705 706 *out = buffer; 707 708 return OK; 709} 710 711sp<MetaData> MP3Extractor::getMetaData() { 712 sp<MetaData> meta = new MetaData; 713 714 if (mFirstFramePos < 0) { 715 return meta; 716 } 717 718 meta->setCString(kKeyMIMEType, "audio/mpeg"); 719 720 ID3 id3(mDataSource); 721 722 if (!id3.isValid()) { 723 return meta; 724 } 725 726 struct Map { 727 int key; 728 const char *tag1; 729 const char *tag2; 730 }; 731 static const Map kMap[] = { 732 { kKeyAlbum, "TALB", "TAL" }, 733 { kKeyArtist, "TPE1", "TP1" }, 734 { kKeyComposer, "TCOM", "TCM" }, 735 { kKeyGenre, "TCON", "TCO" }, 736 { kKeyTitle, "TIT2", "TT2" }, 737 { kKeyYear, "TYE", "TYER" }, 738 { kKeyAuthor, "TXT", "TEXT" }, 739 { kKeyCDTrackNumber, "TRK", "TRCK" }, 740 }; 741 static const size_t kNumMapEntries = sizeof(kMap) / sizeof(kMap[0]); 742 743 for (size_t i = 0; i < kNumMapEntries; ++i) { 744 ID3::Iterator *it = new ID3::Iterator(id3, kMap[i].tag1); 745 if (it->done()) { 746 delete it; 747 it = new ID3::Iterator(id3, kMap[i].tag2); 748 } 749 750 if (it->done()) { 751 delete it; 752 continue; 753 } 754 755 String8 s; 756 it->getString(&s); 757 delete it; 758 759 meta->setCString(kMap[i].key, s); 760 } 761 762 size_t dataSize; 763 String8 mime; 764 const void *data = id3.getAlbumArt(&dataSize, &mime); 765 766 if (data) { 767 meta->setData(kKeyAlbumArt, MetaData::TYPE_NONE, data, dataSize); 768 meta->setCString(kKeyAlbumArtMIME, mime.string()); 769 } 770 771 return meta; 772} 773 774bool SniffMP3( 775 const sp<DataSource> &source, String8 *mimeType, float *confidence) { 776 off_t pos = 0; 777 uint32_t header; 778 if (!Resync(source, 0, &pos, &header)) { 779 return false; 780 } 781 782 *mimeType = MEDIA_MIMETYPE_AUDIO_MPEG; 783 *confidence = 0.3f; 784 785 return true; 786} 787 788} // namespace android 789