MP3Extractor.cpp revision ba1f481614b4a4dd290e3b75e0f3f1879a383a44
1/* 2 * Copyright (C) 2009 The Android Open Source Project 3 * 4 * Licensed under the Apache License, Version 2.0 (the "License"); 5 * you may not use this file except in compliance with the License. 6 * You may obtain a copy of the License at 7 * 8 * http://www.apache.org/licenses/LICENSE-2.0 9 * 10 * Unless required by applicable law or agreed to in writing, software 11 * distributed under the License is distributed on an "AS IS" BASIS, 12 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 * See the License for the specific language governing permissions and 14 * limitations under the License. 15 */ 16 17//#define LOG_NDEBUG 0 18#define LOG_TAG "MP3Extractor" 19#include <utils/Log.h> 20 21#include "include/MP3Extractor.h" 22 23#include "include/ID3.h" 24 25#include <media/stagefright/DataSource.h> 26#include <media/stagefright/MediaBuffer.h> 27#include <media/stagefright/MediaBufferGroup.h> 28#include <media/stagefright/MediaDebug.h> 29#include <media/stagefright/MediaDefs.h> 30#include <media/stagefright/MediaErrors.h> 31#include <media/stagefright/MediaSource.h> 32#include <media/stagefright/MetaData.h> 33#include <media/stagefright/Utils.h> 34#include <utils/String8.h> 35 36namespace android { 37 38// Everything must match except for 39// protection, bitrate, padding, private bits, mode extension, 40// copyright bit, original bit and emphasis. 41// Yes ... there are things that must indeed match... 42static const uint32_t kMask = 0xfffe0cc0; 43 44static bool get_mp3_frame_size( 45 uint32_t header, size_t *frame_size, 46 int *out_sampling_rate = NULL, int *out_channels = NULL, 47 int *out_bitrate = NULL) { 48 *frame_size = 0; 49 50 if (out_sampling_rate) { 51 *out_sampling_rate = 0; 52 } 53 54 if (out_channels) { 55 *out_channels = 0; 56 } 57 58 if (out_bitrate) { 59 *out_bitrate = 0; 60 } 61 62 if ((header & 0xffe00000) != 0xffe00000) { 63 return false; 64 } 65 66 unsigned version = (header >> 19) & 3; 67 68 if (version == 0x01) { 69 return false; 70 } 71 72 unsigned layer = (header >> 17) & 3; 73 74 if (layer == 0x00) { 75 return false; 76 } 77 78 unsigned protection = (header >> 16) & 1; 79 80 unsigned bitrate_index = (header >> 12) & 0x0f; 81 82 if (bitrate_index == 0 || bitrate_index == 0x0f) { 83 // Disallow "free" bitrate. 84 return false; 85 } 86 87 unsigned sampling_rate_index = (header >> 10) & 3; 88 89 if (sampling_rate_index == 3) { 90 return false; 91 } 92 93 static const int kSamplingRateV1[] = { 44100, 48000, 32000 }; 94 int sampling_rate = kSamplingRateV1[sampling_rate_index]; 95 if (version == 2 /* V2 */) { 96 sampling_rate /= 2; 97 } else if (version == 0 /* V2.5 */) { 98 sampling_rate /= 4; 99 } 100 101 unsigned padding = (header >> 9) & 1; 102 103 if (layer == 3) { 104 // layer I 105 106 static const int kBitrateV1[] = { 107 32, 64, 96, 128, 160, 192, 224, 256, 108 288, 320, 352, 384, 416, 448 109 }; 110 111 static const int kBitrateV2[] = { 112 32, 48, 56, 64, 80, 96, 112, 128, 113 144, 160, 176, 192, 224, 256 114 }; 115 116 int bitrate = 117 (version == 3 /* V1 */) 118 ? kBitrateV1[bitrate_index - 1] 119 : kBitrateV2[bitrate_index - 1]; 120 121 if (out_bitrate) { 122 *out_bitrate = bitrate; 123 } 124 125 *frame_size = (12000 * bitrate / sampling_rate + padding) * 4; 126 } else { 127 // layer II or III 128 129 static const int kBitrateV1L2[] = { 130 32, 48, 56, 64, 80, 96, 112, 128, 131 160, 192, 224, 256, 320, 384 132 }; 133 134 static const int kBitrateV1L3[] = { 135 32, 40, 48, 56, 64, 80, 96, 112, 136 128, 160, 192, 224, 256, 320 137 }; 138 139 static const int kBitrateV2[] = { 140 8, 16, 24, 32, 40, 48, 56, 64, 141 80, 96, 112, 128, 144, 160 142 }; 143 144 int bitrate; 145 if (version == 3 /* V1 */) { 146 bitrate = (layer == 2 /* L2 */) 147 ? kBitrateV1L2[bitrate_index - 1] 148 : kBitrateV1L3[bitrate_index - 1]; 149 } else { 150 // V2 (or 2.5) 151 152 bitrate = kBitrateV2[bitrate_index - 1]; 153 } 154 155 if (out_bitrate) { 156 *out_bitrate = bitrate; 157 } 158 159 if (version == 3 /* V1 */) { 160 *frame_size = 144000 * bitrate / sampling_rate + padding; 161 } else { 162 // V2 or V2.5 163 *frame_size = 72000 * bitrate / sampling_rate + padding; 164 } 165 } 166 167 if (out_sampling_rate) { 168 *out_sampling_rate = sampling_rate; 169 } 170 171 if (out_channels) { 172 int channel_mode = (header >> 6) & 3; 173 174 *out_channels = (channel_mode == 3) ? 1 : 2; 175 } 176 177 return true; 178} 179 180static bool parse_xing_header( 181 const sp<DataSource> &source, off_t first_frame_pos, 182 int32_t *frame_number = NULL, int32_t *byte_number = NULL, 183 char *table_of_contents = NULL, int32_t *quality_indicator = NULL, 184 int64_t *duration = NULL) { 185 186 if (frame_number) { 187 *frame_number = 0; 188 } 189 if (byte_number) { 190 *byte_number = 0; 191 } 192 if (table_of_contents) { 193 table_of_contents[0] = 0; 194 } 195 if (quality_indicator) { 196 *quality_indicator = 0; 197 } 198 if (duration) { 199 *duration = 0; 200 } 201 202 uint8_t buffer[4]; 203 int offset = first_frame_pos; 204 if (source->readAt(offset, &buffer, 4) < 4) { // get header 205 return false; 206 } 207 offset += 4; 208 209 uint8_t id, layer, sr_index, mode; 210 layer = (buffer[1] >> 1) & 3; 211 id = (buffer[1] >> 3) & 3; 212 sr_index = (buffer[2] >> 2) & 3; 213 mode = (buffer[3] >> 6) & 3; 214 if (layer == 0) { 215 return false; 216 } 217 if (id == 1) { 218 return false; 219 } 220 if (sr_index == 3) { 221 return false; 222 } 223 // determine offset of XING header 224 if(id&1) { // mpeg1 225 if (mode != 3) offset += 32; 226 else offset += 17; 227 } else { // mpeg2 228 if (mode != 3) offset += 17; 229 else offset += 9; 230 } 231 232 if (source->readAt(offset, &buffer, 4) < 4) { // XING header ID 233 return false; 234 } 235 offset += 4; 236 // Check XING ID 237 if ((buffer[0] != 'X') || (buffer[1] != 'i') 238 || (buffer[2] != 'n') || (buffer[3] != 'g')) { 239 if ((buffer[0] != 'I') || (buffer[1] != 'n') 240 || (buffer[2] != 'f') || (buffer[3] != 'o')) { 241 return false; 242 } 243 } 244 245 if (source->readAt(offset, &buffer, 4) < 4) { // flags 246 return false; 247 } 248 offset += 4; 249 uint32_t flags = U32_AT(buffer); 250 251 if (flags & 0x0001) { // Frames field is present 252 if (source->readAt(offset, buffer, 4) < 4) { 253 return false; 254 } 255 if (frame_number) { 256 *frame_number = U32_AT(buffer); 257 } 258 int32_t frame = U32_AT(buffer); 259 // Samples per Frame: 1. index = MPEG Version ID, 2. index = Layer 260 const int samplesPerFrames[2][3] = 261 { 262 { 384, 1152, 576 }, // MPEG 2, 2.5: layer1, layer2, layer3 263 { 384, 1152, 1152 }, // MPEG 1: layer1, layer2, layer3 264 }; 265 // sampling rates in hertz: 1. index = MPEG Version ID, 2. index = sampling rate index 266 const int samplingRates[4][3] = 267 { 268 { 11025, 12000, 8000, }, // MPEG 2.5 269 { 0, 0, 0, }, // reserved 270 { 22050, 24000, 16000, }, // MPEG 2 271 { 44100, 48000, 32000, } // MPEG 1 272 }; 273 if (duration) { 274 *duration = (int64_t)frame * samplesPerFrames[id&1][3-layer] * 1000000LL 275 / samplingRates[id][sr_index]; 276 } 277 offset += 4; 278 } 279 if (flags & 0x0002) { // Bytes field is present 280 if (byte_number) { 281 if (source->readAt(offset, buffer, 4) < 4) { 282 return false; 283 } 284 *byte_number = U32_AT(buffer); 285 } 286 offset += 4; 287 } 288 if (flags & 0x0004) { // TOC field is present 289 if (table_of_contents) { 290 if (source->readAt(offset + 1, table_of_contents, 99) < 99) { 291 return false; 292 } 293 } 294 offset += 100; 295 } 296 if (flags & 0x0008) { // Quality indicator field is present 297 if (quality_indicator) { 298 if (source->readAt(offset, buffer, 4) < 4) { 299 return false; 300 } 301 *quality_indicator = U32_AT(buffer); 302 } 303 } 304 return true; 305} 306 307static bool Resync( 308 const sp<DataSource> &source, uint32_t match_header, 309 off_t *inout_pos, uint32_t *out_header) { 310 if (*inout_pos == 0) { 311 // Skip an optional ID3 header if syncing at the very beginning 312 // of the datasource. 313 314 uint8_t id3header[10]; 315 if (source->readAt(0, id3header, sizeof(id3header)) 316 < (ssize_t)sizeof(id3header)) { 317 // If we can't even read these 10 bytes, we might as well bail out, 318 // even if there _were_ 10 bytes of valid mp3 audio data... 319 return false; 320 } 321 322 if (id3header[0] == 'I' && id3header[1] == 'D' && id3header[2] == '3') { 323 // Skip the ID3v2 header. 324 325 size_t len = 326 ((id3header[6] & 0x7f) << 21) 327 | ((id3header[7] & 0x7f) << 14) 328 | ((id3header[8] & 0x7f) << 7) 329 | (id3header[9] & 0x7f); 330 331 len += 10; 332 333 *inout_pos += len; 334 } 335 } 336 337 const size_t kMaxFrameSize = 4096; 338 uint8_t *buffer = new uint8_t[kMaxFrameSize]; 339 340 off_t pos = *inout_pos - kMaxFrameSize; 341 size_t buffer_offset = kMaxFrameSize; 342 size_t buffer_length = kMaxFrameSize; 343 bool valid = false; 344 do { 345 if (buffer_offset + 3 >= buffer_length) { 346 if (buffer_length < kMaxFrameSize) { 347 break; 348 } 349 350 pos += buffer_offset; 351 352 if (pos >= *inout_pos + 128 * 1024) { 353 // Don't scan forever. 354 LOGV("giving up at offset %ld", pos); 355 break; 356 } 357 358 memmove(buffer, &buffer[buffer_offset], buffer_length - buffer_offset); 359 buffer_length = buffer_length - buffer_offset; 360 buffer_offset = 0; 361 362 ssize_t n = source->readAt( 363 pos, &buffer[buffer_length], kMaxFrameSize - buffer_length); 364 365 if (n <= 0) { 366 break; 367 } 368 369 buffer_length += (size_t)n; 370 371 continue; 372 } 373 374 uint32_t header = U32_AT(&buffer[buffer_offset]); 375 376 if (match_header != 0 && (header & kMask) != (match_header & kMask)) { 377 ++buffer_offset; 378 continue; 379 } 380 381 size_t frame_size; 382 int sample_rate, num_channels, bitrate; 383 if (!get_mp3_frame_size(header, &frame_size, 384 &sample_rate, &num_channels, &bitrate)) { 385 ++buffer_offset; 386 continue; 387 } 388 389 LOGV("found possible 1st frame at %ld", pos + buffer_offset); 390 391 // We found what looks like a valid frame, 392 // now find its successors. 393 394 off_t test_pos = pos + buffer_offset + frame_size; 395 396 valid = true; 397 for (int j = 0; j < 3; ++j) { 398 uint8_t tmp[4]; 399 if (source->readAt(test_pos, tmp, 4) < 4) { 400 valid = false; 401 break; 402 } 403 404 uint32_t test_header = U32_AT(tmp); 405 406 LOGV("subsequent header is %08x", test_header); 407 408 if ((test_header & kMask) != (header & kMask)) { 409 valid = false; 410 break; 411 } 412 413 size_t test_frame_size; 414 if (!get_mp3_frame_size(test_header, &test_frame_size)) { 415 valid = false; 416 break; 417 } 418 419 LOGV("found subsequent frame #%d at %ld", j + 2, test_pos); 420 421 test_pos += test_frame_size; 422 } 423 424 if (valid) { 425 *inout_pos = pos + buffer_offset; 426 427 if (out_header != NULL) { 428 *out_header = header; 429 } 430 } else { 431 LOGV("no dice, no valid sequence of frames found."); 432 } 433 434 ++buffer_offset; 435 436 } while (!valid); 437 438 delete[] buffer; 439 buffer = NULL; 440 441 return valid; 442} 443 444class MP3Source : public MediaSource { 445public: 446 MP3Source( 447 const sp<MetaData> &meta, const sp<DataSource> &source, 448 off_t first_frame_pos, uint32_t fixed_header, 449 int32_t byte_number, const char *table_of_contents); 450 451 virtual status_t start(MetaData *params = NULL); 452 virtual status_t stop(); 453 454 virtual sp<MetaData> getFormat(); 455 456 virtual status_t read( 457 MediaBuffer **buffer, const ReadOptions *options = NULL); 458 459protected: 460 virtual ~MP3Source(); 461 462private: 463 sp<MetaData> mMeta; 464 sp<DataSource> mDataSource; 465 off_t mFirstFramePos; 466 uint32_t mFixedHeader; 467 off_t mCurrentPos; 468 int64_t mCurrentTimeUs; 469 bool mStarted; 470 int32_t mByteNumber; // total number of bytes in this MP3 471 // TOC entries in XING header. Skip the first one since it's always 0. 472 char mTableOfContents[99]; 473 MediaBufferGroup *mGroup; 474 475 MP3Source(const MP3Source &); 476 MP3Source &operator=(const MP3Source &); 477}; 478 479MP3Extractor::MP3Extractor(const sp<DataSource> &source) 480 : mDataSource(source), 481 mFirstFramePos(-1), 482 mFixedHeader(0), 483 mByteNumber(0) { 484 off_t pos = 0; 485 uint32_t header; 486 bool success = Resync(mDataSource, 0, &pos, &header); 487 CHECK(success); 488 489 if (success) { 490 mFirstFramePos = pos; 491 mFixedHeader = header; 492 493 size_t frame_size; 494 int sample_rate; 495 int num_channels; 496 int bitrate; 497 get_mp3_frame_size( 498 header, &frame_size, &sample_rate, &num_channels, &bitrate); 499 500 mMeta = new MetaData; 501 502 mMeta->setCString(kKeyMIMEType, MEDIA_MIMETYPE_AUDIO_MPEG); 503 mMeta->setInt32(kKeySampleRate, sample_rate); 504 mMeta->setInt32(kKeyBitRate, bitrate * 1000); 505 mMeta->setInt32(kKeyChannelCount, num_channels); 506 507 int64_t duration; 508 parse_xing_header( 509 mDataSource, mFirstFramePos, NULL, &mByteNumber, 510 mTableOfContents, NULL, &duration); 511 if (duration > 0) { 512 mMeta->setInt64(kKeyDuration, duration); 513 } else { 514 off_t fileSize; 515 if (mDataSource->getSize(&fileSize) == OK) { 516 mMeta->setInt64( 517 kKeyDuration, 518 8000LL * (fileSize - mFirstFramePos) / bitrate); 519 } 520 } 521 } 522} 523 524MP3Extractor::~MP3Extractor() { 525} 526 527size_t MP3Extractor::countTracks() { 528 return (mFirstFramePos < 0) ? 0 : 1; 529} 530 531sp<MediaSource> MP3Extractor::getTrack(size_t index) { 532 if (mFirstFramePos < 0 || index != 0) { 533 return NULL; 534 } 535 536 return new MP3Source( 537 mMeta, mDataSource, mFirstFramePos, mFixedHeader, 538 mByteNumber, mTableOfContents); 539} 540 541sp<MetaData> MP3Extractor::getTrackMetaData(size_t index, uint32_t flags) { 542 if (mFirstFramePos < 0 || index != 0) { 543 return NULL; 544 } 545 546 return mMeta; 547} 548 549//////////////////////////////////////////////////////////////////////////////// 550 551MP3Source::MP3Source( 552 const sp<MetaData> &meta, const sp<DataSource> &source, 553 off_t first_frame_pos, uint32_t fixed_header, 554 int32_t byte_number, const char *table_of_contents) 555 : mMeta(meta), 556 mDataSource(source), 557 mFirstFramePos(first_frame_pos), 558 mFixedHeader(fixed_header), 559 mCurrentPos(0), 560 mCurrentTimeUs(0), 561 mStarted(false), 562 mByteNumber(byte_number), 563 mGroup(NULL) { 564 memcpy (mTableOfContents, table_of_contents, sizeof(mTableOfContents)); 565} 566 567MP3Source::~MP3Source() { 568 if (mStarted) { 569 stop(); 570 } 571} 572 573status_t MP3Source::start(MetaData *) { 574 CHECK(!mStarted); 575 576 mGroup = new MediaBufferGroup; 577 578 const size_t kMaxFrameSize = 32768; 579 mGroup->add_buffer(new MediaBuffer(kMaxFrameSize)); 580 581 mCurrentPos = mFirstFramePos; 582 mCurrentTimeUs = 0; 583 584 mStarted = true; 585 586 return OK; 587} 588 589status_t MP3Source::stop() { 590 CHECK(mStarted); 591 592 delete mGroup; 593 mGroup = NULL; 594 595 mStarted = false; 596 597 return OK; 598} 599 600sp<MetaData> MP3Source::getFormat() { 601 return mMeta; 602} 603 604status_t MP3Source::read( 605 MediaBuffer **out, const ReadOptions *options) { 606 *out = NULL; 607 608 int64_t seekTimeUs; 609 if (options != NULL && options->getSeekTo(&seekTimeUs)) { 610 int32_t bitrate; 611 if (!mMeta->findInt32(kKeyBitRate, &bitrate)) { 612 // bitrate is in bits/sec. 613 LOGI("no bitrate"); 614 615 return ERROR_UNSUPPORTED; 616 } 617 618 mCurrentTimeUs = seekTimeUs; 619 // interpolate in TOC to get file seek point in bytes 620 int64_t duration; 621 if ((mByteNumber > 0) && (mTableOfContents[0] > 0) 622 && mMeta->findInt64(kKeyDuration, &duration)) { 623 float percent = (float)seekTimeUs * 100 / duration; 624 float fx; 625 if( percent <= 0.0f ) { 626 fx = 0.0f; 627 } else if( percent >= 100.0f ) { 628 fx = 256.0f; 629 } else { 630 int a = (int)percent; 631 float fa, fb; 632 if ( a == 0 ) { 633 fa = 0.0f; 634 } else { 635 fa = (float)mTableOfContents[a-1]; 636 } 637 if ( a < 99 ) { 638 fb = (float)mTableOfContents[a]; 639 } else { 640 fb = 256.0f; 641 } 642 fx = fa + (fb-fa)*(percent-a); 643 } 644 mCurrentPos = mFirstFramePos + (int)((1.0f/256.0f)*fx*mByteNumber); 645 } else { 646 mCurrentPos = mFirstFramePos + seekTimeUs * bitrate / 8000000; 647 } 648 } 649 650 MediaBuffer *buffer; 651 status_t err = mGroup->acquire_buffer(&buffer); 652 if (err != OK) { 653 return err; 654 } 655 656 size_t frame_size; 657 for (;;) { 658 ssize_t n = mDataSource->readAt(mCurrentPos, buffer->data(), 4); 659 if (n < 4) { 660 buffer->release(); 661 buffer = NULL; 662 663 return ERROR_END_OF_STREAM; 664 } 665 666 uint32_t header = U32_AT((const uint8_t *)buffer->data()); 667 668 if ((header & kMask) == (mFixedHeader & kMask) 669 && get_mp3_frame_size(header, &frame_size)) { 670 break; 671 } 672 673 // Lost sync. 674 LOGV("lost sync! header = 0x%08x, old header = 0x%08x\n", header, mFixedHeader); 675 676 off_t pos = mCurrentPos; 677 if (!Resync(mDataSource, mFixedHeader, &pos, NULL)) { 678 LOGE("Unable to resync. Signalling end of stream."); 679 680 buffer->release(); 681 buffer = NULL; 682 683 return ERROR_END_OF_STREAM; 684 } 685 686 mCurrentPos = pos; 687 688 // Try again with the new position. 689 } 690 691 CHECK(frame_size <= buffer->size()); 692 693 ssize_t n = mDataSource->readAt(mCurrentPos, buffer->data(), frame_size); 694 if (n < (ssize_t)frame_size) { 695 buffer->release(); 696 buffer = NULL; 697 698 return ERROR_END_OF_STREAM; 699 } 700 701 buffer->set_range(0, frame_size); 702 703 buffer->meta_data()->setInt64(kKeyTime, mCurrentTimeUs); 704 705 mCurrentPos += frame_size; 706 mCurrentTimeUs += 1152 * 1000000 / 44100; 707 708 *out = buffer; 709 710 return OK; 711} 712 713sp<MetaData> MP3Extractor::getMetaData() { 714 sp<MetaData> meta = new MetaData; 715 716 if (mFirstFramePos < 0) { 717 return meta; 718 } 719 720 meta->setCString(kKeyMIMEType, "audio/mpeg"); 721 722 ID3 id3(mDataSource); 723 724 if (!id3.isValid()) { 725 return meta; 726 } 727 728 struct Map { 729 int key; 730 const char *tag1; 731 const char *tag2; 732 }; 733 static const Map kMap[] = { 734 { kKeyAlbum, "TALB", "TAL" }, 735 { kKeyArtist, "TPE1", "TP1" }, 736 { kKeyAlbumArtist, "TPE2", "TP2" }, 737 { kKeyComposer, "TCOM", "TCM" }, 738 { kKeyGenre, "TCON", "TCO" }, 739 { kKeyTitle, "TIT2", "TT2" }, 740 { kKeyYear, "TYE", "TYER" }, 741 { kKeyAuthor, "TXT", "TEXT" }, 742 { kKeyCDTrackNumber, "TRK", "TRCK" }, 743 { kKeyDiscNumber, "TPA", "TPOS" }, 744 }; 745 static const size_t kNumMapEntries = sizeof(kMap) / sizeof(kMap[0]); 746 747 for (size_t i = 0; i < kNumMapEntries; ++i) { 748 ID3::Iterator *it = new ID3::Iterator(id3, kMap[i].tag1); 749 if (it->done()) { 750 delete it; 751 it = new ID3::Iterator(id3, kMap[i].tag2); 752 } 753 754 if (it->done()) { 755 delete it; 756 continue; 757 } 758 759 String8 s; 760 it->getString(&s); 761 delete it; 762 763 meta->setCString(kMap[i].key, s); 764 } 765 766 size_t dataSize; 767 String8 mime; 768 const void *data = id3.getAlbumArt(&dataSize, &mime); 769 770 if (data) { 771 meta->setData(kKeyAlbumArt, MetaData::TYPE_NONE, data, dataSize); 772 meta->setCString(kKeyAlbumArtMIME, mime.string()); 773 } 774 775 return meta; 776} 777 778bool SniffMP3( 779 const sp<DataSource> &source, String8 *mimeType, float *confidence) { 780 off_t pos = 0; 781 uint32_t header; 782 if (!Resync(source, 0, &pos, &header)) { 783 return false; 784 } 785 786 *mimeType = MEDIA_MIMETYPE_AUDIO_MPEG; 787 *confidence = 0.3f; 788 789 return true; 790} 791 792} // namespace android 793