MP3Extractor.cpp revision db74495dbf653a72018396607fae63946bed44ec
1/* 2 * Copyright (C) 2009 The Android Open Source Project 3 * 4 * Licensed under the Apache License, Version 2.0 (the "License"); 5 * you may not use this file except in compliance with the License. 6 * You may obtain a copy of the License at 7 * 8 * http://www.apache.org/licenses/LICENSE-2.0 9 * 10 * Unless required by applicable law or agreed to in writing, software 11 * distributed under the License is distributed on an "AS IS" BASIS, 12 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 * See the License for the specific language governing permissions and 14 * limitations under the License. 15 */ 16 17//#define LOG_NDEBUG 0 18#define LOG_TAG "MP3Extractor" 19#include <utils/Log.h> 20 21#include "include/MP3Extractor.h" 22 23#include <media/stagefright/DataSource.h> 24#include <media/stagefright/MediaBuffer.h> 25#include <media/stagefright/MediaBufferGroup.h> 26#include <media/stagefright/MediaDebug.h> 27#include <media/stagefright/MediaDefs.h> 28#include <media/stagefright/MediaErrors.h> 29#include <media/stagefright/MediaSource.h> 30#include <media/stagefright/MetaData.h> 31#include <media/stagefright/Utils.h> 32#include <utils/String8.h> 33 34namespace android { 35 36// Everything must match except for 37// protection, bitrate, padding, private bits and mode extension. 38static const uint32_t kMask = 0xfffe0ccf; 39 40static bool get_mp3_frame_size( 41 uint32_t header, size_t *frame_size, 42 int *out_sampling_rate = NULL, int *out_channels = NULL, 43 int *out_bitrate = NULL) { 44 *frame_size = 0; 45 46 if (out_sampling_rate) { 47 *out_sampling_rate = 0; 48 } 49 50 if (out_channels) { 51 *out_channels = 0; 52 } 53 54 if (out_bitrate) { 55 *out_bitrate = 0; 56 } 57 58 if ((header & 0xffe00000) != 0xffe00000) { 59 return false; 60 } 61 62 unsigned version = (header >> 19) & 3; 63 64 if (version == 0x01) { 65 return false; 66 } 67 68 unsigned layer = (header >> 17) & 3; 69 70 if (layer == 0x00) { 71 return false; 72 } 73 74 unsigned protection = (header >> 16) & 1; 75 76 unsigned bitrate_index = (header >> 12) & 0x0f; 77 78 if (bitrate_index == 0 || bitrate_index == 0x0f) { 79 // Disallow "free" bitrate. 80 return false; 81 } 82 83 unsigned sampling_rate_index = (header >> 10) & 3; 84 85 if (sampling_rate_index == 3) { 86 return false; 87 } 88 89 static const int kSamplingRateV1[] = { 44100, 48000, 32000 }; 90 int sampling_rate = kSamplingRateV1[sampling_rate_index]; 91 if (version == 2 /* V2 */) { 92 sampling_rate /= 2; 93 } else if (version == 0 /* V2.5 */) { 94 sampling_rate /= 4; 95 } 96 97 unsigned padding = (header >> 9) & 1; 98 99 if (layer == 3) { 100 // layer I 101 102 static const int kBitrateV1[] = { 103 32, 64, 96, 128, 160, 192, 224, 256, 104 288, 320, 352, 384, 416, 448 105 }; 106 107 static const int kBitrateV2[] = { 108 32, 48, 56, 64, 80, 96, 112, 128, 109 144, 160, 176, 192, 224, 256 110 }; 111 112 int bitrate = 113 (version == 3 /* V1 */) 114 ? kBitrateV1[bitrate_index - 1] 115 : kBitrateV2[bitrate_index - 1]; 116 117 if (out_bitrate) { 118 *out_bitrate = bitrate; 119 } 120 121 *frame_size = (12000 * bitrate / sampling_rate + padding) * 4; 122 } else { 123 // layer II or III 124 125 static const int kBitrateV1L2[] = { 126 32, 48, 56, 64, 80, 96, 112, 128, 127 160, 192, 224, 256, 320, 384 128 }; 129 130 static const int kBitrateV1L3[] = { 131 32, 40, 48, 56, 64, 80, 96, 112, 132 128, 160, 192, 224, 256, 320 133 }; 134 135 static const int kBitrateV2[] = { 136 8, 16, 24, 32, 40, 48, 56, 64, 137 80, 96, 112, 128, 144, 160 138 }; 139 140 int bitrate; 141 if (version == 3 /* V1 */) { 142 bitrate = (layer == 2 /* L2 */) 143 ? kBitrateV1L2[bitrate_index - 1] 144 : kBitrateV1L3[bitrate_index - 1]; 145 } else { 146 // V2 (or 2.5) 147 148 bitrate = kBitrateV2[bitrate_index - 1]; 149 } 150 151 if (out_bitrate) { 152 *out_bitrate = bitrate; 153 } 154 155 if (version == 3 /* V1 */) { 156 *frame_size = 144000 * bitrate / sampling_rate + padding; 157 } else { 158 // V2 or V2.5 159 *frame_size = 72000 * bitrate / sampling_rate + padding; 160 } 161 } 162 163 if (out_sampling_rate) { 164 *out_sampling_rate = sampling_rate; 165 } 166 167 if (out_channels) { 168 int channel_mode = (header >> 6) & 3; 169 170 *out_channels = (channel_mode == 3) ? 1 : 2; 171 } 172 173 return true; 174} 175 176static bool parse_xing_header( 177 const sp<DataSource> &source, off_t first_frame_pos, 178 int32_t *frame_number = NULL, int32_t *byte_number = NULL, 179 char *table_of_contents = NULL, int32_t *quality_indicator = NULL, 180 int64_t *duration = NULL) { 181 182 if (frame_number) { 183 *frame_number = 0; 184 } 185 if (byte_number) { 186 *byte_number = 0; 187 } 188 if (table_of_contents) { 189 table_of_contents[0] = 0; 190 } 191 if (quality_indicator) { 192 *quality_indicator = 0; 193 } 194 if (duration) { 195 *duration = 0; 196 } 197 198 uint8_t buffer[4]; 199 int offset = first_frame_pos; 200 if (source->readAt(offset, &buffer, 4) < 4) { // get header 201 return false; 202 } 203 offset += 4; 204 205 uint8_t id, layer, sr_index, mode; 206 layer = (buffer[1] >> 1) & 3; 207 id = (buffer[1] >> 3) & 3; 208 sr_index = (buffer[2] >> 2) & 3; 209 mode = (buffer[3] >> 6) & 3; 210 if (layer == 0) { 211 return false; 212 } 213 if (id == 1) { 214 return false; 215 } 216 if (sr_index == 3) { 217 return false; 218 } 219 // determine offset of XING header 220 if(id&1) { // mpeg1 221 if (mode != 3) offset += 32; 222 else offset += 17; 223 } else { // mpeg2 224 if (mode != 3) offset += 17; 225 else offset += 9; 226 } 227 228 if (source->readAt(offset, &buffer, 4) < 4) { // XING header ID 229 return false; 230 } 231 offset += 4; 232 // Check XING ID 233 if ((buffer[0] != 'X') || (buffer[1] != 'i') 234 || (buffer[2] != 'n') || (buffer[3] != 'g')) { 235 if ((buffer[0] != 'I') || (buffer[1] != 'n') 236 || (buffer[2] != 'f') || (buffer[3] != 'o')) { 237 return false; 238 } 239 } 240 241 if (source->readAt(offset, &buffer, 4) < 4) { // flags 242 return false; 243 } 244 offset += 4; 245 uint32_t flags = U32_AT(buffer); 246 247 if (flags & 0x0001) { // Frames field is present 248 if (source->readAt(offset, buffer, 4) < 4) { 249 return false; 250 } 251 if (frame_number) { 252 *frame_number = U32_AT(buffer); 253 } 254 int32_t frame = U32_AT(buffer); 255 // Samples per Frame: 1. index = MPEG Version ID, 2. index = Layer 256 const int samplesPerFrames[2][3] = 257 { 258 { 384, 1152, 576 }, // MPEG 2, 2.5: layer1, layer2, layer3 259 { 384, 1152, 1152 }, // MPEG 1: layer1, layer2, layer3 260 }; 261 // sampling rates in hertz: 1. index = MPEG Version ID, 2. index = sampling rate index 262 const int samplingRates[4][3] = 263 { 264 { 11025, 12000, 8000, }, // MPEG 2.5 265 { 0, 0, 0, }, // reserved 266 { 22050, 24000, 16000, }, // MPEG 2 267 { 44100, 48000, 32000, } // MPEG 1 268 }; 269 if (duration) { 270 *duration = (int64_t)frame * samplesPerFrames[id&1][3-layer] * 1000000LL 271 / samplingRates[id][sr_index]; 272 } 273 offset += 4; 274 } 275 if (flags & 0x0002) { // Bytes field is present 276 if (byte_number) { 277 if (source->readAt(offset, buffer, 4) < 4) { 278 return false; 279 } 280 *byte_number = U32_AT(buffer); 281 } 282 offset += 4; 283 } 284 if (flags & 0x0004) { // TOC field is present 285 if (table_of_contents) { 286 if (source->readAt(offset + 1, table_of_contents, 99) < 99) { 287 return false; 288 } 289 } 290 offset += 100; 291 } 292 if (flags & 0x0008) { // Quality indicator field is present 293 if (quality_indicator) { 294 if (source->readAt(offset, buffer, 4) < 4) { 295 return false; 296 } 297 *quality_indicator = U32_AT(buffer); 298 } 299 } 300 return true; 301} 302 303static bool Resync( 304 const sp<DataSource> &source, uint32_t match_header, 305 off_t *inout_pos, uint32_t *out_header) { 306 if (*inout_pos == 0) { 307 // Skip an optional ID3 header if syncing at the very beginning 308 // of the datasource. 309 310 uint8_t id3header[10]; 311 if (source->readAt(0, id3header, sizeof(id3header)) 312 < (ssize_t)sizeof(id3header)) { 313 // If we can't even read these 10 bytes, we might as well bail out, 314 // even if there _were_ 10 bytes of valid mp3 audio data... 315 return false; 316 } 317 318 if (id3header[0] == 'I' && id3header[1] == 'D' && id3header[2] == '3') { 319 // Skip the ID3v2 header. 320 321 size_t len = 322 ((id3header[6] & 0x7f) << 21) 323 | ((id3header[7] & 0x7f) << 14) 324 | ((id3header[8] & 0x7f) << 7) 325 | (id3header[9] & 0x7f); 326 327 len += 10; 328 329 *inout_pos += len; 330 } 331 } 332 333 const size_t kMaxFrameSize = 4096; 334 uint8_t *buffer = new uint8_t[kMaxFrameSize]; 335 336 off_t pos = *inout_pos - kMaxFrameSize; 337 size_t buffer_offset = kMaxFrameSize; 338 size_t buffer_length = kMaxFrameSize; 339 bool valid = false; 340 do { 341 if (buffer_offset + 3 >= buffer_length) { 342 if (buffer_length < kMaxFrameSize) { 343 break; 344 } 345 346 pos += buffer_offset; 347 348 if (pos >= *inout_pos + 128 * 1024) { 349 // Don't scan forever. 350 LOGV("giving up at offset %ld", pos); 351 break; 352 } 353 354 memmove(buffer, &buffer[buffer_offset], buffer_length - buffer_offset); 355 buffer_length = buffer_length - buffer_offset; 356 buffer_offset = 0; 357 358 ssize_t n = source->readAt( 359 pos, &buffer[buffer_length], kMaxFrameSize - buffer_length); 360 361 if (n <= 0) { 362 break; 363 } 364 365 buffer_length += (size_t)n; 366 367 continue; 368 } 369 370 uint32_t header = U32_AT(&buffer[buffer_offset]); 371 372 if (match_header != 0 && (header & kMask) != (match_header & kMask)) { 373 ++buffer_offset; 374 continue; 375 } 376 377 size_t frame_size; 378 int sample_rate, num_channels, bitrate; 379 if (!get_mp3_frame_size(header, &frame_size, 380 &sample_rate, &num_channels, &bitrate)) { 381 ++buffer_offset; 382 continue; 383 } 384 385 LOGV("found possible 1st frame at %ld", pos + buffer_offset); 386 387 // We found what looks like a valid frame, 388 // now find its successors. 389 390 off_t test_pos = pos + buffer_offset + frame_size; 391 392 valid = true; 393 for (int j = 0; j < 3; ++j) { 394 uint8_t tmp[4]; 395 if (source->readAt(test_pos, tmp, 4) < 4) { 396 valid = false; 397 break; 398 } 399 400 uint32_t test_header = U32_AT(tmp); 401 402 LOGV("subsequent header is %08x", test_header); 403 404 if ((test_header & kMask) != (header & kMask)) { 405 valid = false; 406 break; 407 } 408 409 size_t test_frame_size; 410 if (!get_mp3_frame_size(test_header, &test_frame_size)) { 411 valid = false; 412 break; 413 } 414 415 LOGV("found subsequent frame #%d at %ld", j + 2, test_pos); 416 417 test_pos += test_frame_size; 418 } 419 420 if (valid) { 421 *inout_pos = pos + buffer_offset; 422 423 if (out_header != NULL) { 424 *out_header = header; 425 } 426 } else { 427 LOGV("no dice, no valid sequence of frames found."); 428 } 429 430 ++buffer_offset; 431 432 } while (!valid); 433 434 delete[] buffer; 435 buffer = NULL; 436 437 return valid; 438} 439 440class MP3Source : public MediaSource { 441public: 442 MP3Source( 443 const sp<MetaData> &meta, const sp<DataSource> &source, 444 off_t first_frame_pos, uint32_t fixed_header, 445 int32_t byte_number, const char *table_of_contents); 446 447 virtual status_t start(MetaData *params = NULL); 448 virtual status_t stop(); 449 450 virtual sp<MetaData> getFormat(); 451 452 virtual status_t read( 453 MediaBuffer **buffer, const ReadOptions *options = NULL); 454 455protected: 456 virtual ~MP3Source(); 457 458private: 459 sp<MetaData> mMeta; 460 sp<DataSource> mDataSource; 461 off_t mFirstFramePos; 462 uint32_t mFixedHeader; 463 off_t mCurrentPos; 464 int64_t mCurrentTimeUs; 465 bool mStarted; 466 int32_t mByteNumber; // total number of bytes in this MP3 467 // TOC entries in XING header. Skip the first one since it's always 0. 468 char mTableOfContents[99]; 469 MediaBufferGroup *mGroup; 470 471 MP3Source(const MP3Source &); 472 MP3Source &operator=(const MP3Source &); 473}; 474 475MP3Extractor::MP3Extractor(const sp<DataSource> &source) 476 : mDataSource(source), 477 mFirstFramePos(-1), 478 mFixedHeader(0), 479 mByteNumber(0) { 480 off_t pos = 0; 481 uint32_t header; 482 bool success = Resync(mDataSource, 0, &pos, &header); 483 CHECK(success); 484 485 if (success) { 486 mFirstFramePos = pos; 487 mFixedHeader = header; 488 489 size_t frame_size; 490 int sample_rate; 491 int num_channels; 492 int bitrate; 493 get_mp3_frame_size( 494 header, &frame_size, &sample_rate, &num_channels, &bitrate); 495 496 mMeta = new MetaData; 497 498 mMeta->setCString(kKeyMIMEType, MEDIA_MIMETYPE_AUDIO_MPEG); 499 mMeta->setInt32(kKeySampleRate, sample_rate); 500 mMeta->setInt32(kKeyBitRate, bitrate * 1000); 501 mMeta->setInt32(kKeyChannelCount, num_channels); 502 503 int64_t duration; 504 parse_xing_header( 505 mDataSource, mFirstFramePos, NULL, &mByteNumber, 506 mTableOfContents, NULL, &duration); 507 if (duration > 0) { 508 mMeta->setInt64(kKeyDuration, duration); 509 } else { 510 off_t fileSize; 511 if (mDataSource->getSize(&fileSize) == OK) { 512 mMeta->setInt64( 513 kKeyDuration, 514 8000LL * (fileSize - mFirstFramePos) / bitrate); 515 } 516 } 517 } 518} 519 520MP3Extractor::~MP3Extractor() { 521} 522 523size_t MP3Extractor::countTracks() { 524 return (mFirstFramePos < 0) ? 0 : 1; 525} 526 527sp<MediaSource> MP3Extractor::getTrack(size_t index) { 528 if (mFirstFramePos < 0 || index != 0) { 529 return NULL; 530 } 531 532 return new MP3Source( 533 mMeta, mDataSource, mFirstFramePos, mFixedHeader, 534 mByteNumber, mTableOfContents); 535} 536 537sp<MetaData> MP3Extractor::getTrackMetaData(size_t index, uint32_t flags) { 538 if (mFirstFramePos < 0 || index != 0) { 539 return NULL; 540 } 541 542 return mMeta; 543} 544 545//////////////////////////////////////////////////////////////////////////////// 546 547MP3Source::MP3Source( 548 const sp<MetaData> &meta, const sp<DataSource> &source, 549 off_t first_frame_pos, uint32_t fixed_header, 550 int32_t byte_number, const char *table_of_contents) 551 : mMeta(meta), 552 mDataSource(source), 553 mFirstFramePos(first_frame_pos), 554 mFixedHeader(fixed_header), 555 mCurrentPos(0), 556 mCurrentTimeUs(0), 557 mStarted(false), 558 mByteNumber(byte_number), 559 mGroup(NULL) { 560 memcpy (mTableOfContents, table_of_contents, sizeof(mTableOfContents)); 561} 562 563MP3Source::~MP3Source() { 564 if (mStarted) { 565 stop(); 566 } 567} 568 569status_t MP3Source::start(MetaData *) { 570 CHECK(!mStarted); 571 572 mGroup = new MediaBufferGroup; 573 574 const size_t kMaxFrameSize = 32768; 575 mGroup->add_buffer(new MediaBuffer(kMaxFrameSize)); 576 577 mCurrentPos = mFirstFramePos; 578 mCurrentTimeUs = 0; 579 580 mStarted = true; 581 582 return OK; 583} 584 585status_t MP3Source::stop() { 586 CHECK(mStarted); 587 588 delete mGroup; 589 mGroup = NULL; 590 591 mStarted = false; 592 593 return OK; 594} 595 596sp<MetaData> MP3Source::getFormat() { 597 return mMeta; 598} 599 600status_t MP3Source::read( 601 MediaBuffer **out, const ReadOptions *options) { 602 *out = NULL; 603 604 int64_t seekTimeUs; 605 if (options != NULL && options->getSeekTo(&seekTimeUs)) { 606 int32_t bitrate; 607 if (!mMeta->findInt32(kKeyBitRate, &bitrate)) { 608 // bitrate is in bits/sec. 609 LOGI("no bitrate"); 610 611 return ERROR_UNSUPPORTED; 612 } 613 614 mCurrentTimeUs = seekTimeUs; 615 // interpolate in TOC to get file seek point in bytes 616 int64_t duration; 617 if ((mByteNumber > 0) && (mTableOfContents[0] > 0) 618 && mMeta->findInt64(kKeyDuration, &duration)) { 619 float percent = (float)seekTimeUs * 100 / duration; 620 float fx; 621 if( percent <= 0.0f ) { 622 fx = 0.0f; 623 } else if( percent >= 100.0f ) { 624 fx = 256.0f; 625 } else { 626 int a = (int)percent; 627 float fa, fb; 628 if ( a == 0 ) { 629 fa = 0.0f; 630 } else { 631 fa = (float)mTableOfContents[a-1]; 632 } 633 if ( a < 99 ) { 634 fb = (float)mTableOfContents[a]; 635 } else { 636 fb = 256.0f; 637 } 638 fx = fa + (fb-fa)*(percent-a); 639 } 640 mCurrentPos = mFirstFramePos + (int)((1.0f/256.0f)*fx*mByteNumber); 641 } else { 642 mCurrentPos = mFirstFramePos + seekTimeUs * bitrate / 8000000; 643 } 644 } 645 646 MediaBuffer *buffer; 647 status_t err = mGroup->acquire_buffer(&buffer); 648 if (err != OK) { 649 return err; 650 } 651 652 size_t frame_size; 653 for (;;) { 654 ssize_t n = mDataSource->readAt(mCurrentPos, buffer->data(), 4); 655 if (n < 4) { 656 buffer->release(); 657 buffer = NULL; 658 659 return ERROR_END_OF_STREAM; 660 } 661 662 uint32_t header = U32_AT((const uint8_t *)buffer->data()); 663 664 if ((header & kMask) == (mFixedHeader & kMask) 665 && get_mp3_frame_size(header, &frame_size)) { 666 break; 667 } 668 669 // Lost sync. 670 LOGW("lost sync!\n"); 671 672 off_t pos = mCurrentPos; 673 if (!Resync(mDataSource, mFixedHeader, &pos, NULL)) { 674 LOGE("Unable to resync. Signalling end of stream."); 675 676 buffer->release(); 677 buffer = NULL; 678 679 return ERROR_END_OF_STREAM; 680 } 681 682 mCurrentPos = pos; 683 684 // Try again with the new position. 685 } 686 687 CHECK(frame_size <= buffer->size()); 688 689 ssize_t n = mDataSource->readAt(mCurrentPos, buffer->data(), frame_size); 690 if (n < (ssize_t)frame_size) { 691 buffer->release(); 692 buffer = NULL; 693 694 return ERROR_END_OF_STREAM; 695 } 696 697 buffer->set_range(0, frame_size); 698 699 buffer->meta_data()->setInt64(kKeyTime, mCurrentTimeUs); 700 701 mCurrentPos += frame_size; 702 mCurrentTimeUs += 1152 * 1000000 / 44100; 703 704 *out = buffer; 705 706 return OK; 707} 708 709bool SniffMP3( 710 const sp<DataSource> &source, String8 *mimeType, float *confidence) { 711 off_t pos = 0; 712 uint32_t header; 713 if (!Resync(source, 0, &pos, &header)) { 714 return false; 715 } 716 717 *mimeType = MEDIA_MIMETYPE_AUDIO_MPEG; 718 *confidence = 0.3f; 719 720 return true; 721} 722 723} // namespace android 724