MPEG4Extractor.cpp revision bcc8e5817fa3dc624f214e58f756098053ac5682
1/* 2 * Copyright (C) 2009 The Android Open Source Project 3 * 4 * Licensed under the Apache License, Version 2.0 (the "License"); 5 * you may not use this file except in compliance with the License. 6 * You may obtain a copy of the License at 7 * 8 * http://www.apache.org/licenses/LICENSE-2.0 9 * 10 * Unless required by applicable law or agreed to in writing, software 11 * distributed under the License is distributed on an "AS IS" BASIS, 12 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 * See the License for the specific language governing permissions and 14 * limitations under the License. 15 */ 16 17//#define LOG_NDEBUG 0 18#define LOG_TAG "MPEG4Extractor" 19#include <utils/Log.h> 20 21#include "include/MPEG4Extractor.h" 22#include "include/SampleTable.h" 23#include "include/ESDS.h" 24 25#include <ctype.h> 26#include <stdint.h> 27#include <stdlib.h> 28#include <string.h> 29 30#include <media/stagefright/foundation/ABitReader.h> 31#include <media/stagefright/foundation/ABuffer.h> 32#include <media/stagefright/foundation/ADebug.h> 33#include <media/stagefright/foundation/AMessage.h> 34#include <media/stagefright/MediaBuffer.h> 35#include <media/stagefright/MediaBufferGroup.h> 36#include <media/stagefright/MediaDefs.h> 37#include <media/stagefright/MediaSource.h> 38#include <media/stagefright/MetaData.h> 39#include <utils/String8.h> 40 41namespace android { 42 43class MPEG4Source : public MediaSource { 44public: 45 // Caller retains ownership of both "dataSource" and "sampleTable". 46 MPEG4Source(const sp<MetaData> &format, 47 const sp<DataSource> &dataSource, 48 int32_t timeScale, 49 const sp<SampleTable> &sampleTable, 50 Vector<SidxEntry> &sidx, 51 off64_t firstMoofOffset); 52 53 virtual status_t start(MetaData *params = NULL); 54 virtual status_t stop(); 55 56 virtual sp<MetaData> getFormat(); 57 58 virtual status_t read(MediaBuffer **buffer, const ReadOptions *options = NULL); 59 virtual status_t fragmentedRead(MediaBuffer **buffer, const ReadOptions *options = NULL); 60 61protected: 62 virtual ~MPEG4Source(); 63 64private: 65 Mutex mLock; 66 67 sp<MetaData> mFormat; 68 sp<DataSource> mDataSource; 69 int32_t mTimescale; 70 sp<SampleTable> mSampleTable; 71 uint32_t mCurrentSampleIndex; 72 uint32_t mCurrentFragmentIndex; 73 Vector<SidxEntry> &mSegments; 74 off64_t mFirstMoofOffset; 75 off64_t mCurrentMoofOffset; 76 off64_t mNextMoofOffset; 77 uint32_t mCurrentTime; 78 int32_t mLastParsedTrackId; 79 int32_t mTrackId; 80 81 int32_t mCryptoMode; // passed in from extractor 82 int32_t mDefaultIVSize; // passed in from extractor 83 uint8_t mCryptoKey[16]; // passed in from extractor 84 uint32_t mCurrentAuxInfoType; 85 uint32_t mCurrentAuxInfoTypeParameter; 86 int32_t mCurrentDefaultSampleInfoSize; 87 uint32_t mCurrentSampleInfoCount; 88 uint32_t mCurrentSampleInfoAllocSize; 89 uint8_t* mCurrentSampleInfoSizes; 90 uint32_t mCurrentSampleInfoOffsetCount; 91 uint32_t mCurrentSampleInfoOffsetsAllocSize; 92 uint64_t* mCurrentSampleInfoOffsets; 93 94 bool mIsAVC; 95 size_t mNALLengthSize; 96 97 bool mStarted; 98 99 MediaBufferGroup *mGroup; 100 101 MediaBuffer *mBuffer; 102 103 bool mWantsNALFragments; 104 105 uint8_t *mSrcBuffer; 106 107 size_t parseNALSize(const uint8_t *data) const; 108 status_t parseChunk(off64_t *offset); 109 status_t parseTrackFragmentHeader(off64_t offset, off64_t size); 110 status_t parseTrackFragmentRun(off64_t offset, off64_t size); 111 status_t parseSampleAuxiliaryInformationSizes(off64_t offset, off64_t size); 112 status_t parseSampleAuxiliaryInformationOffsets(off64_t offset, off64_t size); 113 114 struct TrackFragmentHeaderInfo { 115 enum Flags { 116 kBaseDataOffsetPresent = 0x01, 117 kSampleDescriptionIndexPresent = 0x02, 118 kDefaultSampleDurationPresent = 0x08, 119 kDefaultSampleSizePresent = 0x10, 120 kDefaultSampleFlagsPresent = 0x20, 121 kDurationIsEmpty = 0x10000, 122 }; 123 124 uint32_t mTrackID; 125 uint32_t mFlags; 126 uint64_t mBaseDataOffset; 127 uint32_t mSampleDescriptionIndex; 128 uint32_t mDefaultSampleDuration; 129 uint32_t mDefaultSampleSize; 130 uint32_t mDefaultSampleFlags; 131 132 uint64_t mDataOffset; 133 }; 134 TrackFragmentHeaderInfo mTrackFragmentHeaderInfo; 135 136 struct Sample { 137 off64_t offset; 138 size_t size; 139 uint32_t duration; 140 uint8_t iv[16]; 141 Vector<size_t> clearsizes; 142 Vector<size_t> encryptedsizes; 143 }; 144 Vector<Sample> mCurrentSamples; 145 146 MPEG4Source(const MPEG4Source &); 147 MPEG4Source &operator=(const MPEG4Source &); 148}; 149 150// This custom data source wraps an existing one and satisfies requests 151// falling entirely within a cached range from the cache while forwarding 152// all remaining requests to the wrapped datasource. 153// This is used to cache the full sampletable metadata for a single track, 154// possibly wrapping multiple times to cover all tracks, i.e. 155// Each MPEG4DataSource caches the sampletable metadata for a single track. 156 157struct MPEG4DataSource : public DataSource { 158 MPEG4DataSource(const sp<DataSource> &source); 159 160 virtual status_t initCheck() const; 161 virtual ssize_t readAt(off64_t offset, void *data, size_t size); 162 virtual status_t getSize(off64_t *size); 163 virtual uint32_t flags(); 164 165 status_t setCachedRange(off64_t offset, size_t size); 166 167protected: 168 virtual ~MPEG4DataSource(); 169 170private: 171 Mutex mLock; 172 173 sp<DataSource> mSource; 174 off64_t mCachedOffset; 175 size_t mCachedSize; 176 uint8_t *mCache; 177 178 void clearCache(); 179 180 MPEG4DataSource(const MPEG4DataSource &); 181 MPEG4DataSource &operator=(const MPEG4DataSource &); 182}; 183 184MPEG4DataSource::MPEG4DataSource(const sp<DataSource> &source) 185 : mSource(source), 186 mCachedOffset(0), 187 mCachedSize(0), 188 mCache(NULL) { 189} 190 191MPEG4DataSource::~MPEG4DataSource() { 192 clearCache(); 193} 194 195void MPEG4DataSource::clearCache() { 196 if (mCache) { 197 free(mCache); 198 mCache = NULL; 199 } 200 201 mCachedOffset = 0; 202 mCachedSize = 0; 203} 204 205status_t MPEG4DataSource::initCheck() const { 206 return mSource->initCheck(); 207} 208 209ssize_t MPEG4DataSource::readAt(off64_t offset, void *data, size_t size) { 210 Mutex::Autolock autoLock(mLock); 211 212 if (offset >= mCachedOffset 213 && offset + size <= mCachedOffset + mCachedSize) { 214 memcpy(data, &mCache[offset - mCachedOffset], size); 215 return size; 216 } 217 218 return mSource->readAt(offset, data, size); 219} 220 221status_t MPEG4DataSource::getSize(off64_t *size) { 222 return mSource->getSize(size); 223} 224 225uint32_t MPEG4DataSource::flags() { 226 return mSource->flags(); 227} 228 229status_t MPEG4DataSource::setCachedRange(off64_t offset, size_t size) { 230 Mutex::Autolock autoLock(mLock); 231 232 clearCache(); 233 234 mCache = (uint8_t *)malloc(size); 235 236 if (mCache == NULL) { 237 return -ENOMEM; 238 } 239 240 mCachedOffset = offset; 241 mCachedSize = size; 242 243 ssize_t err = mSource->readAt(mCachedOffset, mCache, mCachedSize); 244 245 if (err < (ssize_t)size) { 246 clearCache(); 247 248 return ERROR_IO; 249 } 250 251 return OK; 252} 253 254//////////////////////////////////////////////////////////////////////////////// 255 256static void hexdump(const void *_data, size_t size) { 257 const uint8_t *data = (const uint8_t *)_data; 258 size_t offset = 0; 259 while (offset < size) { 260 printf("0x%04x ", offset); 261 262 size_t n = size - offset; 263 if (n > 16) { 264 n = 16; 265 } 266 267 for (size_t i = 0; i < 16; ++i) { 268 if (i == 8) { 269 printf(" "); 270 } 271 272 if (offset + i < size) { 273 printf("%02x ", data[offset + i]); 274 } else { 275 printf(" "); 276 } 277 } 278 279 printf(" "); 280 281 for (size_t i = 0; i < n; ++i) { 282 if (isprint(data[offset + i])) { 283 printf("%c", data[offset + i]); 284 } else { 285 printf("."); 286 } 287 } 288 289 printf("\n"); 290 291 offset += 16; 292 } 293} 294 295static const char *FourCC2MIME(uint32_t fourcc) { 296 switch (fourcc) { 297 case FOURCC('m', 'p', '4', 'a'): 298 return MEDIA_MIMETYPE_AUDIO_AAC; 299 300 case FOURCC('s', 'a', 'm', 'r'): 301 return MEDIA_MIMETYPE_AUDIO_AMR_NB; 302 303 case FOURCC('s', 'a', 'w', 'b'): 304 return MEDIA_MIMETYPE_AUDIO_AMR_WB; 305 306 case FOURCC('m', 'p', '4', 'v'): 307 return MEDIA_MIMETYPE_VIDEO_MPEG4; 308 309 case FOURCC('s', '2', '6', '3'): 310 case FOURCC('h', '2', '6', '3'): 311 case FOURCC('H', '2', '6', '3'): 312 return MEDIA_MIMETYPE_VIDEO_H263; 313 314 case FOURCC('a', 'v', 'c', '1'): 315 return MEDIA_MIMETYPE_VIDEO_AVC; 316 317 default: 318 CHECK(!"should not be here."); 319 return NULL; 320 } 321} 322 323static bool AdjustChannelsAndRate(uint32_t fourcc, uint32_t *channels, uint32_t *rate) { 324 if (!strcasecmp(MEDIA_MIMETYPE_AUDIO_AMR_NB, FourCC2MIME(fourcc))) { 325 // AMR NB audio is always mono, 8kHz 326 *channels = 1; 327 *rate = 8000; 328 return true; 329 } else if (!strcasecmp(MEDIA_MIMETYPE_AUDIO_AMR_WB, FourCC2MIME(fourcc))) { 330 // AMR WB audio is always mono, 16kHz 331 *channels = 1; 332 *rate = 16000; 333 return true; 334 } 335 return false; 336} 337 338MPEG4Extractor::MPEG4Extractor(const sp<DataSource> &source) 339 : mSidxDuration(0), 340 mMoofOffset(0), 341 mDataSource(source), 342 mInitCheck(NO_INIT), 343 mHasVideo(false), 344 mFirstTrack(NULL), 345 mLastTrack(NULL), 346 mFileMetaData(new MetaData), 347 mFirstSINF(NULL), 348 mIsDrm(false) { 349} 350 351MPEG4Extractor::~MPEG4Extractor() { 352 Track *track = mFirstTrack; 353 while (track) { 354 Track *next = track->next; 355 356 delete track; 357 track = next; 358 } 359 mFirstTrack = mLastTrack = NULL; 360 361 SINF *sinf = mFirstSINF; 362 while (sinf) { 363 SINF *next = sinf->next; 364 delete sinf->IPMPData; 365 delete sinf; 366 sinf = next; 367 } 368 mFirstSINF = NULL; 369 370 for (size_t i = 0; i < mPssh.size(); i++) { 371 delete [] mPssh[i].data; 372 } 373} 374 375uint32_t MPEG4Extractor::flags() const { 376 return CAN_PAUSE | 377 ((mMoofOffset == 0 || mSidxEntries.size() != 0) ? 378 (CAN_SEEK_BACKWARD | CAN_SEEK_FORWARD | CAN_SEEK) : 0); 379} 380 381sp<MetaData> MPEG4Extractor::getMetaData() { 382 status_t err; 383 if ((err = readMetaData()) != OK) { 384 return new MetaData; 385 } 386 387 return mFileMetaData; 388} 389 390size_t MPEG4Extractor::countTracks() { 391 status_t err; 392 if ((err = readMetaData()) != OK) { 393 ALOGV("MPEG4Extractor::countTracks: no tracks"); 394 return 0; 395 } 396 397 size_t n = 0; 398 Track *track = mFirstTrack; 399 while (track) { 400 ++n; 401 track = track->next; 402 } 403 404 ALOGV("MPEG4Extractor::countTracks: %d tracks", n); 405 return n; 406} 407 408sp<MetaData> MPEG4Extractor::getTrackMetaData( 409 size_t index, uint32_t flags) { 410 status_t err; 411 if ((err = readMetaData()) != OK) { 412 return NULL; 413 } 414 415 Track *track = mFirstTrack; 416 while (index > 0) { 417 if (track == NULL) { 418 return NULL; 419 } 420 421 track = track->next; 422 --index; 423 } 424 425 if (track == NULL) { 426 return NULL; 427 } 428 429 if ((flags & kIncludeExtensiveMetaData) 430 && !track->includes_expensive_metadata) { 431 track->includes_expensive_metadata = true; 432 433 const char *mime; 434 CHECK(track->meta->findCString(kKeyMIMEType, &mime)); 435 if (!strncasecmp("video/", mime, 6)) { 436 if (mMoofOffset > 0) { 437 int64_t duration; 438 if (track->meta->findInt64(kKeyDuration, &duration)) { 439 // nothing fancy, just pick a frame near 1/4th of the duration 440 track->meta->setInt64( 441 kKeyThumbnailTime, duration / 4); 442 } 443 } else { 444 uint32_t sampleIndex; 445 uint32_t sampleTime; 446 if (track->sampleTable->findThumbnailSample(&sampleIndex) == OK 447 && track->sampleTable->getMetaDataForSample( 448 sampleIndex, NULL /* offset */, NULL /* size */, 449 &sampleTime) == OK) { 450 track->meta->setInt64( 451 kKeyThumbnailTime, 452 ((int64_t)sampleTime * 1000000) / track->timescale); 453 } 454 } 455 } 456 } 457 458 return track->meta; 459} 460 461static void MakeFourCCString(uint32_t x, char *s) { 462 s[0] = x >> 24; 463 s[1] = (x >> 16) & 0xff; 464 s[2] = (x >> 8) & 0xff; 465 s[3] = x & 0xff; 466 s[4] = '\0'; 467} 468 469status_t MPEG4Extractor::readMetaData() { 470 if (mInitCheck != NO_INIT) { 471 return mInitCheck; 472 } 473 474 off64_t offset = 0; 475 status_t err; 476 while (true) { 477 err = parseChunk(&offset, 0); 478 if (err == OK) { 479 continue; 480 } 481 482 uint32_t hdr[2]; 483 if (mDataSource->readAt(offset, hdr, 8) < 8) { 484 break; 485 } 486 uint32_t chunk_type = ntohl(hdr[1]); 487 if (chunk_type == FOURCC('s', 'i', 'd', 'x')) { 488 // parse the sidx box too 489 continue; 490 } else if (chunk_type == FOURCC('m', 'o', 'o', 'f')) { 491 // store the offset of the first segment 492 mMoofOffset = offset; 493 } 494 break; 495 } 496 497 if (mInitCheck == OK) { 498 if (mHasVideo) { 499 mFileMetaData->setCString( 500 kKeyMIMEType, MEDIA_MIMETYPE_CONTAINER_MPEG4); 501 } else { 502 mFileMetaData->setCString(kKeyMIMEType, "audio/mp4"); 503 } 504 505 mInitCheck = OK; 506 } else { 507 mInitCheck = err; 508 } 509 510 CHECK_NE(err, (status_t)NO_INIT); 511 512 // copy pssh data into file metadata 513 int psshsize = 0; 514 for (size_t i = 0; i < mPssh.size(); i++) { 515 psshsize += 20 + mPssh[i].datalen; 516 } 517 if (psshsize) { 518 char *buf = (char*)malloc(psshsize); 519 char *ptr = buf; 520 for (size_t i = 0; i < mPssh.size(); i++) { 521 memcpy(ptr, mPssh[i].uuid, 20); // uuid + length 522 memcpy(ptr + 20, mPssh[i].data, mPssh[i].datalen); 523 ptr += (20 + mPssh[i].datalen); 524 } 525 mFileMetaData->setData(kKeyPssh, 'pssh', buf, psshsize); 526 free(buf); 527 } 528 return mInitCheck; 529} 530 531char* MPEG4Extractor::getDrmTrackInfo(size_t trackID, int *len) { 532 if (mFirstSINF == NULL) { 533 return NULL; 534 } 535 536 SINF *sinf = mFirstSINF; 537 while (sinf && (trackID != sinf->trackID)) { 538 sinf = sinf->next; 539 } 540 541 if (sinf == NULL) { 542 return NULL; 543 } 544 545 *len = sinf->len; 546 return sinf->IPMPData; 547} 548 549// Reads an encoded integer 7 bits at a time until it encounters the high bit clear. 550static int32_t readSize(off64_t offset, 551 const sp<DataSource> DataSource, uint8_t *numOfBytes) { 552 uint32_t size = 0; 553 uint8_t data; 554 bool moreData = true; 555 *numOfBytes = 0; 556 557 while (moreData) { 558 if (DataSource->readAt(offset, &data, 1) < 1) { 559 return -1; 560 } 561 offset ++; 562 moreData = (data >= 128) ? true : false; 563 size = (size << 7) | (data & 0x7f); // Take last 7 bits 564 (*numOfBytes) ++; 565 } 566 567 return size; 568} 569 570status_t MPEG4Extractor::parseDrmSINF(off64_t *offset, off64_t data_offset) { 571 uint8_t updateIdTag; 572 if (mDataSource->readAt(data_offset, &updateIdTag, 1) < 1) { 573 return ERROR_IO; 574 } 575 data_offset ++; 576 577 if (0x01/*OBJECT_DESCRIPTOR_UPDATE_ID_TAG*/ != updateIdTag) { 578 return ERROR_MALFORMED; 579 } 580 581 uint8_t numOfBytes; 582 int32_t size = readSize(data_offset, mDataSource, &numOfBytes); 583 if (size < 0) { 584 return ERROR_IO; 585 } 586 int32_t classSize = size; 587 data_offset += numOfBytes; 588 589 while(size >= 11 ) { 590 uint8_t descriptorTag; 591 if (mDataSource->readAt(data_offset, &descriptorTag, 1) < 1) { 592 return ERROR_IO; 593 } 594 data_offset ++; 595 596 if (0x11/*OBJECT_DESCRIPTOR_ID_TAG*/ != descriptorTag) { 597 return ERROR_MALFORMED; 598 } 599 600 uint8_t buffer[8]; 601 //ObjectDescriptorID and ObjectDescriptor url flag 602 if (mDataSource->readAt(data_offset, buffer, 2) < 2) { 603 return ERROR_IO; 604 } 605 data_offset += 2; 606 607 if ((buffer[1] >> 5) & 0x0001) { //url flag is set 608 return ERROR_MALFORMED; 609 } 610 611 if (mDataSource->readAt(data_offset, buffer, 8) < 8) { 612 return ERROR_IO; 613 } 614 data_offset += 8; 615 616 if ((0x0F/*ES_ID_REF_TAG*/ != buffer[1]) 617 || ( 0x0A/*IPMP_DESCRIPTOR_POINTER_ID_TAG*/ != buffer[5])) { 618 return ERROR_MALFORMED; 619 } 620 621 SINF *sinf = new SINF; 622 sinf->trackID = U16_AT(&buffer[3]); 623 sinf->IPMPDescriptorID = buffer[7]; 624 sinf->next = mFirstSINF; 625 mFirstSINF = sinf; 626 627 size -= (8 + 2 + 1); 628 } 629 630 if (size != 0) { 631 return ERROR_MALFORMED; 632 } 633 634 if (mDataSource->readAt(data_offset, &updateIdTag, 1) < 1) { 635 return ERROR_IO; 636 } 637 data_offset ++; 638 639 if(0x05/*IPMP_DESCRIPTOR_UPDATE_ID_TAG*/ != updateIdTag) { 640 return ERROR_MALFORMED; 641 } 642 643 size = readSize(data_offset, mDataSource, &numOfBytes); 644 if (size < 0) { 645 return ERROR_IO; 646 } 647 classSize = size; 648 data_offset += numOfBytes; 649 650 while (size > 0) { 651 uint8_t tag; 652 int32_t dataLen; 653 if (mDataSource->readAt(data_offset, &tag, 1) < 1) { 654 return ERROR_IO; 655 } 656 data_offset ++; 657 658 if (0x0B/*IPMP_DESCRIPTOR_ID_TAG*/ == tag) { 659 uint8_t id; 660 dataLen = readSize(data_offset, mDataSource, &numOfBytes); 661 if (dataLen < 0) { 662 return ERROR_IO; 663 } else if (dataLen < 4) { 664 return ERROR_MALFORMED; 665 } 666 data_offset += numOfBytes; 667 668 if (mDataSource->readAt(data_offset, &id, 1) < 1) { 669 return ERROR_IO; 670 } 671 data_offset ++; 672 673 SINF *sinf = mFirstSINF; 674 while (sinf && (sinf->IPMPDescriptorID != id)) { 675 sinf = sinf->next; 676 } 677 if (sinf == NULL) { 678 return ERROR_MALFORMED; 679 } 680 sinf->len = dataLen - 3; 681 sinf->IPMPData = new char[sinf->len]; 682 683 if (mDataSource->readAt(data_offset + 2, sinf->IPMPData, sinf->len) < sinf->len) { 684 return ERROR_IO; 685 } 686 data_offset += sinf->len; 687 688 size -= (dataLen + numOfBytes + 1); 689 } 690 } 691 692 if (size != 0) { 693 return ERROR_MALFORMED; 694 } 695 696 return UNKNOWN_ERROR; // Return a dummy error. 697} 698 699struct PathAdder { 700 PathAdder(Vector<uint32_t> *path, uint32_t chunkType) 701 : mPath(path) { 702 mPath->push(chunkType); 703 } 704 705 ~PathAdder() { 706 mPath->pop(); 707 } 708 709private: 710 Vector<uint32_t> *mPath; 711 712 PathAdder(const PathAdder &); 713 PathAdder &operator=(const PathAdder &); 714}; 715 716static bool underMetaDataPath(const Vector<uint32_t> &path) { 717 return path.size() >= 5 718 && path[0] == FOURCC('m', 'o', 'o', 'v') 719 && path[1] == FOURCC('u', 'd', 't', 'a') 720 && path[2] == FOURCC('m', 'e', 't', 'a') 721 && path[3] == FOURCC('i', 'l', 's', 't'); 722} 723 724// Given a time in seconds since Jan 1 1904, produce a human-readable string. 725static void convertTimeToDate(int64_t time_1904, String8 *s) { 726 time_t time_1970 = time_1904 - (((66 * 365 + 17) * 24) * 3600); 727 728 char tmp[32]; 729 strftime(tmp, sizeof(tmp), "%Y%m%dT%H%M%S.000Z", gmtime(&time_1970)); 730 731 s->setTo(tmp); 732} 733 734status_t MPEG4Extractor::parseChunk(off64_t *offset, int depth) { 735 ALOGV("entering parseChunk %lld/%d", *offset, depth); 736 uint32_t hdr[2]; 737 if (mDataSource->readAt(*offset, hdr, 8) < 8) { 738 return ERROR_IO; 739 } 740 uint64_t chunk_size = ntohl(hdr[0]); 741 uint32_t chunk_type = ntohl(hdr[1]); 742 off64_t data_offset = *offset + 8; 743 744 if (chunk_size == 1) { 745 if (mDataSource->readAt(*offset + 8, &chunk_size, 8) < 8) { 746 return ERROR_IO; 747 } 748 chunk_size = ntoh64(chunk_size); 749 data_offset += 8; 750 751 if (chunk_size < 16) { 752 // The smallest valid chunk is 16 bytes long in this case. 753 return ERROR_MALFORMED; 754 } 755 } else if (chunk_size < 8) { 756 // The smallest valid chunk is 8 bytes long. 757 return ERROR_MALFORMED; 758 } 759 760 char chunk[5]; 761 MakeFourCCString(chunk_type, chunk); 762 ALOGV("chunk: %s @ %lld, %d", chunk, *offset, depth); 763 764#if 0 765 static const char kWhitespace[] = " "; 766 const char *indent = &kWhitespace[sizeof(kWhitespace) - 1 - 2 * depth]; 767 printf("%sfound chunk '%s' of size %lld\n", indent, chunk, chunk_size); 768 769 char buffer[256]; 770 size_t n = chunk_size; 771 if (n > sizeof(buffer)) { 772 n = sizeof(buffer); 773 } 774 if (mDataSource->readAt(*offset, buffer, n) 775 < (ssize_t)n) { 776 return ERROR_IO; 777 } 778 779 hexdump(buffer, n); 780#endif 781 782 PathAdder autoAdder(&mPath, chunk_type); 783 784 off64_t chunk_data_size = *offset + chunk_size - data_offset; 785 786 if (chunk_type != FOURCC('c', 'p', 'r', 't') 787 && chunk_type != FOURCC('c', 'o', 'v', 'r') 788 && mPath.size() == 5 && underMetaDataPath(mPath)) { 789 off64_t stop_offset = *offset + chunk_size; 790 *offset = data_offset; 791 while (*offset < stop_offset) { 792 status_t err = parseChunk(offset, depth + 1); 793 if (err != OK) { 794 return err; 795 } 796 } 797 798 if (*offset != stop_offset) { 799 return ERROR_MALFORMED; 800 } 801 802 return OK; 803 } 804 805 switch(chunk_type) { 806 case FOURCC('m', 'o', 'o', 'v'): 807 case FOURCC('t', 'r', 'a', 'k'): 808 case FOURCC('m', 'd', 'i', 'a'): 809 case FOURCC('m', 'i', 'n', 'f'): 810 case FOURCC('d', 'i', 'n', 'f'): 811 case FOURCC('s', 't', 'b', 'l'): 812 case FOURCC('m', 'v', 'e', 'x'): 813 case FOURCC('m', 'o', 'o', 'f'): 814 case FOURCC('t', 'r', 'a', 'f'): 815 case FOURCC('m', 'f', 'r', 'a'): 816 case FOURCC('u', 'd', 't', 'a'): 817 case FOURCC('i', 'l', 's', 't'): 818 case FOURCC('s', 'i', 'n', 'f'): 819 case FOURCC('s', 'c', 'h', 'i'): 820 { 821 if (chunk_type == FOURCC('s', 't', 'b', 'l')) { 822 ALOGV("sampleTable chunk is %d bytes long.", (size_t)chunk_size); 823 824 if (mDataSource->flags() 825 & (DataSource::kWantsPrefetching 826 | DataSource::kIsCachingDataSource)) { 827 sp<MPEG4DataSource> cachedSource = 828 new MPEG4DataSource(mDataSource); 829 830 if (cachedSource->setCachedRange(*offset, chunk_size) == OK) { 831 mDataSource = cachedSource; 832 } 833 } 834 835 mLastTrack->sampleTable = new SampleTable(mDataSource); 836 } 837 838 bool isTrack = false; 839 if (chunk_type == FOURCC('t', 'r', 'a', 'k')) { 840 isTrack = true; 841 842 Track *track = new Track; 843 track->next = NULL; 844 if (mLastTrack) { 845 mLastTrack->next = track; 846 } else { 847 mFirstTrack = track; 848 } 849 mLastTrack = track; 850 851 track->meta = new MetaData; 852 track->includes_expensive_metadata = false; 853 track->skipTrack = false; 854 track->timescale = 0; 855 track->meta->setCString(kKeyMIMEType, "application/octet-stream"); 856 } 857 858 off64_t stop_offset = *offset + chunk_size; 859 *offset = data_offset; 860 while (*offset < stop_offset) { 861 status_t err = parseChunk(offset, depth + 1); 862 if (err != OK) { 863 return err; 864 } 865 } 866 867 if (*offset != stop_offset) { 868 return ERROR_MALFORMED; 869 } 870 871 if (isTrack) { 872 if (mLastTrack->skipTrack) { 873 Track *cur = mFirstTrack; 874 875 if (cur == mLastTrack) { 876 delete cur; 877 mFirstTrack = mLastTrack = NULL; 878 } else { 879 while (cur && cur->next != mLastTrack) { 880 cur = cur->next; 881 } 882 cur->next = NULL; 883 delete mLastTrack; 884 mLastTrack = cur; 885 } 886 887 return OK; 888 } 889 890 status_t err = verifyTrack(mLastTrack); 891 892 if (err != OK) { 893 return err; 894 } 895 } else if (chunk_type == FOURCC('m', 'o', 'o', 'v')) { 896 mInitCheck = OK; 897 898 if (!mIsDrm) { 899 return UNKNOWN_ERROR; // Return a dummy error. 900 } else { 901 return OK; 902 } 903 } 904 break; 905 } 906 907 case FOURCC('f', 'r', 'm', 'a'): 908 { 909 uint32_t original_fourcc; 910 if (mDataSource->readAt(data_offset, &original_fourcc, 4) < 4) { 911 return ERROR_IO; 912 } 913 original_fourcc = ntohl(original_fourcc); 914 ALOGV("read original format: %d", original_fourcc); 915 mLastTrack->meta->setCString(kKeyMIMEType, FourCC2MIME(original_fourcc)); 916 uint32_t num_channels = 0; 917 uint32_t sample_rate = 0; 918 if (AdjustChannelsAndRate(original_fourcc, &num_channels, &sample_rate)) { 919 mLastTrack->meta->setInt32(kKeyChannelCount, num_channels); 920 mLastTrack->meta->setInt32(kKeySampleRate, sample_rate); 921 } 922 *offset += chunk_size; 923 break; 924 } 925 926 case FOURCC('t', 'e', 'n', 'c'): 927 { 928 if (chunk_size < 32) { 929 return ERROR_MALFORMED; 930 } 931 932 // tenc box contains 1 byte version, 3 byte flags, 3 byte default algorithm id, one byte 933 // default IV size, 16 bytes default KeyID 934 // (ISO 23001-7) 935 char buf[4]; 936 memset(buf, 0, 4); 937 if (mDataSource->readAt(data_offset + 4, buf + 1, 3) < 3) { 938 return ERROR_IO; 939 } 940 uint32_t defaultAlgorithmId = ntohl(*((int32_t*)buf)); 941 if (defaultAlgorithmId > 1) { 942 // only 0 (clear) and 1 (AES-128) are valid 943 return ERROR_MALFORMED; 944 } 945 946 memset(buf, 0, 4); 947 if (mDataSource->readAt(data_offset + 7, buf + 3, 1) < 1) { 948 return ERROR_IO; 949 } 950 uint32_t defaultIVSize = ntohl(*((int32_t*)buf)); 951 952 if ((defaultAlgorithmId == 0 && defaultIVSize != 0) || 953 (defaultAlgorithmId != 0 && defaultIVSize == 0)) { 954 // only unencrypted data must have 0 IV size 955 return ERROR_MALFORMED; 956 } else if (defaultIVSize != 0 && 957 defaultIVSize != 8 && 958 defaultIVSize != 16) { 959 // only supported sizes are 0, 8 and 16 960 return ERROR_MALFORMED; 961 } 962 963 uint8_t defaultKeyId[16]; 964 965 if (mDataSource->readAt(data_offset + 8, &defaultKeyId, 16) < 16) { 966 return ERROR_IO; 967 } 968 969 mLastTrack->meta->setInt32(kKeyCryptoMode, defaultAlgorithmId); 970 mLastTrack->meta->setInt32(kKeyCryptoDefaultIVSize, defaultIVSize); 971 mLastTrack->meta->setData(kKeyCryptoKey, 'tenc', defaultKeyId, 16); 972 *offset += chunk_size; 973 break; 974 } 975 976 case FOURCC('t', 'k', 'h', 'd'): 977 { 978 status_t err; 979 if ((err = parseTrackHeader(data_offset, chunk_data_size)) != OK) { 980 return err; 981 } 982 983 *offset += chunk_size; 984 break; 985 } 986 987 case FOURCC('p', 's', 's', 'h'): 988 { 989 PsshInfo pssh; 990 991 if (mDataSource->readAt(data_offset + 4, &pssh.uuid, 16) < 16) { 992 return ERROR_IO; 993 } 994 995 uint32_t psshdatalen = 0; 996 if (mDataSource->readAt(data_offset + 20, &psshdatalen, 4) < 4) { 997 return ERROR_IO; 998 } 999 pssh.datalen = ntohl(psshdatalen); 1000 ALOGV("pssh data size: %d", pssh.datalen); 1001 if (pssh.datalen + 20 > chunk_size) { 1002 // pssh data length exceeds size of containing box 1003 return ERROR_MALFORMED; 1004 } 1005 1006 pssh.data = new uint8_t[pssh.datalen]; 1007 ALOGV("allocated pssh @ %p", pssh.data); 1008 ssize_t requested = (ssize_t) pssh.datalen; 1009 if (mDataSource->readAt(data_offset + 24, pssh.data, requested) < requested) { 1010 return ERROR_IO; 1011 } 1012 mPssh.push_back(pssh); 1013 1014 *offset += chunk_size; 1015 break; 1016 } 1017 1018 case FOURCC('m', 'd', 'h', 'd'): 1019 { 1020 if (chunk_data_size < 4) { 1021 return ERROR_MALFORMED; 1022 } 1023 1024 uint8_t version; 1025 if (mDataSource->readAt( 1026 data_offset, &version, sizeof(version)) 1027 < (ssize_t)sizeof(version)) { 1028 return ERROR_IO; 1029 } 1030 1031 off64_t timescale_offset; 1032 1033 if (version == 1) { 1034 timescale_offset = data_offset + 4 + 16; 1035 } else if (version == 0) { 1036 timescale_offset = data_offset + 4 + 8; 1037 } else { 1038 return ERROR_IO; 1039 } 1040 1041 uint32_t timescale; 1042 if (mDataSource->readAt( 1043 timescale_offset, ×cale, sizeof(timescale)) 1044 < (ssize_t)sizeof(timescale)) { 1045 return ERROR_IO; 1046 } 1047 1048 mLastTrack->timescale = ntohl(timescale); 1049 1050 int64_t duration = 0; 1051 if (version == 1) { 1052 if (mDataSource->readAt( 1053 timescale_offset + 4, &duration, sizeof(duration)) 1054 < (ssize_t)sizeof(duration)) { 1055 return ERROR_IO; 1056 } 1057 duration = ntoh64(duration); 1058 } else { 1059 uint32_t duration32; 1060 if (mDataSource->readAt( 1061 timescale_offset + 4, &duration32, sizeof(duration32)) 1062 < (ssize_t)sizeof(duration32)) { 1063 return ERROR_IO; 1064 } 1065 // ffmpeg sets duration to -1, which is incorrect. 1066 if (duration32 != 0xffffffff) { 1067 duration = ntohl(duration32); 1068 } 1069 } 1070 mLastTrack->meta->setInt64( 1071 kKeyDuration, (duration * 1000000) / mLastTrack->timescale); 1072 1073 uint8_t lang[2]; 1074 off64_t lang_offset; 1075 if (version == 1) { 1076 lang_offset = timescale_offset + 4 + 8; 1077 } else if (version == 0) { 1078 lang_offset = timescale_offset + 4 + 4; 1079 } else { 1080 return ERROR_IO; 1081 } 1082 1083 if (mDataSource->readAt(lang_offset, &lang, sizeof(lang)) 1084 < (ssize_t)sizeof(lang)) { 1085 return ERROR_IO; 1086 } 1087 1088 // To get the ISO-639-2/T three character language code 1089 // 1 bit pad followed by 3 5-bits characters. Each character 1090 // is packed as the difference between its ASCII value and 0x60. 1091 char lang_code[4]; 1092 lang_code[0] = ((lang[0] >> 2) & 0x1f) + 0x60; 1093 lang_code[1] = ((lang[0] & 0x3) << 3 | (lang[1] >> 5)) + 0x60; 1094 lang_code[2] = (lang[1] & 0x1f) + 0x60; 1095 lang_code[3] = '\0'; 1096 1097 mLastTrack->meta->setCString( 1098 kKeyMediaLanguage, lang_code); 1099 1100 *offset += chunk_size; 1101 break; 1102 } 1103 1104 case FOURCC('s', 't', 's', 'd'): 1105 { 1106 if (chunk_data_size < 8) { 1107 return ERROR_MALFORMED; 1108 } 1109 1110 uint8_t buffer[8]; 1111 if (chunk_data_size < (off64_t)sizeof(buffer)) { 1112 return ERROR_MALFORMED; 1113 } 1114 1115 if (mDataSource->readAt( 1116 data_offset, buffer, 8) < 8) { 1117 return ERROR_IO; 1118 } 1119 1120 if (U32_AT(buffer) != 0) { 1121 // Should be version 0, flags 0. 1122 return ERROR_MALFORMED; 1123 } 1124 1125 uint32_t entry_count = U32_AT(&buffer[4]); 1126 1127 if (entry_count > 1) { 1128 // For 3GPP timed text, there could be multiple tx3g boxes contain 1129 // multiple text display formats. These formats will be used to 1130 // display the timed text. 1131 // For encrypted files, there may also be more than one entry. 1132 const char *mime; 1133 CHECK(mLastTrack->meta->findCString(kKeyMIMEType, &mime)); 1134 if (strcasecmp(mime, MEDIA_MIMETYPE_TEXT_3GPP) && 1135 strcasecmp(mime, "application/octet-stream")) { 1136 // For now we only support a single type of media per track. 1137 mLastTrack->skipTrack = true; 1138 *offset += chunk_size; 1139 break; 1140 } 1141 } 1142 off64_t stop_offset = *offset + chunk_size; 1143 *offset = data_offset + 8; 1144 for (uint32_t i = 0; i < entry_count; ++i) { 1145 status_t err = parseChunk(offset, depth + 1); 1146 if (err != OK) { 1147 return err; 1148 } 1149 } 1150 1151 if (*offset != stop_offset) { 1152 return ERROR_MALFORMED; 1153 } 1154 break; 1155 } 1156 1157 case FOURCC('m', 'p', '4', 'a'): 1158 case FOURCC('e', 'n', 'c', 'a'): 1159 case FOURCC('s', 'a', 'm', 'r'): 1160 case FOURCC('s', 'a', 'w', 'b'): 1161 { 1162 uint8_t buffer[8 + 20]; 1163 if (chunk_data_size < (ssize_t)sizeof(buffer)) { 1164 // Basic AudioSampleEntry size. 1165 return ERROR_MALFORMED; 1166 } 1167 1168 if (mDataSource->readAt( 1169 data_offset, buffer, sizeof(buffer)) < (ssize_t)sizeof(buffer)) { 1170 return ERROR_IO; 1171 } 1172 1173 uint16_t data_ref_index = U16_AT(&buffer[6]); 1174 uint32_t num_channels = U16_AT(&buffer[16]); 1175 1176 uint16_t sample_size = U16_AT(&buffer[18]); 1177 uint32_t sample_rate = U32_AT(&buffer[24]) >> 16; 1178 1179 if (chunk_type != FOURCC('e', 'n', 'c', 'a')) { 1180 // if the chunk type is enca, we'll get the type from the sinf/frma box later 1181 mLastTrack->meta->setCString(kKeyMIMEType, FourCC2MIME(chunk_type)); 1182 AdjustChannelsAndRate(chunk_type, &num_channels, &sample_rate); 1183 } 1184 ALOGV("*** coding='%s' %d channels, size %d, rate %d\n", 1185 chunk, num_channels, sample_size, sample_rate); 1186 mLastTrack->meta->setInt32(kKeyChannelCount, num_channels); 1187 mLastTrack->meta->setInt32(kKeySampleRate, sample_rate); 1188 1189 off64_t stop_offset = *offset + chunk_size; 1190 *offset = data_offset + sizeof(buffer); 1191 while (*offset < stop_offset) { 1192 status_t err = parseChunk(offset, depth + 1); 1193 if (err != OK) { 1194 return err; 1195 } 1196 } 1197 1198 if (*offset != stop_offset) { 1199 return ERROR_MALFORMED; 1200 } 1201 break; 1202 } 1203 1204 case FOURCC('m', 'p', '4', 'v'): 1205 case FOURCC('e', 'n', 'c', 'v'): 1206 case FOURCC('s', '2', '6', '3'): 1207 case FOURCC('H', '2', '6', '3'): 1208 case FOURCC('h', '2', '6', '3'): 1209 case FOURCC('a', 'v', 'c', '1'): 1210 { 1211 mHasVideo = true; 1212 1213 uint8_t buffer[78]; 1214 if (chunk_data_size < (ssize_t)sizeof(buffer)) { 1215 // Basic VideoSampleEntry size. 1216 return ERROR_MALFORMED; 1217 } 1218 1219 if (mDataSource->readAt( 1220 data_offset, buffer, sizeof(buffer)) < (ssize_t)sizeof(buffer)) { 1221 return ERROR_IO; 1222 } 1223 1224 uint16_t data_ref_index = U16_AT(&buffer[6]); 1225 uint16_t width = U16_AT(&buffer[6 + 18]); 1226 uint16_t height = U16_AT(&buffer[6 + 20]); 1227 1228 // The video sample is not standard-compliant if it has invalid dimension. 1229 // Use some default width and height value, and 1230 // let the decoder figure out the actual width and height (and thus 1231 // be prepared for INFO_FOMRAT_CHANGED event). 1232 if (width == 0) width = 352; 1233 if (height == 0) height = 288; 1234 1235 // printf("*** coding='%s' width=%d height=%d\n", 1236 // chunk, width, height); 1237 1238 if (chunk_type != FOURCC('e', 'n', 'c', 'v')) { 1239 // if the chunk type is encv, we'll get the type from the sinf/frma box later 1240 mLastTrack->meta->setCString(kKeyMIMEType, FourCC2MIME(chunk_type)); 1241 } 1242 mLastTrack->meta->setInt32(kKeyWidth, width); 1243 mLastTrack->meta->setInt32(kKeyHeight, height); 1244 1245 off64_t stop_offset = *offset + chunk_size; 1246 *offset = data_offset + sizeof(buffer); 1247 while (*offset < stop_offset) { 1248 status_t err = parseChunk(offset, depth + 1); 1249 if (err != OK) { 1250 return err; 1251 } 1252 } 1253 1254 if (*offset != stop_offset) { 1255 return ERROR_MALFORMED; 1256 } 1257 break; 1258 } 1259 1260 case FOURCC('s', 't', 'c', 'o'): 1261 case FOURCC('c', 'o', '6', '4'): 1262 { 1263 status_t err = 1264 mLastTrack->sampleTable->setChunkOffsetParams( 1265 chunk_type, data_offset, chunk_data_size); 1266 1267 if (err != OK) { 1268 return err; 1269 } 1270 1271 *offset += chunk_size; 1272 break; 1273 } 1274 1275 case FOURCC('s', 't', 's', 'c'): 1276 { 1277 status_t err = 1278 mLastTrack->sampleTable->setSampleToChunkParams( 1279 data_offset, chunk_data_size); 1280 1281 if (err != OK) { 1282 return err; 1283 } 1284 1285 *offset += chunk_size; 1286 break; 1287 } 1288 1289 case FOURCC('s', 't', 's', 'z'): 1290 case FOURCC('s', 't', 'z', '2'): 1291 { 1292 status_t err = 1293 mLastTrack->sampleTable->setSampleSizeParams( 1294 chunk_type, data_offset, chunk_data_size); 1295 1296 if (err != OK) { 1297 return err; 1298 } 1299 1300 size_t max_size; 1301 err = mLastTrack->sampleTable->getMaxSampleSize(&max_size); 1302 1303 if (err != OK) { 1304 return err; 1305 } 1306 1307 if (max_size != 0) { 1308 // Assume that a given buffer only contains at most 10 chunks, 1309 // each chunk originally prefixed with a 2 byte length will 1310 // have a 4 byte header (0x00 0x00 0x00 0x01) after conversion, 1311 // and thus will grow by 2 bytes per chunk. 1312 mLastTrack->meta->setInt32(kKeyMaxInputSize, max_size + 10 * 2); 1313 } else { 1314 // No size was specified. Pick a conservatively large size. 1315 int32_t width, height; 1316 if (mLastTrack->meta->findInt32(kKeyWidth, &width) && 1317 mLastTrack->meta->findInt32(kKeyHeight, &height)) { 1318 mLastTrack->meta->setInt32(kKeyMaxInputSize, width * height * 3 / 2); 1319 } else { 1320 ALOGE("No width or height, assuming worst case 1080p"); 1321 mLastTrack->meta->setInt32(kKeyMaxInputSize, 3110400); 1322 } 1323 } 1324 *offset += chunk_size; 1325 1326 // Calculate average frame rate. 1327 const char *mime; 1328 CHECK(mLastTrack->meta->findCString(kKeyMIMEType, &mime)); 1329 if (!strncasecmp("video/", mime, 6)) { 1330 size_t nSamples = mLastTrack->sampleTable->countSamples(); 1331 int64_t durationUs; 1332 if (mLastTrack->meta->findInt64(kKeyDuration, &durationUs)) { 1333 if (durationUs > 0) { 1334 int32_t frameRate = (nSamples * 1000000LL + 1335 (durationUs >> 1)) / durationUs; 1336 mLastTrack->meta->setInt32(kKeyFrameRate, frameRate); 1337 } 1338 } 1339 } 1340 1341 break; 1342 } 1343 1344 case FOURCC('s', 't', 't', 's'): 1345 { 1346 status_t err = 1347 mLastTrack->sampleTable->setTimeToSampleParams( 1348 data_offset, chunk_data_size); 1349 1350 if (err != OK) { 1351 return err; 1352 } 1353 1354 *offset += chunk_size; 1355 break; 1356 } 1357 1358 case FOURCC('c', 't', 't', 's'): 1359 { 1360 status_t err = 1361 mLastTrack->sampleTable->setCompositionTimeToSampleParams( 1362 data_offset, chunk_data_size); 1363 1364 if (err != OK) { 1365 return err; 1366 } 1367 1368 *offset += chunk_size; 1369 break; 1370 } 1371 1372 case FOURCC('s', 't', 's', 's'): 1373 { 1374 status_t err = 1375 mLastTrack->sampleTable->setSyncSampleParams( 1376 data_offset, chunk_data_size); 1377 1378 if (err != OK) { 1379 return err; 1380 } 1381 1382 *offset += chunk_size; 1383 break; 1384 } 1385 1386 // @xyz 1387 case FOURCC('\xA9', 'x', 'y', 'z'): 1388 { 1389 // Best case the total data length inside "@xyz" box 1390 // would be 8, for instance "@xyz" + "\x00\x04\x15\xc7" + "0+0/", 1391 // where "\x00\x04" is the text string length with value = 4, 1392 // "\0x15\xc7" is the language code = en, and "0+0" is a 1393 // location (string) value with longitude = 0 and latitude = 0. 1394 if (chunk_data_size < 8) { 1395 return ERROR_MALFORMED; 1396 } 1397 1398 // Worst case the location string length would be 18, 1399 // for instance +90.0000-180.0000, without the trailing "/" and 1400 // the string length + language code. 1401 char buffer[18]; 1402 1403 // Substracting 5 from the data size is because the text string length + 1404 // language code takes 4 bytes, and the trailing slash "/" takes 1 byte. 1405 off64_t location_length = chunk_data_size - 5; 1406 if (location_length >= (off64_t) sizeof(buffer)) { 1407 return ERROR_MALFORMED; 1408 } 1409 1410 if (mDataSource->readAt( 1411 data_offset + 4, buffer, location_length) < location_length) { 1412 return ERROR_IO; 1413 } 1414 1415 buffer[location_length] = '\0'; 1416 mFileMetaData->setCString(kKeyLocation, buffer); 1417 *offset += chunk_size; 1418 break; 1419 } 1420 1421 case FOURCC('e', 's', 'd', 's'): 1422 { 1423 if (chunk_data_size < 4) { 1424 return ERROR_MALFORMED; 1425 } 1426 1427 uint8_t buffer[256]; 1428 if (chunk_data_size > (off64_t)sizeof(buffer)) { 1429 return ERROR_BUFFER_TOO_SMALL; 1430 } 1431 1432 if (mDataSource->readAt( 1433 data_offset, buffer, chunk_data_size) < chunk_data_size) { 1434 return ERROR_IO; 1435 } 1436 1437 if (U32_AT(buffer) != 0) { 1438 // Should be version 0, flags 0. 1439 return ERROR_MALFORMED; 1440 } 1441 1442 mLastTrack->meta->setData( 1443 kKeyESDS, kTypeESDS, &buffer[4], chunk_data_size - 4); 1444 1445 if (mPath.size() >= 2 1446 && mPath[mPath.size() - 2] == FOURCC('m', 'p', '4', 'a')) { 1447 // Information from the ESDS must be relied on for proper 1448 // setup of sample rate and channel count for MPEG4 Audio. 1449 // The generic header appears to only contain generic 1450 // information... 1451 1452 status_t err = updateAudioTrackInfoFromESDS_MPEG4Audio( 1453 &buffer[4], chunk_data_size - 4); 1454 1455 if (err != OK) { 1456 return err; 1457 } 1458 } 1459 1460 *offset += chunk_size; 1461 break; 1462 } 1463 1464 case FOURCC('a', 'v', 'c', 'C'): 1465 { 1466 sp<ABuffer> buffer = new ABuffer(chunk_data_size); 1467 1468 if (mDataSource->readAt( 1469 data_offset, buffer->data(), chunk_data_size) < chunk_data_size) { 1470 return ERROR_IO; 1471 } 1472 1473 mLastTrack->meta->setData( 1474 kKeyAVCC, kTypeAVCC, buffer->data(), chunk_data_size); 1475 1476 *offset += chunk_size; 1477 break; 1478 } 1479 1480 case FOURCC('d', '2', '6', '3'): 1481 { 1482 /* 1483 * d263 contains a fixed 7 bytes part: 1484 * vendor - 4 bytes 1485 * version - 1 byte 1486 * level - 1 byte 1487 * profile - 1 byte 1488 * optionally, "d263" box itself may contain a 16-byte 1489 * bit rate box (bitr) 1490 * average bit rate - 4 bytes 1491 * max bit rate - 4 bytes 1492 */ 1493 char buffer[23]; 1494 if (chunk_data_size != 7 && 1495 chunk_data_size != 23) { 1496 ALOGE("Incorrect D263 box size %lld", chunk_data_size); 1497 return ERROR_MALFORMED; 1498 } 1499 1500 if (mDataSource->readAt( 1501 data_offset, buffer, chunk_data_size) < chunk_data_size) { 1502 return ERROR_IO; 1503 } 1504 1505 mLastTrack->meta->setData(kKeyD263, kTypeD263, buffer, chunk_data_size); 1506 1507 *offset += chunk_size; 1508 break; 1509 } 1510 1511 case FOURCC('m', 'e', 't', 'a'): 1512 { 1513 uint8_t buffer[4]; 1514 if (chunk_data_size < (off64_t)sizeof(buffer)) { 1515 return ERROR_MALFORMED; 1516 } 1517 1518 if (mDataSource->readAt( 1519 data_offset, buffer, 4) < 4) { 1520 return ERROR_IO; 1521 } 1522 1523 if (U32_AT(buffer) != 0) { 1524 // Should be version 0, flags 0. 1525 1526 // If it's not, let's assume this is one of those 1527 // apparently malformed chunks that don't have flags 1528 // and completely different semantics than what's 1529 // in the MPEG4 specs and skip it. 1530 *offset += chunk_size; 1531 return OK; 1532 } 1533 1534 off64_t stop_offset = *offset + chunk_size; 1535 *offset = data_offset + sizeof(buffer); 1536 while (*offset < stop_offset) { 1537 status_t err = parseChunk(offset, depth + 1); 1538 if (err != OK) { 1539 return err; 1540 } 1541 } 1542 1543 if (*offset != stop_offset) { 1544 return ERROR_MALFORMED; 1545 } 1546 break; 1547 } 1548 1549 case FOURCC('m', 'e', 'a', 'n'): 1550 case FOURCC('n', 'a', 'm', 'e'): 1551 case FOURCC('d', 'a', 't', 'a'): 1552 { 1553 if (mPath.size() == 6 && underMetaDataPath(mPath)) { 1554 status_t err = parseMetaData(data_offset, chunk_data_size); 1555 1556 if (err != OK) { 1557 return err; 1558 } 1559 } 1560 1561 *offset += chunk_size; 1562 break; 1563 } 1564 1565 case FOURCC('m', 'v', 'h', 'd'): 1566 { 1567 if (chunk_data_size < 12) { 1568 return ERROR_MALFORMED; 1569 } 1570 1571 uint8_t header[12]; 1572 if (mDataSource->readAt( 1573 data_offset, header, sizeof(header)) 1574 < (ssize_t)sizeof(header)) { 1575 return ERROR_IO; 1576 } 1577 1578 int64_t creationTime; 1579 if (header[0] == 1) { 1580 creationTime = U64_AT(&header[4]); 1581 } else if (header[0] != 0) { 1582 return ERROR_MALFORMED; 1583 } else { 1584 creationTime = U32_AT(&header[4]); 1585 } 1586 1587 String8 s; 1588 convertTimeToDate(creationTime, &s); 1589 1590 mFileMetaData->setCString(kKeyDate, s.string()); 1591 1592 *offset += chunk_size; 1593 break; 1594 } 1595 1596 case FOURCC('m', 'd', 'a', 't'): 1597 { 1598 ALOGV("mdat chunk, drm: %d", mIsDrm); 1599 if (!mIsDrm) { 1600 *offset += chunk_size; 1601 break; 1602 } 1603 1604 if (chunk_size < 8) { 1605 return ERROR_MALFORMED; 1606 } 1607 1608 return parseDrmSINF(offset, data_offset); 1609 } 1610 1611 case FOURCC('h', 'd', 'l', 'r'): 1612 { 1613 uint32_t buffer; 1614 if (mDataSource->readAt( 1615 data_offset + 8, &buffer, 4) < 4) { 1616 return ERROR_IO; 1617 } 1618 1619 uint32_t type = ntohl(buffer); 1620 // For the 3GPP file format, the handler-type within the 'hdlr' box 1621 // shall be 'text'. We also want to support 'sbtl' handler type 1622 // for a practical reason as various MPEG4 containers use it. 1623 if (type == FOURCC('t', 'e', 'x', 't') || type == FOURCC('s', 'b', 't', 'l')) { 1624 mLastTrack->meta->setCString(kKeyMIMEType, MEDIA_MIMETYPE_TEXT_3GPP); 1625 } 1626 1627 *offset += chunk_size; 1628 break; 1629 } 1630 1631 case FOURCC('t', 'x', '3', 'g'): 1632 { 1633 uint32_t type; 1634 const void *data; 1635 size_t size = 0; 1636 if (!mLastTrack->meta->findData( 1637 kKeyTextFormatData, &type, &data, &size)) { 1638 size = 0; 1639 } 1640 1641 if (SIZE_MAX - chunk_size <= size) { 1642 return ERROR_MALFORMED; 1643 } 1644 1645 uint8_t *buffer = new uint8_t[size + chunk_size]; 1646 if (buffer == NULL) { 1647 return ERROR_MALFORMED; 1648 } 1649 1650 if (size > 0) { 1651 memcpy(buffer, data, size); 1652 } 1653 1654 if ((size_t)(mDataSource->readAt(*offset, buffer + size, chunk_size)) 1655 < chunk_size) { 1656 delete[] buffer; 1657 buffer = NULL; 1658 1659 return ERROR_IO; 1660 } 1661 1662 mLastTrack->meta->setData( 1663 kKeyTextFormatData, 0, buffer, size + chunk_size); 1664 1665 delete[] buffer; 1666 1667 *offset += chunk_size; 1668 break; 1669 } 1670 1671 case FOURCC('c', 'o', 'v', 'r'): 1672 { 1673 if (mFileMetaData != NULL) { 1674 ALOGV("chunk_data_size = %lld and data_offset = %lld", 1675 chunk_data_size, data_offset); 1676 if (chunk_data_size >= SIZE_MAX - 1) { 1677 return ERROR_MALFORMED; 1678 } 1679 sp<ABuffer> buffer = new ABuffer(chunk_data_size + 1); 1680 if (mDataSource->readAt( 1681 data_offset, buffer->data(), chunk_data_size) != (ssize_t)chunk_data_size) { 1682 return ERROR_IO; 1683 } 1684 const int kSkipBytesOfDataBox = 16; 1685 mFileMetaData->setData( 1686 kKeyAlbumArt, MetaData::TYPE_NONE, 1687 buffer->data() + kSkipBytesOfDataBox, chunk_data_size - kSkipBytesOfDataBox); 1688 } 1689 1690 *offset += chunk_size; 1691 break; 1692 } 1693 1694 case FOURCC('-', '-', '-', '-'): 1695 { 1696 mLastCommentMean.clear(); 1697 mLastCommentName.clear(); 1698 mLastCommentData.clear(); 1699 *offset += chunk_size; 1700 break; 1701 } 1702 1703 case FOURCC('s', 'i', 'd', 'x'): 1704 { 1705 parseSegmentIndex(data_offset, chunk_data_size); 1706 *offset += chunk_size; 1707 return UNKNOWN_ERROR; // stop parsing after sidx 1708 } 1709 1710 default: 1711 { 1712 *offset += chunk_size; 1713 break; 1714 } 1715 } 1716 1717 return OK; 1718} 1719 1720status_t MPEG4Extractor::parseSegmentIndex(off64_t offset, size_t size) { 1721 ALOGV("MPEG4Extractor::parseSegmentIndex"); 1722 1723 if (size < 12) { 1724 return -EINVAL; 1725 } 1726 1727 uint32_t flags; 1728 if (!mDataSource->getUInt32(offset, &flags)) { 1729 return ERROR_MALFORMED; 1730 } 1731 1732 uint32_t version = flags >> 24; 1733 flags &= 0xffffff; 1734 1735 ALOGV("sidx version %d", version); 1736 1737 uint32_t referenceId; 1738 if (!mDataSource->getUInt32(offset + 4, &referenceId)) { 1739 return ERROR_MALFORMED; 1740 } 1741 1742 uint32_t timeScale; 1743 if (!mDataSource->getUInt32(offset + 8, &timeScale)) { 1744 return ERROR_MALFORMED; 1745 } 1746 ALOGV("sidx refid/timescale: %d/%d", referenceId, timeScale); 1747 1748 uint64_t earliestPresentationTime; 1749 uint64_t firstOffset; 1750 1751 offset += 12; 1752 size -= 12; 1753 1754 if (version == 0) { 1755 if (size < 8) { 1756 return -EINVAL; 1757 } 1758 uint32_t tmp; 1759 if (!mDataSource->getUInt32(offset, &tmp)) { 1760 return ERROR_MALFORMED; 1761 } 1762 earliestPresentationTime = tmp; 1763 if (!mDataSource->getUInt32(offset + 4, &tmp)) { 1764 return ERROR_MALFORMED; 1765 } 1766 firstOffset = tmp; 1767 offset += 8; 1768 size -= 8; 1769 } else { 1770 if (size < 16) { 1771 return -EINVAL; 1772 } 1773 if (!mDataSource->getUInt64(offset, &earliestPresentationTime)) { 1774 return ERROR_MALFORMED; 1775 } 1776 if (!mDataSource->getUInt64(offset + 8, &firstOffset)) { 1777 return ERROR_MALFORMED; 1778 } 1779 offset += 16; 1780 size -= 16; 1781 } 1782 ALOGV("sidx pres/off: %Ld/%Ld", earliestPresentationTime, firstOffset); 1783 1784 if (size < 4) { 1785 return -EINVAL; 1786 } 1787 1788 uint16_t referenceCount; 1789 if (!mDataSource->getUInt16(offset + 2, &referenceCount)) { 1790 return ERROR_MALFORMED; 1791 } 1792 offset += 4; 1793 size -= 4; 1794 ALOGV("refcount: %d", referenceCount); 1795 1796 if (size < referenceCount * 12) { 1797 return -EINVAL; 1798 } 1799 1800 uint64_t total_duration = 0; 1801 for (unsigned int i = 0; i < referenceCount; i++) { 1802 uint32_t d1, d2, d3; 1803 1804 if (!mDataSource->getUInt32(offset, &d1) || // size 1805 !mDataSource->getUInt32(offset + 4, &d2) || // duration 1806 !mDataSource->getUInt32(offset + 8, &d3)) { // flags 1807 return ERROR_MALFORMED; 1808 } 1809 1810 if (d1 & 0x80000000) { 1811 ALOGW("sub-sidx boxes not supported yet"); 1812 } 1813 bool sap = d3 & 0x80000000; 1814 bool saptype = d3 >> 28; 1815 if (!sap || saptype > 2) { 1816 ALOGW("not a stream access point, or unsupported type"); 1817 } 1818 total_duration += d2; 1819 offset += 12; 1820 ALOGV(" item %d, %08x %08x %08x", i, d1, d2, d3); 1821 SidxEntry se; 1822 se.mSize = d1 & 0x7fffffff; 1823 se.mDurationUs = 1000000LL * d2 / timeScale; 1824 mSidxEntries.add(se); 1825 } 1826 1827 mSidxDuration = total_duration * 1000000 / timeScale; 1828 ALOGV("duration: %lld", mSidxDuration); 1829 1830 int64_t metaDuration; 1831 if (!mLastTrack->meta->findInt64(kKeyDuration, &metaDuration) || metaDuration == 0) { 1832 mLastTrack->meta->setInt64(kKeyDuration, mSidxDuration); 1833 } 1834 return OK; 1835} 1836 1837 1838 1839status_t MPEG4Extractor::parseTrackHeader( 1840 off64_t data_offset, off64_t data_size) { 1841 if (data_size < 4) { 1842 return ERROR_MALFORMED; 1843 } 1844 1845 uint8_t version; 1846 if (mDataSource->readAt(data_offset, &version, 1) < 1) { 1847 return ERROR_IO; 1848 } 1849 1850 size_t dynSize = (version == 1) ? 36 : 24; 1851 1852 uint8_t buffer[36 + 60]; 1853 1854 if (data_size != (off64_t)dynSize + 60) { 1855 return ERROR_MALFORMED; 1856 } 1857 1858 if (mDataSource->readAt( 1859 data_offset, buffer, data_size) < (ssize_t)data_size) { 1860 return ERROR_IO; 1861 } 1862 1863 uint64_t ctime, mtime, duration; 1864 int32_t id; 1865 1866 if (version == 1) { 1867 ctime = U64_AT(&buffer[4]); 1868 mtime = U64_AT(&buffer[12]); 1869 id = U32_AT(&buffer[20]); 1870 duration = U64_AT(&buffer[28]); 1871 } else { 1872 CHECK_EQ((unsigned)version, 0u); 1873 1874 ctime = U32_AT(&buffer[4]); 1875 mtime = U32_AT(&buffer[8]); 1876 id = U32_AT(&buffer[12]); 1877 duration = U32_AT(&buffer[20]); 1878 } 1879 1880 mLastTrack->meta->setInt32(kKeyTrackID, id); 1881 1882 size_t matrixOffset = dynSize + 16; 1883 int32_t a00 = U32_AT(&buffer[matrixOffset]); 1884 int32_t a01 = U32_AT(&buffer[matrixOffset + 4]); 1885 int32_t dx = U32_AT(&buffer[matrixOffset + 8]); 1886 int32_t a10 = U32_AT(&buffer[matrixOffset + 12]); 1887 int32_t a11 = U32_AT(&buffer[matrixOffset + 16]); 1888 int32_t dy = U32_AT(&buffer[matrixOffset + 20]); 1889 1890#if 0 1891 ALOGI("x' = %.2f * x + %.2f * y + %.2f", 1892 a00 / 65536.0f, a01 / 65536.0f, dx / 65536.0f); 1893 ALOGI("y' = %.2f * x + %.2f * y + %.2f", 1894 a10 / 65536.0f, a11 / 65536.0f, dy / 65536.0f); 1895#endif 1896 1897 uint32_t rotationDegrees; 1898 1899 static const int32_t kFixedOne = 0x10000; 1900 if (a00 == kFixedOne && a01 == 0 && a10 == 0 && a11 == kFixedOne) { 1901 // Identity, no rotation 1902 rotationDegrees = 0; 1903 } else if (a00 == 0 && a01 == kFixedOne && a10 == -kFixedOne && a11 == 0) { 1904 rotationDegrees = 90; 1905 } else if (a00 == 0 && a01 == -kFixedOne && a10 == kFixedOne && a11 == 0) { 1906 rotationDegrees = 270; 1907 } else if (a00 == -kFixedOne && a01 == 0 && a10 == 0 && a11 == -kFixedOne) { 1908 rotationDegrees = 180; 1909 } else { 1910 ALOGW("We only support 0,90,180,270 degree rotation matrices"); 1911 rotationDegrees = 0; 1912 } 1913 1914 if (rotationDegrees != 0) { 1915 mLastTrack->meta->setInt32(kKeyRotation, rotationDegrees); 1916 } 1917 1918 // Handle presentation display size, which could be different 1919 // from the image size indicated by kKeyWidth and kKeyHeight. 1920 uint32_t width = U32_AT(&buffer[dynSize + 52]); 1921 uint32_t height = U32_AT(&buffer[dynSize + 56]); 1922 mLastTrack->meta->setInt32(kKeyDisplayWidth, width >> 16); 1923 mLastTrack->meta->setInt32(kKeyDisplayHeight, height >> 16); 1924 1925 return OK; 1926} 1927 1928status_t MPEG4Extractor::parseMetaData(off64_t offset, size_t size) { 1929 if (size < 4) { 1930 return ERROR_MALFORMED; 1931 } 1932 1933 uint8_t *buffer = new uint8_t[size + 1]; 1934 if (mDataSource->readAt( 1935 offset, buffer, size) != (ssize_t)size) { 1936 delete[] buffer; 1937 buffer = NULL; 1938 1939 return ERROR_IO; 1940 } 1941 1942 uint32_t flags = U32_AT(buffer); 1943 1944 uint32_t metadataKey = 0; 1945 char chunk[5]; 1946 MakeFourCCString(mPath[4], chunk); 1947 ALOGV("meta: %s @ %lld", chunk, offset); 1948 switch (mPath[4]) { 1949 case FOURCC(0xa9, 'a', 'l', 'b'): 1950 { 1951 metadataKey = kKeyAlbum; 1952 break; 1953 } 1954 case FOURCC(0xa9, 'A', 'R', 'T'): 1955 { 1956 metadataKey = kKeyArtist; 1957 break; 1958 } 1959 case FOURCC('a', 'A', 'R', 'T'): 1960 { 1961 metadataKey = kKeyAlbumArtist; 1962 break; 1963 } 1964 case FOURCC(0xa9, 'd', 'a', 'y'): 1965 { 1966 metadataKey = kKeyYear; 1967 break; 1968 } 1969 case FOURCC(0xa9, 'n', 'a', 'm'): 1970 { 1971 metadataKey = kKeyTitle; 1972 break; 1973 } 1974 case FOURCC(0xa9, 'w', 'r', 't'): 1975 { 1976 metadataKey = kKeyWriter; 1977 break; 1978 } 1979 case FOURCC('c', 'o', 'v', 'r'): 1980 { 1981 metadataKey = kKeyAlbumArt; 1982 break; 1983 } 1984 case FOURCC('g', 'n', 'r', 'e'): 1985 { 1986 metadataKey = kKeyGenre; 1987 break; 1988 } 1989 case FOURCC(0xa9, 'g', 'e', 'n'): 1990 { 1991 metadataKey = kKeyGenre; 1992 break; 1993 } 1994 case FOURCC('c', 'p', 'i', 'l'): 1995 { 1996 if (size == 9 && flags == 21) { 1997 char tmp[16]; 1998 sprintf(tmp, "%d", 1999 (int)buffer[size - 1]); 2000 2001 mFileMetaData->setCString(kKeyCompilation, tmp); 2002 } 2003 break; 2004 } 2005 case FOURCC('t', 'r', 'k', 'n'): 2006 { 2007 if (size == 16 && flags == 0) { 2008 char tmp[16]; 2009 uint16_t* pTrack = (uint16_t*)&buffer[10]; 2010 uint16_t* pTotalTracks = (uint16_t*)&buffer[12]; 2011 sprintf(tmp, "%d/%d", ntohs(*pTrack), ntohs(*pTotalTracks)); 2012 2013 mFileMetaData->setCString(kKeyCDTrackNumber, tmp); 2014 } 2015 break; 2016 } 2017 case FOURCC('d', 'i', 's', 'k'): 2018 { 2019 if ((size == 14 || size == 16) && flags == 0) { 2020 char tmp[16]; 2021 uint16_t* pDisc = (uint16_t*)&buffer[10]; 2022 uint16_t* pTotalDiscs = (uint16_t*)&buffer[12]; 2023 sprintf(tmp, "%d/%d", ntohs(*pDisc), ntohs(*pTotalDiscs)); 2024 2025 mFileMetaData->setCString(kKeyDiscNumber, tmp); 2026 } 2027 break; 2028 } 2029 case FOURCC('-', '-', '-', '-'): 2030 { 2031 buffer[size] = '\0'; 2032 switch (mPath[5]) { 2033 case FOURCC('m', 'e', 'a', 'n'): 2034 mLastCommentMean.setTo((const char *)buffer + 4); 2035 break; 2036 case FOURCC('n', 'a', 'm', 'e'): 2037 mLastCommentName.setTo((const char *)buffer + 4); 2038 break; 2039 case FOURCC('d', 'a', 't', 'a'): 2040 mLastCommentData.setTo((const char *)buffer + 8); 2041 break; 2042 } 2043 2044 // Once we have a set of mean/name/data info, go ahead and process 2045 // it to see if its something we are interested in. Whether or not 2046 // were are interested in the specific tag, make sure to clear out 2047 // the set so we can be ready to process another tuple should one 2048 // show up later in the file. 2049 if ((mLastCommentMean.length() != 0) && 2050 (mLastCommentName.length() != 0) && 2051 (mLastCommentData.length() != 0)) { 2052 2053 if (mLastCommentMean == "com.apple.iTunes" 2054 && mLastCommentName == "iTunSMPB") { 2055 int32_t delay, padding; 2056 if (sscanf(mLastCommentData, 2057 " %*x %x %x %*x", &delay, &padding) == 2) { 2058 mLastTrack->meta->setInt32(kKeyEncoderDelay, delay); 2059 mLastTrack->meta->setInt32(kKeyEncoderPadding, padding); 2060 } 2061 } 2062 2063 mLastCommentMean.clear(); 2064 mLastCommentName.clear(); 2065 mLastCommentData.clear(); 2066 } 2067 break; 2068 } 2069 2070 default: 2071 break; 2072 } 2073 2074 if (size >= 8 && metadataKey) { 2075 if (metadataKey == kKeyAlbumArt) { 2076 mFileMetaData->setData( 2077 kKeyAlbumArt, MetaData::TYPE_NONE, 2078 buffer + 8, size - 8); 2079 } else if (metadataKey == kKeyGenre) { 2080 if (flags == 0) { 2081 // uint8_t genre code, iTunes genre codes are 2082 // the standard id3 codes, except they start 2083 // at 1 instead of 0 (e.g. Pop is 14, not 13) 2084 // We use standard id3 numbering, so subtract 1. 2085 int genrecode = (int)buffer[size - 1]; 2086 genrecode--; 2087 if (genrecode < 0) { 2088 genrecode = 255; // reserved for 'unknown genre' 2089 } 2090 char genre[10]; 2091 sprintf(genre, "%d", genrecode); 2092 2093 mFileMetaData->setCString(metadataKey, genre); 2094 } else if (flags == 1) { 2095 // custom genre string 2096 buffer[size] = '\0'; 2097 2098 mFileMetaData->setCString( 2099 metadataKey, (const char *)buffer + 8); 2100 } 2101 } else { 2102 buffer[size] = '\0'; 2103 2104 mFileMetaData->setCString( 2105 metadataKey, (const char *)buffer + 8); 2106 } 2107 } 2108 2109 delete[] buffer; 2110 buffer = NULL; 2111 2112 return OK; 2113} 2114 2115sp<MediaSource> MPEG4Extractor::getTrack(size_t index) { 2116 status_t err; 2117 if ((err = readMetaData()) != OK) { 2118 return NULL; 2119 } 2120 2121 Track *track = mFirstTrack; 2122 while (index > 0) { 2123 if (track == NULL) { 2124 return NULL; 2125 } 2126 2127 track = track->next; 2128 --index; 2129 } 2130 2131 if (track == NULL) { 2132 return NULL; 2133 } 2134 2135 ALOGV("getTrack called, pssh: %d", mPssh.size()); 2136 2137 return new MPEG4Source( 2138 track->meta, mDataSource, track->timescale, track->sampleTable, 2139 mSidxEntries, mMoofOffset); 2140} 2141 2142// static 2143status_t MPEG4Extractor::verifyTrack(Track *track) { 2144 const char *mime; 2145 CHECK(track->meta->findCString(kKeyMIMEType, &mime)); 2146 2147 uint32_t type; 2148 const void *data; 2149 size_t size; 2150 if (!strcasecmp(mime, MEDIA_MIMETYPE_VIDEO_AVC)) { 2151 if (!track->meta->findData(kKeyAVCC, &type, &data, &size) 2152 || type != kTypeAVCC) { 2153 return ERROR_MALFORMED; 2154 } 2155 } else if (!strcasecmp(mime, MEDIA_MIMETYPE_VIDEO_MPEG4) 2156 || !strcasecmp(mime, MEDIA_MIMETYPE_AUDIO_AAC)) { 2157 if (!track->meta->findData(kKeyESDS, &type, &data, &size) 2158 || type != kTypeESDS) { 2159 return ERROR_MALFORMED; 2160 } 2161 } 2162 2163 if (!track->sampleTable->isValid()) { 2164 // Make sure we have all the metadata we need. 2165 return ERROR_MALFORMED; 2166 } 2167 2168 return OK; 2169} 2170 2171status_t MPEG4Extractor::updateAudioTrackInfoFromESDS_MPEG4Audio( 2172 const void *esds_data, size_t esds_size) { 2173 ESDS esds(esds_data, esds_size); 2174 2175 uint8_t objectTypeIndication; 2176 if (esds.getObjectTypeIndication(&objectTypeIndication) != OK) { 2177 return ERROR_MALFORMED; 2178 } 2179 2180 if (objectTypeIndication == 0xe1) { 2181 // This isn't MPEG4 audio at all, it's QCELP 14k... 2182 mLastTrack->meta->setCString(kKeyMIMEType, MEDIA_MIMETYPE_AUDIO_QCELP); 2183 return OK; 2184 } 2185 2186 if (objectTypeIndication == 0x6b) { 2187 // The media subtype is MP3 audio 2188 // Our software MP3 audio decoder may not be able to handle 2189 // packetized MP3 audio; for now, lets just return ERROR_UNSUPPORTED 2190 ALOGE("MP3 track in MP4/3GPP file is not supported"); 2191 return ERROR_UNSUPPORTED; 2192 } 2193 2194 const uint8_t *csd; 2195 size_t csd_size; 2196 if (esds.getCodecSpecificInfo( 2197 (const void **)&csd, &csd_size) != OK) { 2198 return ERROR_MALFORMED; 2199 } 2200 2201#if 0 2202 printf("ESD of size %d\n", csd_size); 2203 hexdump(csd, csd_size); 2204#endif 2205 2206 if (csd_size == 0) { 2207 // There's no further information, i.e. no codec specific data 2208 // Let's assume that the information provided in the mpeg4 headers 2209 // is accurate and hope for the best. 2210 2211 return OK; 2212 } 2213 2214 if (csd_size < 2) { 2215 return ERROR_MALFORMED; 2216 } 2217 2218 ABitReader br(csd, csd_size); 2219 uint32_t objectType = br.getBits(5); 2220 2221 if (objectType == 31) { // AAC-ELD => additional 6 bits 2222 objectType = 32 + br.getBits(6); 2223 } 2224 2225 uint32_t freqIndex = br.getBits(4); 2226 2227 int32_t sampleRate = 0; 2228 int32_t numChannels = 0; 2229 if (freqIndex == 15) { 2230 if (csd_size < 5) { 2231 return ERROR_MALFORMED; 2232 } 2233 sampleRate = br.getBits(24); 2234 numChannels = br.getBits(4); 2235 } else { 2236 numChannels = br.getBits(4); 2237 if (objectType == 5) { 2238 // SBR specific config per 14496-3 table 1.13 2239 freqIndex = br.getBits(4); 2240 if (freqIndex == 15) { 2241 if (csd_size < 8) { 2242 return ERROR_MALFORMED; 2243 } 2244 sampleRate = br.getBits(24); 2245 } 2246 } 2247 2248 if (sampleRate == 0) { 2249 static uint32_t kSamplingRate[] = { 2250 96000, 88200, 64000, 48000, 44100, 32000, 24000, 22050, 2251 16000, 12000, 11025, 8000, 7350 2252 }; 2253 2254 if (freqIndex == 13 || freqIndex == 14) { 2255 return ERROR_MALFORMED; 2256 } 2257 2258 sampleRate = kSamplingRate[freqIndex]; 2259 } 2260 } 2261 2262 if (numChannels == 0) { 2263 return ERROR_UNSUPPORTED; 2264 } 2265 2266 int32_t prevSampleRate; 2267 CHECK(mLastTrack->meta->findInt32(kKeySampleRate, &prevSampleRate)); 2268 2269 if (prevSampleRate != sampleRate) { 2270 ALOGV("mpeg4 audio sample rate different from previous setting. " 2271 "was: %d, now: %d", prevSampleRate, sampleRate); 2272 } 2273 2274 mLastTrack->meta->setInt32(kKeySampleRate, sampleRate); 2275 2276 int32_t prevChannelCount; 2277 CHECK(mLastTrack->meta->findInt32(kKeyChannelCount, &prevChannelCount)); 2278 2279 if (prevChannelCount != numChannels) { 2280 ALOGV("mpeg4 audio channel count different from previous setting. " 2281 "was: %d, now: %d", prevChannelCount, numChannels); 2282 } 2283 2284 mLastTrack->meta->setInt32(kKeyChannelCount, numChannels); 2285 2286 return OK; 2287} 2288 2289//////////////////////////////////////////////////////////////////////////////// 2290 2291MPEG4Source::MPEG4Source( 2292 const sp<MetaData> &format, 2293 const sp<DataSource> &dataSource, 2294 int32_t timeScale, 2295 const sp<SampleTable> &sampleTable, 2296 Vector<SidxEntry> &sidx, 2297 off64_t firstMoofOffset) 2298 : mFormat(format), 2299 mDataSource(dataSource), 2300 mTimescale(timeScale), 2301 mSampleTable(sampleTable), 2302 mCurrentSampleIndex(0), 2303 mCurrentFragmentIndex(0), 2304 mSegments(sidx), 2305 mFirstMoofOffset(firstMoofOffset), 2306 mCurrentMoofOffset(firstMoofOffset), 2307 mCurrentTime(0), 2308 mCurrentSampleInfoAllocSize(0), 2309 mCurrentSampleInfoSizes(NULL), 2310 mCurrentSampleInfoOffsetsAllocSize(0), 2311 mCurrentSampleInfoOffsets(NULL), 2312 mIsAVC(false), 2313 mNALLengthSize(0), 2314 mStarted(false), 2315 mGroup(NULL), 2316 mBuffer(NULL), 2317 mWantsNALFragments(false), 2318 mSrcBuffer(NULL) { 2319 2320 mFormat->findInt32(kKeyCryptoMode, &mCryptoMode); 2321 mDefaultIVSize = 0; 2322 mFormat->findInt32(kKeyCryptoDefaultIVSize, &mDefaultIVSize); 2323 uint32_t keytype; 2324 const void *key; 2325 size_t keysize; 2326 if (mFormat->findData(kKeyCryptoKey, &keytype, &key, &keysize)) { 2327 CHECK(keysize <= 16); 2328 memset(mCryptoKey, 0, 16); 2329 memcpy(mCryptoKey, key, keysize); 2330 } 2331 2332 const char *mime; 2333 bool success = mFormat->findCString(kKeyMIMEType, &mime); 2334 CHECK(success); 2335 2336 mIsAVC = !strcasecmp(mime, MEDIA_MIMETYPE_VIDEO_AVC); 2337 2338 if (mIsAVC) { 2339 uint32_t type; 2340 const void *data; 2341 size_t size; 2342 CHECK(format->findData(kKeyAVCC, &type, &data, &size)); 2343 2344 const uint8_t *ptr = (const uint8_t *)data; 2345 2346 CHECK(size >= 7); 2347 CHECK_EQ((unsigned)ptr[0], 1u); // configurationVersion == 1 2348 2349 // The number of bytes used to encode the length of a NAL unit. 2350 mNALLengthSize = 1 + (ptr[4] & 3); 2351 } 2352 2353 CHECK(format->findInt32(kKeyTrackID, &mTrackId)); 2354 2355 if (mFirstMoofOffset != 0) { 2356 off64_t offset = mFirstMoofOffset; 2357 parseChunk(&offset); 2358 } 2359} 2360 2361MPEG4Source::~MPEG4Source() { 2362 if (mStarted) { 2363 stop(); 2364 } 2365 free(mCurrentSampleInfoSizes); 2366 free(mCurrentSampleInfoOffsets); 2367} 2368 2369status_t MPEG4Source::start(MetaData *params) { 2370 Mutex::Autolock autoLock(mLock); 2371 2372 CHECK(!mStarted); 2373 2374 int32_t val; 2375 if (params && params->findInt32(kKeyWantsNALFragments, &val) 2376 && val != 0) { 2377 mWantsNALFragments = true; 2378 } else { 2379 mWantsNALFragments = false; 2380 } 2381 2382 mGroup = new MediaBufferGroup; 2383 2384 int32_t max_size; 2385 CHECK(mFormat->findInt32(kKeyMaxInputSize, &max_size)); 2386 2387 mGroup->add_buffer(new MediaBuffer(max_size)); 2388 2389 mSrcBuffer = new uint8_t[max_size]; 2390 2391 mStarted = true; 2392 2393 return OK; 2394} 2395 2396status_t MPEG4Source::stop() { 2397 Mutex::Autolock autoLock(mLock); 2398 2399 CHECK(mStarted); 2400 2401 if (mBuffer != NULL) { 2402 mBuffer->release(); 2403 mBuffer = NULL; 2404 } 2405 2406 delete[] mSrcBuffer; 2407 mSrcBuffer = NULL; 2408 2409 delete mGroup; 2410 mGroup = NULL; 2411 2412 mStarted = false; 2413 mCurrentSampleIndex = 0; 2414 2415 return OK; 2416} 2417 2418status_t MPEG4Source::parseChunk(off64_t *offset) { 2419 uint32_t hdr[2]; 2420 if (mDataSource->readAt(*offset, hdr, 8) < 8) { 2421 return ERROR_IO; 2422 } 2423 uint64_t chunk_size = ntohl(hdr[0]); 2424 uint32_t chunk_type = ntohl(hdr[1]); 2425 off64_t data_offset = *offset + 8; 2426 2427 if (chunk_size == 1) { 2428 if (mDataSource->readAt(*offset + 8, &chunk_size, 8) < 8) { 2429 return ERROR_IO; 2430 } 2431 chunk_size = ntoh64(chunk_size); 2432 data_offset += 8; 2433 2434 if (chunk_size < 16) { 2435 // The smallest valid chunk is 16 bytes long in this case. 2436 return ERROR_MALFORMED; 2437 } 2438 } else if (chunk_size < 8) { 2439 // The smallest valid chunk is 8 bytes long. 2440 return ERROR_MALFORMED; 2441 } 2442 2443 char chunk[5]; 2444 MakeFourCCString(chunk_type, chunk); 2445 ALOGV("MPEG4Source chunk %s @ %llx", chunk, *offset); 2446 2447 off64_t chunk_data_size = *offset + chunk_size - data_offset; 2448 2449 switch(chunk_type) { 2450 2451 case FOURCC('t', 'r', 'a', 'f'): 2452 case FOURCC('m', 'o', 'o', 'f'): { 2453 off64_t stop_offset = *offset + chunk_size; 2454 *offset = data_offset; 2455 while (*offset < stop_offset) { 2456 status_t err = parseChunk(offset); 2457 if (err != OK) { 2458 return err; 2459 } 2460 } 2461 if (chunk_type == FOURCC('m', 'o', 'o', 'f')) { 2462 // *offset points to the mdat box following this moof 2463 parseChunk(offset); // doesn't actually parse it, just updates offset 2464 mNextMoofOffset = *offset; 2465 } 2466 break; 2467 } 2468 2469 case FOURCC('t', 'f', 'h', 'd'): { 2470 status_t err; 2471 if ((err = parseTrackFragmentHeader(data_offset, chunk_data_size)) != OK) { 2472 return err; 2473 } 2474 *offset += chunk_size; 2475 break; 2476 } 2477 2478 case FOURCC('t', 'r', 'u', 'n'): { 2479 status_t err; 2480 if (mLastParsedTrackId == mTrackId) { 2481 if ((err = parseTrackFragmentRun(data_offset, chunk_data_size)) != OK) { 2482 return err; 2483 } 2484 } 2485 2486 *offset += chunk_size; 2487 break; 2488 } 2489 2490 case FOURCC('s', 'a', 'i', 'z'): { 2491 status_t err; 2492 if ((err = parseSampleAuxiliaryInformationSizes(data_offset, chunk_data_size)) != OK) { 2493 return err; 2494 } 2495 *offset += chunk_size; 2496 break; 2497 } 2498 case FOURCC('s', 'a', 'i', 'o'): { 2499 status_t err; 2500 if ((err = parseSampleAuxiliaryInformationOffsets(data_offset, chunk_data_size)) != OK) { 2501 return err; 2502 } 2503 *offset += chunk_size; 2504 break; 2505 } 2506 2507 case FOURCC('m', 'd', 'a', 't'): { 2508 // parse DRM info if present 2509 ALOGV("MPEG4Source::parseChunk mdat"); 2510 // if saiz/saoi was previously observed, do something with the sampleinfos 2511 *offset += chunk_size; 2512 break; 2513 } 2514 2515 default: { 2516 *offset += chunk_size; 2517 break; 2518 } 2519 } 2520 return OK; 2521} 2522 2523status_t MPEG4Source::parseSampleAuxiliaryInformationSizes(off64_t offset, off64_t size) { 2524 ALOGV("parseSampleAuxiliaryInformationSizes"); 2525 // 14496-12 8.7.12 2526 uint8_t version; 2527 if (mDataSource->readAt( 2528 offset, &version, sizeof(version)) 2529 < (ssize_t)sizeof(version)) { 2530 return ERROR_IO; 2531 } 2532 2533 if (version != 0) { 2534 return ERROR_UNSUPPORTED; 2535 } 2536 offset++; 2537 2538 uint32_t flags; 2539 if (!mDataSource->getUInt24(offset, &flags)) { 2540 return ERROR_IO; 2541 } 2542 offset += 3; 2543 2544 if (flags & 1) { 2545 uint32_t tmp; 2546 if (!mDataSource->getUInt32(offset, &tmp)) { 2547 return ERROR_MALFORMED; 2548 } 2549 mCurrentAuxInfoType = tmp; 2550 offset += 4; 2551 if (!mDataSource->getUInt32(offset, &tmp)) { 2552 return ERROR_MALFORMED; 2553 } 2554 mCurrentAuxInfoTypeParameter = tmp; 2555 offset += 4; 2556 } 2557 2558 uint8_t defsize; 2559 if (mDataSource->readAt(offset, &defsize, 1) != 1) { 2560 return ERROR_MALFORMED; 2561 } 2562 mCurrentDefaultSampleInfoSize = defsize; 2563 offset++; 2564 2565 uint32_t smplcnt; 2566 if (!mDataSource->getUInt32(offset, &smplcnt)) { 2567 return ERROR_MALFORMED; 2568 } 2569 mCurrentSampleInfoCount = smplcnt; 2570 offset += 4; 2571 2572 if (mCurrentDefaultSampleInfoSize != 0) { 2573 ALOGV("@@@@ using default sample info size of %d", mCurrentDefaultSampleInfoSize); 2574 return OK; 2575 } 2576 if (smplcnt > mCurrentSampleInfoAllocSize) { 2577 mCurrentSampleInfoSizes = (uint8_t*) realloc(mCurrentSampleInfoSizes, smplcnt); 2578 mCurrentSampleInfoAllocSize = smplcnt; 2579 } 2580 2581 mDataSource->readAt(offset, mCurrentSampleInfoSizes, smplcnt); 2582 return OK; 2583} 2584 2585status_t MPEG4Source::parseSampleAuxiliaryInformationOffsets(off64_t offset, off64_t size) { 2586 ALOGV("parseSampleAuxiliaryInformationOffsets"); 2587 // 14496-12 8.7.13 2588 uint8_t version; 2589 if (mDataSource->readAt(offset, &version, sizeof(version)) != 1) { 2590 return ERROR_IO; 2591 } 2592 offset++; 2593 2594 uint32_t flags; 2595 if (!mDataSource->getUInt24(offset, &flags)) { 2596 return ERROR_IO; 2597 } 2598 offset += 3; 2599 2600 uint32_t entrycount; 2601 if (!mDataSource->getUInt32(offset, &entrycount)) { 2602 return ERROR_IO; 2603 } 2604 offset += 4; 2605 2606 if (entrycount > mCurrentSampleInfoOffsetsAllocSize) { 2607 mCurrentSampleInfoOffsets = (uint64_t*) realloc(mCurrentSampleInfoOffsets, entrycount * 8); 2608 mCurrentSampleInfoOffsetsAllocSize = entrycount; 2609 } 2610 mCurrentSampleInfoOffsetCount = entrycount; 2611 2612 for (size_t i = 0; i < entrycount; i++) { 2613 if (version == 0) { 2614 uint32_t tmp; 2615 if (!mDataSource->getUInt32(offset, &tmp)) { 2616 return ERROR_IO; 2617 } 2618 mCurrentSampleInfoOffsets[i] = tmp; 2619 offset += 4; 2620 } else { 2621 uint64_t tmp; 2622 if (!mDataSource->getUInt64(offset, &tmp)) { 2623 return ERROR_IO; 2624 } 2625 mCurrentSampleInfoOffsets[i] = tmp; 2626 offset += 8; 2627 } 2628 } 2629 2630 // parse clear/encrypted data 2631 2632 off64_t drmoffset = mCurrentSampleInfoOffsets[0]; // from moof 2633 2634 drmoffset += mCurrentMoofOffset; 2635 int ivlength; 2636 CHECK(mFormat->findInt32(kKeyCryptoDefaultIVSize, &ivlength)); 2637 2638 // read CencSampleAuxiliaryDataFormats 2639 for (size_t i = 0; i < mCurrentSampleInfoCount; i++) { 2640 Sample *smpl = &mCurrentSamples.editItemAt(i); 2641 2642 memset(smpl->iv, 0, 16); 2643 if (mDataSource->readAt(drmoffset, smpl->iv, ivlength) != ivlength) { 2644 return ERROR_IO; 2645 } 2646 2647 drmoffset += ivlength; 2648 2649 int32_t smplinfosize = mCurrentDefaultSampleInfoSize; 2650 if (smplinfosize == 0) { 2651 smplinfosize = mCurrentSampleInfoSizes[i]; 2652 } 2653 if (smplinfosize > ivlength) { 2654 uint16_t numsubsamples; 2655 if (!mDataSource->getUInt16(drmoffset, &numsubsamples)) { 2656 return ERROR_IO; 2657 } 2658 drmoffset += 2; 2659 for (size_t j = 0; j < numsubsamples; j++) { 2660 uint16_t numclear; 2661 uint32_t numencrypted; 2662 if (!mDataSource->getUInt16(drmoffset, &numclear)) { 2663 return ERROR_IO; 2664 } 2665 drmoffset += 2; 2666 if (!mDataSource->getUInt32(drmoffset, &numencrypted)) { 2667 return ERROR_IO; 2668 } 2669 drmoffset += 4; 2670 smpl->clearsizes.add(numclear); 2671 smpl->encryptedsizes.add(numencrypted); 2672 } 2673 } else { 2674 smpl->clearsizes.add(0); 2675 smpl->encryptedsizes.add(smpl->size); 2676 } 2677 } 2678 2679 2680 return OK; 2681} 2682 2683status_t MPEG4Source::parseTrackFragmentHeader(off64_t offset, off64_t size) { 2684 2685 if (size < 8) { 2686 return -EINVAL; 2687 } 2688 2689 uint32_t flags; 2690 if (!mDataSource->getUInt32(offset, &flags)) { // actually version + flags 2691 return ERROR_MALFORMED; 2692 } 2693 2694 if (flags & 0xff000000) { 2695 return -EINVAL; 2696 } 2697 2698 if (!mDataSource->getUInt32(offset + 4, (uint32_t*)&mLastParsedTrackId)) { 2699 return ERROR_MALFORMED; 2700 } 2701 2702 if (mLastParsedTrackId != mTrackId) { 2703 // this is not the right track, skip it 2704 return OK; 2705 } 2706 2707 mTrackFragmentHeaderInfo.mFlags = flags; 2708 mTrackFragmentHeaderInfo.mTrackID = mLastParsedTrackId; 2709 offset += 8; 2710 size -= 8; 2711 2712 ALOGV("fragment header: %08x %08x", flags, mTrackFragmentHeaderInfo.mTrackID); 2713 2714 if (flags & TrackFragmentHeaderInfo::kBaseDataOffsetPresent) { 2715 if (size < 8) { 2716 return -EINVAL; 2717 } 2718 2719 if (!mDataSource->getUInt64(offset, &mTrackFragmentHeaderInfo.mBaseDataOffset)) { 2720 return ERROR_MALFORMED; 2721 } 2722 offset += 8; 2723 size -= 8; 2724 } 2725 2726 if (flags & TrackFragmentHeaderInfo::kSampleDescriptionIndexPresent) { 2727 if (size < 4) { 2728 return -EINVAL; 2729 } 2730 2731 if (!mDataSource->getUInt32(offset, &mTrackFragmentHeaderInfo.mSampleDescriptionIndex)) { 2732 return ERROR_MALFORMED; 2733 } 2734 offset += 4; 2735 size -= 4; 2736 } 2737 2738 if (flags & TrackFragmentHeaderInfo::kDefaultSampleDurationPresent) { 2739 if (size < 4) { 2740 return -EINVAL; 2741 } 2742 2743 if (!mDataSource->getUInt32(offset, &mTrackFragmentHeaderInfo.mDefaultSampleDuration)) { 2744 return ERROR_MALFORMED; 2745 } 2746 offset += 4; 2747 size -= 4; 2748 } 2749 2750 if (flags & TrackFragmentHeaderInfo::kDefaultSampleSizePresent) { 2751 if (size < 4) { 2752 return -EINVAL; 2753 } 2754 2755 if (!mDataSource->getUInt32(offset, &mTrackFragmentHeaderInfo.mDefaultSampleSize)) { 2756 return ERROR_MALFORMED; 2757 } 2758 offset += 4; 2759 size -= 4; 2760 } 2761 2762 if (flags & TrackFragmentHeaderInfo::kDefaultSampleFlagsPresent) { 2763 if (size < 4) { 2764 return -EINVAL; 2765 } 2766 2767 if (!mDataSource->getUInt32(offset, &mTrackFragmentHeaderInfo.mDefaultSampleFlags)) { 2768 return ERROR_MALFORMED; 2769 } 2770 offset += 4; 2771 size -= 4; 2772 } 2773 2774 if (!(flags & TrackFragmentHeaderInfo::kBaseDataOffsetPresent)) { 2775 mTrackFragmentHeaderInfo.mBaseDataOffset = mCurrentMoofOffset; 2776 } 2777 2778 mTrackFragmentHeaderInfo.mDataOffset = 0; 2779 return OK; 2780} 2781 2782status_t MPEG4Source::parseTrackFragmentRun(off64_t offset, off64_t size) { 2783 2784 ALOGV("MPEG4Extractor::parseTrackFragmentRun"); 2785 if (size < 8) { 2786 return -EINVAL; 2787 } 2788 2789 enum { 2790 kDataOffsetPresent = 0x01, 2791 kFirstSampleFlagsPresent = 0x04, 2792 kSampleDurationPresent = 0x100, 2793 kSampleSizePresent = 0x200, 2794 kSampleFlagsPresent = 0x400, 2795 kSampleCompositionTimeOffsetPresent = 0x800, 2796 }; 2797 2798 uint32_t flags; 2799 if (!mDataSource->getUInt32(offset, &flags)) { 2800 return ERROR_MALFORMED; 2801 } 2802 ALOGV("fragment run flags: %08x", flags); 2803 2804 if (flags & 0xff000000) { 2805 return -EINVAL; 2806 } 2807 2808 if ((flags & kFirstSampleFlagsPresent) && (flags & kSampleFlagsPresent)) { 2809 // These two shall not be used together. 2810 return -EINVAL; 2811 } 2812 2813 uint32_t sampleCount; 2814 if (!mDataSource->getUInt32(offset + 4, &sampleCount)) { 2815 return ERROR_MALFORMED; 2816 } 2817 offset += 8; 2818 size -= 8; 2819 2820 uint64_t dataOffset = mTrackFragmentHeaderInfo.mDataOffset; 2821 2822 uint32_t firstSampleFlags = 0; 2823 2824 if (flags & kDataOffsetPresent) { 2825 if (size < 4) { 2826 return -EINVAL; 2827 } 2828 2829 int32_t dataOffsetDelta; 2830 if (!mDataSource->getUInt32(offset, (uint32_t*)&dataOffsetDelta)) { 2831 return ERROR_MALFORMED; 2832 } 2833 2834 dataOffset = mTrackFragmentHeaderInfo.mBaseDataOffset + dataOffsetDelta; 2835 2836 offset += 4; 2837 size -= 4; 2838 } 2839 2840 if (flags & kFirstSampleFlagsPresent) { 2841 if (size < 4) { 2842 return -EINVAL; 2843 } 2844 2845 if (!mDataSource->getUInt32(offset, &firstSampleFlags)) { 2846 return ERROR_MALFORMED; 2847 } 2848 offset += 4; 2849 size -= 4; 2850 } 2851 2852 uint32_t sampleDuration = 0, sampleSize = 0, sampleFlags = 0, 2853 sampleCtsOffset = 0; 2854 2855 size_t bytesPerSample = 0; 2856 if (flags & kSampleDurationPresent) { 2857 bytesPerSample += 4; 2858 } else if (mTrackFragmentHeaderInfo.mFlags 2859 & TrackFragmentHeaderInfo::kDefaultSampleDurationPresent) { 2860 sampleDuration = mTrackFragmentHeaderInfo.mDefaultSampleDuration; 2861 } else { 2862 sampleDuration = mTrackFragmentHeaderInfo.mDefaultSampleDuration; 2863 } 2864 2865 if (flags & kSampleSizePresent) { 2866 bytesPerSample += 4; 2867 } else if (mTrackFragmentHeaderInfo.mFlags 2868 & TrackFragmentHeaderInfo::kDefaultSampleSizePresent) { 2869 sampleSize = mTrackFragmentHeaderInfo.mDefaultSampleSize; 2870 } else { 2871 sampleSize = mTrackFragmentHeaderInfo.mDefaultSampleSize; 2872 } 2873 2874 if (flags & kSampleFlagsPresent) { 2875 bytesPerSample += 4; 2876 } else if (mTrackFragmentHeaderInfo.mFlags 2877 & TrackFragmentHeaderInfo::kDefaultSampleFlagsPresent) { 2878 sampleFlags = mTrackFragmentHeaderInfo.mDefaultSampleFlags; 2879 } else { 2880 sampleFlags = mTrackFragmentHeaderInfo.mDefaultSampleFlags; 2881 } 2882 2883 if (flags & kSampleCompositionTimeOffsetPresent) { 2884 bytesPerSample += 4; 2885 } else { 2886 sampleCtsOffset = 0; 2887 } 2888 2889 if (size < sampleCount * bytesPerSample) { 2890 return -EINVAL; 2891 } 2892 2893 Sample tmp; 2894 for (uint32_t i = 0; i < sampleCount; ++i) { 2895 if (flags & kSampleDurationPresent) { 2896 if (!mDataSource->getUInt32(offset, &sampleDuration)) { 2897 return ERROR_MALFORMED; 2898 } 2899 offset += 4; 2900 } 2901 2902 if (flags & kSampleSizePresent) { 2903 if (!mDataSource->getUInt32(offset, &sampleSize)) { 2904 return ERROR_MALFORMED; 2905 } 2906 offset += 4; 2907 } 2908 2909 if (flags & kSampleFlagsPresent) { 2910 if (!mDataSource->getUInt32(offset, &sampleFlags)) { 2911 return ERROR_MALFORMED; 2912 } 2913 offset += 4; 2914 } 2915 2916 if (flags & kSampleCompositionTimeOffsetPresent) { 2917 if (!mDataSource->getUInt32(offset, &sampleCtsOffset)) { 2918 return ERROR_MALFORMED; 2919 } 2920 offset += 4; 2921 } 2922 2923 ALOGV("adding sample %d at offset 0x%08llx, size %u, duration %u, " 2924 " flags 0x%08x", i + 1, 2925 dataOffset, sampleSize, sampleDuration, 2926 (flags & kFirstSampleFlagsPresent) && i == 0 2927 ? firstSampleFlags : sampleFlags); 2928 tmp.offset = dataOffset; 2929 tmp.size = sampleSize; 2930 tmp.duration = sampleDuration; 2931 mCurrentSamples.add(tmp); 2932 2933 dataOffset += sampleSize; 2934 } 2935 2936 mTrackFragmentHeaderInfo.mDataOffset = dataOffset; 2937 2938 return OK; 2939} 2940 2941sp<MetaData> MPEG4Source::getFormat() { 2942 Mutex::Autolock autoLock(mLock); 2943 2944 return mFormat; 2945} 2946 2947size_t MPEG4Source::parseNALSize(const uint8_t *data) const { 2948 switch (mNALLengthSize) { 2949 case 1: 2950 return *data; 2951 case 2: 2952 return U16_AT(data); 2953 case 3: 2954 return ((size_t)data[0] << 16) | U16_AT(&data[1]); 2955 case 4: 2956 return U32_AT(data); 2957 } 2958 2959 // This cannot happen, mNALLengthSize springs to life by adding 1 to 2960 // a 2-bit integer. 2961 CHECK(!"Should not be here."); 2962 2963 return 0; 2964} 2965 2966status_t MPEG4Source::read( 2967 MediaBuffer **out, const ReadOptions *options) { 2968 Mutex::Autolock autoLock(mLock); 2969 2970 CHECK(mStarted); 2971 2972 if (mFirstMoofOffset > 0) { 2973 return fragmentedRead(out, options); 2974 } 2975 2976 *out = NULL; 2977 2978 int64_t targetSampleTimeUs = -1; 2979 2980 int64_t seekTimeUs; 2981 ReadOptions::SeekMode mode; 2982 if (options && options->getSeekTo(&seekTimeUs, &mode)) { 2983 uint32_t findFlags = 0; 2984 switch (mode) { 2985 case ReadOptions::SEEK_PREVIOUS_SYNC: 2986 findFlags = SampleTable::kFlagBefore; 2987 break; 2988 case ReadOptions::SEEK_NEXT_SYNC: 2989 findFlags = SampleTable::kFlagAfter; 2990 break; 2991 case ReadOptions::SEEK_CLOSEST_SYNC: 2992 case ReadOptions::SEEK_CLOSEST: 2993 findFlags = SampleTable::kFlagClosest; 2994 break; 2995 default: 2996 CHECK(!"Should not be here."); 2997 break; 2998 } 2999 3000 uint32_t sampleIndex; 3001 status_t err = mSampleTable->findSampleAtTime( 3002 seekTimeUs * mTimescale / 1000000, 3003 &sampleIndex, findFlags); 3004 3005 if (mode == ReadOptions::SEEK_CLOSEST) { 3006 // We found the closest sample already, now we want the sync 3007 // sample preceding it (or the sample itself of course), even 3008 // if the subsequent sync sample is closer. 3009 findFlags = SampleTable::kFlagBefore; 3010 } 3011 3012 uint32_t syncSampleIndex; 3013 if (err == OK) { 3014 err = mSampleTable->findSyncSampleNear( 3015 sampleIndex, &syncSampleIndex, findFlags); 3016 } 3017 3018 uint32_t sampleTime; 3019 if (err == OK) { 3020 err = mSampleTable->getMetaDataForSample( 3021 sampleIndex, NULL, NULL, &sampleTime); 3022 } 3023 3024 if (err != OK) { 3025 if (err == ERROR_OUT_OF_RANGE) { 3026 // An attempt to seek past the end of the stream would 3027 // normally cause this ERROR_OUT_OF_RANGE error. Propagating 3028 // this all the way to the MediaPlayer would cause abnormal 3029 // termination. Legacy behaviour appears to be to behave as if 3030 // we had seeked to the end of stream, ending normally. 3031 err = ERROR_END_OF_STREAM; 3032 } 3033 ALOGV("end of stream"); 3034 return err; 3035 } 3036 3037 if (mode == ReadOptions::SEEK_CLOSEST) { 3038 targetSampleTimeUs = (sampleTime * 1000000ll) / mTimescale; 3039 } 3040 3041#if 0 3042 uint32_t syncSampleTime; 3043 CHECK_EQ(OK, mSampleTable->getMetaDataForSample( 3044 syncSampleIndex, NULL, NULL, &syncSampleTime)); 3045 3046 ALOGI("seek to time %lld us => sample at time %lld us, " 3047 "sync sample at time %lld us", 3048 seekTimeUs, 3049 sampleTime * 1000000ll / mTimescale, 3050 syncSampleTime * 1000000ll / mTimescale); 3051#endif 3052 3053 mCurrentSampleIndex = syncSampleIndex; 3054 if (mBuffer != NULL) { 3055 mBuffer->release(); 3056 mBuffer = NULL; 3057 } 3058 3059 // fall through 3060 } 3061 3062 off64_t offset; 3063 size_t size; 3064 uint32_t cts; 3065 bool isSyncSample; 3066 bool newBuffer = false; 3067 if (mBuffer == NULL) { 3068 newBuffer = true; 3069 3070 status_t err = 3071 mSampleTable->getMetaDataForSample( 3072 mCurrentSampleIndex, &offset, &size, &cts, &isSyncSample); 3073 3074 if (err != OK) { 3075 return err; 3076 } 3077 3078 err = mGroup->acquire_buffer(&mBuffer); 3079 3080 if (err != OK) { 3081 CHECK(mBuffer == NULL); 3082 return err; 3083 } 3084 } 3085 3086 if (!mIsAVC || mWantsNALFragments) { 3087 if (newBuffer) { 3088 ssize_t num_bytes_read = 3089 mDataSource->readAt(offset, (uint8_t *)mBuffer->data(), size); 3090 3091 if (num_bytes_read < (ssize_t)size) { 3092 mBuffer->release(); 3093 mBuffer = NULL; 3094 3095 return ERROR_IO; 3096 } 3097 3098 CHECK(mBuffer != NULL); 3099 mBuffer->set_range(0, size); 3100 mBuffer->meta_data()->clear(); 3101 mBuffer->meta_data()->setInt64( 3102 kKeyTime, ((int64_t)cts * 1000000) / mTimescale); 3103 3104 if (targetSampleTimeUs >= 0) { 3105 mBuffer->meta_data()->setInt64( 3106 kKeyTargetTime, targetSampleTimeUs); 3107 } 3108 3109 if (isSyncSample) { 3110 mBuffer->meta_data()->setInt32(kKeyIsSyncFrame, 1); 3111 } 3112 3113 ++mCurrentSampleIndex; 3114 } 3115 3116 if (!mIsAVC) { 3117 *out = mBuffer; 3118 mBuffer = NULL; 3119 3120 return OK; 3121 } 3122 3123 // Each NAL unit is split up into its constituent fragments and 3124 // each one of them returned in its own buffer. 3125 3126 CHECK(mBuffer->range_length() >= mNALLengthSize); 3127 3128 const uint8_t *src = 3129 (const uint8_t *)mBuffer->data() + mBuffer->range_offset(); 3130 3131 size_t nal_size = parseNALSize(src); 3132 if (mBuffer->range_length() < mNALLengthSize + nal_size) { 3133 ALOGE("incomplete NAL unit."); 3134 3135 mBuffer->release(); 3136 mBuffer = NULL; 3137 3138 return ERROR_MALFORMED; 3139 } 3140 3141 MediaBuffer *clone = mBuffer->clone(); 3142 CHECK(clone != NULL); 3143 clone->set_range(mBuffer->range_offset() + mNALLengthSize, nal_size); 3144 3145 CHECK(mBuffer != NULL); 3146 mBuffer->set_range( 3147 mBuffer->range_offset() + mNALLengthSize + nal_size, 3148 mBuffer->range_length() - mNALLengthSize - nal_size); 3149 3150 if (mBuffer->range_length() == 0) { 3151 mBuffer->release(); 3152 mBuffer = NULL; 3153 } 3154 3155 *out = clone; 3156 3157 return OK; 3158 } else { 3159 // Whole NAL units are returned but each fragment is prefixed by 3160 // the start code (0x00 00 00 01). 3161 ssize_t num_bytes_read = 0; 3162 int32_t drm = 0; 3163 bool usesDRM = (mFormat->findInt32(kKeyIsDRM, &drm) && drm != 0); 3164 if (usesDRM) { 3165 num_bytes_read = 3166 mDataSource->readAt(offset, (uint8_t*)mBuffer->data(), size); 3167 } else { 3168 num_bytes_read = mDataSource->readAt(offset, mSrcBuffer, size); 3169 } 3170 3171 if (num_bytes_read < (ssize_t)size) { 3172 mBuffer->release(); 3173 mBuffer = NULL; 3174 3175 return ERROR_IO; 3176 } 3177 3178 if (usesDRM) { 3179 CHECK(mBuffer != NULL); 3180 mBuffer->set_range(0, size); 3181 3182 } else { 3183 uint8_t *dstData = (uint8_t *)mBuffer->data(); 3184 size_t srcOffset = 0; 3185 size_t dstOffset = 0; 3186 3187 while (srcOffset < size) { 3188 bool isMalFormed = (srcOffset + mNALLengthSize > size); 3189 size_t nalLength = 0; 3190 if (!isMalFormed) { 3191 nalLength = parseNALSize(&mSrcBuffer[srcOffset]); 3192 srcOffset += mNALLengthSize; 3193 isMalFormed = srcOffset + nalLength > size; 3194 } 3195 3196 if (isMalFormed) { 3197 ALOGE("Video is malformed"); 3198 mBuffer->release(); 3199 mBuffer = NULL; 3200 return ERROR_MALFORMED; 3201 } 3202 3203 if (nalLength == 0) { 3204 continue; 3205 } 3206 3207 CHECK(dstOffset + 4 <= mBuffer->size()); 3208 3209 dstData[dstOffset++] = 0; 3210 dstData[dstOffset++] = 0; 3211 dstData[dstOffset++] = 0; 3212 dstData[dstOffset++] = 1; 3213 memcpy(&dstData[dstOffset], &mSrcBuffer[srcOffset], nalLength); 3214 srcOffset += nalLength; 3215 dstOffset += nalLength; 3216 } 3217 CHECK_EQ(srcOffset, size); 3218 CHECK(mBuffer != NULL); 3219 mBuffer->set_range(0, dstOffset); 3220 } 3221 3222 mBuffer->meta_data()->clear(); 3223 mBuffer->meta_data()->setInt64( 3224 kKeyTime, ((int64_t)cts * 1000000) / mTimescale); 3225 3226 if (targetSampleTimeUs >= 0) { 3227 mBuffer->meta_data()->setInt64( 3228 kKeyTargetTime, targetSampleTimeUs); 3229 } 3230 3231 if (isSyncSample) { 3232 mBuffer->meta_data()->setInt32(kKeyIsSyncFrame, 1); 3233 } 3234 3235 ++mCurrentSampleIndex; 3236 3237 *out = mBuffer; 3238 mBuffer = NULL; 3239 3240 return OK; 3241 } 3242} 3243 3244status_t MPEG4Source::fragmentedRead( 3245 MediaBuffer **out, const ReadOptions *options) { 3246 3247 ALOGV("MPEG4Source::fragmentedRead"); 3248 3249 CHECK(mStarted); 3250 3251 *out = NULL; 3252 3253 int64_t targetSampleTimeUs = -1; 3254 3255 int64_t seekTimeUs; 3256 ReadOptions::SeekMode mode; 3257 if (options && options->getSeekTo(&seekTimeUs, &mode)) { 3258 3259 int numSidxEntries = mSegments.size(); 3260 if (numSidxEntries != 0) { 3261 int64_t totalTime = 0; 3262 off64_t totalOffset = mFirstMoofOffset; 3263 for (int i = 0; i < numSidxEntries; i++) { 3264 const SidxEntry *se = &mSegments[i]; 3265 if (totalTime + se->mDurationUs > seekTimeUs) { 3266 // The requested time is somewhere in this segment 3267 if ((mode == ReadOptions::SEEK_NEXT_SYNC) || 3268 (mode == ReadOptions::SEEK_CLOSEST_SYNC && 3269 (seekTimeUs - totalTime) > (totalTime + se->mDurationUs - seekTimeUs))) { 3270 // requested next sync, or closest sync and it was closer to the end of 3271 // this segment 3272 totalTime += se->mDurationUs; 3273 totalOffset += se->mSize; 3274 } 3275 break; 3276 } 3277 totalTime += se->mDurationUs; 3278 totalOffset += se->mSize; 3279 } 3280 mCurrentMoofOffset = totalOffset; 3281 mCurrentSamples.clear(); 3282 mCurrentSampleIndex = 0; 3283 parseChunk(&totalOffset); 3284 mCurrentTime = totalTime * mTimescale / 1000000ll; 3285 } 3286 3287 if (mBuffer != NULL) { 3288 mBuffer->release(); 3289 mBuffer = NULL; 3290 } 3291 3292 // fall through 3293 } 3294 3295 off64_t offset = 0; 3296 size_t size; 3297 uint32_t cts = 0; 3298 bool isSyncSample = false; 3299 bool newBuffer = false; 3300 if (mBuffer == NULL) { 3301 newBuffer = true; 3302 3303 if (mCurrentSampleIndex >= mCurrentSamples.size()) { 3304 // move to next fragment 3305 Sample lastSample = mCurrentSamples[mCurrentSamples.size() - 1]; 3306 off64_t nextMoof = mNextMoofOffset; // lastSample.offset + lastSample.size; 3307 mCurrentMoofOffset = nextMoof; 3308 mCurrentSamples.clear(); 3309 mCurrentSampleIndex = 0; 3310 parseChunk(&nextMoof); 3311 if (mCurrentSampleIndex >= mCurrentSamples.size()) { 3312 return ERROR_END_OF_STREAM; 3313 } 3314 } 3315 3316 const Sample *smpl = &mCurrentSamples[mCurrentSampleIndex]; 3317 offset = smpl->offset; 3318 size = smpl->size; 3319 cts = mCurrentTime; 3320 mCurrentTime += smpl->duration; 3321 isSyncSample = (mCurrentSampleIndex == 0); // XXX 3322 3323 status_t err = mGroup->acquire_buffer(&mBuffer); 3324 3325 if (err != OK) { 3326 CHECK(mBuffer == NULL); 3327 ALOGV("acquire_buffer returned %d", err); 3328 return err; 3329 } 3330 } 3331 3332 const Sample *smpl = &mCurrentSamples[mCurrentSampleIndex]; 3333 const sp<MetaData> bufmeta = mBuffer->meta_data(); 3334 bufmeta->clear(); 3335 if (smpl->encryptedsizes.size()) { 3336 // store clear/encrypted lengths in metadata 3337 bufmeta->setData(kKeyPlainSizes, 0, 3338 smpl->clearsizes.array(), smpl->clearsizes.size() * 4); 3339 bufmeta->setData(kKeyEncryptedSizes, 0, 3340 smpl->encryptedsizes.array(), smpl->encryptedsizes.size() * 4); 3341 bufmeta->setData(kKeyCryptoIV, 0, smpl->iv, 16); // use 16 or the actual size? 3342 bufmeta->setInt32(kKeyCryptoDefaultIVSize, mDefaultIVSize); 3343 bufmeta->setInt32(kKeyCryptoMode, mCryptoMode); 3344 bufmeta->setData(kKeyCryptoKey, 0, mCryptoKey, 16); 3345 } 3346 3347 if (!mIsAVC || mWantsNALFragments) { 3348 if (newBuffer) { 3349 ssize_t num_bytes_read = 3350 mDataSource->readAt(offset, (uint8_t *)mBuffer->data(), size); 3351 3352 if (num_bytes_read < (ssize_t)size) { 3353 mBuffer->release(); 3354 mBuffer = NULL; 3355 3356 ALOGV("i/o error"); 3357 return ERROR_IO; 3358 } 3359 3360 CHECK(mBuffer != NULL); 3361 mBuffer->set_range(0, size); 3362 mBuffer->meta_data()->setInt64( 3363 kKeyTime, ((int64_t)cts * 1000000) / mTimescale); 3364 3365 if (targetSampleTimeUs >= 0) { 3366 mBuffer->meta_data()->setInt64( 3367 kKeyTargetTime, targetSampleTimeUs); 3368 } 3369 3370 if (isSyncSample) { 3371 mBuffer->meta_data()->setInt32(kKeyIsSyncFrame, 1); 3372 } 3373 3374 ++mCurrentSampleIndex; 3375 } 3376 3377 if (!mIsAVC) { 3378 *out = mBuffer; 3379 mBuffer = NULL; 3380 3381 return OK; 3382 } 3383 3384 // Each NAL unit is split up into its constituent fragments and 3385 // each one of them returned in its own buffer. 3386 3387 CHECK(mBuffer->range_length() >= mNALLengthSize); 3388 3389 const uint8_t *src = 3390 (const uint8_t *)mBuffer->data() + mBuffer->range_offset(); 3391 3392 size_t nal_size = parseNALSize(src); 3393 if (mBuffer->range_length() < mNALLengthSize + nal_size) { 3394 ALOGE("incomplete NAL unit."); 3395 3396 mBuffer->release(); 3397 mBuffer = NULL; 3398 3399 return ERROR_MALFORMED; 3400 } 3401 3402 MediaBuffer *clone = mBuffer->clone(); 3403 CHECK(clone != NULL); 3404 clone->set_range(mBuffer->range_offset() + mNALLengthSize, nal_size); 3405 3406 CHECK(mBuffer != NULL); 3407 mBuffer->set_range( 3408 mBuffer->range_offset() + mNALLengthSize + nal_size, 3409 mBuffer->range_length() - mNALLengthSize - nal_size); 3410 3411 if (mBuffer->range_length() == 0) { 3412 mBuffer->release(); 3413 mBuffer = NULL; 3414 } 3415 3416 *out = clone; 3417 3418 return OK; 3419 } else { 3420 ALOGV("whole NAL"); 3421 // Whole NAL units are returned but each fragment is prefixed by 3422 // the start code (0x00 00 00 01). 3423 ssize_t num_bytes_read = 0; 3424 int32_t drm = 0; 3425 bool usesDRM = (mFormat->findInt32(kKeyIsDRM, &drm) && drm != 0); 3426 if (usesDRM) { 3427 num_bytes_read = 3428 mDataSource->readAt(offset, (uint8_t*)mBuffer->data(), size); 3429 } else { 3430 num_bytes_read = mDataSource->readAt(offset, mSrcBuffer, size); 3431 } 3432 3433 if (num_bytes_read < (ssize_t)size) { 3434 mBuffer->release(); 3435 mBuffer = NULL; 3436 3437 ALOGV("i/o error"); 3438 return ERROR_IO; 3439 } 3440 3441 if (usesDRM) { 3442 CHECK(mBuffer != NULL); 3443 mBuffer->set_range(0, size); 3444 3445 } else { 3446 uint8_t *dstData = (uint8_t *)mBuffer->data(); 3447 size_t srcOffset = 0; 3448 size_t dstOffset = 0; 3449 3450 while (srcOffset < size) { 3451 bool isMalFormed = (srcOffset + mNALLengthSize > size); 3452 size_t nalLength = 0; 3453 if (!isMalFormed) { 3454 nalLength = parseNALSize(&mSrcBuffer[srcOffset]); 3455 srcOffset += mNALLengthSize; 3456 isMalFormed = srcOffset + nalLength > size; 3457 } 3458 3459 if (isMalFormed) { 3460 ALOGE("Video is malformed"); 3461 mBuffer->release(); 3462 mBuffer = NULL; 3463 return ERROR_MALFORMED; 3464 } 3465 3466 if (nalLength == 0) { 3467 continue; 3468 } 3469 3470 CHECK(dstOffset + 4 <= mBuffer->size()); 3471 3472 dstData[dstOffset++] = 0; 3473 dstData[dstOffset++] = 0; 3474 dstData[dstOffset++] = 0; 3475 dstData[dstOffset++] = 1; 3476 memcpy(&dstData[dstOffset], &mSrcBuffer[srcOffset], nalLength); 3477 srcOffset += nalLength; 3478 dstOffset += nalLength; 3479 } 3480 CHECK_EQ(srcOffset, size); 3481 CHECK(mBuffer != NULL); 3482 mBuffer->set_range(0, dstOffset); 3483 } 3484 3485 mBuffer->meta_data()->setInt64( 3486 kKeyTime, ((int64_t)cts * 1000000) / mTimescale); 3487 3488 if (targetSampleTimeUs >= 0) { 3489 mBuffer->meta_data()->setInt64( 3490 kKeyTargetTime, targetSampleTimeUs); 3491 } 3492 3493 if (isSyncSample) { 3494 mBuffer->meta_data()->setInt32(kKeyIsSyncFrame, 1); 3495 } 3496 3497 ++mCurrentSampleIndex; 3498 3499 *out = mBuffer; 3500 mBuffer = NULL; 3501 3502 return OK; 3503 } 3504} 3505 3506MPEG4Extractor::Track *MPEG4Extractor::findTrackByMimePrefix( 3507 const char *mimePrefix) { 3508 for (Track *track = mFirstTrack; track != NULL; track = track->next) { 3509 const char *mime; 3510 if (track->meta != NULL 3511 && track->meta->findCString(kKeyMIMEType, &mime) 3512 && !strncasecmp(mime, mimePrefix, strlen(mimePrefix))) { 3513 return track; 3514 } 3515 } 3516 3517 return NULL; 3518} 3519 3520static bool LegacySniffMPEG4( 3521 const sp<DataSource> &source, String8 *mimeType, float *confidence) { 3522 uint8_t header[8]; 3523 3524 ssize_t n = source->readAt(4, header, sizeof(header)); 3525 if (n < (ssize_t)sizeof(header)) { 3526 return false; 3527 } 3528 3529 if (!memcmp(header, "ftyp3gp", 7) || !memcmp(header, "ftypmp42", 8) 3530 || !memcmp(header, "ftyp3gr6", 8) || !memcmp(header, "ftyp3gs6", 8) 3531 || !memcmp(header, "ftyp3ge6", 8) || !memcmp(header, "ftyp3gg6", 8) 3532 || !memcmp(header, "ftypisom", 8) || !memcmp(header, "ftypM4V ", 8) 3533 || !memcmp(header, "ftypM4A ", 8) || !memcmp(header, "ftypf4v ", 8) 3534 || !memcmp(header, "ftypkddi", 8) || !memcmp(header, "ftypM4VP", 8)) { 3535 *mimeType = MEDIA_MIMETYPE_CONTAINER_MPEG4; 3536 *confidence = 0.4; 3537 3538 return true; 3539 } 3540 3541 return false; 3542} 3543 3544static bool isCompatibleBrand(uint32_t fourcc) { 3545 static const uint32_t kCompatibleBrands[] = { 3546 FOURCC('i', 's', 'o', 'm'), 3547 FOURCC('i', 's', 'o', '2'), 3548 FOURCC('a', 'v', 'c', '1'), 3549 FOURCC('3', 'g', 'p', '4'), 3550 FOURCC('m', 'p', '4', '1'), 3551 FOURCC('m', 'p', '4', '2'), 3552 3553 // Won't promise that the following file types can be played. 3554 // Just give these file types a chance. 3555 FOURCC('q', 't', ' ', ' '), // Apple's QuickTime 3556 FOURCC('M', 'S', 'N', 'V'), // Sony's PSP 3557 3558 FOURCC('3', 'g', '2', 'a'), // 3GPP2 3559 FOURCC('3', 'g', '2', 'b'), 3560 }; 3561 3562 for (size_t i = 0; 3563 i < sizeof(kCompatibleBrands) / sizeof(kCompatibleBrands[0]); 3564 ++i) { 3565 if (kCompatibleBrands[i] == fourcc) { 3566 return true; 3567 } 3568 } 3569 3570 return false; 3571} 3572 3573// Attempt to actually parse the 'ftyp' atom and determine if a suitable 3574// compatible brand is present. 3575// Also try to identify where this file's metadata ends 3576// (end of the 'moov' atom) and report it to the caller as part of 3577// the metadata. 3578static bool BetterSniffMPEG4( 3579 const sp<DataSource> &source, String8 *mimeType, float *confidence, 3580 sp<AMessage> *meta) { 3581 // We scan up to 128 bytes to identify this file as an MP4. 3582 static const off64_t kMaxScanOffset = 128ll; 3583 3584 off64_t offset = 0ll; 3585 bool foundGoodFileType = false; 3586 off64_t moovAtomEndOffset = -1ll; 3587 bool done = false; 3588 3589 while (!done && offset < kMaxScanOffset) { 3590 uint32_t hdr[2]; 3591 if (source->readAt(offset, hdr, 8) < 8) { 3592 return false; 3593 } 3594 3595 uint64_t chunkSize = ntohl(hdr[0]); 3596 uint32_t chunkType = ntohl(hdr[1]); 3597 off64_t chunkDataOffset = offset + 8; 3598 3599 if (chunkSize == 1) { 3600 if (source->readAt(offset + 8, &chunkSize, 8) < 8) { 3601 return false; 3602 } 3603 3604 chunkSize = ntoh64(chunkSize); 3605 chunkDataOffset += 8; 3606 3607 if (chunkSize < 16) { 3608 // The smallest valid chunk is 16 bytes long in this case. 3609 return false; 3610 } 3611 } else if (chunkSize < 8) { 3612 // The smallest valid chunk is 8 bytes long. 3613 return false; 3614 } 3615 3616 off64_t chunkDataSize = offset + chunkSize - chunkDataOffset; 3617 3618 char chunkstring[5]; 3619 MakeFourCCString(chunkType, chunkstring); 3620 ALOGV("saw chunk type %s, size %lld @ %lld", chunkstring, chunkSize, offset); 3621 switch (chunkType) { 3622 case FOURCC('f', 't', 'y', 'p'): 3623 { 3624 if (chunkDataSize < 8) { 3625 return false; 3626 } 3627 3628 uint32_t numCompatibleBrands = (chunkDataSize - 8) / 4; 3629 for (size_t i = 0; i < numCompatibleBrands + 2; ++i) { 3630 if (i == 1) { 3631 // Skip this index, it refers to the minorVersion, 3632 // not a brand. 3633 continue; 3634 } 3635 3636 uint32_t brand; 3637 if (source->readAt( 3638 chunkDataOffset + 4 * i, &brand, 4) < 4) { 3639 return false; 3640 } 3641 3642 brand = ntohl(brand); 3643 3644 if (isCompatibleBrand(brand)) { 3645 foundGoodFileType = true; 3646 break; 3647 } 3648 } 3649 3650 if (!foundGoodFileType) { 3651 return false; 3652 } 3653 3654 break; 3655 } 3656 3657 case FOURCC('m', 'o', 'o', 'v'): 3658 { 3659 moovAtomEndOffset = offset + chunkSize; 3660 3661 done = true; 3662 break; 3663 } 3664 3665 default: 3666 break; 3667 } 3668 3669 offset += chunkSize; 3670 } 3671 3672 if (!foundGoodFileType) { 3673 return false; 3674 } 3675 3676 *mimeType = MEDIA_MIMETYPE_CONTAINER_MPEG4; 3677 *confidence = 0.4f; 3678 3679 if (moovAtomEndOffset >= 0) { 3680 *meta = new AMessage; 3681 (*meta)->setInt64("meta-data-size", moovAtomEndOffset); 3682 3683 ALOGV("found metadata size: %lld", moovAtomEndOffset); 3684 } 3685 3686 return true; 3687} 3688 3689bool SniffMPEG4( 3690 const sp<DataSource> &source, String8 *mimeType, float *confidence, 3691 sp<AMessage> *meta) { 3692 if (BetterSniffMPEG4(source, mimeType, confidence, meta)) { 3693 return true; 3694 } 3695 3696 if (LegacySniffMPEG4(source, mimeType, confidence)) { 3697 ALOGW("Identified supported mpeg4 through LegacySniffMPEG4."); 3698 return true; 3699 } 3700 3701 return false; 3702} 3703 3704} // namespace android 3705