1/* 2 * Copyright (C) 2009 The Android Open Source Project 3 * 4 * Licensed under the Apache License, Version 2.0 (the "License"); 5 * you may not use this file except in compliance with the License. 6 * You may obtain a copy of the License at 7 * 8 * http://www.apache.org/licenses/LICENSE-2.0 9 * 10 * Unless required by applicable law or agreed to in writing, software 11 * distributed under the License is distributed on an "AS IS" BASIS, 12 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 * See the License for the specific language governing permissions and 14 * limitations under the License. 15 */ 16 17//#define LOG_NDEBUG 0 18#define LOG_TAG "MPEG4Extractor" 19 20#include <ctype.h> 21#include <inttypes.h> 22#include <stdint.h> 23#include <stdlib.h> 24#include <string.h> 25 26#include <utils/Log.h> 27 28#include "include/MPEG4Extractor.h" 29#include "include/SampleTable.h" 30#include "include/ESDS.h" 31 32#include <media/stagefright/foundation/ABitReader.h> 33#include <media/stagefright/foundation/ABuffer.h> 34#include <media/stagefright/foundation/ADebug.h> 35#include <media/stagefright/foundation/AMessage.h> 36#include <media/stagefright/foundation/AUtils.h> 37#include <media/stagefright/foundation/ColorUtils.h> 38#include <media/stagefright/MediaBuffer.h> 39#include <media/stagefright/MediaBufferGroup.h> 40#include <media/stagefright/MediaDefs.h> 41#include <media/stagefright/MediaSource.h> 42#include <media/stagefright/MetaData.h> 43#include <utils/String8.h> 44 45#include <byteswap.h> 46#include "include/ID3.h" 47#include "include/avc_utils.h" 48 49#ifndef UINT32_MAX 50#define UINT32_MAX (4294967295U) 51#endif 52 53namespace android { 54 55enum { 56 // max track header chunk to return 57 kMaxTrackHeaderSize = 32, 58 59 // maximum size of an atom. Some atoms can be bigger according to the spec, 60 // but we only allow up to this size. 61 kMaxAtomSize = 64 * 1024 * 1024, 62}; 63 64class MPEG4Source : public MediaSource { 65public: 66 // Caller retains ownership of both "dataSource" and "sampleTable". 67 MPEG4Source(const sp<MPEG4Extractor> &owner, 68 const sp<MetaData> &format, 69 const sp<DataSource> &dataSource, 70 int32_t timeScale, 71 const sp<SampleTable> &sampleTable, 72 Vector<SidxEntry> &sidx, 73 const Trex *trex, 74 off64_t firstMoofOffset); 75 virtual status_t init(); 76 77 virtual status_t start(MetaData *params = NULL); 78 virtual status_t stop(); 79 80 virtual sp<MetaData> getFormat(); 81 82 virtual status_t read(MediaBuffer **buffer, const ReadOptions *options = NULL); 83 virtual bool supportNonblockingRead() { return true; } 84 virtual status_t fragmentedRead(MediaBuffer **buffer, const ReadOptions *options = NULL); 85 86protected: 87 virtual ~MPEG4Source(); 88 89private: 90 Mutex mLock; 91 92 // keep the MPEG4Extractor around, since we're referencing its data 93 sp<MPEG4Extractor> mOwner; 94 sp<MetaData> mFormat; 95 sp<DataSource> mDataSource; 96 int32_t mTimescale; 97 sp<SampleTable> mSampleTable; 98 uint32_t mCurrentSampleIndex; 99 uint32_t mCurrentFragmentIndex; 100 Vector<SidxEntry> &mSegments; 101 const Trex *mTrex; 102 off64_t mFirstMoofOffset; 103 off64_t mCurrentMoofOffset; 104 off64_t mNextMoofOffset; 105 uint32_t mCurrentTime; 106 int32_t mLastParsedTrackId; 107 int32_t mTrackId; 108 109 int32_t mCryptoMode; // passed in from extractor 110 int32_t mDefaultIVSize; // passed in from extractor 111 uint8_t mCryptoKey[16]; // passed in from extractor 112 uint32_t mCurrentAuxInfoType; 113 uint32_t mCurrentAuxInfoTypeParameter; 114 int32_t mCurrentDefaultSampleInfoSize; 115 uint32_t mCurrentSampleInfoCount; 116 uint32_t mCurrentSampleInfoAllocSize; 117 uint8_t* mCurrentSampleInfoSizes; 118 uint32_t mCurrentSampleInfoOffsetCount; 119 uint32_t mCurrentSampleInfoOffsetsAllocSize; 120 uint64_t* mCurrentSampleInfoOffsets; 121 122 bool mIsAVC; 123 bool mIsHEVC; 124 size_t mNALLengthSize; 125 126 bool mStarted; 127 128 MediaBufferGroup *mGroup; 129 130 MediaBuffer *mBuffer; 131 132 bool mWantsNALFragments; 133 134 uint8_t *mSrcBuffer; 135 136 size_t parseNALSize(const uint8_t *data) const; 137 status_t parseChunk(off64_t *offset); 138 status_t parseTrackFragmentHeader(off64_t offset, off64_t size); 139 status_t parseTrackFragmentRun(off64_t offset, off64_t size); 140 status_t parseSampleAuxiliaryInformationSizes(off64_t offset, off64_t size); 141 status_t parseSampleAuxiliaryInformationOffsets(off64_t offset, off64_t size); 142 143 struct TrackFragmentHeaderInfo { 144 enum Flags { 145 kBaseDataOffsetPresent = 0x01, 146 kSampleDescriptionIndexPresent = 0x02, 147 kDefaultSampleDurationPresent = 0x08, 148 kDefaultSampleSizePresent = 0x10, 149 kDefaultSampleFlagsPresent = 0x20, 150 kDurationIsEmpty = 0x10000, 151 }; 152 153 uint32_t mTrackID; 154 uint32_t mFlags; 155 uint64_t mBaseDataOffset; 156 uint32_t mSampleDescriptionIndex; 157 uint32_t mDefaultSampleDuration; 158 uint32_t mDefaultSampleSize; 159 uint32_t mDefaultSampleFlags; 160 161 uint64_t mDataOffset; 162 }; 163 TrackFragmentHeaderInfo mTrackFragmentHeaderInfo; 164 165 struct Sample { 166 off64_t offset; 167 size_t size; 168 uint32_t duration; 169 int32_t compositionOffset; 170 uint8_t iv[16]; 171 Vector<size_t> clearsizes; 172 Vector<size_t> encryptedsizes; 173 }; 174 Vector<Sample> mCurrentSamples; 175 176 MPEG4Source(const MPEG4Source &); 177 MPEG4Source &operator=(const MPEG4Source &); 178}; 179 180// This custom data source wraps an existing one and satisfies requests 181// falling entirely within a cached range from the cache while forwarding 182// all remaining requests to the wrapped datasource. 183// This is used to cache the full sampletable metadata for a single track, 184// possibly wrapping multiple times to cover all tracks, i.e. 185// Each MPEG4DataSource caches the sampletable metadata for a single track. 186 187struct MPEG4DataSource : public DataSource { 188 explicit MPEG4DataSource(const sp<DataSource> &source); 189 190 virtual status_t initCheck() const; 191 virtual ssize_t readAt(off64_t offset, void *data, size_t size); 192 virtual status_t getSize(off64_t *size); 193 virtual uint32_t flags(); 194 195 status_t setCachedRange(off64_t offset, size_t size); 196 197protected: 198 virtual ~MPEG4DataSource(); 199 200private: 201 Mutex mLock; 202 203 sp<DataSource> mSource; 204 off64_t mCachedOffset; 205 size_t mCachedSize; 206 uint8_t *mCache; 207 208 void clearCache(); 209 210 MPEG4DataSource(const MPEG4DataSource &); 211 MPEG4DataSource &operator=(const MPEG4DataSource &); 212}; 213 214MPEG4DataSource::MPEG4DataSource(const sp<DataSource> &source) 215 : mSource(source), 216 mCachedOffset(0), 217 mCachedSize(0), 218 mCache(NULL) { 219} 220 221MPEG4DataSource::~MPEG4DataSource() { 222 clearCache(); 223} 224 225void MPEG4DataSource::clearCache() { 226 if (mCache) { 227 free(mCache); 228 mCache = NULL; 229 } 230 231 mCachedOffset = 0; 232 mCachedSize = 0; 233} 234 235status_t MPEG4DataSource::initCheck() const { 236 return mSource->initCheck(); 237} 238 239ssize_t MPEG4DataSource::readAt(off64_t offset, void *data, size_t size) { 240 Mutex::Autolock autoLock(mLock); 241 242 if (isInRange(mCachedOffset, mCachedSize, offset, size)) { 243 memcpy(data, &mCache[offset - mCachedOffset], size); 244 return size; 245 } 246 247 return mSource->readAt(offset, data, size); 248} 249 250status_t MPEG4DataSource::getSize(off64_t *size) { 251 return mSource->getSize(size); 252} 253 254uint32_t MPEG4DataSource::flags() { 255 return mSource->flags(); 256} 257 258status_t MPEG4DataSource::setCachedRange(off64_t offset, size_t size) { 259 Mutex::Autolock autoLock(mLock); 260 261 clearCache(); 262 263 mCache = (uint8_t *)malloc(size); 264 265 if (mCache == NULL) { 266 return -ENOMEM; 267 } 268 269 mCachedOffset = offset; 270 mCachedSize = size; 271 272 ssize_t err = mSource->readAt(mCachedOffset, mCache, mCachedSize); 273 274 if (err < (ssize_t)size) { 275 clearCache(); 276 277 return ERROR_IO; 278 } 279 280 return OK; 281} 282 283//////////////////////////////////////////////////////////////////////////////// 284 285static const bool kUseHexDump = false; 286 287static void hexdump(const void *_data, size_t size) { 288 const uint8_t *data = (const uint8_t *)_data; 289 size_t offset = 0; 290 while (offset < size) { 291 printf("0x%04zx ", offset); 292 293 size_t n = size - offset; 294 if (n > 16) { 295 n = 16; 296 } 297 298 for (size_t i = 0; i < 16; ++i) { 299 if (i == 8) { 300 printf(" "); 301 } 302 303 if (offset + i < size) { 304 printf("%02x ", data[offset + i]); 305 } else { 306 printf(" "); 307 } 308 } 309 310 printf(" "); 311 312 for (size_t i = 0; i < n; ++i) { 313 if (isprint(data[offset + i])) { 314 printf("%c", data[offset + i]); 315 } else { 316 printf("."); 317 } 318 } 319 320 printf("\n"); 321 322 offset += 16; 323 } 324} 325 326static const char *FourCC2MIME(uint32_t fourcc) { 327 switch (fourcc) { 328 case FOURCC('m', 'p', '4', 'a'): 329 return MEDIA_MIMETYPE_AUDIO_AAC; 330 331 case FOURCC('s', 'a', 'm', 'r'): 332 return MEDIA_MIMETYPE_AUDIO_AMR_NB; 333 334 case FOURCC('s', 'a', 'w', 'b'): 335 return MEDIA_MIMETYPE_AUDIO_AMR_WB; 336 337 case FOURCC('m', 'p', '4', 'v'): 338 return MEDIA_MIMETYPE_VIDEO_MPEG4; 339 340 case FOURCC('s', '2', '6', '3'): 341 case FOURCC('h', '2', '6', '3'): 342 case FOURCC('H', '2', '6', '3'): 343 return MEDIA_MIMETYPE_VIDEO_H263; 344 345 case FOURCC('a', 'v', 'c', '1'): 346 return MEDIA_MIMETYPE_VIDEO_AVC; 347 348 case FOURCC('h', 'v', 'c', '1'): 349 case FOURCC('h', 'e', 'v', '1'): 350 return MEDIA_MIMETYPE_VIDEO_HEVC; 351 default: 352 CHECK(!"should not be here."); 353 return NULL; 354 } 355} 356 357static bool AdjustChannelsAndRate(uint32_t fourcc, uint32_t *channels, uint32_t *rate) { 358 if (!strcasecmp(MEDIA_MIMETYPE_AUDIO_AMR_NB, FourCC2MIME(fourcc))) { 359 // AMR NB audio is always mono, 8kHz 360 *channels = 1; 361 *rate = 8000; 362 return true; 363 } else if (!strcasecmp(MEDIA_MIMETYPE_AUDIO_AMR_WB, FourCC2MIME(fourcc))) { 364 // AMR WB audio is always mono, 16kHz 365 *channels = 1; 366 *rate = 16000; 367 return true; 368 } 369 return false; 370} 371 372MPEG4Extractor::MPEG4Extractor(const sp<DataSource> &source) 373 : mMoofOffset(0), 374 mMoofFound(false), 375 mMdatFound(false), 376 mDataSource(source), 377 mInitCheck(NO_INIT), 378 mHeaderTimescale(0), 379 mIsQT(false), 380 mFirstTrack(NULL), 381 mLastTrack(NULL), 382 mFileMetaData(new MetaData), 383 mFirstSINF(NULL), 384 mIsDrm(false) { 385} 386 387MPEG4Extractor::~MPEG4Extractor() { 388 Track *track = mFirstTrack; 389 while (track) { 390 Track *next = track->next; 391 392 delete track; 393 track = next; 394 } 395 mFirstTrack = mLastTrack = NULL; 396 397 SINF *sinf = mFirstSINF; 398 while (sinf) { 399 SINF *next = sinf->next; 400 delete[] sinf->IPMPData; 401 delete sinf; 402 sinf = next; 403 } 404 mFirstSINF = NULL; 405 406 for (size_t i = 0; i < mPssh.size(); i++) { 407 delete [] mPssh[i].data; 408 } 409} 410 411uint32_t MPEG4Extractor::flags() const { 412 return CAN_PAUSE | 413 ((mMoofOffset == 0 || mSidxEntries.size() != 0) ? 414 (CAN_SEEK_BACKWARD | CAN_SEEK_FORWARD | CAN_SEEK) : 0); 415} 416 417sp<MetaData> MPEG4Extractor::getMetaData() { 418 status_t err; 419 if ((err = readMetaData()) != OK) { 420 return new MetaData; 421 } 422 423 return mFileMetaData; 424} 425 426size_t MPEG4Extractor::countTracks() { 427 status_t err; 428 if ((err = readMetaData()) != OK) { 429 ALOGV("MPEG4Extractor::countTracks: no tracks"); 430 return 0; 431 } 432 433 size_t n = 0; 434 Track *track = mFirstTrack; 435 while (track) { 436 ++n; 437 track = track->next; 438 } 439 440 ALOGV("MPEG4Extractor::countTracks: %zu tracks", n); 441 return n; 442} 443 444sp<MetaData> MPEG4Extractor::getTrackMetaData( 445 size_t index, uint32_t flags) { 446 status_t err; 447 if ((err = readMetaData()) != OK) { 448 return NULL; 449 } 450 451 Track *track = mFirstTrack; 452 while (index > 0) { 453 if (track == NULL) { 454 return NULL; 455 } 456 457 track = track->next; 458 --index; 459 } 460 461 if (track == NULL) { 462 return NULL; 463 } 464 465 if ((flags & kIncludeExtensiveMetaData) 466 && !track->includes_expensive_metadata) { 467 track->includes_expensive_metadata = true; 468 469 const char *mime; 470 CHECK(track->meta->findCString(kKeyMIMEType, &mime)); 471 if (!strncasecmp("video/", mime, 6)) { 472 // MPEG2 tracks do not provide CSD, so read the stream header 473 if (!strcmp(mime, MEDIA_MIMETYPE_VIDEO_MPEG2)) { 474 off64_t offset; 475 size_t size; 476 if (track->sampleTable->getMetaDataForSample( 477 0 /* sampleIndex */, &offset, &size, NULL /* sampleTime */) == OK) { 478 if (size > kMaxTrackHeaderSize) { 479 size = kMaxTrackHeaderSize; 480 } 481 uint8_t header[kMaxTrackHeaderSize]; 482 if (mDataSource->readAt(offset, &header, size) == (ssize_t)size) { 483 track->meta->setData(kKeyStreamHeader, 'mdat', header, size); 484 } 485 } 486 } 487 488 if (mMoofOffset > 0) { 489 int64_t duration; 490 if (track->meta->findInt64(kKeyDuration, &duration)) { 491 // nothing fancy, just pick a frame near 1/4th of the duration 492 track->meta->setInt64( 493 kKeyThumbnailTime, duration / 4); 494 } 495 } else { 496 uint32_t sampleIndex; 497 uint32_t sampleTime; 498 if (track->timescale != 0 && 499 track->sampleTable->findThumbnailSample(&sampleIndex) == OK 500 && track->sampleTable->getMetaDataForSample( 501 sampleIndex, NULL /* offset */, NULL /* size */, 502 &sampleTime) == OK) { 503 track->meta->setInt64( 504 kKeyThumbnailTime, 505 ((int64_t)sampleTime * 1000000) / track->timescale); 506 } 507 } 508 } 509 } 510 511 return track->meta; 512} 513 514static void MakeFourCCString(uint32_t x, char *s) { 515 s[0] = x >> 24; 516 s[1] = (x >> 16) & 0xff; 517 s[2] = (x >> 8) & 0xff; 518 s[3] = x & 0xff; 519 s[4] = '\0'; 520} 521 522status_t MPEG4Extractor::readMetaData() { 523 if (mInitCheck != NO_INIT) { 524 return mInitCheck; 525 } 526 527 off64_t offset = 0; 528 status_t err; 529 bool sawMoovOrSidx = false; 530 531 while (!(sawMoovOrSidx && (mMdatFound || mMoofFound))) { 532 off64_t orig_offset = offset; 533 err = parseChunk(&offset, 0); 534 535 if (err != OK && err != UNKNOWN_ERROR) { 536 break; 537 } else if (offset <= orig_offset) { 538 // only continue parsing if the offset was advanced, 539 // otherwise we might end up in an infinite loop 540 ALOGE("did not advance: %lld->%lld", (long long)orig_offset, (long long)offset); 541 err = ERROR_MALFORMED; 542 break; 543 } else if (err == UNKNOWN_ERROR) { 544 sawMoovOrSidx = true; 545 } 546 } 547 548 if (mInitCheck == OK) { 549 if (findTrackByMimePrefix("video/") != NULL) { 550 mFileMetaData->setCString( 551 kKeyMIMEType, MEDIA_MIMETYPE_CONTAINER_MPEG4); 552 } else if (findTrackByMimePrefix("audio/") != NULL) { 553 mFileMetaData->setCString(kKeyMIMEType, "audio/mp4"); 554 } else { 555 mFileMetaData->setCString(kKeyMIMEType, "application/octet-stream"); 556 } 557 } else { 558 mInitCheck = err; 559 } 560 561 CHECK_NE(err, (status_t)NO_INIT); 562 563 // copy pssh data into file metadata 564 uint64_t psshsize = 0; 565 for (size_t i = 0; i < mPssh.size(); i++) { 566 psshsize += 20 + mPssh[i].datalen; 567 } 568 if (psshsize > 0 && psshsize <= UINT32_MAX) { 569 char *buf = (char*)malloc(psshsize); 570 if (!buf) { 571 ALOGE("b/28471206"); 572 return NO_MEMORY; 573 } 574 char *ptr = buf; 575 for (size_t i = 0; i < mPssh.size(); i++) { 576 memcpy(ptr, mPssh[i].uuid, 20); // uuid + length 577 memcpy(ptr + 20, mPssh[i].data, mPssh[i].datalen); 578 ptr += (20 + mPssh[i].datalen); 579 } 580 mFileMetaData->setData(kKeyPssh, 'pssh', buf, psshsize); 581 free(buf); 582 } 583 return mInitCheck; 584} 585 586char* MPEG4Extractor::getDrmTrackInfo(size_t trackID, int *len) { 587 if (mFirstSINF == NULL) { 588 return NULL; 589 } 590 591 SINF *sinf = mFirstSINF; 592 while (sinf && (trackID != sinf->trackID)) { 593 sinf = sinf->next; 594 } 595 596 if (sinf == NULL) { 597 return NULL; 598 } 599 600 *len = sinf->len; 601 return sinf->IPMPData; 602} 603 604// Reads an encoded integer 7 bits at a time until it encounters the high bit clear. 605static int32_t readSize(off64_t offset, 606 const sp<DataSource> &DataSource, uint8_t *numOfBytes) { 607 uint32_t size = 0; 608 uint8_t data; 609 bool moreData = true; 610 *numOfBytes = 0; 611 612 while (moreData) { 613 if (DataSource->readAt(offset, &data, 1) < 1) { 614 return -1; 615 } 616 offset ++; 617 moreData = (data >= 128) ? true : false; 618 size = (size << 7) | (data & 0x7f); // Take last 7 bits 619 (*numOfBytes) ++; 620 } 621 622 return size; 623} 624 625status_t MPEG4Extractor::parseDrmSINF( 626 off64_t * /* offset */, off64_t data_offset) { 627 uint8_t updateIdTag; 628 if (mDataSource->readAt(data_offset, &updateIdTag, 1) < 1) { 629 return ERROR_IO; 630 } 631 data_offset ++; 632 633 if (0x01/*OBJECT_DESCRIPTOR_UPDATE_ID_TAG*/ != updateIdTag) { 634 return ERROR_MALFORMED; 635 } 636 637 uint8_t numOfBytes; 638 int32_t size = readSize(data_offset, mDataSource, &numOfBytes); 639 if (size < 0) { 640 return ERROR_IO; 641 } 642 data_offset += numOfBytes; 643 644 while(size >= 11 ) { 645 uint8_t descriptorTag; 646 if (mDataSource->readAt(data_offset, &descriptorTag, 1) < 1) { 647 return ERROR_IO; 648 } 649 data_offset ++; 650 651 if (0x11/*OBJECT_DESCRIPTOR_ID_TAG*/ != descriptorTag) { 652 return ERROR_MALFORMED; 653 } 654 655 uint8_t buffer[8]; 656 //ObjectDescriptorID and ObjectDescriptor url flag 657 if (mDataSource->readAt(data_offset, buffer, 2) < 2) { 658 return ERROR_IO; 659 } 660 data_offset += 2; 661 662 if ((buffer[1] >> 5) & 0x0001) { //url flag is set 663 return ERROR_MALFORMED; 664 } 665 666 if (mDataSource->readAt(data_offset, buffer, 8) < 8) { 667 return ERROR_IO; 668 } 669 data_offset += 8; 670 671 if ((0x0F/*ES_ID_REF_TAG*/ != buffer[1]) 672 || ( 0x0A/*IPMP_DESCRIPTOR_POINTER_ID_TAG*/ != buffer[5])) { 673 return ERROR_MALFORMED; 674 } 675 676 SINF *sinf = new SINF; 677 sinf->trackID = U16_AT(&buffer[3]); 678 sinf->IPMPDescriptorID = buffer[7]; 679 sinf->next = mFirstSINF; 680 mFirstSINF = sinf; 681 682 size -= (8 + 2 + 1); 683 } 684 685 if (size != 0) { 686 return ERROR_MALFORMED; 687 } 688 689 if (mDataSource->readAt(data_offset, &updateIdTag, 1) < 1) { 690 return ERROR_IO; 691 } 692 data_offset ++; 693 694 if(0x05/*IPMP_DESCRIPTOR_UPDATE_ID_TAG*/ != updateIdTag) { 695 return ERROR_MALFORMED; 696 } 697 698 size = readSize(data_offset, mDataSource, &numOfBytes); 699 if (size < 0) { 700 return ERROR_IO; 701 } 702 data_offset += numOfBytes; 703 704 while (size > 0) { 705 uint8_t tag; 706 int32_t dataLen; 707 if (mDataSource->readAt(data_offset, &tag, 1) < 1) { 708 return ERROR_IO; 709 } 710 data_offset ++; 711 712 if (0x0B/*IPMP_DESCRIPTOR_ID_TAG*/ == tag) { 713 uint8_t id; 714 dataLen = readSize(data_offset, mDataSource, &numOfBytes); 715 if (dataLen < 0) { 716 return ERROR_IO; 717 } else if (dataLen < 4) { 718 return ERROR_MALFORMED; 719 } 720 data_offset += numOfBytes; 721 722 if (mDataSource->readAt(data_offset, &id, 1) < 1) { 723 return ERROR_IO; 724 } 725 data_offset ++; 726 727 SINF *sinf = mFirstSINF; 728 while (sinf && (sinf->IPMPDescriptorID != id)) { 729 sinf = sinf->next; 730 } 731 if (sinf == NULL) { 732 return ERROR_MALFORMED; 733 } 734 sinf->len = dataLen - 3; 735 sinf->IPMPData = new (std::nothrow) char[sinf->len]; 736 if (sinf->IPMPData == NULL) { 737 return ERROR_MALFORMED; 738 } 739 data_offset += 2; 740 741 if (mDataSource->readAt(data_offset, sinf->IPMPData, sinf->len) < sinf->len) { 742 return ERROR_IO; 743 } 744 data_offset += sinf->len; 745 746 size -= (dataLen + numOfBytes + 1); 747 } 748 } 749 750 if (size != 0) { 751 return ERROR_MALFORMED; 752 } 753 754 return UNKNOWN_ERROR; // Return a dummy error. 755} 756 757struct PathAdder { 758 PathAdder(Vector<uint32_t> *path, uint32_t chunkType) 759 : mPath(path) { 760 mPath->push(chunkType); 761 } 762 763 ~PathAdder() { 764 mPath->pop(); 765 } 766 767private: 768 Vector<uint32_t> *mPath; 769 770 PathAdder(const PathAdder &); 771 PathAdder &operator=(const PathAdder &); 772}; 773 774static bool underMetaDataPath(const Vector<uint32_t> &path) { 775 return path.size() >= 5 776 && path[0] == FOURCC('m', 'o', 'o', 'v') 777 && path[1] == FOURCC('u', 'd', 't', 'a') 778 && path[2] == FOURCC('m', 'e', 't', 'a') 779 && path[3] == FOURCC('i', 'l', 's', 't'); 780} 781 782static bool underQTMetaPath(const Vector<uint32_t> &path, int32_t depth) { 783 return path.size() >= 2 784 && path[0] == FOURCC('m', 'o', 'o', 'v') 785 && path[1] == FOURCC('m', 'e', 't', 'a') 786 && (depth == 2 787 || (depth == 3 788 && (path[2] == FOURCC('h', 'd', 'l', 'r') 789 || path[2] == FOURCC('i', 'l', 's', 't') 790 || path[2] == FOURCC('k', 'e', 'y', 's')))); 791} 792 793// Given a time in seconds since Jan 1 1904, produce a human-readable string. 794static bool convertTimeToDate(int64_t time_1904, String8 *s) { 795 // delta between mpeg4 time and unix epoch time 796 static const int64_t delta = (((66 * 365 + 17) * 24) * 3600); 797 if (time_1904 < INT64_MIN + delta) { 798 return false; 799 } 800 time_t time_1970 = time_1904 - delta; 801 802 char tmp[32]; 803 struct tm* tm = gmtime(&time_1970); 804 if (tm != NULL && 805 strftime(tmp, sizeof(tmp), "%Y%m%dT%H%M%S.000Z", tm) > 0) { 806 s->setTo(tmp); 807 return true; 808 } 809 return false; 810} 811 812status_t MPEG4Extractor::parseChunk(off64_t *offset, int depth) { 813 ALOGV("entering parseChunk %lld/%d", (long long)*offset, depth); 814 815 if (*offset < 0) { 816 ALOGE("b/23540914"); 817 return ERROR_MALFORMED; 818 } 819 if (depth > 100) { 820 ALOGE("b/27456299"); 821 return ERROR_MALFORMED; 822 } 823 uint32_t hdr[2]; 824 if (mDataSource->readAt(*offset, hdr, 8) < 8) { 825 return ERROR_IO; 826 } 827 uint64_t chunk_size = ntohl(hdr[0]); 828 int32_t chunk_type = ntohl(hdr[1]); 829 off64_t data_offset = *offset + 8; 830 831 if (chunk_size == 1) { 832 if (mDataSource->readAt(*offset + 8, &chunk_size, 8) < 8) { 833 return ERROR_IO; 834 } 835 chunk_size = ntoh64(chunk_size); 836 data_offset += 8; 837 838 if (chunk_size < 16) { 839 // The smallest valid chunk is 16 bytes long in this case. 840 return ERROR_MALFORMED; 841 } 842 } else if (chunk_size == 0) { 843 if (depth == 0) { 844 // atom extends to end of file 845 off64_t sourceSize; 846 if (mDataSource->getSize(&sourceSize) == OK) { 847 chunk_size = (sourceSize - *offset); 848 } else { 849 // XXX could we just pick a "sufficiently large" value here? 850 ALOGE("atom size is 0, and data source has no size"); 851 return ERROR_MALFORMED; 852 } 853 } else { 854 // not allowed for non-toplevel atoms, skip it 855 *offset += 4; 856 return OK; 857 } 858 } else if (chunk_size < 8) { 859 // The smallest valid chunk is 8 bytes long. 860 ALOGE("invalid chunk size: %" PRIu64, chunk_size); 861 return ERROR_MALFORMED; 862 } 863 864 char chunk[5]; 865 MakeFourCCString(chunk_type, chunk); 866 ALOGV("chunk: %s @ %lld, %d", chunk, (long long)*offset, depth); 867 868 if (kUseHexDump) { 869 static const char kWhitespace[] = " "; 870 const char *indent = &kWhitespace[sizeof(kWhitespace) - 1 - 2 * depth]; 871 printf("%sfound chunk '%s' of size %" PRIu64 "\n", indent, chunk, chunk_size); 872 873 char buffer[256]; 874 size_t n = chunk_size; 875 if (n > sizeof(buffer)) { 876 n = sizeof(buffer); 877 } 878 if (mDataSource->readAt(*offset, buffer, n) 879 < (ssize_t)n) { 880 return ERROR_IO; 881 } 882 883 hexdump(buffer, n); 884 } 885 886 PathAdder autoAdder(&mPath, chunk_type); 887 888 // (data_offset - *offset) is either 8 or 16 889 off64_t chunk_data_size = chunk_size - (data_offset - *offset); 890 if (chunk_data_size < 0) { 891 ALOGE("b/23540914"); 892 return ERROR_MALFORMED; 893 } 894 if (chunk_type != FOURCC('m', 'd', 'a', 't') && chunk_data_size > kMaxAtomSize) { 895 char errMsg[100]; 896 sprintf(errMsg, "%s atom has size %" PRId64, chunk, chunk_data_size); 897 ALOGE("%s (b/28615448)", errMsg); 898 android_errorWriteWithInfoLog(0x534e4554, "28615448", -1, errMsg, strlen(errMsg)); 899 return ERROR_MALFORMED; 900 } 901 902 if (chunk_type != FOURCC('c', 'p', 'r', 't') 903 && chunk_type != FOURCC('c', 'o', 'v', 'r') 904 && mPath.size() == 5 && underMetaDataPath(mPath)) { 905 off64_t stop_offset = *offset + chunk_size; 906 *offset = data_offset; 907 while (*offset < stop_offset) { 908 status_t err = parseChunk(offset, depth + 1); 909 if (err != OK) { 910 return err; 911 } 912 } 913 914 if (*offset != stop_offset) { 915 return ERROR_MALFORMED; 916 } 917 918 return OK; 919 } 920 921 switch(chunk_type) { 922 case FOURCC('m', 'o', 'o', 'v'): 923 case FOURCC('t', 'r', 'a', 'k'): 924 case FOURCC('m', 'd', 'i', 'a'): 925 case FOURCC('m', 'i', 'n', 'f'): 926 case FOURCC('d', 'i', 'n', 'f'): 927 case FOURCC('s', 't', 'b', 'l'): 928 case FOURCC('m', 'v', 'e', 'x'): 929 case FOURCC('m', 'o', 'o', 'f'): 930 case FOURCC('t', 'r', 'a', 'f'): 931 case FOURCC('m', 'f', 'r', 'a'): 932 case FOURCC('u', 'd', 't', 'a'): 933 case FOURCC('i', 'l', 's', 't'): 934 case FOURCC('s', 'i', 'n', 'f'): 935 case FOURCC('s', 'c', 'h', 'i'): 936 case FOURCC('e', 'd', 't', 's'): 937 case FOURCC('w', 'a', 'v', 'e'): 938 { 939 if (chunk_type == FOURCC('m', 'o', 'o', 'v') && depth != 0) { 940 ALOGE("moov: depth %d", depth); 941 return ERROR_MALFORMED; 942 } 943 if (chunk_type == FOURCC('m', 'o', 'o', 'f') && !mMoofFound) { 944 // store the offset of the first segment 945 mMoofFound = true; 946 mMoofOffset = *offset; 947 } 948 949 if (chunk_type == FOURCC('s', 't', 'b', 'l')) { 950 ALOGV("sampleTable chunk is %" PRIu64 " bytes long.", chunk_size); 951 952 if (mDataSource->flags() 953 & (DataSource::kWantsPrefetching 954 | DataSource::kIsCachingDataSource)) { 955 sp<MPEG4DataSource> cachedSource = 956 new MPEG4DataSource(mDataSource); 957 958 if (cachedSource->setCachedRange(*offset, chunk_size) == OK) { 959 mDataSource = cachedSource; 960 } 961 } 962 963 if (mLastTrack == NULL) 964 return ERROR_MALFORMED; 965 966 mLastTrack->sampleTable = new SampleTable(mDataSource); 967 } 968 969 bool isTrack = false; 970 if (chunk_type == FOURCC('t', 'r', 'a', 'k')) { 971 if (depth != 1) { 972 ALOGE("trak: depth %d", depth); 973 return ERROR_MALFORMED; 974 } 975 isTrack = true; 976 977 Track *track = new Track; 978 track->next = NULL; 979 if (mLastTrack) { 980 mLastTrack->next = track; 981 } else { 982 mFirstTrack = track; 983 } 984 mLastTrack = track; 985 986 track->meta = new MetaData; 987 track->includes_expensive_metadata = false; 988 track->skipTrack = false; 989 track->timescale = 0; 990 track->meta->setCString(kKeyMIMEType, "application/octet-stream"); 991 } 992 993 off64_t stop_offset = *offset + chunk_size; 994 *offset = data_offset; 995 while (*offset < stop_offset) { 996 status_t err = parseChunk(offset, depth + 1); 997 if (err != OK) { 998 if (isTrack) { 999 mLastTrack->skipTrack = true; 1000 break; 1001 } 1002 return err; 1003 } 1004 } 1005 1006 if (*offset != stop_offset) { 1007 return ERROR_MALFORMED; 1008 } 1009 1010 if (isTrack) { 1011 int32_t trackId; 1012 // There must be exact one track header per track. 1013 if (!mLastTrack->meta->findInt32(kKeyTrackID, &trackId)) { 1014 mLastTrack->skipTrack = true; 1015 } 1016 if (mLastTrack->skipTrack) { 1017 Track *cur = mFirstTrack; 1018 1019 if (cur == mLastTrack) { 1020 delete cur; 1021 mFirstTrack = mLastTrack = NULL; 1022 } else { 1023 while (cur && cur->next != mLastTrack) { 1024 cur = cur->next; 1025 } 1026 if (cur) { 1027 cur->next = NULL; 1028 } 1029 delete mLastTrack; 1030 mLastTrack = cur; 1031 } 1032 1033 return OK; 1034 } 1035 1036 status_t err = verifyTrack(mLastTrack); 1037 1038 if (err != OK) { 1039 return err; 1040 } 1041 } else if (chunk_type == FOURCC('m', 'o', 'o', 'v')) { 1042 mInitCheck = OK; 1043 1044 if (!mIsDrm) { 1045 return UNKNOWN_ERROR; // Return a dummy error. 1046 } else { 1047 return OK; 1048 } 1049 } 1050 break; 1051 } 1052 1053 case FOURCC('e', 'l', 's', 't'): 1054 { 1055 *offset += chunk_size; 1056 1057 // See 14496-12 8.6.6 1058 uint8_t version; 1059 if (mDataSource->readAt(data_offset, &version, 1) < 1) { 1060 return ERROR_IO; 1061 } 1062 1063 uint32_t entry_count; 1064 if (!mDataSource->getUInt32(data_offset + 4, &entry_count)) { 1065 return ERROR_IO; 1066 } 1067 1068 if (entry_count != 1) { 1069 // we only support a single entry at the moment, for gapless playback 1070 ALOGW("ignoring edit list with %d entries", entry_count); 1071 } else if (mHeaderTimescale == 0) { 1072 ALOGW("ignoring edit list because timescale is 0"); 1073 } else { 1074 off64_t entriesoffset = data_offset + 8; 1075 uint64_t segment_duration; 1076 int64_t media_time; 1077 1078 if (version == 1) { 1079 if (!mDataSource->getUInt64(entriesoffset, &segment_duration) || 1080 !mDataSource->getUInt64(entriesoffset + 8, (uint64_t*)&media_time)) { 1081 return ERROR_IO; 1082 } 1083 } else if (version == 0) { 1084 uint32_t sd; 1085 int32_t mt; 1086 if (!mDataSource->getUInt32(entriesoffset, &sd) || 1087 !mDataSource->getUInt32(entriesoffset + 4, (uint32_t*)&mt)) { 1088 return ERROR_IO; 1089 } 1090 segment_duration = sd; 1091 media_time = mt; 1092 } else { 1093 return ERROR_IO; 1094 } 1095 1096 uint64_t halfscale = mHeaderTimescale / 2; 1097 segment_duration = (segment_duration * 1000000 + halfscale)/ mHeaderTimescale; 1098 media_time = (media_time * 1000000 + halfscale) / mHeaderTimescale; 1099 1100 int64_t duration; 1101 int32_t samplerate; 1102 if (!mLastTrack) { 1103 return ERROR_MALFORMED; 1104 } 1105 if (mLastTrack->meta->findInt64(kKeyDuration, &duration) && 1106 mLastTrack->meta->findInt32(kKeySampleRate, &samplerate)) { 1107 1108 int64_t delay = (media_time * samplerate + 500000) / 1000000; 1109 mLastTrack->meta->setInt32(kKeyEncoderDelay, delay); 1110 1111 int64_t paddingus = duration - (int64_t)(segment_duration + media_time); 1112 if (paddingus < 0) { 1113 // track duration from media header (which is what kKeyDuration is) might 1114 // be slightly shorter than the segment duration, which would make the 1115 // padding negative. Clamp to zero. 1116 paddingus = 0; 1117 } 1118 int64_t paddingsamples = (paddingus * samplerate + 500000) / 1000000; 1119 mLastTrack->meta->setInt32(kKeyEncoderPadding, paddingsamples); 1120 } 1121 } 1122 break; 1123 } 1124 1125 case FOURCC('f', 'r', 'm', 'a'): 1126 { 1127 *offset += chunk_size; 1128 1129 uint32_t original_fourcc; 1130 if (mDataSource->readAt(data_offset, &original_fourcc, 4) < 4) { 1131 return ERROR_IO; 1132 } 1133 original_fourcc = ntohl(original_fourcc); 1134 ALOGV("read original format: %d", original_fourcc); 1135 1136 if (mLastTrack == NULL) 1137 return ERROR_MALFORMED; 1138 1139 mLastTrack->meta->setCString(kKeyMIMEType, FourCC2MIME(original_fourcc)); 1140 uint32_t num_channels = 0; 1141 uint32_t sample_rate = 0; 1142 if (AdjustChannelsAndRate(original_fourcc, &num_channels, &sample_rate)) { 1143 mLastTrack->meta->setInt32(kKeyChannelCount, num_channels); 1144 mLastTrack->meta->setInt32(kKeySampleRate, sample_rate); 1145 } 1146 break; 1147 } 1148 1149 case FOURCC('t', 'e', 'n', 'c'): 1150 { 1151 *offset += chunk_size; 1152 1153 if (chunk_size < 32) { 1154 return ERROR_MALFORMED; 1155 } 1156 1157 // tenc box contains 1 byte version, 3 byte flags, 3 byte default algorithm id, one byte 1158 // default IV size, 16 bytes default KeyID 1159 // (ISO 23001-7) 1160 char buf[4]; 1161 memset(buf, 0, 4); 1162 if (mDataSource->readAt(data_offset + 4, buf + 1, 3) < 3) { 1163 return ERROR_IO; 1164 } 1165 uint32_t defaultAlgorithmId = ntohl(*((int32_t*)buf)); 1166 if (defaultAlgorithmId > 1) { 1167 // only 0 (clear) and 1 (AES-128) are valid 1168 return ERROR_MALFORMED; 1169 } 1170 1171 memset(buf, 0, 4); 1172 if (mDataSource->readAt(data_offset + 7, buf + 3, 1) < 1) { 1173 return ERROR_IO; 1174 } 1175 uint32_t defaultIVSize = ntohl(*((int32_t*)buf)); 1176 1177 if ((defaultAlgorithmId == 0 && defaultIVSize != 0) || 1178 (defaultAlgorithmId != 0 && defaultIVSize == 0)) { 1179 // only unencrypted data must have 0 IV size 1180 return ERROR_MALFORMED; 1181 } else if (defaultIVSize != 0 && 1182 defaultIVSize != 8 && 1183 defaultIVSize != 16) { 1184 // only supported sizes are 0, 8 and 16 1185 return ERROR_MALFORMED; 1186 } 1187 1188 uint8_t defaultKeyId[16]; 1189 1190 if (mDataSource->readAt(data_offset + 8, &defaultKeyId, 16) < 16) { 1191 return ERROR_IO; 1192 } 1193 1194 if (mLastTrack == NULL) 1195 return ERROR_MALFORMED; 1196 1197 mLastTrack->meta->setInt32(kKeyCryptoMode, defaultAlgorithmId); 1198 mLastTrack->meta->setInt32(kKeyCryptoDefaultIVSize, defaultIVSize); 1199 mLastTrack->meta->setData(kKeyCryptoKey, 'tenc', defaultKeyId, 16); 1200 break; 1201 } 1202 1203 case FOURCC('t', 'k', 'h', 'd'): 1204 { 1205 *offset += chunk_size; 1206 1207 status_t err; 1208 if ((err = parseTrackHeader(data_offset, chunk_data_size)) != OK) { 1209 return err; 1210 } 1211 1212 break; 1213 } 1214 1215 case FOURCC('p', 's', 's', 'h'): 1216 { 1217 *offset += chunk_size; 1218 1219 PsshInfo pssh; 1220 1221 if (mDataSource->readAt(data_offset + 4, &pssh.uuid, 16) < 16) { 1222 return ERROR_IO; 1223 } 1224 1225 uint32_t psshdatalen = 0; 1226 if (mDataSource->readAt(data_offset + 20, &psshdatalen, 4) < 4) { 1227 return ERROR_IO; 1228 } 1229 pssh.datalen = ntohl(psshdatalen); 1230 ALOGV("pssh data size: %d", pssh.datalen); 1231 if (chunk_size < 20 || pssh.datalen > chunk_size - 20) { 1232 // pssh data length exceeds size of containing box 1233 return ERROR_MALFORMED; 1234 } 1235 1236 pssh.data = new (std::nothrow) uint8_t[pssh.datalen]; 1237 if (pssh.data == NULL) { 1238 return ERROR_MALFORMED; 1239 } 1240 ALOGV("allocated pssh @ %p", pssh.data); 1241 ssize_t requested = (ssize_t) pssh.datalen; 1242 if (mDataSource->readAt(data_offset + 24, pssh.data, requested) < requested) { 1243 delete[] pssh.data; 1244 return ERROR_IO; 1245 } 1246 mPssh.push_back(pssh); 1247 1248 break; 1249 } 1250 1251 case FOURCC('m', 'd', 'h', 'd'): 1252 { 1253 *offset += chunk_size; 1254 1255 if (chunk_data_size < 4 || mLastTrack == NULL) { 1256 return ERROR_MALFORMED; 1257 } 1258 1259 uint8_t version; 1260 if (mDataSource->readAt( 1261 data_offset, &version, sizeof(version)) 1262 < (ssize_t)sizeof(version)) { 1263 return ERROR_IO; 1264 } 1265 1266 off64_t timescale_offset; 1267 1268 if (version == 1) { 1269 timescale_offset = data_offset + 4 + 16; 1270 } else if (version == 0) { 1271 timescale_offset = data_offset + 4 + 8; 1272 } else { 1273 return ERROR_IO; 1274 } 1275 1276 uint32_t timescale; 1277 if (mDataSource->readAt( 1278 timescale_offset, ×cale, sizeof(timescale)) 1279 < (ssize_t)sizeof(timescale)) { 1280 return ERROR_IO; 1281 } 1282 1283 if (!timescale) { 1284 ALOGE("timescale should not be ZERO."); 1285 return ERROR_MALFORMED; 1286 } 1287 1288 mLastTrack->timescale = ntohl(timescale); 1289 1290 // 14496-12 says all ones means indeterminate, but some files seem to use 1291 // 0 instead. We treat both the same. 1292 int64_t duration = 0; 1293 if (version == 1) { 1294 if (mDataSource->readAt( 1295 timescale_offset + 4, &duration, sizeof(duration)) 1296 < (ssize_t)sizeof(duration)) { 1297 return ERROR_IO; 1298 } 1299 if (duration != -1) { 1300 duration = ntoh64(duration); 1301 } 1302 } else { 1303 uint32_t duration32; 1304 if (mDataSource->readAt( 1305 timescale_offset + 4, &duration32, sizeof(duration32)) 1306 < (ssize_t)sizeof(duration32)) { 1307 return ERROR_IO; 1308 } 1309 if (duration32 != 0xffffffff) { 1310 duration = ntohl(duration32); 1311 } 1312 } 1313 if (duration != 0 && mLastTrack->timescale != 0) { 1314 mLastTrack->meta->setInt64( 1315 kKeyDuration, (duration * 1000000) / mLastTrack->timescale); 1316 } 1317 1318 uint8_t lang[2]; 1319 off64_t lang_offset; 1320 if (version == 1) { 1321 lang_offset = timescale_offset + 4 + 8; 1322 } else if (version == 0) { 1323 lang_offset = timescale_offset + 4 + 4; 1324 } else { 1325 return ERROR_IO; 1326 } 1327 1328 if (mDataSource->readAt(lang_offset, &lang, sizeof(lang)) 1329 < (ssize_t)sizeof(lang)) { 1330 return ERROR_IO; 1331 } 1332 1333 // To get the ISO-639-2/T three character language code 1334 // 1 bit pad followed by 3 5-bits characters. Each character 1335 // is packed as the difference between its ASCII value and 0x60. 1336 char lang_code[4]; 1337 lang_code[0] = ((lang[0] >> 2) & 0x1f) + 0x60; 1338 lang_code[1] = ((lang[0] & 0x3) << 3 | (lang[1] >> 5)) + 0x60; 1339 lang_code[2] = (lang[1] & 0x1f) + 0x60; 1340 lang_code[3] = '\0'; 1341 1342 mLastTrack->meta->setCString( 1343 kKeyMediaLanguage, lang_code); 1344 1345 break; 1346 } 1347 1348 case FOURCC('s', 't', 's', 'd'): 1349 { 1350 uint8_t buffer[8]; 1351 if (chunk_data_size < (off64_t)sizeof(buffer)) { 1352 return ERROR_MALFORMED; 1353 } 1354 1355 if (mDataSource->readAt( 1356 data_offset, buffer, 8) < 8) { 1357 return ERROR_IO; 1358 } 1359 1360 if (U32_AT(buffer) != 0) { 1361 // Should be version 0, flags 0. 1362 return ERROR_MALFORMED; 1363 } 1364 1365 uint32_t entry_count = U32_AT(&buffer[4]); 1366 1367 if (entry_count > 1) { 1368 // For 3GPP timed text, there could be multiple tx3g boxes contain 1369 // multiple text display formats. These formats will be used to 1370 // display the timed text. 1371 // For encrypted files, there may also be more than one entry. 1372 const char *mime; 1373 1374 if (mLastTrack == NULL) 1375 return ERROR_MALFORMED; 1376 1377 CHECK(mLastTrack->meta->findCString(kKeyMIMEType, &mime)); 1378 if (strcasecmp(mime, MEDIA_MIMETYPE_TEXT_3GPP) && 1379 strcasecmp(mime, "application/octet-stream")) { 1380 // For now we only support a single type of media per track. 1381 mLastTrack->skipTrack = true; 1382 *offset += chunk_size; 1383 break; 1384 } 1385 } 1386 off64_t stop_offset = *offset + chunk_size; 1387 *offset = data_offset + 8; 1388 for (uint32_t i = 0; i < entry_count; ++i) { 1389 status_t err = parseChunk(offset, depth + 1); 1390 if (err != OK) { 1391 return err; 1392 } 1393 } 1394 1395 if (*offset != stop_offset) { 1396 return ERROR_MALFORMED; 1397 } 1398 break; 1399 } 1400 case FOURCC('m', 'e', 't', 't'): 1401 { 1402 *offset += chunk_size; 1403 1404 if (mLastTrack == NULL) 1405 return ERROR_MALFORMED; 1406 1407 sp<ABuffer> buffer = new ABuffer(chunk_data_size); 1408 if (buffer->data() == NULL) { 1409 return NO_MEMORY; 1410 } 1411 1412 if (mDataSource->readAt( 1413 data_offset, buffer->data(), chunk_data_size) < chunk_data_size) { 1414 return ERROR_IO; 1415 } 1416 1417 String8 mimeFormat((const char *)(buffer->data()), chunk_data_size); 1418 mLastTrack->meta->setCString(kKeyMIMEType, mimeFormat.string()); 1419 1420 break; 1421 } 1422 1423 case FOURCC('m', 'p', '4', 'a'): 1424 case FOURCC('e', 'n', 'c', 'a'): 1425 case FOURCC('s', 'a', 'm', 'r'): 1426 case FOURCC('s', 'a', 'w', 'b'): 1427 { 1428 if (mIsQT && chunk_type == FOURCC('m', 'p', '4', 'a') 1429 && depth >= 1 && mPath[depth - 1] == FOURCC('w', 'a', 'v', 'e')) { 1430 // Ignore mp4a embedded in QT wave atom 1431 *offset += chunk_size; 1432 break; 1433 } 1434 1435 uint8_t buffer[8 + 20]; 1436 if (chunk_data_size < (ssize_t)sizeof(buffer)) { 1437 // Basic AudioSampleEntry size. 1438 return ERROR_MALFORMED; 1439 } 1440 1441 if (mDataSource->readAt( 1442 data_offset, buffer, sizeof(buffer)) < (ssize_t)sizeof(buffer)) { 1443 return ERROR_IO; 1444 } 1445 1446 uint16_t data_ref_index __unused = U16_AT(&buffer[6]); 1447 uint16_t version = U16_AT(&buffer[8]); 1448 uint32_t num_channels = U16_AT(&buffer[16]); 1449 1450 uint16_t sample_size = U16_AT(&buffer[18]); 1451 uint32_t sample_rate = U32_AT(&buffer[24]) >> 16; 1452 1453 if (mLastTrack == NULL) 1454 return ERROR_MALFORMED; 1455 1456 off64_t stop_offset = *offset + chunk_size; 1457 *offset = data_offset + sizeof(buffer); 1458 1459 if (mIsQT && chunk_type == FOURCC('m', 'p', '4', 'a')) { 1460 if (version == 1) { 1461 if (mDataSource->readAt(*offset, buffer, 16) < 16) { 1462 return ERROR_IO; 1463 } 1464 1465#if 0 1466 U32_AT(buffer); // samples per packet 1467 U32_AT(&buffer[4]); // bytes per packet 1468 U32_AT(&buffer[8]); // bytes per frame 1469 U32_AT(&buffer[12]); // bytes per sample 1470#endif 1471 *offset += 16; 1472 } else if (version == 2) { 1473 uint8_t v2buffer[36]; 1474 if (mDataSource->readAt(*offset, v2buffer, 36) < 36) { 1475 return ERROR_IO; 1476 } 1477 1478#if 0 1479 U32_AT(v2buffer); // size of struct only 1480 sample_rate = (uint32_t)U64_AT(&v2buffer[4]); // audio sample rate 1481 num_channels = U32_AT(&v2buffer[12]); // num audio channels 1482 U32_AT(&v2buffer[16]); // always 0x7f000000 1483 sample_size = (uint16_t)U32_AT(&v2buffer[20]); // const bits per channel 1484 U32_AT(&v2buffer[24]); // format specifc flags 1485 U32_AT(&v2buffer[28]); // const bytes per audio packet 1486 U32_AT(&v2buffer[32]); // const LPCM frames per audio packet 1487#endif 1488 *offset += 36; 1489 } 1490 } 1491 1492 if (chunk_type != FOURCC('e', 'n', 'c', 'a')) { 1493 // if the chunk type is enca, we'll get the type from the sinf/frma box later 1494 mLastTrack->meta->setCString(kKeyMIMEType, FourCC2MIME(chunk_type)); 1495 AdjustChannelsAndRate(chunk_type, &num_channels, &sample_rate); 1496 } 1497 ALOGV("*** coding='%s' %d channels, size %d, rate %d\n", 1498 chunk, num_channels, sample_size, sample_rate); 1499 mLastTrack->meta->setInt32(kKeyChannelCount, num_channels); 1500 mLastTrack->meta->setInt32(kKeySampleRate, sample_rate); 1501 1502 while (*offset < stop_offset) { 1503 status_t err = parseChunk(offset, depth + 1); 1504 if (err != OK) { 1505 return err; 1506 } 1507 } 1508 1509 if (*offset != stop_offset) { 1510 return ERROR_MALFORMED; 1511 } 1512 break; 1513 } 1514 1515 case FOURCC('m', 'p', '4', 'v'): 1516 case FOURCC('e', 'n', 'c', 'v'): 1517 case FOURCC('s', '2', '6', '3'): 1518 case FOURCC('H', '2', '6', '3'): 1519 case FOURCC('h', '2', '6', '3'): 1520 case FOURCC('a', 'v', 'c', '1'): 1521 case FOURCC('h', 'v', 'c', '1'): 1522 case FOURCC('h', 'e', 'v', '1'): 1523 { 1524 uint8_t buffer[78]; 1525 if (chunk_data_size < (ssize_t)sizeof(buffer)) { 1526 // Basic VideoSampleEntry size. 1527 return ERROR_MALFORMED; 1528 } 1529 1530 if (mDataSource->readAt( 1531 data_offset, buffer, sizeof(buffer)) < (ssize_t)sizeof(buffer)) { 1532 return ERROR_IO; 1533 } 1534 1535 uint16_t data_ref_index __unused = U16_AT(&buffer[6]); 1536 uint16_t width = U16_AT(&buffer[6 + 18]); 1537 uint16_t height = U16_AT(&buffer[6 + 20]); 1538 1539 // The video sample is not standard-compliant if it has invalid dimension. 1540 // Use some default width and height value, and 1541 // let the decoder figure out the actual width and height (and thus 1542 // be prepared for INFO_FOMRAT_CHANGED event). 1543 if (width == 0) width = 352; 1544 if (height == 0) height = 288; 1545 1546 // printf("*** coding='%s' width=%d height=%d\n", 1547 // chunk, width, height); 1548 1549 if (mLastTrack == NULL) 1550 return ERROR_MALFORMED; 1551 1552 if (chunk_type != FOURCC('e', 'n', 'c', 'v')) { 1553 // if the chunk type is encv, we'll get the type from the sinf/frma box later 1554 mLastTrack->meta->setCString(kKeyMIMEType, FourCC2MIME(chunk_type)); 1555 } 1556 mLastTrack->meta->setInt32(kKeyWidth, width); 1557 mLastTrack->meta->setInt32(kKeyHeight, height); 1558 1559 off64_t stop_offset = *offset + chunk_size; 1560 *offset = data_offset + sizeof(buffer); 1561 while (*offset < stop_offset) { 1562 status_t err = parseChunk(offset, depth + 1); 1563 if (err != OK) { 1564 return err; 1565 } 1566 } 1567 1568 if (*offset != stop_offset) { 1569 return ERROR_MALFORMED; 1570 } 1571 break; 1572 } 1573 1574 case FOURCC('s', 't', 'c', 'o'): 1575 case FOURCC('c', 'o', '6', '4'): 1576 { 1577 if ((mLastTrack == NULL) || (mLastTrack->sampleTable == NULL)) 1578 return ERROR_MALFORMED; 1579 1580 status_t err = 1581 mLastTrack->sampleTable->setChunkOffsetParams( 1582 chunk_type, data_offset, chunk_data_size); 1583 1584 *offset += chunk_size; 1585 1586 if (err != OK) { 1587 return err; 1588 } 1589 1590 break; 1591 } 1592 1593 case FOURCC('s', 't', 's', 'c'): 1594 { 1595 if ((mLastTrack == NULL) || (mLastTrack->sampleTable == NULL)) 1596 return ERROR_MALFORMED; 1597 1598 status_t err = 1599 mLastTrack->sampleTable->setSampleToChunkParams( 1600 data_offset, chunk_data_size); 1601 1602 *offset += chunk_size; 1603 1604 if (err != OK) { 1605 return err; 1606 } 1607 1608 break; 1609 } 1610 1611 case FOURCC('s', 't', 's', 'z'): 1612 case FOURCC('s', 't', 'z', '2'): 1613 { 1614 if ((mLastTrack == NULL) || (mLastTrack->sampleTable == NULL)) 1615 return ERROR_MALFORMED; 1616 1617 status_t err = 1618 mLastTrack->sampleTable->setSampleSizeParams( 1619 chunk_type, data_offset, chunk_data_size); 1620 1621 *offset += chunk_size; 1622 1623 if (err != OK) { 1624 return err; 1625 } 1626 1627 size_t max_size; 1628 err = mLastTrack->sampleTable->getMaxSampleSize(&max_size); 1629 1630 if (err != OK) { 1631 return err; 1632 } 1633 1634 if (max_size != 0) { 1635 // Assume that a given buffer only contains at most 10 chunks, 1636 // each chunk originally prefixed with a 2 byte length will 1637 // have a 4 byte header (0x00 0x00 0x00 0x01) after conversion, 1638 // and thus will grow by 2 bytes per chunk. 1639 if (max_size > SIZE_MAX - 10 * 2) { 1640 ALOGE("max sample size too big: %zu", max_size); 1641 return ERROR_MALFORMED; 1642 } 1643 mLastTrack->meta->setInt32(kKeyMaxInputSize, max_size + 10 * 2); 1644 } else { 1645 // No size was specified. Pick a conservatively large size. 1646 uint32_t width, height; 1647 if (!mLastTrack->meta->findInt32(kKeyWidth, (int32_t*)&width) || 1648 !mLastTrack->meta->findInt32(kKeyHeight,(int32_t*) &height)) { 1649 ALOGE("No width or height, assuming worst case 1080p"); 1650 width = 1920; 1651 height = 1080; 1652 } else { 1653 // A resolution was specified, check that it's not too big. The values below 1654 // were chosen so that the calculations below don't cause overflows, they're 1655 // not indicating that resolutions up to 32kx32k are actually supported. 1656 if (width > 32768 || height > 32768) { 1657 ALOGE("can't support %u x %u video", width, height); 1658 return ERROR_MALFORMED; 1659 } 1660 } 1661 1662 const char *mime; 1663 CHECK(mLastTrack->meta->findCString(kKeyMIMEType, &mime)); 1664 if (!strcmp(mime, MEDIA_MIMETYPE_VIDEO_AVC) 1665 || !strcmp(mime, MEDIA_MIMETYPE_VIDEO_HEVC)) { 1666 // AVC & HEVC requires compression ratio of at least 2, and uses 1667 // macroblocks 1668 max_size = ((width + 15) / 16) * ((height + 15) / 16) * 192; 1669 } else { 1670 // For all other formats there is no minimum compression 1671 // ratio. Use compression ratio of 1. 1672 max_size = width * height * 3 / 2; 1673 } 1674 mLastTrack->meta->setInt32(kKeyMaxInputSize, max_size); 1675 } 1676 1677 // NOTE: setting another piece of metadata invalidates any pointers (such as the 1678 // mimetype) previously obtained, so don't cache them. 1679 const char *mime; 1680 CHECK(mLastTrack->meta->findCString(kKeyMIMEType, &mime)); 1681 // Calculate average frame rate. 1682 if (!strncasecmp("video/", mime, 6)) { 1683 size_t nSamples = mLastTrack->sampleTable->countSamples(); 1684 if (nSamples == 0) { 1685 int32_t trackId; 1686 if (mLastTrack->meta->findInt32(kKeyTrackID, &trackId)) { 1687 for (size_t i = 0; i < mTrex.size(); i++) { 1688 Trex *t = &mTrex.editItemAt(i); 1689 if (t->track_ID == (uint32_t) trackId) { 1690 if (t->default_sample_duration > 0) { 1691 int32_t frameRate = 1692 mLastTrack->timescale / t->default_sample_duration; 1693 mLastTrack->meta->setInt32(kKeyFrameRate, frameRate); 1694 } 1695 break; 1696 } 1697 } 1698 } 1699 } else { 1700 int64_t durationUs; 1701 if (mLastTrack->meta->findInt64(kKeyDuration, &durationUs)) { 1702 if (durationUs > 0) { 1703 int32_t frameRate = (nSamples * 1000000LL + 1704 (durationUs >> 1)) / durationUs; 1705 mLastTrack->meta->setInt32(kKeyFrameRate, frameRate); 1706 } 1707 } 1708 } 1709 } 1710 1711 break; 1712 } 1713 1714 case FOURCC('s', 't', 't', 's'): 1715 { 1716 if ((mLastTrack == NULL) || (mLastTrack->sampleTable == NULL)) 1717 return ERROR_MALFORMED; 1718 1719 *offset += chunk_size; 1720 1721 status_t err = 1722 mLastTrack->sampleTable->setTimeToSampleParams( 1723 data_offset, chunk_data_size); 1724 1725 if (err != OK) { 1726 return err; 1727 } 1728 1729 break; 1730 } 1731 1732 case FOURCC('c', 't', 't', 's'): 1733 { 1734 if ((mLastTrack == NULL) || (mLastTrack->sampleTable == NULL)) 1735 return ERROR_MALFORMED; 1736 1737 *offset += chunk_size; 1738 1739 status_t err = 1740 mLastTrack->sampleTable->setCompositionTimeToSampleParams( 1741 data_offset, chunk_data_size); 1742 1743 if (err != OK) { 1744 return err; 1745 } 1746 1747 break; 1748 } 1749 1750 case FOURCC('s', 't', 's', 's'): 1751 { 1752 if ((mLastTrack == NULL) || (mLastTrack->sampleTable == NULL)) 1753 return ERROR_MALFORMED; 1754 1755 *offset += chunk_size; 1756 1757 status_t err = 1758 mLastTrack->sampleTable->setSyncSampleParams( 1759 data_offset, chunk_data_size); 1760 1761 if (err != OK) { 1762 return err; 1763 } 1764 1765 break; 1766 } 1767 1768 // \xA9xyz 1769 case FOURCC(0xA9, 'x', 'y', 'z'): 1770 { 1771 *offset += chunk_size; 1772 1773 // Best case the total data length inside "\xA9xyz" box 1774 // would be 8, for instance "\xA9xyz" + "\x00\x04\x15\xc7" + "0+0/", 1775 // where "\x00\x04" is the text string length with value = 4, 1776 // "\0x15\xc7" is the language code = en, and "0+0" is a 1777 // location (string) value with longitude = 0 and latitude = 0. 1778 if (chunk_data_size < 8) { 1779 return ERROR_MALFORMED; 1780 } 1781 1782 // Worst case the location string length would be 18, 1783 // for instance +90.0000-180.0000, without the trailing "/" and 1784 // the string length + language code, and some devices include 1785 // an additional 8 bytes of altitude, e.g. +007.186 1786 char buffer[18 + 8]; 1787 1788 // Substracting 5 from the data size is because the text string length + 1789 // language code takes 4 bytes, and the trailing slash "/" takes 1 byte. 1790 off64_t location_length = chunk_data_size - 5; 1791 if (location_length >= (off64_t) sizeof(buffer)) { 1792 return ERROR_MALFORMED; 1793 } 1794 1795 if (mDataSource->readAt( 1796 data_offset + 4, buffer, location_length) < location_length) { 1797 return ERROR_IO; 1798 } 1799 1800 buffer[location_length] = '\0'; 1801 mFileMetaData->setCString(kKeyLocation, buffer); 1802 break; 1803 } 1804 1805 case FOURCC('e', 's', 'd', 's'): 1806 { 1807 *offset += chunk_size; 1808 1809 if (chunk_data_size < 4) { 1810 return ERROR_MALFORMED; 1811 } 1812 1813 uint8_t buffer[256]; 1814 if (chunk_data_size > (off64_t)sizeof(buffer)) { 1815 return ERROR_BUFFER_TOO_SMALL; 1816 } 1817 1818 if (mDataSource->readAt( 1819 data_offset, buffer, chunk_data_size) < chunk_data_size) { 1820 return ERROR_IO; 1821 } 1822 1823 if (U32_AT(buffer) != 0) { 1824 // Should be version 0, flags 0. 1825 return ERROR_MALFORMED; 1826 } 1827 1828 if (mLastTrack == NULL) 1829 return ERROR_MALFORMED; 1830 1831 mLastTrack->meta->setData( 1832 kKeyESDS, kTypeESDS, &buffer[4], chunk_data_size - 4); 1833 1834 if (mPath.size() >= 2 1835 && mPath[mPath.size() - 2] == FOURCC('m', 'p', '4', 'a')) { 1836 // Information from the ESDS must be relied on for proper 1837 // setup of sample rate and channel count for MPEG4 Audio. 1838 // The generic header appears to only contain generic 1839 // information... 1840 1841 status_t err = updateAudioTrackInfoFromESDS_MPEG4Audio( 1842 &buffer[4], chunk_data_size - 4); 1843 1844 if (err != OK) { 1845 return err; 1846 } 1847 } 1848 if (mPath.size() >= 2 1849 && mPath[mPath.size() - 2] == FOURCC('m', 'p', '4', 'v')) { 1850 // Check if the video is MPEG2 1851 ESDS esds(&buffer[4], chunk_data_size - 4); 1852 1853 uint8_t objectTypeIndication; 1854 if (esds.getObjectTypeIndication(&objectTypeIndication) == OK) { 1855 if (objectTypeIndication >= 0x60 && objectTypeIndication <= 0x65) { 1856 mLastTrack->meta->setCString(kKeyMIMEType, MEDIA_MIMETYPE_VIDEO_MPEG2); 1857 } 1858 } 1859 } 1860 break; 1861 } 1862 1863 case FOURCC('b', 't', 'r', 't'): 1864 { 1865 *offset += chunk_size; 1866 if (mLastTrack == NULL) { 1867 return ERROR_MALFORMED; 1868 } 1869 1870 uint8_t buffer[12]; 1871 if (chunk_data_size != sizeof(buffer)) { 1872 return ERROR_MALFORMED; 1873 } 1874 1875 if (mDataSource->readAt( 1876 data_offset, buffer, chunk_data_size) < chunk_data_size) { 1877 return ERROR_IO; 1878 } 1879 1880 uint32_t maxBitrate = U32_AT(&buffer[4]); 1881 uint32_t avgBitrate = U32_AT(&buffer[8]); 1882 if (maxBitrate > 0 && maxBitrate < INT32_MAX) { 1883 mLastTrack->meta->setInt32(kKeyMaxBitRate, (int32_t)maxBitrate); 1884 } 1885 if (avgBitrate > 0 && avgBitrate < INT32_MAX) { 1886 mLastTrack->meta->setInt32(kKeyBitRate, (int32_t)avgBitrate); 1887 } 1888 break; 1889 } 1890 1891 case FOURCC('a', 'v', 'c', 'C'): 1892 { 1893 *offset += chunk_size; 1894 1895 sp<ABuffer> buffer = new ABuffer(chunk_data_size); 1896 1897 if (buffer->data() == NULL) { 1898 ALOGE("b/28471206"); 1899 return NO_MEMORY; 1900 } 1901 1902 if (mDataSource->readAt( 1903 data_offset, buffer->data(), chunk_data_size) < chunk_data_size) { 1904 return ERROR_IO; 1905 } 1906 1907 if (mLastTrack == NULL) 1908 return ERROR_MALFORMED; 1909 1910 mLastTrack->meta->setData( 1911 kKeyAVCC, kTypeAVCC, buffer->data(), chunk_data_size); 1912 1913 break; 1914 } 1915 case FOURCC('h', 'v', 'c', 'C'): 1916 { 1917 sp<ABuffer> buffer = new ABuffer(chunk_data_size); 1918 1919 if (buffer->data() == NULL) { 1920 ALOGE("b/28471206"); 1921 return NO_MEMORY; 1922 } 1923 1924 if (mDataSource->readAt( 1925 data_offset, buffer->data(), chunk_data_size) < chunk_data_size) { 1926 return ERROR_IO; 1927 } 1928 1929 if (mLastTrack == NULL) 1930 return ERROR_MALFORMED; 1931 1932 mLastTrack->meta->setData( 1933 kKeyHVCC, kTypeHVCC, buffer->data(), chunk_data_size); 1934 1935 *offset += chunk_size; 1936 break; 1937 } 1938 1939 case FOURCC('d', '2', '6', '3'): 1940 { 1941 *offset += chunk_size; 1942 /* 1943 * d263 contains a fixed 7 bytes part: 1944 * vendor - 4 bytes 1945 * version - 1 byte 1946 * level - 1 byte 1947 * profile - 1 byte 1948 * optionally, "d263" box itself may contain a 16-byte 1949 * bit rate box (bitr) 1950 * average bit rate - 4 bytes 1951 * max bit rate - 4 bytes 1952 */ 1953 char buffer[23]; 1954 if (chunk_data_size != 7 && 1955 chunk_data_size != 23) { 1956 ALOGE("Incorrect D263 box size %lld", (long long)chunk_data_size); 1957 return ERROR_MALFORMED; 1958 } 1959 1960 if (mDataSource->readAt( 1961 data_offset, buffer, chunk_data_size) < chunk_data_size) { 1962 return ERROR_IO; 1963 } 1964 1965 if (mLastTrack == NULL) 1966 return ERROR_MALFORMED; 1967 1968 mLastTrack->meta->setData(kKeyD263, kTypeD263, buffer, chunk_data_size); 1969 1970 break; 1971 } 1972 1973 case FOURCC('m', 'e', 't', 'a'): 1974 { 1975 off64_t stop_offset = *offset + chunk_size; 1976 *offset = data_offset; 1977 bool isParsingMetaKeys = underQTMetaPath(mPath, 2); 1978 if (!isParsingMetaKeys) { 1979 uint8_t buffer[4]; 1980 if (chunk_data_size < (off64_t)sizeof(buffer)) { 1981 *offset = stop_offset; 1982 return ERROR_MALFORMED; 1983 } 1984 1985 if (mDataSource->readAt( 1986 data_offset, buffer, 4) < 4) { 1987 *offset = stop_offset; 1988 return ERROR_IO; 1989 } 1990 1991 if (U32_AT(buffer) != 0) { 1992 // Should be version 0, flags 0. 1993 1994 // If it's not, let's assume this is one of those 1995 // apparently malformed chunks that don't have flags 1996 // and completely different semantics than what's 1997 // in the MPEG4 specs and skip it. 1998 *offset = stop_offset; 1999 return OK; 2000 } 2001 *offset += sizeof(buffer); 2002 } 2003 2004 while (*offset < stop_offset) { 2005 status_t err = parseChunk(offset, depth + 1); 2006 if (err != OK) { 2007 return err; 2008 } 2009 } 2010 2011 if (*offset != stop_offset) { 2012 return ERROR_MALFORMED; 2013 } 2014 break; 2015 } 2016 2017 case FOURCC('m', 'e', 'a', 'n'): 2018 case FOURCC('n', 'a', 'm', 'e'): 2019 case FOURCC('d', 'a', 't', 'a'): 2020 { 2021 *offset += chunk_size; 2022 2023 if (mPath.size() == 6 && underMetaDataPath(mPath)) { 2024 status_t err = parseITunesMetaData(data_offset, chunk_data_size); 2025 2026 if (err != OK) { 2027 return err; 2028 } 2029 } 2030 2031 break; 2032 } 2033 2034 case FOURCC('m', 'v', 'h', 'd'): 2035 { 2036 *offset += chunk_size; 2037 2038 if (depth != 1) { 2039 ALOGE("mvhd: depth %d", depth); 2040 return ERROR_MALFORMED; 2041 } 2042 if (chunk_data_size < 32) { 2043 return ERROR_MALFORMED; 2044 } 2045 2046 uint8_t header[32]; 2047 if (mDataSource->readAt( 2048 data_offset, header, sizeof(header)) 2049 < (ssize_t)sizeof(header)) { 2050 return ERROR_IO; 2051 } 2052 2053 uint64_t creationTime; 2054 uint64_t duration = 0; 2055 if (header[0] == 1) { 2056 creationTime = U64_AT(&header[4]); 2057 mHeaderTimescale = U32_AT(&header[20]); 2058 duration = U64_AT(&header[24]); 2059 if (duration == 0xffffffffffffffff) { 2060 duration = 0; 2061 } 2062 } else if (header[0] != 0) { 2063 return ERROR_MALFORMED; 2064 } else { 2065 creationTime = U32_AT(&header[4]); 2066 mHeaderTimescale = U32_AT(&header[12]); 2067 uint32_t d32 = U32_AT(&header[16]); 2068 if (d32 == 0xffffffff) { 2069 d32 = 0; 2070 } 2071 duration = d32; 2072 } 2073 if (duration != 0 && mHeaderTimescale != 0 && duration < UINT64_MAX / 1000000) { 2074 mFileMetaData->setInt64(kKeyDuration, duration * 1000000 / mHeaderTimescale); 2075 } 2076 2077 String8 s; 2078 if (convertTimeToDate(creationTime, &s)) { 2079 mFileMetaData->setCString(kKeyDate, s.string()); 2080 } 2081 2082 2083 break; 2084 } 2085 2086 case FOURCC('m', 'e', 'h', 'd'): 2087 { 2088 *offset += chunk_size; 2089 2090 if (chunk_data_size < 8) { 2091 return ERROR_MALFORMED; 2092 } 2093 2094 uint8_t flags[4]; 2095 if (mDataSource->readAt( 2096 data_offset, flags, sizeof(flags)) 2097 < (ssize_t)sizeof(flags)) { 2098 return ERROR_IO; 2099 } 2100 2101 uint64_t duration = 0; 2102 if (flags[0] == 1) { 2103 // 64 bit 2104 if (chunk_data_size < 12) { 2105 return ERROR_MALFORMED; 2106 } 2107 mDataSource->getUInt64(data_offset + 4, &duration); 2108 if (duration == 0xffffffffffffffff) { 2109 duration = 0; 2110 } 2111 } else if (flags[0] == 0) { 2112 // 32 bit 2113 uint32_t d32; 2114 mDataSource->getUInt32(data_offset + 4, &d32); 2115 if (d32 == 0xffffffff) { 2116 d32 = 0; 2117 } 2118 duration = d32; 2119 } else { 2120 return ERROR_MALFORMED; 2121 } 2122 2123 if (duration != 0 && mHeaderTimescale != 0) { 2124 mFileMetaData->setInt64(kKeyDuration, duration * 1000000 / mHeaderTimescale); 2125 } 2126 2127 break; 2128 } 2129 2130 case FOURCC('m', 'd', 'a', 't'): 2131 { 2132 ALOGV("mdat chunk, drm: %d", mIsDrm); 2133 2134 mMdatFound = true; 2135 2136 if (!mIsDrm) { 2137 *offset += chunk_size; 2138 break; 2139 } 2140 2141 if (chunk_size < 8) { 2142 return ERROR_MALFORMED; 2143 } 2144 2145 return parseDrmSINF(offset, data_offset); 2146 } 2147 2148 case FOURCC('h', 'd', 'l', 'r'): 2149 { 2150 *offset += chunk_size; 2151 2152 if (underQTMetaPath(mPath, 3)) { 2153 break; 2154 } 2155 2156 uint32_t buffer; 2157 if (mDataSource->readAt( 2158 data_offset + 8, &buffer, 4) < 4) { 2159 return ERROR_IO; 2160 } 2161 2162 uint32_t type = ntohl(buffer); 2163 // For the 3GPP file format, the handler-type within the 'hdlr' box 2164 // shall be 'text'. We also want to support 'sbtl' handler type 2165 // for a practical reason as various MPEG4 containers use it. 2166 if (type == FOURCC('t', 'e', 'x', 't') || type == FOURCC('s', 'b', 't', 'l')) { 2167 if (mLastTrack != NULL) { 2168 mLastTrack->meta->setCString(kKeyMIMEType, MEDIA_MIMETYPE_TEXT_3GPP); 2169 } 2170 } 2171 2172 break; 2173 } 2174 2175 case FOURCC('k', 'e', 'y', 's'): 2176 { 2177 *offset += chunk_size; 2178 2179 if (underQTMetaPath(mPath, 3)) { 2180 status_t err = parseQTMetaKey(data_offset, chunk_data_size); 2181 if (err != OK) { 2182 return err; 2183 } 2184 } 2185 break; 2186 } 2187 2188 case FOURCC('t', 'r', 'e', 'x'): 2189 { 2190 *offset += chunk_size; 2191 2192 if (chunk_data_size < 24) { 2193 return ERROR_IO; 2194 } 2195 Trex trex; 2196 if (!mDataSource->getUInt32(data_offset + 4, &trex.track_ID) || 2197 !mDataSource->getUInt32(data_offset + 8, &trex.default_sample_description_index) || 2198 !mDataSource->getUInt32(data_offset + 12, &trex.default_sample_duration) || 2199 !mDataSource->getUInt32(data_offset + 16, &trex.default_sample_size) || 2200 !mDataSource->getUInt32(data_offset + 20, &trex.default_sample_flags)) { 2201 return ERROR_IO; 2202 } 2203 mTrex.add(trex); 2204 break; 2205 } 2206 2207 case FOURCC('t', 'x', '3', 'g'): 2208 { 2209 if (mLastTrack == NULL) 2210 return ERROR_MALFORMED; 2211 2212 uint32_t type; 2213 const void *data; 2214 size_t size = 0; 2215 if (!mLastTrack->meta->findData( 2216 kKeyTextFormatData, &type, &data, &size)) { 2217 size = 0; 2218 } 2219 2220 if ((chunk_size > SIZE_MAX) || (SIZE_MAX - chunk_size <= size)) { 2221 return ERROR_MALFORMED; 2222 } 2223 2224 uint8_t *buffer = new (std::nothrow) uint8_t[size + chunk_size]; 2225 if (buffer == NULL) { 2226 return ERROR_MALFORMED; 2227 } 2228 2229 if (size > 0) { 2230 memcpy(buffer, data, size); 2231 } 2232 2233 if ((size_t)(mDataSource->readAt(*offset, buffer + size, chunk_size)) 2234 < chunk_size) { 2235 delete[] buffer; 2236 buffer = NULL; 2237 2238 // advance read pointer so we don't end up reading this again 2239 *offset += chunk_size; 2240 return ERROR_IO; 2241 } 2242 2243 mLastTrack->meta->setData( 2244 kKeyTextFormatData, 0, buffer, size + chunk_size); 2245 2246 delete[] buffer; 2247 2248 *offset += chunk_size; 2249 break; 2250 } 2251 2252 case FOURCC('c', 'o', 'v', 'r'): 2253 { 2254 *offset += chunk_size; 2255 2256 if (mFileMetaData != NULL) { 2257 ALOGV("chunk_data_size = %" PRId64 " and data_offset = %" PRId64, 2258 chunk_data_size, data_offset); 2259 2260 if (chunk_data_size < 0 || static_cast<uint64_t>(chunk_data_size) >= SIZE_MAX - 1) { 2261 return ERROR_MALFORMED; 2262 } 2263 sp<ABuffer> buffer = new ABuffer(chunk_data_size + 1); 2264 if (buffer->data() == NULL) { 2265 ALOGE("b/28471206"); 2266 return NO_MEMORY; 2267 } 2268 if (mDataSource->readAt( 2269 data_offset, buffer->data(), chunk_data_size) != (ssize_t)chunk_data_size) { 2270 return ERROR_IO; 2271 } 2272 const int kSkipBytesOfDataBox = 16; 2273 if (chunk_data_size <= kSkipBytesOfDataBox) { 2274 return ERROR_MALFORMED; 2275 } 2276 2277 mFileMetaData->setData( 2278 kKeyAlbumArt, MetaData::TYPE_NONE, 2279 buffer->data() + kSkipBytesOfDataBox, chunk_data_size - kSkipBytesOfDataBox); 2280 } 2281 2282 break; 2283 } 2284 2285 case FOURCC('c', 'o', 'l', 'r'): 2286 { 2287 *offset += chunk_size; 2288 // this must be in a VisualSampleEntry box under the Sample Description Box ('stsd') 2289 // ignore otherwise 2290 if (depth >= 2 && mPath[depth - 2] == FOURCC('s', 't', 's', 'd')) { 2291 status_t err = parseColorInfo(data_offset, chunk_data_size); 2292 if (err != OK) { 2293 return err; 2294 } 2295 } 2296 2297 break; 2298 } 2299 2300 case FOURCC('t', 'i', 't', 'l'): 2301 case FOURCC('p', 'e', 'r', 'f'): 2302 case FOURCC('a', 'u', 't', 'h'): 2303 case FOURCC('g', 'n', 'r', 'e'): 2304 case FOURCC('a', 'l', 'b', 'm'): 2305 case FOURCC('y', 'r', 'r', 'c'): 2306 { 2307 *offset += chunk_size; 2308 2309 status_t err = parse3GPPMetaData(data_offset, chunk_data_size, depth); 2310 2311 if (err != OK) { 2312 return err; 2313 } 2314 2315 break; 2316 } 2317 2318 case FOURCC('I', 'D', '3', '2'): 2319 { 2320 *offset += chunk_size; 2321 2322 if (chunk_data_size < 6) { 2323 return ERROR_MALFORMED; 2324 } 2325 2326 parseID3v2MetaData(data_offset + 6); 2327 2328 break; 2329 } 2330 2331 case FOURCC('-', '-', '-', '-'): 2332 { 2333 mLastCommentMean.clear(); 2334 mLastCommentName.clear(); 2335 mLastCommentData.clear(); 2336 *offset += chunk_size; 2337 break; 2338 } 2339 2340 case FOURCC('s', 'i', 'd', 'x'): 2341 { 2342 status_t err = parseSegmentIndex(data_offset, chunk_data_size); 2343 if (err != OK) { 2344 return err; 2345 } 2346 *offset += chunk_size; 2347 return UNKNOWN_ERROR; // stop parsing after sidx 2348 } 2349 2350 case FOURCC('a', 'c', '-', '3'): 2351 { 2352 *offset += chunk_size; 2353 return parseAC3SampleEntry(data_offset); 2354 } 2355 2356 case FOURCC('f', 't', 'y', 'p'): 2357 { 2358 if (chunk_data_size < 8 || depth != 0) { 2359 return ERROR_MALFORMED; 2360 } 2361 2362 off64_t stop_offset = *offset + chunk_size; 2363 uint32_t numCompatibleBrands = (chunk_data_size - 8) / 4; 2364 for (size_t i = 0; i < numCompatibleBrands + 2; ++i) { 2365 if (i == 1) { 2366 // Skip this index, it refers to the minorVersion, 2367 // not a brand. 2368 continue; 2369 } 2370 2371 uint32_t brand; 2372 if (mDataSource->readAt(data_offset + 4 * i, &brand, 4) < 4) { 2373 return ERROR_MALFORMED; 2374 } 2375 2376 brand = ntohl(brand); 2377 if (brand == FOURCC('q', 't', ' ', ' ')) { 2378 mIsQT = true; 2379 break; 2380 } 2381 } 2382 2383 *offset = stop_offset; 2384 2385 break; 2386 } 2387 2388 default: 2389 { 2390 // check if we're parsing 'ilst' for meta keys 2391 // if so, treat type as a number (key-id). 2392 if (underQTMetaPath(mPath, 3)) { 2393 status_t err = parseQTMetaVal(chunk_type, data_offset, chunk_data_size); 2394 if (err != OK) { 2395 return err; 2396 } 2397 } 2398 2399 *offset += chunk_size; 2400 break; 2401 } 2402 } 2403 2404 return OK; 2405} 2406 2407status_t MPEG4Extractor::parseAC3SampleEntry(off64_t offset) { 2408 // skip 16 bytes: 2409 // + 6-byte reserved, 2410 // + 2-byte data reference index, 2411 // + 8-byte reserved 2412 offset += 16; 2413 uint16_t channelCount; 2414 if (!mDataSource->getUInt16(offset, &channelCount)) { 2415 return ERROR_MALFORMED; 2416 } 2417 // skip 8 bytes: 2418 // + 2-byte channelCount, 2419 // + 2-byte sample size, 2420 // + 4-byte reserved 2421 offset += 8; 2422 uint16_t sampleRate; 2423 if (!mDataSource->getUInt16(offset, &sampleRate)) { 2424 ALOGE("MPEG4Extractor: error while reading ac-3 block: cannot read sample rate"); 2425 return ERROR_MALFORMED; 2426 } 2427 2428 // skip 4 bytes: 2429 // + 2-byte sampleRate, 2430 // + 2-byte reserved 2431 offset += 4; 2432 return parseAC3SpecificBox(offset, sampleRate); 2433} 2434 2435status_t MPEG4Extractor::parseAC3SpecificBox( 2436 off64_t offset, uint16_t sampleRate) { 2437 uint32_t size; 2438 // + 4-byte size 2439 // + 4-byte type 2440 // + 3-byte payload 2441 const uint32_t kAC3SpecificBoxSize = 11; 2442 if (!mDataSource->getUInt32(offset, &size) || size < kAC3SpecificBoxSize) { 2443 ALOGE("MPEG4Extractor: error while reading ac-3 block: cannot read specific box size"); 2444 return ERROR_MALFORMED; 2445 } 2446 2447 offset += 4; 2448 uint32_t type; 2449 if (!mDataSource->getUInt32(offset, &type) || type != FOURCC('d', 'a', 'c', '3')) { 2450 ALOGE("MPEG4Extractor: error while reading ac-3 specific block: header not dac3"); 2451 return ERROR_MALFORMED; 2452 } 2453 2454 offset += 4; 2455 const uint32_t kAC3SpecificBoxPayloadSize = 3; 2456 uint8_t chunk[kAC3SpecificBoxPayloadSize]; 2457 if (mDataSource->readAt(offset, chunk, sizeof(chunk)) != sizeof(chunk)) { 2458 ALOGE("MPEG4Extractor: error while reading ac-3 specific block: bitstream fields"); 2459 return ERROR_MALFORMED; 2460 } 2461 2462 ABitReader br(chunk, sizeof(chunk)); 2463 static const unsigned channelCountTable[] = {2, 1, 2, 3, 3, 4, 4, 5}; 2464 static const unsigned sampleRateTable[] = {48000, 44100, 32000}; 2465 2466 unsigned fscod = br.getBits(2); 2467 if (fscod == 3) { 2468 ALOGE("Incorrect fscod (3) in AC3 header"); 2469 return ERROR_MALFORMED; 2470 } 2471 unsigned boxSampleRate = sampleRateTable[fscod]; 2472 if (boxSampleRate != sampleRate) { 2473 ALOGE("sample rate mismatch: boxSampleRate = %d, sampleRate = %d", 2474 boxSampleRate, sampleRate); 2475 return ERROR_MALFORMED; 2476 } 2477 2478 unsigned bsid = br.getBits(5); 2479 if (bsid > 8) { 2480 ALOGW("Incorrect bsid in AC3 header. Possibly E-AC-3?"); 2481 return ERROR_MALFORMED; 2482 } 2483 2484 // skip 2485 unsigned bsmod __unused = br.getBits(3); 2486 2487 unsigned acmod = br.getBits(3); 2488 unsigned lfeon = br.getBits(1); 2489 unsigned channelCount = channelCountTable[acmod] + lfeon; 2490 2491 if (mLastTrack == NULL) { 2492 return ERROR_MALFORMED; 2493 } 2494 mLastTrack->meta->setCString(kKeyMIMEType, MEDIA_MIMETYPE_AUDIO_AC3); 2495 mLastTrack->meta->setInt32(kKeyChannelCount, channelCount); 2496 mLastTrack->meta->setInt32(kKeySampleRate, sampleRate); 2497 return OK; 2498} 2499 2500status_t MPEG4Extractor::parseSegmentIndex(off64_t offset, size_t size) { 2501 ALOGV("MPEG4Extractor::parseSegmentIndex"); 2502 2503 if (size < 12) { 2504 return -EINVAL; 2505 } 2506 2507 uint32_t flags; 2508 if (!mDataSource->getUInt32(offset, &flags)) { 2509 return ERROR_MALFORMED; 2510 } 2511 2512 uint32_t version = flags >> 24; 2513 flags &= 0xffffff; 2514 2515 ALOGV("sidx version %d", version); 2516 2517 uint32_t referenceId; 2518 if (!mDataSource->getUInt32(offset + 4, &referenceId)) { 2519 return ERROR_MALFORMED; 2520 } 2521 2522 uint32_t timeScale; 2523 if (!mDataSource->getUInt32(offset + 8, &timeScale)) { 2524 return ERROR_MALFORMED; 2525 } 2526 ALOGV("sidx refid/timescale: %d/%d", referenceId, timeScale); 2527 if (timeScale == 0) 2528 return ERROR_MALFORMED; 2529 2530 uint64_t earliestPresentationTime; 2531 uint64_t firstOffset; 2532 2533 offset += 12; 2534 size -= 12; 2535 2536 if (version == 0) { 2537 if (size < 8) { 2538 return -EINVAL; 2539 } 2540 uint32_t tmp; 2541 if (!mDataSource->getUInt32(offset, &tmp)) { 2542 return ERROR_MALFORMED; 2543 } 2544 earliestPresentationTime = tmp; 2545 if (!mDataSource->getUInt32(offset + 4, &tmp)) { 2546 return ERROR_MALFORMED; 2547 } 2548 firstOffset = tmp; 2549 offset += 8; 2550 size -= 8; 2551 } else { 2552 if (size < 16) { 2553 return -EINVAL; 2554 } 2555 if (!mDataSource->getUInt64(offset, &earliestPresentationTime)) { 2556 return ERROR_MALFORMED; 2557 } 2558 if (!mDataSource->getUInt64(offset + 8, &firstOffset)) { 2559 return ERROR_MALFORMED; 2560 } 2561 offset += 16; 2562 size -= 16; 2563 } 2564 ALOGV("sidx pres/off: %" PRIu64 "/%" PRIu64, earliestPresentationTime, firstOffset); 2565 2566 if (size < 4) { 2567 return -EINVAL; 2568 } 2569 2570 uint16_t referenceCount; 2571 if (!mDataSource->getUInt16(offset + 2, &referenceCount)) { 2572 return ERROR_MALFORMED; 2573 } 2574 offset += 4; 2575 size -= 4; 2576 ALOGV("refcount: %d", referenceCount); 2577 2578 if (size < referenceCount * 12) { 2579 return -EINVAL; 2580 } 2581 2582 uint64_t total_duration = 0; 2583 for (unsigned int i = 0; i < referenceCount; i++) { 2584 uint32_t d1, d2, d3; 2585 2586 if (!mDataSource->getUInt32(offset, &d1) || // size 2587 !mDataSource->getUInt32(offset + 4, &d2) || // duration 2588 !mDataSource->getUInt32(offset + 8, &d3)) { // flags 2589 return ERROR_MALFORMED; 2590 } 2591 2592 if (d1 & 0x80000000) { 2593 ALOGW("sub-sidx boxes not supported yet"); 2594 } 2595 bool sap = d3 & 0x80000000; 2596 uint32_t saptype = (d3 >> 28) & 7; 2597 if (!sap || (saptype != 1 && saptype != 2)) { 2598 // type 1 and 2 are sync samples 2599 ALOGW("not a stream access point, or unsupported type: %08x", d3); 2600 } 2601 total_duration += d2; 2602 offset += 12; 2603 ALOGV(" item %d, %08x %08x %08x", i, d1, d2, d3); 2604 SidxEntry se; 2605 se.mSize = d1 & 0x7fffffff; 2606 se.mDurationUs = 1000000LL * d2 / timeScale; 2607 mSidxEntries.add(se); 2608 } 2609 2610 uint64_t sidxDuration = total_duration * 1000000 / timeScale; 2611 2612 if (mLastTrack == NULL) 2613 return ERROR_MALFORMED; 2614 2615 int64_t metaDuration; 2616 if (!mLastTrack->meta->findInt64(kKeyDuration, &metaDuration) || metaDuration == 0) { 2617 mLastTrack->meta->setInt64(kKeyDuration, sidxDuration); 2618 } 2619 return OK; 2620} 2621 2622status_t MPEG4Extractor::parseQTMetaKey(off64_t offset, size_t size) { 2623 if (size < 8) { 2624 return ERROR_MALFORMED; 2625 } 2626 2627 uint32_t count; 2628 if (!mDataSource->getUInt32(offset + 4, &count)) { 2629 return ERROR_MALFORMED; 2630 } 2631 2632 if (mMetaKeyMap.size() > 0) { 2633 ALOGW("'keys' atom seen again, discarding existing entries"); 2634 mMetaKeyMap.clear(); 2635 } 2636 2637 off64_t keyOffset = offset + 8; 2638 off64_t stopOffset = offset + size; 2639 for (size_t i = 1; i <= count; i++) { 2640 if (keyOffset + 8 > stopOffset) { 2641 return ERROR_MALFORMED; 2642 } 2643 2644 uint32_t keySize; 2645 if (!mDataSource->getUInt32(keyOffset, &keySize) 2646 || keySize < 8 2647 || keyOffset + keySize > stopOffset) { 2648 return ERROR_MALFORMED; 2649 } 2650 2651 uint32_t type; 2652 if (!mDataSource->getUInt32(keyOffset + 4, &type) 2653 || type != FOURCC('m', 'd', 't', 'a')) { 2654 return ERROR_MALFORMED; 2655 } 2656 2657 keySize -= 8; 2658 keyOffset += 8; 2659 2660 sp<ABuffer> keyData = new ABuffer(keySize); 2661 if (keyData->data() == NULL) { 2662 return ERROR_MALFORMED; 2663 } 2664 if (mDataSource->readAt( 2665 keyOffset, keyData->data(), keySize) < (ssize_t) keySize) { 2666 return ERROR_MALFORMED; 2667 } 2668 2669 AString key((const char *)keyData->data(), keySize); 2670 mMetaKeyMap.add(i, key); 2671 2672 keyOffset += keySize; 2673 } 2674 return OK; 2675} 2676 2677status_t MPEG4Extractor::parseQTMetaVal( 2678 int32_t keyId, off64_t offset, size_t size) { 2679 ssize_t index = mMetaKeyMap.indexOfKey(keyId); 2680 if (index < 0) { 2681 // corresponding key is not present, ignore 2682 return ERROR_MALFORMED; 2683 } 2684 2685 if (size <= 16) { 2686 return ERROR_MALFORMED; 2687 } 2688 uint32_t dataSize; 2689 if (!mDataSource->getUInt32(offset, &dataSize) 2690 || dataSize > size || dataSize <= 16) { 2691 return ERROR_MALFORMED; 2692 } 2693 uint32_t atomFourCC; 2694 if (!mDataSource->getUInt32(offset + 4, &atomFourCC) 2695 || atomFourCC != FOURCC('d', 'a', 't', 'a')) { 2696 return ERROR_MALFORMED; 2697 } 2698 uint32_t dataType; 2699 if (!mDataSource->getUInt32(offset + 8, &dataType) 2700 || ((dataType & 0xff000000) != 0)) { 2701 // not well-known type 2702 return ERROR_MALFORMED; 2703 } 2704 2705 dataSize -= 16; 2706 offset += 16; 2707 2708 if (dataType == 23 && dataSize >= 4) { 2709 // BE Float32 2710 uint32_t val; 2711 if (!mDataSource->getUInt32(offset, &val)) { 2712 return ERROR_MALFORMED; 2713 } 2714 if (!strcasecmp(mMetaKeyMap[index].c_str(), "com.android.capture.fps")) { 2715 mFileMetaData->setFloat(kKeyCaptureFramerate, *(float *)&val); 2716 } 2717 } else if (dataType == 67 && dataSize >= 4) { 2718 // BE signed int32 2719 uint32_t val; 2720 if (!mDataSource->getUInt32(offset, &val)) { 2721 return ERROR_MALFORMED; 2722 } 2723 if (!strcasecmp(mMetaKeyMap[index].c_str(), "com.android.video.temporal_layers_count")) { 2724 mFileMetaData->setInt32(kKeyTemporalLayerCount, val); 2725 } 2726 } else { 2727 // add more keys if needed 2728 ALOGV("ignoring key: type %d, size %d", dataType, dataSize); 2729 } 2730 2731 return OK; 2732} 2733 2734status_t MPEG4Extractor::parseTrackHeader( 2735 off64_t data_offset, off64_t data_size) { 2736 if (data_size < 4) { 2737 return ERROR_MALFORMED; 2738 } 2739 2740 uint8_t version; 2741 if (mDataSource->readAt(data_offset, &version, 1) < 1) { 2742 return ERROR_IO; 2743 } 2744 2745 size_t dynSize = (version == 1) ? 36 : 24; 2746 2747 uint8_t buffer[36 + 60]; 2748 2749 if (data_size != (off64_t)dynSize + 60) { 2750 return ERROR_MALFORMED; 2751 } 2752 2753 if (mDataSource->readAt( 2754 data_offset, buffer, data_size) < (ssize_t)data_size) { 2755 return ERROR_IO; 2756 } 2757 2758 uint64_t ctime __unused, mtime __unused, duration __unused; 2759 int32_t id; 2760 2761 if (version == 1) { 2762 ctime = U64_AT(&buffer[4]); 2763 mtime = U64_AT(&buffer[12]); 2764 id = U32_AT(&buffer[20]); 2765 duration = U64_AT(&buffer[28]); 2766 } else if (version == 0) { 2767 ctime = U32_AT(&buffer[4]); 2768 mtime = U32_AT(&buffer[8]); 2769 id = U32_AT(&buffer[12]); 2770 duration = U32_AT(&buffer[20]); 2771 } else { 2772 return ERROR_UNSUPPORTED; 2773 } 2774 2775 if (mLastTrack == NULL) 2776 return ERROR_MALFORMED; 2777 2778 mLastTrack->meta->setInt32(kKeyTrackID, id); 2779 2780 size_t matrixOffset = dynSize + 16; 2781 int32_t a00 = U32_AT(&buffer[matrixOffset]); 2782 int32_t a01 = U32_AT(&buffer[matrixOffset + 4]); 2783 int32_t a10 = U32_AT(&buffer[matrixOffset + 12]); 2784 int32_t a11 = U32_AT(&buffer[matrixOffset + 16]); 2785 2786#if 0 2787 int32_t dx = U32_AT(&buffer[matrixOffset + 8]); 2788 int32_t dy = U32_AT(&buffer[matrixOffset + 20]); 2789 2790 ALOGI("x' = %.2f * x + %.2f * y + %.2f", 2791 a00 / 65536.0f, a01 / 65536.0f, dx / 65536.0f); 2792 ALOGI("y' = %.2f * x + %.2f * y + %.2f", 2793 a10 / 65536.0f, a11 / 65536.0f, dy / 65536.0f); 2794#endif 2795 2796 uint32_t rotationDegrees; 2797 2798 static const int32_t kFixedOne = 0x10000; 2799 if (a00 == kFixedOne && a01 == 0 && a10 == 0 && a11 == kFixedOne) { 2800 // Identity, no rotation 2801 rotationDegrees = 0; 2802 } else if (a00 == 0 && a01 == kFixedOne && a10 == -kFixedOne && a11 == 0) { 2803 rotationDegrees = 90; 2804 } else if (a00 == 0 && a01 == -kFixedOne && a10 == kFixedOne && a11 == 0) { 2805 rotationDegrees = 270; 2806 } else if (a00 == -kFixedOne && a01 == 0 && a10 == 0 && a11 == -kFixedOne) { 2807 rotationDegrees = 180; 2808 } else { 2809 ALOGW("We only support 0,90,180,270 degree rotation matrices"); 2810 rotationDegrees = 0; 2811 } 2812 2813 if (rotationDegrees != 0) { 2814 mLastTrack->meta->setInt32(kKeyRotation, rotationDegrees); 2815 } 2816 2817 // Handle presentation display size, which could be different 2818 // from the image size indicated by kKeyWidth and kKeyHeight. 2819 uint32_t width = U32_AT(&buffer[dynSize + 52]); 2820 uint32_t height = U32_AT(&buffer[dynSize + 56]); 2821 mLastTrack->meta->setInt32(kKeyDisplayWidth, width >> 16); 2822 mLastTrack->meta->setInt32(kKeyDisplayHeight, height >> 16); 2823 2824 return OK; 2825} 2826 2827status_t MPEG4Extractor::parseITunesMetaData(off64_t offset, size_t size) { 2828 if (size == 0) { 2829 return OK; 2830 } 2831 2832 if (size < 4 || size == SIZE_MAX) { 2833 return ERROR_MALFORMED; 2834 } 2835 2836 uint8_t *buffer = new (std::nothrow) uint8_t[size + 1]; 2837 if (buffer == NULL) { 2838 return ERROR_MALFORMED; 2839 } 2840 if (mDataSource->readAt( 2841 offset, buffer, size) != (ssize_t)size) { 2842 delete[] buffer; 2843 buffer = NULL; 2844 2845 return ERROR_IO; 2846 } 2847 2848 uint32_t flags = U32_AT(buffer); 2849 2850 uint32_t metadataKey = 0; 2851 char chunk[5]; 2852 MakeFourCCString(mPath[4], chunk); 2853 ALOGV("meta: %s @ %lld", chunk, (long long)offset); 2854 switch ((int32_t)mPath[4]) { 2855 case FOURCC(0xa9, 'a', 'l', 'b'): 2856 { 2857 metadataKey = kKeyAlbum; 2858 break; 2859 } 2860 case FOURCC(0xa9, 'A', 'R', 'T'): 2861 { 2862 metadataKey = kKeyArtist; 2863 break; 2864 } 2865 case FOURCC('a', 'A', 'R', 'T'): 2866 { 2867 metadataKey = kKeyAlbumArtist; 2868 break; 2869 } 2870 case FOURCC(0xa9, 'd', 'a', 'y'): 2871 { 2872 metadataKey = kKeyYear; 2873 break; 2874 } 2875 case FOURCC(0xa9, 'n', 'a', 'm'): 2876 { 2877 metadataKey = kKeyTitle; 2878 break; 2879 } 2880 case FOURCC(0xa9, 'w', 'r', 't'): 2881 { 2882 metadataKey = kKeyWriter; 2883 break; 2884 } 2885 case FOURCC('c', 'o', 'v', 'r'): 2886 { 2887 metadataKey = kKeyAlbumArt; 2888 break; 2889 } 2890 case FOURCC('g', 'n', 'r', 'e'): 2891 { 2892 metadataKey = kKeyGenre; 2893 break; 2894 } 2895 case FOURCC(0xa9, 'g', 'e', 'n'): 2896 { 2897 metadataKey = kKeyGenre; 2898 break; 2899 } 2900 case FOURCC('c', 'p', 'i', 'l'): 2901 { 2902 if (size == 9 && flags == 21) { 2903 char tmp[16]; 2904 sprintf(tmp, "%d", 2905 (int)buffer[size - 1]); 2906 2907 mFileMetaData->setCString(kKeyCompilation, tmp); 2908 } 2909 break; 2910 } 2911 case FOURCC('t', 'r', 'k', 'n'): 2912 { 2913 if (size == 16 && flags == 0) { 2914 char tmp[16]; 2915 uint16_t* pTrack = (uint16_t*)&buffer[10]; 2916 uint16_t* pTotalTracks = (uint16_t*)&buffer[12]; 2917 sprintf(tmp, "%d/%d", ntohs(*pTrack), ntohs(*pTotalTracks)); 2918 2919 mFileMetaData->setCString(kKeyCDTrackNumber, tmp); 2920 } 2921 break; 2922 } 2923 case FOURCC('d', 'i', 's', 'k'): 2924 { 2925 if ((size == 14 || size == 16) && flags == 0) { 2926 char tmp[16]; 2927 uint16_t* pDisc = (uint16_t*)&buffer[10]; 2928 uint16_t* pTotalDiscs = (uint16_t*)&buffer[12]; 2929 sprintf(tmp, "%d/%d", ntohs(*pDisc), ntohs(*pTotalDiscs)); 2930 2931 mFileMetaData->setCString(kKeyDiscNumber, tmp); 2932 } 2933 break; 2934 } 2935 case FOURCC('-', '-', '-', '-'): 2936 { 2937 buffer[size] = '\0'; 2938 switch (mPath[5]) { 2939 case FOURCC('m', 'e', 'a', 'n'): 2940 mLastCommentMean.setTo((const char *)buffer + 4); 2941 break; 2942 case FOURCC('n', 'a', 'm', 'e'): 2943 mLastCommentName.setTo((const char *)buffer + 4); 2944 break; 2945 case FOURCC('d', 'a', 't', 'a'): 2946 if (size < 8) { 2947 delete[] buffer; 2948 buffer = NULL; 2949 ALOGE("b/24346430"); 2950 return ERROR_MALFORMED; 2951 } 2952 mLastCommentData.setTo((const char *)buffer + 8); 2953 break; 2954 } 2955 2956 // Once we have a set of mean/name/data info, go ahead and process 2957 // it to see if its something we are interested in. Whether or not 2958 // were are interested in the specific tag, make sure to clear out 2959 // the set so we can be ready to process another tuple should one 2960 // show up later in the file. 2961 if ((mLastCommentMean.length() != 0) && 2962 (mLastCommentName.length() != 0) && 2963 (mLastCommentData.length() != 0)) { 2964 2965 if (mLastCommentMean == "com.apple.iTunes" 2966 && mLastCommentName == "iTunSMPB") { 2967 int32_t delay, padding; 2968 if (sscanf(mLastCommentData, 2969 " %*x %x %x %*x", &delay, &padding) == 2) { 2970 if (mLastTrack == NULL) { 2971 delete[] buffer; 2972 return ERROR_MALFORMED; 2973 } 2974 2975 mLastTrack->meta->setInt32(kKeyEncoderDelay, delay); 2976 mLastTrack->meta->setInt32(kKeyEncoderPadding, padding); 2977 } 2978 } 2979 2980 mLastCommentMean.clear(); 2981 mLastCommentName.clear(); 2982 mLastCommentData.clear(); 2983 } 2984 break; 2985 } 2986 2987 default: 2988 break; 2989 } 2990 2991 if (size >= 8 && metadataKey && !mFileMetaData->hasData(metadataKey)) { 2992 if (metadataKey == kKeyAlbumArt) { 2993 mFileMetaData->setData( 2994 kKeyAlbumArt, MetaData::TYPE_NONE, 2995 buffer + 8, size - 8); 2996 } else if (metadataKey == kKeyGenre) { 2997 if (flags == 0) { 2998 // uint8_t genre code, iTunes genre codes are 2999 // the standard id3 codes, except they start 3000 // at 1 instead of 0 (e.g. Pop is 14, not 13) 3001 // We use standard id3 numbering, so subtract 1. 3002 int genrecode = (int)buffer[size - 1]; 3003 genrecode--; 3004 if (genrecode < 0) { 3005 genrecode = 255; // reserved for 'unknown genre' 3006 } 3007 char genre[10]; 3008 sprintf(genre, "%d", genrecode); 3009 3010 mFileMetaData->setCString(metadataKey, genre); 3011 } else if (flags == 1) { 3012 // custom genre string 3013 buffer[size] = '\0'; 3014 3015 mFileMetaData->setCString( 3016 metadataKey, (const char *)buffer + 8); 3017 } 3018 } else { 3019 buffer[size] = '\0'; 3020 3021 mFileMetaData->setCString( 3022 metadataKey, (const char *)buffer + 8); 3023 } 3024 } 3025 3026 delete[] buffer; 3027 buffer = NULL; 3028 3029 return OK; 3030} 3031 3032status_t MPEG4Extractor::parseColorInfo(off64_t offset, size_t size) { 3033 if (size < 4 || size == SIZE_MAX || mLastTrack == NULL) { 3034 return ERROR_MALFORMED; 3035 } 3036 3037 uint8_t *buffer = new (std::nothrow) uint8_t[size + 1]; 3038 if (buffer == NULL) { 3039 return ERROR_MALFORMED; 3040 } 3041 if (mDataSource->readAt(offset, buffer, size) != (ssize_t)size) { 3042 delete[] buffer; 3043 buffer = NULL; 3044 3045 return ERROR_IO; 3046 } 3047 3048 int32_t type = U32_AT(&buffer[0]); 3049 if ((type == FOURCC('n', 'c', 'l', 'x') && size >= 11) 3050 || (type == FOURCC('n', 'c', 'l', 'c') && size >= 10)) { 3051 int32_t primaries = U16_AT(&buffer[4]); 3052 int32_t transfer = U16_AT(&buffer[6]); 3053 int32_t coeffs = U16_AT(&buffer[8]); 3054 bool fullRange = (type == FOURCC('n', 'c', 'l', 'x')) && (buffer[10] & 128); 3055 3056 ColorAspects aspects; 3057 ColorUtils::convertIsoColorAspectsToCodecAspects( 3058 primaries, transfer, coeffs, fullRange, aspects); 3059 3060 // only store the first color specification 3061 if (!mLastTrack->meta->hasData(kKeyColorPrimaries)) { 3062 mLastTrack->meta->setInt32(kKeyColorPrimaries, aspects.mPrimaries); 3063 mLastTrack->meta->setInt32(kKeyTransferFunction, aspects.mTransfer); 3064 mLastTrack->meta->setInt32(kKeyColorMatrix, aspects.mMatrixCoeffs); 3065 mLastTrack->meta->setInt32(kKeyColorRange, aspects.mRange); 3066 } 3067 } 3068 3069 delete[] buffer; 3070 buffer = NULL; 3071 3072 return OK; 3073} 3074 3075status_t MPEG4Extractor::parse3GPPMetaData(off64_t offset, size_t size, int depth) { 3076 if (size < 4 || size == SIZE_MAX) { 3077 return ERROR_MALFORMED; 3078 } 3079 3080 uint8_t *buffer = new (std::nothrow) uint8_t[size + 1]; 3081 if (buffer == NULL) { 3082 return ERROR_MALFORMED; 3083 } 3084 if (mDataSource->readAt( 3085 offset, buffer, size) != (ssize_t)size) { 3086 delete[] buffer; 3087 buffer = NULL; 3088 3089 return ERROR_IO; 3090 } 3091 3092 uint32_t metadataKey = 0; 3093 switch (mPath[depth]) { 3094 case FOURCC('t', 'i', 't', 'l'): 3095 { 3096 metadataKey = kKeyTitle; 3097 break; 3098 } 3099 case FOURCC('p', 'e', 'r', 'f'): 3100 { 3101 metadataKey = kKeyArtist; 3102 break; 3103 } 3104 case FOURCC('a', 'u', 't', 'h'): 3105 { 3106 metadataKey = kKeyWriter; 3107 break; 3108 } 3109 case FOURCC('g', 'n', 'r', 'e'): 3110 { 3111 metadataKey = kKeyGenre; 3112 break; 3113 } 3114 case FOURCC('a', 'l', 'b', 'm'): 3115 { 3116 if (buffer[size - 1] != '\0') { 3117 char tmp[4]; 3118 sprintf(tmp, "%u", buffer[size - 1]); 3119 3120 mFileMetaData->setCString(kKeyCDTrackNumber, tmp); 3121 } 3122 3123 metadataKey = kKeyAlbum; 3124 break; 3125 } 3126 case FOURCC('y', 'r', 'r', 'c'): 3127 { 3128 if (size < 6) { 3129 delete[] buffer; 3130 buffer = NULL; 3131 ALOGE("b/62133227"); 3132 android_errorWriteLog(0x534e4554, "62133227"); 3133 return ERROR_MALFORMED; 3134 } 3135 char tmp[5]; 3136 uint16_t year = U16_AT(&buffer[4]); 3137 3138 if (year < 10000) { 3139 sprintf(tmp, "%u", year); 3140 3141 mFileMetaData->setCString(kKeyYear, tmp); 3142 } 3143 break; 3144 } 3145 3146 default: 3147 break; 3148 } 3149 3150 if (metadataKey > 0) { 3151 bool isUTF8 = true; // Common case 3152 char16_t *framedata = NULL; 3153 int len16 = 0; // Number of UTF-16 characters 3154 3155 // smallest possible valid UTF-16 string w BOM: 0xfe 0xff 0x00 0x00 3156 if (size < 6) { 3157 delete[] buffer; 3158 buffer = NULL; 3159 return ERROR_MALFORMED; 3160 } 3161 3162 if (size - 6 >= 4) { 3163 len16 = ((size - 6) / 2) - 1; // don't include 0x0000 terminator 3164 framedata = (char16_t *)(buffer + 6); 3165 if (0xfffe == *framedata) { 3166 // endianness marker (BOM) doesn't match host endianness 3167 for (int i = 0; i < len16; i++) { 3168 framedata[i] = bswap_16(framedata[i]); 3169 } 3170 // BOM is now swapped to 0xfeff, we will execute next block too 3171 } 3172 3173 if (0xfeff == *framedata) { 3174 // Remove the BOM 3175 framedata++; 3176 len16--; 3177 isUTF8 = false; 3178 } 3179 // else normal non-zero-length UTF-8 string 3180 // we can't handle UTF-16 without BOM as there is no other 3181 // indication of encoding. 3182 } 3183 3184 if (isUTF8) { 3185 buffer[size] = 0; 3186 mFileMetaData->setCString(metadataKey, (const char *)buffer + 6); 3187 } else { 3188 // Convert from UTF-16 string to UTF-8 string. 3189 String8 tmpUTF8str(framedata, len16); 3190 mFileMetaData->setCString(metadataKey, tmpUTF8str.string()); 3191 } 3192 } 3193 3194 delete[] buffer; 3195 buffer = NULL; 3196 3197 return OK; 3198} 3199 3200void MPEG4Extractor::parseID3v2MetaData(off64_t offset) { 3201 ID3 id3(mDataSource, true /* ignorev1 */, offset); 3202 3203 if (id3.isValid()) { 3204 struct Map { 3205 int key; 3206 const char *tag1; 3207 const char *tag2; 3208 }; 3209 static const Map kMap[] = { 3210 { kKeyAlbum, "TALB", "TAL" }, 3211 { kKeyArtist, "TPE1", "TP1" }, 3212 { kKeyAlbumArtist, "TPE2", "TP2" }, 3213 { kKeyComposer, "TCOM", "TCM" }, 3214 { kKeyGenre, "TCON", "TCO" }, 3215 { kKeyTitle, "TIT2", "TT2" }, 3216 { kKeyYear, "TYE", "TYER" }, 3217 { kKeyAuthor, "TXT", "TEXT" }, 3218 { kKeyCDTrackNumber, "TRK", "TRCK" }, 3219 { kKeyDiscNumber, "TPA", "TPOS" }, 3220 { kKeyCompilation, "TCP", "TCMP" }, 3221 }; 3222 static const size_t kNumMapEntries = sizeof(kMap) / sizeof(kMap[0]); 3223 3224 for (size_t i = 0; i < kNumMapEntries; ++i) { 3225 if (!mFileMetaData->hasData(kMap[i].key)) { 3226 ID3::Iterator *it = new ID3::Iterator(id3, kMap[i].tag1); 3227 if (it->done()) { 3228 delete it; 3229 it = new ID3::Iterator(id3, kMap[i].tag2); 3230 } 3231 3232 if (it->done()) { 3233 delete it; 3234 continue; 3235 } 3236 3237 String8 s; 3238 it->getString(&s); 3239 delete it; 3240 3241 mFileMetaData->setCString(kMap[i].key, s); 3242 } 3243 } 3244 3245 size_t dataSize; 3246 String8 mime; 3247 const void *data = id3.getAlbumArt(&dataSize, &mime); 3248 3249 if (data) { 3250 mFileMetaData->setData(kKeyAlbumArt, MetaData::TYPE_NONE, data, dataSize); 3251 mFileMetaData->setCString(kKeyAlbumArtMIME, mime.string()); 3252 } 3253 } 3254} 3255 3256sp<IMediaSource> MPEG4Extractor::getTrack(size_t index) { 3257 status_t err; 3258 if ((err = readMetaData()) != OK) { 3259 return NULL; 3260 } 3261 3262 Track *track = mFirstTrack; 3263 while (index > 0) { 3264 if (track == NULL) { 3265 return NULL; 3266 } 3267 3268 track = track->next; 3269 --index; 3270 } 3271 3272 if (track == NULL) { 3273 return NULL; 3274 } 3275 3276 3277 Trex *trex = NULL; 3278 int32_t trackId; 3279 if (track->meta->findInt32(kKeyTrackID, &trackId)) { 3280 for (size_t i = 0; i < mTrex.size(); i++) { 3281 Trex *t = &mTrex.editItemAt(i); 3282 if (t->track_ID == (uint32_t) trackId) { 3283 trex = t; 3284 break; 3285 } 3286 } 3287 } else { 3288 ALOGE("b/21657957"); 3289 return NULL; 3290 } 3291 3292 ALOGV("getTrack called, pssh: %zu", mPssh.size()); 3293 3294 const char *mime; 3295 if (!track->meta->findCString(kKeyMIMEType, &mime)) { 3296 return NULL; 3297 } 3298 3299 if (!strcasecmp(mime, MEDIA_MIMETYPE_VIDEO_AVC)) { 3300 uint32_t type; 3301 const void *data; 3302 size_t size; 3303 if (!track->meta->findData(kKeyAVCC, &type, &data, &size)) { 3304 return NULL; 3305 } 3306 3307 const uint8_t *ptr = (const uint8_t *)data; 3308 3309 if (size < 7 || ptr[0] != 1) { // configurationVersion == 1 3310 return NULL; 3311 } 3312 } else if (!strcasecmp(mime, MEDIA_MIMETYPE_VIDEO_HEVC)) { 3313 uint32_t type; 3314 const void *data; 3315 size_t size; 3316 if (!track->meta->findData(kKeyHVCC, &type, &data, &size)) { 3317 return NULL; 3318 } 3319 3320 const uint8_t *ptr = (const uint8_t *)data; 3321 3322 if (size < 22 || ptr[0] != 1) { // configurationVersion == 1 3323 return NULL; 3324 } 3325 } 3326 3327 sp<MPEG4Source> source = new MPEG4Source(this, 3328 track->meta, mDataSource, track->timescale, track->sampleTable, 3329 mSidxEntries, trex, mMoofOffset); 3330 if (source->init() != OK) { 3331 return NULL; 3332 } 3333 return source; 3334} 3335 3336// static 3337status_t MPEG4Extractor::verifyTrack(Track *track) { 3338 const char *mime; 3339 CHECK(track->meta->findCString(kKeyMIMEType, &mime)); 3340 3341 uint32_t type; 3342 const void *data; 3343 size_t size; 3344 if (!strcasecmp(mime, MEDIA_MIMETYPE_VIDEO_AVC)) { 3345 if (!track->meta->findData(kKeyAVCC, &type, &data, &size) 3346 || type != kTypeAVCC) { 3347 return ERROR_MALFORMED; 3348 } 3349 } else if (!strcasecmp(mime, MEDIA_MIMETYPE_VIDEO_HEVC)) { 3350 if (!track->meta->findData(kKeyHVCC, &type, &data, &size) 3351 || type != kTypeHVCC) { 3352 return ERROR_MALFORMED; 3353 } 3354 } else if (!strcasecmp(mime, MEDIA_MIMETYPE_VIDEO_MPEG4) 3355 || !strcasecmp(mime, MEDIA_MIMETYPE_VIDEO_MPEG2) 3356 || !strcasecmp(mime, MEDIA_MIMETYPE_AUDIO_AAC)) { 3357 if (!track->meta->findData(kKeyESDS, &type, &data, &size) 3358 || type != kTypeESDS) { 3359 return ERROR_MALFORMED; 3360 } 3361 } 3362 3363 if (track->sampleTable == NULL || !track->sampleTable->isValid()) { 3364 // Make sure we have all the metadata we need. 3365 ALOGE("stbl atom missing/invalid."); 3366 return ERROR_MALFORMED; 3367 } 3368 3369 if (track->timescale == 0) { 3370 ALOGE("timescale invalid."); 3371 return ERROR_MALFORMED; 3372 } 3373 3374 return OK; 3375} 3376 3377typedef enum { 3378 //AOT_NONE = -1, 3379 //AOT_NULL_OBJECT = 0, 3380 //AOT_AAC_MAIN = 1, /**< Main profile */ 3381 AOT_AAC_LC = 2, /**< Low Complexity object */ 3382 //AOT_AAC_SSR = 3, 3383 //AOT_AAC_LTP = 4, 3384 AOT_SBR = 5, 3385 //AOT_AAC_SCAL = 6, 3386 //AOT_TWIN_VQ = 7, 3387 //AOT_CELP = 8, 3388 //AOT_HVXC = 9, 3389 //AOT_RSVD_10 = 10, /**< (reserved) */ 3390 //AOT_RSVD_11 = 11, /**< (reserved) */ 3391 //AOT_TTSI = 12, /**< TTSI Object */ 3392 //AOT_MAIN_SYNTH = 13, /**< Main Synthetic object */ 3393 //AOT_WAV_TAB_SYNTH = 14, /**< Wavetable Synthesis object */ 3394 //AOT_GEN_MIDI = 15, /**< General MIDI object */ 3395 //AOT_ALG_SYNTH_AUD_FX = 16, /**< Algorithmic Synthesis and Audio FX object */ 3396 AOT_ER_AAC_LC = 17, /**< Error Resilient(ER) AAC Low Complexity */ 3397 //AOT_RSVD_18 = 18, /**< (reserved) */ 3398 //AOT_ER_AAC_LTP = 19, /**< Error Resilient(ER) AAC LTP object */ 3399 AOT_ER_AAC_SCAL = 20, /**< Error Resilient(ER) AAC Scalable object */ 3400 //AOT_ER_TWIN_VQ = 21, /**< Error Resilient(ER) TwinVQ object */ 3401 AOT_ER_BSAC = 22, /**< Error Resilient(ER) BSAC object */ 3402 AOT_ER_AAC_LD = 23, /**< Error Resilient(ER) AAC LowDelay object */ 3403 //AOT_ER_CELP = 24, /**< Error Resilient(ER) CELP object */ 3404 //AOT_ER_HVXC = 25, /**< Error Resilient(ER) HVXC object */ 3405 //AOT_ER_HILN = 26, /**< Error Resilient(ER) HILN object */ 3406 //AOT_ER_PARA = 27, /**< Error Resilient(ER) Parametric object */ 3407 //AOT_RSVD_28 = 28, /**< might become SSC */ 3408 AOT_PS = 29, /**< PS, Parametric Stereo (includes SBR) */ 3409 //AOT_MPEGS = 30, /**< MPEG Surround */ 3410 3411 AOT_ESCAPE = 31, /**< Signal AOT uses more than 5 bits */ 3412 3413 //AOT_MP3ONMP4_L1 = 32, /**< MPEG-Layer1 in mp4 */ 3414 //AOT_MP3ONMP4_L2 = 33, /**< MPEG-Layer2 in mp4 */ 3415 //AOT_MP3ONMP4_L3 = 34, /**< MPEG-Layer3 in mp4 */ 3416 //AOT_RSVD_35 = 35, /**< might become DST */ 3417 //AOT_RSVD_36 = 36, /**< might become ALS */ 3418 //AOT_AAC_SLS = 37, /**< AAC + SLS */ 3419 //AOT_SLS = 38, /**< SLS */ 3420 //AOT_ER_AAC_ELD = 39, /**< AAC Enhanced Low Delay */ 3421 3422 //AOT_USAC = 42, /**< USAC */ 3423 //AOT_SAOC = 43, /**< SAOC */ 3424 //AOT_LD_MPEGS = 44, /**< Low Delay MPEG Surround */ 3425 3426 //AOT_RSVD50 = 50, /**< Interim AOT for Rsvd50 */ 3427} AUDIO_OBJECT_TYPE; 3428 3429status_t MPEG4Extractor::updateAudioTrackInfoFromESDS_MPEG4Audio( 3430 const void *esds_data, size_t esds_size) { 3431 ESDS esds(esds_data, esds_size); 3432 3433 uint8_t objectTypeIndication; 3434 if (esds.getObjectTypeIndication(&objectTypeIndication) != OK) { 3435 return ERROR_MALFORMED; 3436 } 3437 3438 if (objectTypeIndication == 0xe1) { 3439 // This isn't MPEG4 audio at all, it's QCELP 14k... 3440 if (mLastTrack == NULL) 3441 return ERROR_MALFORMED; 3442 3443 mLastTrack->meta->setCString(kKeyMIMEType, MEDIA_MIMETYPE_AUDIO_QCELP); 3444 return OK; 3445 } 3446 3447 if (objectTypeIndication == 0x6b) { 3448 // The media subtype is MP3 audio 3449 // Our software MP3 audio decoder may not be able to handle 3450 // packetized MP3 audio; for now, lets just return ERROR_UNSUPPORTED 3451 ALOGE("MP3 track in MP4/3GPP file is not supported"); 3452 return ERROR_UNSUPPORTED; 3453 } 3454 3455 const uint8_t *csd; 3456 size_t csd_size; 3457 if (esds.getCodecSpecificInfo( 3458 (const void **)&csd, &csd_size) != OK) { 3459 return ERROR_MALFORMED; 3460 } 3461 3462 if (kUseHexDump) { 3463 printf("ESD of size %zu\n", csd_size); 3464 hexdump(csd, csd_size); 3465 } 3466 3467 if (csd_size == 0) { 3468 // There's no further information, i.e. no codec specific data 3469 // Let's assume that the information provided in the mpeg4 headers 3470 // is accurate and hope for the best. 3471 3472 return OK; 3473 } 3474 3475 if (csd_size < 2) { 3476 return ERROR_MALFORMED; 3477 } 3478 3479 static uint32_t kSamplingRate[] = { 3480 96000, 88200, 64000, 48000, 44100, 32000, 24000, 22050, 3481 16000, 12000, 11025, 8000, 7350 3482 }; 3483 3484 ABitReader br(csd, csd_size); 3485 uint32_t objectType = br.getBits(5); 3486 3487 if (objectType == 31) { // AAC-ELD => additional 6 bits 3488 objectType = 32 + br.getBits(6); 3489 } 3490 3491 if (mLastTrack == NULL) 3492 return ERROR_MALFORMED; 3493 3494 //keep AOT type 3495 mLastTrack->meta->setInt32(kKeyAACAOT, objectType); 3496 3497 uint32_t freqIndex = br.getBits(4); 3498 3499 int32_t sampleRate = 0; 3500 int32_t numChannels = 0; 3501 if (freqIndex == 15) { 3502 if (br.numBitsLeft() < 28) return ERROR_MALFORMED; 3503 sampleRate = br.getBits(24); 3504 numChannels = br.getBits(4); 3505 } else { 3506 if (br.numBitsLeft() < 4) return ERROR_MALFORMED; 3507 numChannels = br.getBits(4); 3508 3509 if (freqIndex == 13 || freqIndex == 14) { 3510 return ERROR_MALFORMED; 3511 } 3512 3513 sampleRate = kSamplingRate[freqIndex]; 3514 } 3515 3516 if (objectType == AOT_SBR || objectType == AOT_PS) {//SBR specific config per 14496-3 table 1.13 3517 if (br.numBitsLeft() < 4) return ERROR_MALFORMED; 3518 uint32_t extFreqIndex = br.getBits(4); 3519 int32_t extSampleRate __unused; 3520 if (extFreqIndex == 15) { 3521 if (csd_size < 8) { 3522 return ERROR_MALFORMED; 3523 } 3524 if (br.numBitsLeft() < 24) return ERROR_MALFORMED; 3525 extSampleRate = br.getBits(24); 3526 } else { 3527 if (extFreqIndex == 13 || extFreqIndex == 14) { 3528 return ERROR_MALFORMED; 3529 } 3530 extSampleRate = kSamplingRate[extFreqIndex]; 3531 } 3532 //TODO: save the extension sampling rate value in meta data => 3533 // mLastTrack->meta->setInt32(kKeyExtSampleRate, extSampleRate); 3534 } 3535 3536 switch (numChannels) { 3537 // values defined in 14496-3_2009 amendment-4 Table 1.19 - Channel Configuration 3538 case 0: 3539 case 1:// FC 3540 case 2:// FL FR 3541 case 3:// FC, FL FR 3542 case 4:// FC, FL FR, RC 3543 case 5:// FC, FL FR, SL SR 3544 case 6:// FC, FL FR, SL SR, LFE 3545 //numChannels already contains the right value 3546 break; 3547 case 11:// FC, FL FR, SL SR, RC, LFE 3548 numChannels = 7; 3549 break; 3550 case 7: // FC, FCL FCR, FL FR, SL SR, LFE 3551 case 12:// FC, FL FR, SL SR, RL RR, LFE 3552 case 14:// FC, FL FR, SL SR, LFE, FHL FHR 3553 numChannels = 8; 3554 break; 3555 default: 3556 return ERROR_UNSUPPORTED; 3557 } 3558 3559 { 3560 if (objectType == AOT_SBR || objectType == AOT_PS) { 3561 if (br.numBitsLeft() < 5) return ERROR_MALFORMED; 3562 objectType = br.getBits(5); 3563 3564 if (objectType == AOT_ESCAPE) { 3565 if (br.numBitsLeft() < 6) return ERROR_MALFORMED; 3566 objectType = 32 + br.getBits(6); 3567 } 3568 } 3569 if (objectType == AOT_AAC_LC || objectType == AOT_ER_AAC_LC || 3570 objectType == AOT_ER_AAC_LD || objectType == AOT_ER_AAC_SCAL || 3571 objectType == AOT_ER_BSAC) { 3572 if (br.numBitsLeft() < 2) return ERROR_MALFORMED; 3573 const int32_t frameLengthFlag __unused = br.getBits(1); 3574 3575 const int32_t dependsOnCoreCoder = br.getBits(1); 3576 3577 if (dependsOnCoreCoder ) { 3578 if (br.numBitsLeft() < 14) return ERROR_MALFORMED; 3579 const int32_t coreCoderDelay __unused = br.getBits(14); 3580 } 3581 3582 int32_t extensionFlag = -1; 3583 if (br.numBitsLeft() > 0) { 3584 extensionFlag = br.getBits(1); 3585 } else { 3586 switch (objectType) { 3587 // 14496-3 4.5.1.1 extensionFlag 3588 case AOT_AAC_LC: 3589 extensionFlag = 0; 3590 break; 3591 case AOT_ER_AAC_LC: 3592 case AOT_ER_AAC_SCAL: 3593 case AOT_ER_BSAC: 3594 case AOT_ER_AAC_LD: 3595 extensionFlag = 1; 3596 break; 3597 default: 3598 return ERROR_MALFORMED; 3599 break; 3600 } 3601 ALOGW("csd missing extension flag; assuming %d for object type %u.", 3602 extensionFlag, objectType); 3603 } 3604 3605 if (numChannels == 0) { 3606 int32_t channelsEffectiveNum = 0; 3607 int32_t channelsNum = 0; 3608 if (br.numBitsLeft() < 32) { 3609 return ERROR_MALFORMED; 3610 } 3611 const int32_t ElementInstanceTag __unused = br.getBits(4); 3612 const int32_t Profile __unused = br.getBits(2); 3613 const int32_t SamplingFrequencyIndex __unused = br.getBits(4); 3614 const int32_t NumFrontChannelElements = br.getBits(4); 3615 const int32_t NumSideChannelElements = br.getBits(4); 3616 const int32_t NumBackChannelElements = br.getBits(4); 3617 const int32_t NumLfeChannelElements = br.getBits(2); 3618 const int32_t NumAssocDataElements __unused = br.getBits(3); 3619 const int32_t NumValidCcElements __unused = br.getBits(4); 3620 3621 const int32_t MonoMixdownPresent = br.getBits(1); 3622 3623 if (MonoMixdownPresent != 0) { 3624 if (br.numBitsLeft() < 4) return ERROR_MALFORMED; 3625 const int32_t MonoMixdownElementNumber __unused = br.getBits(4); 3626 } 3627 3628 if (br.numBitsLeft() < 1) return ERROR_MALFORMED; 3629 const int32_t StereoMixdownPresent = br.getBits(1); 3630 if (StereoMixdownPresent != 0) { 3631 if (br.numBitsLeft() < 4) return ERROR_MALFORMED; 3632 const int32_t StereoMixdownElementNumber __unused = br.getBits(4); 3633 } 3634 3635 if (br.numBitsLeft() < 1) return ERROR_MALFORMED; 3636 const int32_t MatrixMixdownIndexPresent = br.getBits(1); 3637 if (MatrixMixdownIndexPresent != 0) { 3638 if (br.numBitsLeft() < 3) return ERROR_MALFORMED; 3639 const int32_t MatrixMixdownIndex __unused = br.getBits(2); 3640 const int32_t PseudoSurroundEnable __unused = br.getBits(1); 3641 } 3642 3643 int i; 3644 for (i=0; i < NumFrontChannelElements; i++) { 3645 if (br.numBitsLeft() < 5) return ERROR_MALFORMED; 3646 const int32_t FrontElementIsCpe = br.getBits(1); 3647 const int32_t FrontElementTagSelect __unused = br.getBits(4); 3648 channelsNum += FrontElementIsCpe ? 2 : 1; 3649 } 3650 3651 for (i=0; i < NumSideChannelElements; i++) { 3652 if (br.numBitsLeft() < 5) return ERROR_MALFORMED; 3653 const int32_t SideElementIsCpe = br.getBits(1); 3654 const int32_t SideElementTagSelect __unused = br.getBits(4); 3655 channelsNum += SideElementIsCpe ? 2 : 1; 3656 } 3657 3658 for (i=0; i < NumBackChannelElements; i++) { 3659 if (br.numBitsLeft() < 5) return ERROR_MALFORMED; 3660 const int32_t BackElementIsCpe = br.getBits(1); 3661 const int32_t BackElementTagSelect __unused = br.getBits(4); 3662 channelsNum += BackElementIsCpe ? 2 : 1; 3663 } 3664 channelsEffectiveNum = channelsNum; 3665 3666 for (i=0; i < NumLfeChannelElements; i++) { 3667 if (br.numBitsLeft() < 4) return ERROR_MALFORMED; 3668 const int32_t LfeElementTagSelect __unused = br.getBits(4); 3669 channelsNum += 1; 3670 } 3671 ALOGV("mpeg4 audio channelsNum = %d", channelsNum); 3672 ALOGV("mpeg4 audio channelsEffectiveNum = %d", channelsEffectiveNum); 3673 numChannels = channelsNum; 3674 } 3675 } 3676 } 3677 3678 if (numChannels == 0) { 3679 return ERROR_UNSUPPORTED; 3680 } 3681 3682 if (mLastTrack == NULL) 3683 return ERROR_MALFORMED; 3684 3685 int32_t prevSampleRate; 3686 CHECK(mLastTrack->meta->findInt32(kKeySampleRate, &prevSampleRate)); 3687 3688 if (prevSampleRate != sampleRate) { 3689 ALOGV("mpeg4 audio sample rate different from previous setting. " 3690 "was: %d, now: %d", prevSampleRate, sampleRate); 3691 } 3692 3693 mLastTrack->meta->setInt32(kKeySampleRate, sampleRate); 3694 3695 int32_t prevChannelCount; 3696 CHECK(mLastTrack->meta->findInt32(kKeyChannelCount, &prevChannelCount)); 3697 3698 if (prevChannelCount != numChannels) { 3699 ALOGV("mpeg4 audio channel count different from previous setting. " 3700 "was: %d, now: %d", prevChannelCount, numChannels); 3701 } 3702 3703 mLastTrack->meta->setInt32(kKeyChannelCount, numChannels); 3704 3705 return OK; 3706} 3707 3708//////////////////////////////////////////////////////////////////////////////// 3709 3710MPEG4Source::MPEG4Source( 3711 const sp<MPEG4Extractor> &owner, 3712 const sp<MetaData> &format, 3713 const sp<DataSource> &dataSource, 3714 int32_t timeScale, 3715 const sp<SampleTable> &sampleTable, 3716 Vector<SidxEntry> &sidx, 3717 const Trex *trex, 3718 off64_t firstMoofOffset) 3719 : mOwner(owner), 3720 mFormat(format), 3721 mDataSource(dataSource), 3722 mTimescale(timeScale), 3723 mSampleTable(sampleTable), 3724 mCurrentSampleIndex(0), 3725 mCurrentFragmentIndex(0), 3726 mSegments(sidx), 3727 mTrex(trex), 3728 mFirstMoofOffset(firstMoofOffset), 3729 mCurrentMoofOffset(firstMoofOffset), 3730 mNextMoofOffset(-1), 3731 mCurrentTime(0), 3732 mCurrentSampleInfoAllocSize(0), 3733 mCurrentSampleInfoSizes(NULL), 3734 mCurrentSampleInfoOffsetsAllocSize(0), 3735 mCurrentSampleInfoOffsets(NULL), 3736 mIsAVC(false), 3737 mIsHEVC(false), 3738 mNALLengthSize(0), 3739 mStarted(false), 3740 mGroup(NULL), 3741 mBuffer(NULL), 3742 mWantsNALFragments(false), 3743 mSrcBuffer(NULL) { 3744 3745 memset(&mTrackFragmentHeaderInfo, 0, sizeof(mTrackFragmentHeaderInfo)); 3746 3747 mFormat->findInt32(kKeyCryptoMode, &mCryptoMode); 3748 mDefaultIVSize = 0; 3749 mFormat->findInt32(kKeyCryptoDefaultIVSize, &mDefaultIVSize); 3750 uint32_t keytype; 3751 const void *key; 3752 size_t keysize; 3753 if (mFormat->findData(kKeyCryptoKey, &keytype, &key, &keysize)) { 3754 CHECK(keysize <= 16); 3755 memset(mCryptoKey, 0, 16); 3756 memcpy(mCryptoKey, key, keysize); 3757 } 3758 3759 const char *mime; 3760 bool success = mFormat->findCString(kKeyMIMEType, &mime); 3761 CHECK(success); 3762 3763 mIsAVC = !strcasecmp(mime, MEDIA_MIMETYPE_VIDEO_AVC); 3764 mIsHEVC = !strcasecmp(mime, MEDIA_MIMETYPE_VIDEO_HEVC); 3765 3766 if (mIsAVC) { 3767 uint32_t type; 3768 const void *data; 3769 size_t size; 3770 CHECK(format->findData(kKeyAVCC, &type, &data, &size)); 3771 3772 const uint8_t *ptr = (const uint8_t *)data; 3773 3774 CHECK(size >= 7); 3775 CHECK_EQ((unsigned)ptr[0], 1u); // configurationVersion == 1 3776 3777 // The number of bytes used to encode the length of a NAL unit. 3778 mNALLengthSize = 1 + (ptr[4] & 3); 3779 } else if (mIsHEVC) { 3780 uint32_t type; 3781 const void *data; 3782 size_t size; 3783 CHECK(format->findData(kKeyHVCC, &type, &data, &size)); 3784 3785 const uint8_t *ptr = (const uint8_t *)data; 3786 3787 CHECK(size >= 22); 3788 CHECK_EQ((unsigned)ptr[0], 1u); // configurationVersion == 1 3789 3790 mNALLengthSize = 1 + (ptr[14 + 7] & 3); 3791 } 3792 3793 CHECK(format->findInt32(kKeyTrackID, &mTrackId)); 3794 3795} 3796 3797status_t MPEG4Source::init() { 3798 if (mFirstMoofOffset != 0) { 3799 off64_t offset = mFirstMoofOffset; 3800 return parseChunk(&offset); 3801 } 3802 return OK; 3803} 3804 3805MPEG4Source::~MPEG4Source() { 3806 if (mStarted) { 3807 stop(); 3808 } 3809 free(mCurrentSampleInfoSizes); 3810 free(mCurrentSampleInfoOffsets); 3811} 3812 3813status_t MPEG4Source::start(MetaData *params) { 3814 Mutex::Autolock autoLock(mLock); 3815 3816 CHECK(!mStarted); 3817 3818 int32_t val; 3819 if (params && params->findInt32(kKeyWantsNALFragments, &val) 3820 && val != 0) { 3821 mWantsNALFragments = true; 3822 } else { 3823 mWantsNALFragments = false; 3824 } 3825 3826 int32_t tmp; 3827 CHECK(mFormat->findInt32(kKeyMaxInputSize, &tmp)); 3828 size_t max_size = tmp; 3829 3830 // A somewhat arbitrary limit that should be sufficient for 8k video frames 3831 // If you see the message below for a valid input stream: increase the limit 3832 const size_t kMaxBufferSize = 64 * 1024 * 1024; 3833 if (max_size > kMaxBufferSize) { 3834 ALOGE("bogus max input size: %zu > %zu", max_size, kMaxBufferSize); 3835 return ERROR_MALFORMED; 3836 } 3837 if (max_size == 0) { 3838 ALOGE("zero max input size"); 3839 return ERROR_MALFORMED; 3840 } 3841 3842 // Allow up to kMaxBuffers, but not if the total exceeds kMaxBufferSize. 3843 const size_t kMaxBuffers = 8; 3844 const size_t buffers = min(kMaxBufferSize / max_size, kMaxBuffers); 3845 mGroup = new MediaBufferGroup(buffers, max_size); 3846 mSrcBuffer = new (std::nothrow) uint8_t[max_size]; 3847 if (mSrcBuffer == NULL) { 3848 // file probably specified a bad max size 3849 delete mGroup; 3850 mGroup = NULL; 3851 return ERROR_MALFORMED; 3852 } 3853 3854 mStarted = true; 3855 3856 return OK; 3857} 3858 3859status_t MPEG4Source::stop() { 3860 Mutex::Autolock autoLock(mLock); 3861 3862 CHECK(mStarted); 3863 3864 if (mBuffer != NULL) { 3865 mBuffer->release(); 3866 mBuffer = NULL; 3867 } 3868 3869 delete[] mSrcBuffer; 3870 mSrcBuffer = NULL; 3871 3872 delete mGroup; 3873 mGroup = NULL; 3874 3875 mStarted = false; 3876 mCurrentSampleIndex = 0; 3877 3878 return OK; 3879} 3880 3881status_t MPEG4Source::parseChunk(off64_t *offset) { 3882 uint32_t hdr[2]; 3883 if (mDataSource->readAt(*offset, hdr, 8) < 8) { 3884 return ERROR_IO; 3885 } 3886 uint64_t chunk_size = ntohl(hdr[0]); 3887 uint32_t chunk_type = ntohl(hdr[1]); 3888 off64_t data_offset = *offset + 8; 3889 3890 if (chunk_size == 1) { 3891 if (mDataSource->readAt(*offset + 8, &chunk_size, 8) < 8) { 3892 return ERROR_IO; 3893 } 3894 chunk_size = ntoh64(chunk_size); 3895 data_offset += 8; 3896 3897 if (chunk_size < 16) { 3898 // The smallest valid chunk is 16 bytes long in this case. 3899 return ERROR_MALFORMED; 3900 } 3901 } else if (chunk_size < 8) { 3902 // The smallest valid chunk is 8 bytes long. 3903 return ERROR_MALFORMED; 3904 } 3905 3906 char chunk[5]; 3907 MakeFourCCString(chunk_type, chunk); 3908 ALOGV("MPEG4Source chunk %s @ %#llx", chunk, (long long)*offset); 3909 3910 off64_t chunk_data_size = *offset + chunk_size - data_offset; 3911 3912 switch(chunk_type) { 3913 3914 case FOURCC('t', 'r', 'a', 'f'): 3915 case FOURCC('m', 'o', 'o', 'f'): { 3916 off64_t stop_offset = *offset + chunk_size; 3917 *offset = data_offset; 3918 while (*offset < stop_offset) { 3919 status_t err = parseChunk(offset); 3920 if (err != OK) { 3921 return err; 3922 } 3923 } 3924 if (chunk_type == FOURCC('m', 'o', 'o', 'f')) { 3925 // *offset points to the box following this moof. Find the next moof from there. 3926 3927 while (true) { 3928 if (mDataSource->readAt(*offset, hdr, 8) < 8) { 3929 // no more box to the end of file. 3930 break; 3931 } 3932 chunk_size = ntohl(hdr[0]); 3933 chunk_type = ntohl(hdr[1]); 3934 if (chunk_size == 1) { 3935 // ISO/IEC 14496-12:2012, 8.8.4 Movie Fragment Box, moof is a Box 3936 // which is defined in 4.2 Object Structure. 3937 // When chunk_size==1, 8 bytes follows as "largesize". 3938 if (mDataSource->readAt(*offset + 8, &chunk_size, 8) < 8) { 3939 return ERROR_IO; 3940 } 3941 chunk_size = ntoh64(chunk_size); 3942 if (chunk_size < 16) { 3943 // The smallest valid chunk is 16 bytes long in this case. 3944 return ERROR_MALFORMED; 3945 } 3946 } else if (chunk_size == 0) { 3947 // next box extends to end of file. 3948 } else if (chunk_size < 8) { 3949 // The smallest valid chunk is 8 bytes long in this case. 3950 return ERROR_MALFORMED; 3951 } 3952 3953 if (chunk_type == FOURCC('m', 'o', 'o', 'f')) { 3954 mNextMoofOffset = *offset; 3955 break; 3956 } else if (chunk_size == 0) { 3957 break; 3958 } 3959 *offset += chunk_size; 3960 } 3961 } 3962 break; 3963 } 3964 3965 case FOURCC('t', 'f', 'h', 'd'): { 3966 status_t err; 3967 if ((err = parseTrackFragmentHeader(data_offset, chunk_data_size)) != OK) { 3968 return err; 3969 } 3970 *offset += chunk_size; 3971 break; 3972 } 3973 3974 case FOURCC('t', 'r', 'u', 'n'): { 3975 status_t err; 3976 if (mLastParsedTrackId == mTrackId) { 3977 if ((err = parseTrackFragmentRun(data_offset, chunk_data_size)) != OK) { 3978 return err; 3979 } 3980 } 3981 3982 *offset += chunk_size; 3983 break; 3984 } 3985 3986 case FOURCC('s', 'a', 'i', 'z'): { 3987 status_t err; 3988 if ((err = parseSampleAuxiliaryInformationSizes(data_offset, chunk_data_size)) != OK) { 3989 return err; 3990 } 3991 *offset += chunk_size; 3992 break; 3993 } 3994 case FOURCC('s', 'a', 'i', 'o'): { 3995 status_t err; 3996 if ((err = parseSampleAuxiliaryInformationOffsets(data_offset, chunk_data_size)) != OK) { 3997 return err; 3998 } 3999 *offset += chunk_size; 4000 break; 4001 } 4002 4003 case FOURCC('m', 'd', 'a', 't'): { 4004 // parse DRM info if present 4005 ALOGV("MPEG4Source::parseChunk mdat"); 4006 // if saiz/saoi was previously observed, do something with the sampleinfos 4007 *offset += chunk_size; 4008 break; 4009 } 4010 4011 default: { 4012 *offset += chunk_size; 4013 break; 4014 } 4015 } 4016 return OK; 4017} 4018 4019status_t MPEG4Source::parseSampleAuxiliaryInformationSizes( 4020 off64_t offset, off64_t /* size */) { 4021 ALOGV("parseSampleAuxiliaryInformationSizes"); 4022 // 14496-12 8.7.12 4023 uint8_t version; 4024 if (mDataSource->readAt( 4025 offset, &version, sizeof(version)) 4026 < (ssize_t)sizeof(version)) { 4027 return ERROR_IO; 4028 } 4029 4030 if (version != 0) { 4031 return ERROR_UNSUPPORTED; 4032 } 4033 offset++; 4034 4035 uint32_t flags; 4036 if (!mDataSource->getUInt24(offset, &flags)) { 4037 return ERROR_IO; 4038 } 4039 offset += 3; 4040 4041 if (flags & 1) { 4042 uint32_t tmp; 4043 if (!mDataSource->getUInt32(offset, &tmp)) { 4044 return ERROR_MALFORMED; 4045 } 4046 mCurrentAuxInfoType = tmp; 4047 offset += 4; 4048 if (!mDataSource->getUInt32(offset, &tmp)) { 4049 return ERROR_MALFORMED; 4050 } 4051 mCurrentAuxInfoTypeParameter = tmp; 4052 offset += 4; 4053 } 4054 4055 uint8_t defsize; 4056 if (mDataSource->readAt(offset, &defsize, 1) != 1) { 4057 return ERROR_MALFORMED; 4058 } 4059 mCurrentDefaultSampleInfoSize = defsize; 4060 offset++; 4061 4062 uint32_t smplcnt; 4063 if (!mDataSource->getUInt32(offset, &smplcnt)) { 4064 return ERROR_MALFORMED; 4065 } 4066 mCurrentSampleInfoCount = smplcnt; 4067 offset += 4; 4068 4069 if (mCurrentDefaultSampleInfoSize != 0) { 4070 ALOGV("@@@@ using default sample info size of %d", mCurrentDefaultSampleInfoSize); 4071 return OK; 4072 } 4073 if (smplcnt > mCurrentSampleInfoAllocSize) { 4074 uint8_t * newPtr = (uint8_t*) realloc(mCurrentSampleInfoSizes, smplcnt); 4075 if (newPtr == NULL) { 4076 ALOGE("failed to realloc %u -> %u", mCurrentSampleInfoAllocSize, smplcnt); 4077 return NO_MEMORY; 4078 } 4079 mCurrentSampleInfoSizes = newPtr; 4080 mCurrentSampleInfoAllocSize = smplcnt; 4081 } 4082 4083 mDataSource->readAt(offset, mCurrentSampleInfoSizes, smplcnt); 4084 return OK; 4085} 4086 4087status_t MPEG4Source::parseSampleAuxiliaryInformationOffsets( 4088 off64_t offset, off64_t /* size */) { 4089 ALOGV("parseSampleAuxiliaryInformationOffsets"); 4090 // 14496-12 8.7.13 4091 uint8_t version; 4092 if (mDataSource->readAt(offset, &version, sizeof(version)) != 1) { 4093 return ERROR_IO; 4094 } 4095 offset++; 4096 4097 uint32_t flags; 4098 if (!mDataSource->getUInt24(offset, &flags)) { 4099 return ERROR_IO; 4100 } 4101 offset += 3; 4102 4103 uint32_t entrycount; 4104 if (!mDataSource->getUInt32(offset, &entrycount)) { 4105 return ERROR_IO; 4106 } 4107 offset += 4; 4108 if (entrycount == 0) { 4109 return OK; 4110 } 4111 if (entrycount > UINT32_MAX / 8) { 4112 return ERROR_MALFORMED; 4113 } 4114 4115 if (entrycount > mCurrentSampleInfoOffsetsAllocSize) { 4116 uint64_t *newPtr = (uint64_t *)realloc(mCurrentSampleInfoOffsets, entrycount * 8); 4117 if (newPtr == NULL) { 4118 ALOGE("failed to realloc %u -> %u", mCurrentSampleInfoOffsetsAllocSize, entrycount * 8); 4119 return NO_MEMORY; 4120 } 4121 mCurrentSampleInfoOffsets = newPtr; 4122 mCurrentSampleInfoOffsetsAllocSize = entrycount; 4123 } 4124 mCurrentSampleInfoOffsetCount = entrycount; 4125 4126 if (mCurrentSampleInfoOffsets == NULL) { 4127 return OK; 4128 } 4129 4130 for (size_t i = 0; i < entrycount; i++) { 4131 if (version == 0) { 4132 uint32_t tmp; 4133 if (!mDataSource->getUInt32(offset, &tmp)) { 4134 return ERROR_IO; 4135 } 4136 mCurrentSampleInfoOffsets[i] = tmp; 4137 offset += 4; 4138 } else { 4139 uint64_t tmp; 4140 if (!mDataSource->getUInt64(offset, &tmp)) { 4141 return ERROR_IO; 4142 } 4143 mCurrentSampleInfoOffsets[i] = tmp; 4144 offset += 8; 4145 } 4146 } 4147 4148 // parse clear/encrypted data 4149 4150 off64_t drmoffset = mCurrentSampleInfoOffsets[0]; // from moof 4151 4152 drmoffset += mCurrentMoofOffset; 4153 int ivlength; 4154 CHECK(mFormat->findInt32(kKeyCryptoDefaultIVSize, &ivlength)); 4155 4156 // only 0, 8 and 16 byte initialization vectors are supported 4157 if (ivlength != 0 && ivlength != 8 && ivlength != 16) { 4158 ALOGW("unsupported IV length: %d", ivlength); 4159 return ERROR_MALFORMED; 4160 } 4161 // read CencSampleAuxiliaryDataFormats 4162 for (size_t i = 0; i < mCurrentSampleInfoCount; i++) { 4163 if (i >= mCurrentSamples.size()) { 4164 ALOGW("too few samples"); 4165 break; 4166 } 4167 Sample *smpl = &mCurrentSamples.editItemAt(i); 4168 4169 memset(smpl->iv, 0, 16); 4170 if (mDataSource->readAt(drmoffset, smpl->iv, ivlength) != ivlength) { 4171 return ERROR_IO; 4172 } 4173 4174 drmoffset += ivlength; 4175 4176 int32_t smplinfosize = mCurrentDefaultSampleInfoSize; 4177 if (smplinfosize == 0) { 4178 smplinfosize = mCurrentSampleInfoSizes[i]; 4179 } 4180 if (smplinfosize > ivlength) { 4181 uint16_t numsubsamples; 4182 if (!mDataSource->getUInt16(drmoffset, &numsubsamples)) { 4183 return ERROR_IO; 4184 } 4185 drmoffset += 2; 4186 for (size_t j = 0; j < numsubsamples; j++) { 4187 uint16_t numclear; 4188 uint32_t numencrypted; 4189 if (!mDataSource->getUInt16(drmoffset, &numclear)) { 4190 return ERROR_IO; 4191 } 4192 drmoffset += 2; 4193 if (!mDataSource->getUInt32(drmoffset, &numencrypted)) { 4194 return ERROR_IO; 4195 } 4196 drmoffset += 4; 4197 smpl->clearsizes.add(numclear); 4198 smpl->encryptedsizes.add(numencrypted); 4199 } 4200 } else { 4201 smpl->clearsizes.add(0); 4202 smpl->encryptedsizes.add(smpl->size); 4203 } 4204 } 4205 4206 4207 return OK; 4208} 4209 4210status_t MPEG4Source::parseTrackFragmentHeader(off64_t offset, off64_t size) { 4211 4212 if (size < 8) { 4213 return -EINVAL; 4214 } 4215 4216 uint32_t flags; 4217 if (!mDataSource->getUInt32(offset, &flags)) { // actually version + flags 4218 return ERROR_MALFORMED; 4219 } 4220 4221 if (flags & 0xff000000) { 4222 return -EINVAL; 4223 } 4224 4225 if (!mDataSource->getUInt32(offset + 4, (uint32_t*)&mLastParsedTrackId)) { 4226 return ERROR_MALFORMED; 4227 } 4228 4229 if (mLastParsedTrackId != mTrackId) { 4230 // this is not the right track, skip it 4231 return OK; 4232 } 4233 4234 mTrackFragmentHeaderInfo.mFlags = flags; 4235 mTrackFragmentHeaderInfo.mTrackID = mLastParsedTrackId; 4236 offset += 8; 4237 size -= 8; 4238 4239 ALOGV("fragment header: %08x %08x", flags, mTrackFragmentHeaderInfo.mTrackID); 4240 4241 if (flags & TrackFragmentHeaderInfo::kBaseDataOffsetPresent) { 4242 if (size < 8) { 4243 return -EINVAL; 4244 } 4245 4246 if (!mDataSource->getUInt64(offset, &mTrackFragmentHeaderInfo.mBaseDataOffset)) { 4247 return ERROR_MALFORMED; 4248 } 4249 offset += 8; 4250 size -= 8; 4251 } 4252 4253 if (flags & TrackFragmentHeaderInfo::kSampleDescriptionIndexPresent) { 4254 if (size < 4) { 4255 return -EINVAL; 4256 } 4257 4258 if (!mDataSource->getUInt32(offset, &mTrackFragmentHeaderInfo.mSampleDescriptionIndex)) { 4259 return ERROR_MALFORMED; 4260 } 4261 offset += 4; 4262 size -= 4; 4263 } 4264 4265 if (flags & TrackFragmentHeaderInfo::kDefaultSampleDurationPresent) { 4266 if (size < 4) { 4267 return -EINVAL; 4268 } 4269 4270 if (!mDataSource->getUInt32(offset, &mTrackFragmentHeaderInfo.mDefaultSampleDuration)) { 4271 return ERROR_MALFORMED; 4272 } 4273 offset += 4; 4274 size -= 4; 4275 } 4276 4277 if (flags & TrackFragmentHeaderInfo::kDefaultSampleSizePresent) { 4278 if (size < 4) { 4279 return -EINVAL; 4280 } 4281 4282 if (!mDataSource->getUInt32(offset, &mTrackFragmentHeaderInfo.mDefaultSampleSize)) { 4283 return ERROR_MALFORMED; 4284 } 4285 offset += 4; 4286 size -= 4; 4287 } 4288 4289 if (flags & TrackFragmentHeaderInfo::kDefaultSampleFlagsPresent) { 4290 if (size < 4) { 4291 return -EINVAL; 4292 } 4293 4294 if (!mDataSource->getUInt32(offset, &mTrackFragmentHeaderInfo.mDefaultSampleFlags)) { 4295 return ERROR_MALFORMED; 4296 } 4297 offset += 4; 4298 size -= 4; 4299 } 4300 4301 if (!(flags & TrackFragmentHeaderInfo::kBaseDataOffsetPresent)) { 4302 mTrackFragmentHeaderInfo.mBaseDataOffset = mCurrentMoofOffset; 4303 } 4304 4305 mTrackFragmentHeaderInfo.mDataOffset = 0; 4306 return OK; 4307} 4308 4309status_t MPEG4Source::parseTrackFragmentRun(off64_t offset, off64_t size) { 4310 4311 ALOGV("MPEG4Extractor::parseTrackFragmentRun"); 4312 if (size < 8) { 4313 return -EINVAL; 4314 } 4315 4316 enum { 4317 kDataOffsetPresent = 0x01, 4318 kFirstSampleFlagsPresent = 0x04, 4319 kSampleDurationPresent = 0x100, 4320 kSampleSizePresent = 0x200, 4321 kSampleFlagsPresent = 0x400, 4322 kSampleCompositionTimeOffsetPresent = 0x800, 4323 }; 4324 4325 uint32_t flags; 4326 if (!mDataSource->getUInt32(offset, &flags)) { 4327 return ERROR_MALFORMED; 4328 } 4329 // |version| only affects SampleCompositionTimeOffset field. 4330 // If version == 0, SampleCompositionTimeOffset is uint32_t; 4331 // Otherwise, SampleCompositionTimeOffset is int32_t. 4332 // Sample.compositionOffset is defined as int32_t. 4333 uint8_t version = flags >> 24; 4334 flags &= 0xffffff; 4335 ALOGV("fragment run version: 0x%02x, flags: 0x%06x", version, flags); 4336 4337 if ((flags & kFirstSampleFlagsPresent) && (flags & kSampleFlagsPresent)) { 4338 // These two shall not be used together. 4339 return -EINVAL; 4340 } 4341 4342 uint32_t sampleCount; 4343 if (!mDataSource->getUInt32(offset + 4, &sampleCount)) { 4344 return ERROR_MALFORMED; 4345 } 4346 offset += 8; 4347 size -= 8; 4348 4349 uint64_t dataOffset = mTrackFragmentHeaderInfo.mDataOffset; 4350 4351 uint32_t firstSampleFlags = 0; 4352 4353 if (flags & kDataOffsetPresent) { 4354 if (size < 4) { 4355 return -EINVAL; 4356 } 4357 4358 int32_t dataOffsetDelta; 4359 if (!mDataSource->getUInt32(offset, (uint32_t*)&dataOffsetDelta)) { 4360 return ERROR_MALFORMED; 4361 } 4362 4363 dataOffset = mTrackFragmentHeaderInfo.mBaseDataOffset + dataOffsetDelta; 4364 4365 offset += 4; 4366 size -= 4; 4367 } 4368 4369 if (flags & kFirstSampleFlagsPresent) { 4370 if (size < 4) { 4371 return -EINVAL; 4372 } 4373 4374 if (!mDataSource->getUInt32(offset, &firstSampleFlags)) { 4375 return ERROR_MALFORMED; 4376 } 4377 offset += 4; 4378 size -= 4; 4379 } 4380 4381 uint32_t sampleDuration = 0, sampleSize = 0, sampleFlags = 0, 4382 sampleCtsOffset = 0; 4383 4384 size_t bytesPerSample = 0; 4385 if (flags & kSampleDurationPresent) { 4386 bytesPerSample += 4; 4387 } else if (mTrackFragmentHeaderInfo.mFlags 4388 & TrackFragmentHeaderInfo::kDefaultSampleDurationPresent) { 4389 sampleDuration = mTrackFragmentHeaderInfo.mDefaultSampleDuration; 4390 } else if (mTrex) { 4391 sampleDuration = mTrex->default_sample_duration; 4392 } 4393 4394 if (flags & kSampleSizePresent) { 4395 bytesPerSample += 4; 4396 } else if (mTrackFragmentHeaderInfo.mFlags 4397 & TrackFragmentHeaderInfo::kDefaultSampleSizePresent) { 4398 sampleSize = mTrackFragmentHeaderInfo.mDefaultSampleSize; 4399 } else { 4400 sampleSize = mTrackFragmentHeaderInfo.mDefaultSampleSize; 4401 } 4402 4403 if (flags & kSampleFlagsPresent) { 4404 bytesPerSample += 4; 4405 } else if (mTrackFragmentHeaderInfo.mFlags 4406 & TrackFragmentHeaderInfo::kDefaultSampleFlagsPresent) { 4407 sampleFlags = mTrackFragmentHeaderInfo.mDefaultSampleFlags; 4408 } else { 4409 sampleFlags = mTrackFragmentHeaderInfo.mDefaultSampleFlags; 4410 } 4411 4412 if (flags & kSampleCompositionTimeOffsetPresent) { 4413 bytesPerSample += 4; 4414 } else { 4415 sampleCtsOffset = 0; 4416 } 4417 4418 if (size < (off64_t)(sampleCount * bytesPerSample)) { 4419 return -EINVAL; 4420 } 4421 4422 Sample tmp; 4423 for (uint32_t i = 0; i < sampleCount; ++i) { 4424 if (flags & kSampleDurationPresent) { 4425 if (!mDataSource->getUInt32(offset, &sampleDuration)) { 4426 return ERROR_MALFORMED; 4427 } 4428 offset += 4; 4429 } 4430 4431 if (flags & kSampleSizePresent) { 4432 if (!mDataSource->getUInt32(offset, &sampleSize)) { 4433 return ERROR_MALFORMED; 4434 } 4435 offset += 4; 4436 } 4437 4438 if (flags & kSampleFlagsPresent) { 4439 if (!mDataSource->getUInt32(offset, &sampleFlags)) { 4440 return ERROR_MALFORMED; 4441 } 4442 offset += 4; 4443 } 4444 4445 if (flags & kSampleCompositionTimeOffsetPresent) { 4446 if (!mDataSource->getUInt32(offset, &sampleCtsOffset)) { 4447 return ERROR_MALFORMED; 4448 } 4449 offset += 4; 4450 } 4451 4452 ALOGV("adding sample %d at offset 0x%08" PRIx64 ", size %u, duration %u, " 4453 " flags 0x%08x", i + 1, 4454 dataOffset, sampleSize, sampleDuration, 4455 (flags & kFirstSampleFlagsPresent) && i == 0 4456 ? firstSampleFlags : sampleFlags); 4457 tmp.offset = dataOffset; 4458 tmp.size = sampleSize; 4459 tmp.duration = sampleDuration; 4460 tmp.compositionOffset = sampleCtsOffset; 4461 mCurrentSamples.add(tmp); 4462 4463 dataOffset += sampleSize; 4464 } 4465 4466 mTrackFragmentHeaderInfo.mDataOffset = dataOffset; 4467 4468 return OK; 4469} 4470 4471sp<MetaData> MPEG4Source::getFormat() { 4472 Mutex::Autolock autoLock(mLock); 4473 4474 return mFormat; 4475} 4476 4477size_t MPEG4Source::parseNALSize(const uint8_t *data) const { 4478 switch (mNALLengthSize) { 4479 case 1: 4480 return *data; 4481 case 2: 4482 return U16_AT(data); 4483 case 3: 4484 return ((size_t)data[0] << 16) | U16_AT(&data[1]); 4485 case 4: 4486 return U32_AT(data); 4487 } 4488 4489 // This cannot happen, mNALLengthSize springs to life by adding 1 to 4490 // a 2-bit integer. 4491 CHECK(!"Should not be here."); 4492 4493 return 0; 4494} 4495 4496status_t MPEG4Source::read( 4497 MediaBuffer **out, const ReadOptions *options) { 4498 Mutex::Autolock autoLock(mLock); 4499 4500 CHECK(mStarted); 4501 4502 if (options != nullptr && options->getNonBlocking() && !mGroup->has_buffers()) { 4503 *out = nullptr; 4504 return WOULD_BLOCK; 4505 } 4506 4507 if (mFirstMoofOffset > 0) { 4508 return fragmentedRead(out, options); 4509 } 4510 4511 *out = NULL; 4512 4513 int64_t targetSampleTimeUs = -1; 4514 4515 int64_t seekTimeUs; 4516 ReadOptions::SeekMode mode; 4517 if (options && options->getSeekTo(&seekTimeUs, &mode)) { 4518 uint32_t findFlags = 0; 4519 switch (mode) { 4520 case ReadOptions::SEEK_PREVIOUS_SYNC: 4521 findFlags = SampleTable::kFlagBefore; 4522 break; 4523 case ReadOptions::SEEK_NEXT_SYNC: 4524 findFlags = SampleTable::kFlagAfter; 4525 break; 4526 case ReadOptions::SEEK_CLOSEST_SYNC: 4527 case ReadOptions::SEEK_CLOSEST: 4528 findFlags = SampleTable::kFlagClosest; 4529 break; 4530 default: 4531 CHECK(!"Should not be here."); 4532 break; 4533 } 4534 4535 uint32_t sampleIndex; 4536 status_t err = mSampleTable->findSampleAtTime( 4537 seekTimeUs, 1000000, mTimescale, 4538 &sampleIndex, findFlags); 4539 4540 if (mode == ReadOptions::SEEK_CLOSEST) { 4541 // We found the closest sample already, now we want the sync 4542 // sample preceding it (or the sample itself of course), even 4543 // if the subsequent sync sample is closer. 4544 findFlags = SampleTable::kFlagBefore; 4545 } 4546 4547 uint32_t syncSampleIndex; 4548 if (err == OK) { 4549 err = mSampleTable->findSyncSampleNear( 4550 sampleIndex, &syncSampleIndex, findFlags); 4551 } 4552 4553 uint32_t sampleTime; 4554 if (err == OK) { 4555 err = mSampleTable->getMetaDataForSample( 4556 sampleIndex, NULL, NULL, &sampleTime); 4557 } 4558 4559 if (err != OK) { 4560 if (err == ERROR_OUT_OF_RANGE) { 4561 // An attempt to seek past the end of the stream would 4562 // normally cause this ERROR_OUT_OF_RANGE error. Propagating 4563 // this all the way to the MediaPlayer would cause abnormal 4564 // termination. Legacy behaviour appears to be to behave as if 4565 // we had seeked to the end of stream, ending normally. 4566 err = ERROR_END_OF_STREAM; 4567 } 4568 ALOGV("end of stream"); 4569 return err; 4570 } 4571 4572 if (mode == ReadOptions::SEEK_CLOSEST) { 4573 targetSampleTimeUs = (sampleTime * 1000000ll) / mTimescale; 4574 } 4575 4576#if 0 4577 uint32_t syncSampleTime; 4578 CHECK_EQ(OK, mSampleTable->getMetaDataForSample( 4579 syncSampleIndex, NULL, NULL, &syncSampleTime)); 4580 4581 ALOGI("seek to time %lld us => sample at time %lld us, " 4582 "sync sample at time %lld us", 4583 seekTimeUs, 4584 sampleTime * 1000000ll / mTimescale, 4585 syncSampleTime * 1000000ll / mTimescale); 4586#endif 4587 4588 mCurrentSampleIndex = syncSampleIndex; 4589 if (mBuffer != NULL) { 4590 mBuffer->release(); 4591 mBuffer = NULL; 4592 } 4593 4594 // fall through 4595 } 4596 4597 off64_t offset = 0; 4598 size_t size = 0; 4599 uint32_t cts, stts; 4600 bool isSyncSample; 4601 bool newBuffer = false; 4602 if (mBuffer == NULL) { 4603 newBuffer = true; 4604 4605 status_t err = 4606 mSampleTable->getMetaDataForSample( 4607 mCurrentSampleIndex, &offset, &size, &cts, &isSyncSample, &stts); 4608 4609 if (err != OK) { 4610 return err; 4611 } 4612 4613 err = mGroup->acquire_buffer(&mBuffer); 4614 4615 if (err != OK) { 4616 CHECK(mBuffer == NULL); 4617 return err; 4618 } 4619 if (size > mBuffer->size()) { 4620 ALOGE("buffer too small: %zu > %zu", size, mBuffer->size()); 4621 return ERROR_BUFFER_TOO_SMALL; 4622 } 4623 } 4624 4625 if ((!mIsAVC && !mIsHEVC) || mWantsNALFragments) { 4626 if (newBuffer) { 4627 ssize_t num_bytes_read = 4628 mDataSource->readAt(offset, (uint8_t *)mBuffer->data(), size); 4629 4630 if (num_bytes_read < (ssize_t)size) { 4631 mBuffer->release(); 4632 mBuffer = NULL; 4633 4634 return ERROR_IO; 4635 } 4636 4637 CHECK(mBuffer != NULL); 4638 mBuffer->set_range(0, size); 4639 mBuffer->meta_data()->clear(); 4640 mBuffer->meta_data()->setInt64( 4641 kKeyTime, ((int64_t)cts * 1000000) / mTimescale); 4642 mBuffer->meta_data()->setInt64( 4643 kKeyDuration, ((int64_t)stts * 1000000) / mTimescale); 4644 4645 if (targetSampleTimeUs >= 0) { 4646 mBuffer->meta_data()->setInt64( 4647 kKeyTargetTime, targetSampleTimeUs); 4648 } 4649 4650 if (isSyncSample) { 4651 mBuffer->meta_data()->setInt32(kKeyIsSyncFrame, 1); 4652 } 4653 4654 ++mCurrentSampleIndex; 4655 } 4656 4657 if (!mIsAVC && !mIsHEVC) { 4658 *out = mBuffer; 4659 mBuffer = NULL; 4660 4661 return OK; 4662 } 4663 4664 // Each NAL unit is split up into its constituent fragments and 4665 // each one of them returned in its own buffer. 4666 4667 CHECK(mBuffer->range_length() >= mNALLengthSize); 4668 4669 const uint8_t *src = 4670 (const uint8_t *)mBuffer->data() + mBuffer->range_offset(); 4671 4672 size_t nal_size = parseNALSize(src); 4673 if (mNALLengthSize > SIZE_MAX - nal_size) { 4674 ALOGE("b/24441553, b/24445122"); 4675 } 4676 if (mBuffer->range_length() - mNALLengthSize < nal_size) { 4677 ALOGE("incomplete NAL unit."); 4678 4679 mBuffer->release(); 4680 mBuffer = NULL; 4681 4682 return ERROR_MALFORMED; 4683 } 4684 4685 MediaBuffer *clone = mBuffer->clone(); 4686 CHECK(clone != NULL); 4687 clone->set_range(mBuffer->range_offset() + mNALLengthSize, nal_size); 4688 4689 CHECK(mBuffer != NULL); 4690 mBuffer->set_range( 4691 mBuffer->range_offset() + mNALLengthSize + nal_size, 4692 mBuffer->range_length() - mNALLengthSize - nal_size); 4693 4694 if (mBuffer->range_length() == 0) { 4695 mBuffer->release(); 4696 mBuffer = NULL; 4697 } 4698 4699 *out = clone; 4700 4701 return OK; 4702 } else { 4703 // Whole NAL units are returned but each fragment is prefixed by 4704 // the start code (0x00 00 00 01). 4705 ssize_t num_bytes_read = 0; 4706 int32_t drm = 0; 4707 bool usesDRM = (mFormat->findInt32(kKeyIsDRM, &drm) && drm != 0); 4708 if (usesDRM) { 4709 num_bytes_read = 4710 mDataSource->readAt(offset, (uint8_t*)mBuffer->data(), size); 4711 } else { 4712 num_bytes_read = mDataSource->readAt(offset, mSrcBuffer, size); 4713 } 4714 4715 if (num_bytes_read < (ssize_t)size) { 4716 mBuffer->release(); 4717 mBuffer = NULL; 4718 4719 return ERROR_IO; 4720 } 4721 4722 if (usesDRM) { 4723 CHECK(mBuffer != NULL); 4724 mBuffer->set_range(0, size); 4725 4726 } else { 4727 uint8_t *dstData = (uint8_t *)mBuffer->data(); 4728 size_t srcOffset = 0; 4729 size_t dstOffset = 0; 4730 4731 while (srcOffset < size) { 4732 bool isMalFormed = !isInRange((size_t)0u, size, srcOffset, mNALLengthSize); 4733 size_t nalLength = 0; 4734 if (!isMalFormed) { 4735 nalLength = parseNALSize(&mSrcBuffer[srcOffset]); 4736 srcOffset += mNALLengthSize; 4737 isMalFormed = !isInRange((size_t)0u, size, srcOffset, nalLength); 4738 } 4739 4740 if (isMalFormed) { 4741 ALOGE("Video is malformed"); 4742 mBuffer->release(); 4743 mBuffer = NULL; 4744 return ERROR_MALFORMED; 4745 } 4746 4747 if (nalLength == 0) { 4748 continue; 4749 } 4750 4751 if (dstOffset > SIZE_MAX - 4 || 4752 dstOffset + 4 > SIZE_MAX - nalLength || 4753 dstOffset + 4 + nalLength > mBuffer->size()) { 4754 ALOGE("b/27208621 : %zu %zu", dstOffset, mBuffer->size()); 4755 android_errorWriteLog(0x534e4554, "27208621"); 4756 mBuffer->release(); 4757 mBuffer = NULL; 4758 return ERROR_MALFORMED; 4759 } 4760 4761 dstData[dstOffset++] = 0; 4762 dstData[dstOffset++] = 0; 4763 dstData[dstOffset++] = 0; 4764 dstData[dstOffset++] = 1; 4765 memcpy(&dstData[dstOffset], &mSrcBuffer[srcOffset], nalLength); 4766 srcOffset += nalLength; 4767 dstOffset += nalLength; 4768 } 4769 CHECK_EQ(srcOffset, size); 4770 CHECK(mBuffer != NULL); 4771 mBuffer->set_range(0, dstOffset); 4772 } 4773 4774 mBuffer->meta_data()->clear(); 4775 mBuffer->meta_data()->setInt64( 4776 kKeyTime, ((int64_t)cts * 1000000) / mTimescale); 4777 mBuffer->meta_data()->setInt64( 4778 kKeyDuration, ((int64_t)stts * 1000000) / mTimescale); 4779 4780 if (targetSampleTimeUs >= 0) { 4781 mBuffer->meta_data()->setInt64( 4782 kKeyTargetTime, targetSampleTimeUs); 4783 } 4784 4785 if (mIsAVC) { 4786 uint32_t layerId = FindAVCLayerId( 4787 (const uint8_t *)mBuffer->data(), mBuffer->range_length()); 4788 mBuffer->meta_data()->setInt32(kKeyTemporalLayerId, layerId); 4789 } 4790 4791 if (isSyncSample) { 4792 mBuffer->meta_data()->setInt32(kKeyIsSyncFrame, 1); 4793 } 4794 4795 ++mCurrentSampleIndex; 4796 4797 *out = mBuffer; 4798 mBuffer = NULL; 4799 4800 return OK; 4801 } 4802} 4803 4804status_t MPEG4Source::fragmentedRead( 4805 MediaBuffer **out, const ReadOptions *options) { 4806 4807 ALOGV("MPEG4Source::fragmentedRead"); 4808 4809 CHECK(mStarted); 4810 4811 *out = NULL; 4812 4813 int64_t targetSampleTimeUs = -1; 4814 4815 int64_t seekTimeUs; 4816 ReadOptions::SeekMode mode; 4817 if (options && options->getSeekTo(&seekTimeUs, &mode)) { 4818 4819 int numSidxEntries = mSegments.size(); 4820 if (numSidxEntries != 0) { 4821 int64_t totalTime = 0; 4822 off64_t totalOffset = mFirstMoofOffset; 4823 for (int i = 0; i < numSidxEntries; i++) { 4824 const SidxEntry *se = &mSegments[i]; 4825 if (totalTime + se->mDurationUs > seekTimeUs) { 4826 // The requested time is somewhere in this segment 4827 if ((mode == ReadOptions::SEEK_NEXT_SYNC && seekTimeUs > totalTime) || 4828 (mode == ReadOptions::SEEK_CLOSEST_SYNC && 4829 (seekTimeUs - totalTime) > (totalTime + se->mDurationUs - seekTimeUs))) { 4830 // requested next sync, or closest sync and it was closer to the end of 4831 // this segment 4832 totalTime += se->mDurationUs; 4833 totalOffset += se->mSize; 4834 } 4835 break; 4836 } 4837 totalTime += se->mDurationUs; 4838 totalOffset += se->mSize; 4839 } 4840 mCurrentMoofOffset = totalOffset; 4841 mNextMoofOffset = -1; 4842 mCurrentSamples.clear(); 4843 mCurrentSampleIndex = 0; 4844 status_t err = parseChunk(&totalOffset); 4845 if (err != OK) { 4846 return err; 4847 } 4848 mCurrentTime = totalTime * mTimescale / 1000000ll; 4849 } else { 4850 // without sidx boxes, we can only seek to 0 4851 mCurrentMoofOffset = mFirstMoofOffset; 4852 mNextMoofOffset = -1; 4853 mCurrentSamples.clear(); 4854 mCurrentSampleIndex = 0; 4855 off64_t tmp = mCurrentMoofOffset; 4856 status_t err = parseChunk(&tmp); 4857 if (err != OK) { 4858 return err; 4859 } 4860 mCurrentTime = 0; 4861 } 4862 4863 if (mBuffer != NULL) { 4864 mBuffer->release(); 4865 mBuffer = NULL; 4866 } 4867 4868 // fall through 4869 } 4870 4871 off64_t offset = 0; 4872 size_t size = 0; 4873 uint32_t cts = 0; 4874 bool isSyncSample = false; 4875 bool newBuffer = false; 4876 if (mBuffer == NULL) { 4877 newBuffer = true; 4878 4879 if (mCurrentSampleIndex >= mCurrentSamples.size()) { 4880 // move to next fragment if there is one 4881 if (mNextMoofOffset <= mCurrentMoofOffset) { 4882 return ERROR_END_OF_STREAM; 4883 } 4884 off64_t nextMoof = mNextMoofOffset; 4885 mCurrentMoofOffset = nextMoof; 4886 mCurrentSamples.clear(); 4887 mCurrentSampleIndex = 0; 4888 status_t err = parseChunk(&nextMoof); 4889 if (err != OK) { 4890 return err; 4891 } 4892 if (mCurrentSampleIndex >= mCurrentSamples.size()) { 4893 return ERROR_END_OF_STREAM; 4894 } 4895 } 4896 4897 const Sample *smpl = &mCurrentSamples[mCurrentSampleIndex]; 4898 offset = smpl->offset; 4899 size = smpl->size; 4900 cts = mCurrentTime + smpl->compositionOffset; 4901 mCurrentTime += smpl->duration; 4902 isSyncSample = (mCurrentSampleIndex == 0); // XXX 4903 4904 status_t err = mGroup->acquire_buffer(&mBuffer); 4905 4906 if (err != OK) { 4907 CHECK(mBuffer == NULL); 4908 ALOGV("acquire_buffer returned %d", err); 4909 return err; 4910 } 4911 if (size > mBuffer->size()) { 4912 ALOGE("buffer too small: %zu > %zu", size, mBuffer->size()); 4913 return ERROR_BUFFER_TOO_SMALL; 4914 } 4915 } 4916 4917 const Sample *smpl = &mCurrentSamples[mCurrentSampleIndex]; 4918 const sp<MetaData> bufmeta = mBuffer->meta_data(); 4919 bufmeta->clear(); 4920 if (smpl->encryptedsizes.size()) { 4921 // store clear/encrypted lengths in metadata 4922 bufmeta->setData(kKeyPlainSizes, 0, 4923 smpl->clearsizes.array(), smpl->clearsizes.size() * 4); 4924 bufmeta->setData(kKeyEncryptedSizes, 0, 4925 smpl->encryptedsizes.array(), smpl->encryptedsizes.size() * 4); 4926 bufmeta->setData(kKeyCryptoIV, 0, smpl->iv, 16); // use 16 or the actual size? 4927 bufmeta->setInt32(kKeyCryptoDefaultIVSize, mDefaultIVSize); 4928 bufmeta->setInt32(kKeyCryptoMode, mCryptoMode); 4929 bufmeta->setData(kKeyCryptoKey, 0, mCryptoKey, 16); 4930 } 4931 4932 if ((!mIsAVC && !mIsHEVC)|| mWantsNALFragments) { 4933 if (newBuffer) { 4934 if (!isInRange((size_t)0u, mBuffer->size(), size)) { 4935 mBuffer->release(); 4936 mBuffer = NULL; 4937 4938 ALOGE("fragmentedRead ERROR_MALFORMED size %zu", size); 4939 return ERROR_MALFORMED; 4940 } 4941 4942 ssize_t num_bytes_read = 4943 mDataSource->readAt(offset, (uint8_t *)mBuffer->data(), size); 4944 4945 if (num_bytes_read < (ssize_t)size) { 4946 mBuffer->release(); 4947 mBuffer = NULL; 4948 4949 ALOGE("i/o error"); 4950 return ERROR_IO; 4951 } 4952 4953 CHECK(mBuffer != NULL); 4954 mBuffer->set_range(0, size); 4955 mBuffer->meta_data()->setInt64( 4956 kKeyTime, ((int64_t)cts * 1000000) / mTimescale); 4957 mBuffer->meta_data()->setInt64( 4958 kKeyDuration, ((int64_t)smpl->duration * 1000000) / mTimescale); 4959 4960 if (targetSampleTimeUs >= 0) { 4961 mBuffer->meta_data()->setInt64( 4962 kKeyTargetTime, targetSampleTimeUs); 4963 } 4964 4965 if (mIsAVC) { 4966 uint32_t layerId = FindAVCLayerId( 4967 (const uint8_t *)mBuffer->data(), mBuffer->range_length()); 4968 mBuffer->meta_data()->setInt32(kKeyTemporalLayerId, layerId); 4969 } 4970 4971 if (isSyncSample) { 4972 mBuffer->meta_data()->setInt32(kKeyIsSyncFrame, 1); 4973 } 4974 4975 ++mCurrentSampleIndex; 4976 } 4977 4978 if (!mIsAVC && !mIsHEVC) { 4979 *out = mBuffer; 4980 mBuffer = NULL; 4981 4982 return OK; 4983 } 4984 4985 // Each NAL unit is split up into its constituent fragments and 4986 // each one of them returned in its own buffer. 4987 4988 CHECK(mBuffer->range_length() >= mNALLengthSize); 4989 4990 const uint8_t *src = 4991 (const uint8_t *)mBuffer->data() + mBuffer->range_offset(); 4992 4993 size_t nal_size = parseNALSize(src); 4994 if (mNALLengthSize > SIZE_MAX - nal_size) { 4995 ALOGE("b/24441553, b/24445122"); 4996 } 4997 4998 if (mBuffer->range_length() - mNALLengthSize < nal_size) { 4999 ALOGE("incomplete NAL unit."); 5000 5001 mBuffer->release(); 5002 mBuffer = NULL; 5003 5004 return ERROR_MALFORMED; 5005 } 5006 5007 MediaBuffer *clone = mBuffer->clone(); 5008 CHECK(clone != NULL); 5009 clone->set_range(mBuffer->range_offset() + mNALLengthSize, nal_size); 5010 5011 CHECK(mBuffer != NULL); 5012 mBuffer->set_range( 5013 mBuffer->range_offset() + mNALLengthSize + nal_size, 5014 mBuffer->range_length() - mNALLengthSize - nal_size); 5015 5016 if (mBuffer->range_length() == 0) { 5017 mBuffer->release(); 5018 mBuffer = NULL; 5019 } 5020 5021 *out = clone; 5022 5023 return OK; 5024 } else { 5025 ALOGV("whole NAL"); 5026 // Whole NAL units are returned but each fragment is prefixed by 5027 // the start code (0x00 00 00 01). 5028 ssize_t num_bytes_read = 0; 5029 int32_t drm = 0; 5030 bool usesDRM = (mFormat->findInt32(kKeyIsDRM, &drm) && drm != 0); 5031 void *data = NULL; 5032 bool isMalFormed = false; 5033 if (usesDRM) { 5034 if (mBuffer == NULL || !isInRange((size_t)0u, mBuffer->size(), size)) { 5035 isMalFormed = true; 5036 } else { 5037 data = mBuffer->data(); 5038 } 5039 } else { 5040 int32_t max_size; 5041 if (mFormat == NULL 5042 || !mFormat->findInt32(kKeyMaxInputSize, &max_size) 5043 || !isInRange((size_t)0u, (size_t)max_size, size)) { 5044 isMalFormed = true; 5045 } else { 5046 data = mSrcBuffer; 5047 } 5048 } 5049 5050 if (isMalFormed || data == NULL) { 5051 ALOGE("isMalFormed size %zu", size); 5052 if (mBuffer != NULL) { 5053 mBuffer->release(); 5054 mBuffer = NULL; 5055 } 5056 return ERROR_MALFORMED; 5057 } 5058 num_bytes_read = mDataSource->readAt(offset, data, size); 5059 5060 if (num_bytes_read < (ssize_t)size) { 5061 mBuffer->release(); 5062 mBuffer = NULL; 5063 5064 ALOGE("i/o error"); 5065 return ERROR_IO; 5066 } 5067 5068 if (usesDRM) { 5069 CHECK(mBuffer != NULL); 5070 mBuffer->set_range(0, size); 5071 5072 } else { 5073 uint8_t *dstData = (uint8_t *)mBuffer->data(); 5074 size_t srcOffset = 0; 5075 size_t dstOffset = 0; 5076 5077 while (srcOffset < size) { 5078 isMalFormed = !isInRange((size_t)0u, size, srcOffset, mNALLengthSize); 5079 size_t nalLength = 0; 5080 if (!isMalFormed) { 5081 nalLength = parseNALSize(&mSrcBuffer[srcOffset]); 5082 srcOffset += mNALLengthSize; 5083 isMalFormed = !isInRange((size_t)0u, size, srcOffset, nalLength) 5084 || !isInRange((size_t)0u, mBuffer->size(), dstOffset, (size_t)4u) 5085 || !isInRange((size_t)0u, mBuffer->size(), dstOffset + 4, nalLength); 5086 } 5087 5088 if (isMalFormed) { 5089 ALOGE("Video is malformed; nalLength %zu", nalLength); 5090 mBuffer->release(); 5091 mBuffer = NULL; 5092 return ERROR_MALFORMED; 5093 } 5094 5095 if (nalLength == 0) { 5096 continue; 5097 } 5098 5099 if (dstOffset > SIZE_MAX - 4 || 5100 dstOffset + 4 > SIZE_MAX - nalLength || 5101 dstOffset + 4 + nalLength > mBuffer->size()) { 5102 ALOGE("b/26365349 : %zu %zu", dstOffset, mBuffer->size()); 5103 android_errorWriteLog(0x534e4554, "26365349"); 5104 mBuffer->release(); 5105 mBuffer = NULL; 5106 return ERROR_MALFORMED; 5107 } 5108 5109 dstData[dstOffset++] = 0; 5110 dstData[dstOffset++] = 0; 5111 dstData[dstOffset++] = 0; 5112 dstData[dstOffset++] = 1; 5113 memcpy(&dstData[dstOffset], &mSrcBuffer[srcOffset], nalLength); 5114 srcOffset += nalLength; 5115 dstOffset += nalLength; 5116 } 5117 CHECK_EQ(srcOffset, size); 5118 CHECK(mBuffer != NULL); 5119 mBuffer->set_range(0, dstOffset); 5120 } 5121 5122 mBuffer->meta_data()->setInt64( 5123 kKeyTime, ((int64_t)cts * 1000000) / mTimescale); 5124 mBuffer->meta_data()->setInt64( 5125 kKeyDuration, ((int64_t)smpl->duration * 1000000) / mTimescale); 5126 5127 if (targetSampleTimeUs >= 0) { 5128 mBuffer->meta_data()->setInt64( 5129 kKeyTargetTime, targetSampleTimeUs); 5130 } 5131 5132 if (isSyncSample) { 5133 mBuffer->meta_data()->setInt32(kKeyIsSyncFrame, 1); 5134 } 5135 5136 ++mCurrentSampleIndex; 5137 5138 *out = mBuffer; 5139 mBuffer = NULL; 5140 5141 return OK; 5142 } 5143} 5144 5145MPEG4Extractor::Track *MPEG4Extractor::findTrackByMimePrefix( 5146 const char *mimePrefix) { 5147 for (Track *track = mFirstTrack; track != NULL; track = track->next) { 5148 const char *mime; 5149 if (track->meta != NULL 5150 && track->meta->findCString(kKeyMIMEType, &mime) 5151 && !strncasecmp(mime, mimePrefix, strlen(mimePrefix))) { 5152 return track; 5153 } 5154 } 5155 5156 return NULL; 5157} 5158 5159void MPEG4Extractor::populateMetrics() { 5160 ALOGV("MPEG4Extractor::populateMetrics"); 5161} 5162 5163static bool LegacySniffMPEG4( 5164 const sp<DataSource> &source, String8 *mimeType, float *confidence) { 5165 uint8_t header[8]; 5166 5167 ssize_t n = source->readAt(4, header, sizeof(header)); 5168 if (n < (ssize_t)sizeof(header)) { 5169 return false; 5170 } 5171 5172 if (!memcmp(header, "ftyp3gp", 7) || !memcmp(header, "ftypmp42", 8) 5173 || !memcmp(header, "ftyp3gr6", 8) || !memcmp(header, "ftyp3gs6", 8) 5174 || !memcmp(header, "ftyp3ge6", 8) || !memcmp(header, "ftyp3gg6", 8) 5175 || !memcmp(header, "ftypisom", 8) || !memcmp(header, "ftypM4V ", 8) 5176 || !memcmp(header, "ftypM4A ", 8) || !memcmp(header, "ftypf4v ", 8) 5177 || !memcmp(header, "ftypkddi", 8) || !memcmp(header, "ftypM4VP", 8)) { 5178 *mimeType = MEDIA_MIMETYPE_CONTAINER_MPEG4; 5179 *confidence = 0.4; 5180 5181 return true; 5182 } 5183 5184 return false; 5185} 5186 5187static bool isCompatibleBrand(uint32_t fourcc) { 5188 static const uint32_t kCompatibleBrands[] = { 5189 FOURCC('i', 's', 'o', 'm'), 5190 FOURCC('i', 's', 'o', '2'), 5191 FOURCC('a', 'v', 'c', '1'), 5192 FOURCC('h', 'v', 'c', '1'), 5193 FOURCC('h', 'e', 'v', '1'), 5194 FOURCC('3', 'g', 'p', '4'), 5195 FOURCC('m', 'p', '4', '1'), 5196 FOURCC('m', 'p', '4', '2'), 5197 5198 // Won't promise that the following file types can be played. 5199 // Just give these file types a chance. 5200 FOURCC('q', 't', ' ', ' '), // Apple's QuickTime 5201 FOURCC('M', 'S', 'N', 'V'), // Sony's PSP 5202 5203 FOURCC('3', 'g', '2', 'a'), // 3GPP2 5204 FOURCC('3', 'g', '2', 'b'), 5205 }; 5206 5207 for (size_t i = 0; 5208 i < sizeof(kCompatibleBrands) / sizeof(kCompatibleBrands[0]); 5209 ++i) { 5210 if (kCompatibleBrands[i] == fourcc) { 5211 return true; 5212 } 5213 } 5214 5215 return false; 5216} 5217 5218// Attempt to actually parse the 'ftyp' atom and determine if a suitable 5219// compatible brand is present. 5220// Also try to identify where this file's metadata ends 5221// (end of the 'moov' atom) and report it to the caller as part of 5222// the metadata. 5223static bool BetterSniffMPEG4( 5224 const sp<DataSource> &source, String8 *mimeType, float *confidence, 5225 sp<AMessage> *meta) { 5226 // We scan up to 128 bytes to identify this file as an MP4. 5227 static const off64_t kMaxScanOffset = 128ll; 5228 5229 off64_t offset = 0ll; 5230 bool foundGoodFileType = false; 5231 off64_t moovAtomEndOffset = -1ll; 5232 bool done = false; 5233 5234 while (!done && offset < kMaxScanOffset) { 5235 uint32_t hdr[2]; 5236 if (source->readAt(offset, hdr, 8) < 8) { 5237 return false; 5238 } 5239 5240 uint64_t chunkSize = ntohl(hdr[0]); 5241 uint32_t chunkType = ntohl(hdr[1]); 5242 off64_t chunkDataOffset = offset + 8; 5243 5244 if (chunkSize == 1) { 5245 if (source->readAt(offset + 8, &chunkSize, 8) < 8) { 5246 return false; 5247 } 5248 5249 chunkSize = ntoh64(chunkSize); 5250 chunkDataOffset += 8; 5251 5252 if (chunkSize < 16) { 5253 // The smallest valid chunk is 16 bytes long in this case. 5254 return false; 5255 } 5256 5257 } else if (chunkSize < 8) { 5258 // The smallest valid chunk is 8 bytes long. 5259 return false; 5260 } 5261 5262 // (data_offset - offset) is either 8 or 16 5263 off64_t chunkDataSize = chunkSize - (chunkDataOffset - offset); 5264 if (chunkDataSize < 0) { 5265 ALOGE("b/23540914"); 5266 return ERROR_MALFORMED; 5267 } 5268 5269 char chunkstring[5]; 5270 MakeFourCCString(chunkType, chunkstring); 5271 ALOGV("saw chunk type %s, size %" PRIu64 " @ %lld", chunkstring, chunkSize, (long long)offset); 5272 switch (chunkType) { 5273 case FOURCC('f', 't', 'y', 'p'): 5274 { 5275 if (chunkDataSize < 8) { 5276 return false; 5277 } 5278 5279 uint32_t numCompatibleBrands = (chunkDataSize - 8) / 4; 5280 for (size_t i = 0; i < numCompatibleBrands + 2; ++i) { 5281 if (i == 1) { 5282 // Skip this index, it refers to the minorVersion, 5283 // not a brand. 5284 continue; 5285 } 5286 5287 uint32_t brand; 5288 if (source->readAt( 5289 chunkDataOffset + 4 * i, &brand, 4) < 4) { 5290 return false; 5291 } 5292 5293 brand = ntohl(brand); 5294 5295 if (isCompatibleBrand(brand)) { 5296 foundGoodFileType = true; 5297 break; 5298 } 5299 } 5300 5301 if (!foundGoodFileType) { 5302 return false; 5303 } 5304 5305 break; 5306 } 5307 5308 case FOURCC('m', 'o', 'o', 'v'): 5309 { 5310 moovAtomEndOffset = offset + chunkSize; 5311 5312 done = true; 5313 break; 5314 } 5315 5316 default: 5317 break; 5318 } 5319 5320 offset += chunkSize; 5321 } 5322 5323 if (!foundGoodFileType) { 5324 return false; 5325 } 5326 5327 *mimeType = MEDIA_MIMETYPE_CONTAINER_MPEG4; 5328 *confidence = 0.4f; 5329 5330 if (moovAtomEndOffset >= 0) { 5331 *meta = new AMessage; 5332 (*meta)->setInt64("meta-data-size", moovAtomEndOffset); 5333 5334 ALOGV("found metadata size: %lld", (long long)moovAtomEndOffset); 5335 } 5336 5337 return true; 5338} 5339 5340bool SniffMPEG4( 5341 const sp<DataSource> &source, String8 *mimeType, float *confidence, 5342 sp<AMessage> *meta) { 5343 if (BetterSniffMPEG4(source, mimeType, confidence, meta)) { 5344 return true; 5345 } 5346 5347 if (LegacySniffMPEG4(source, mimeType, confidence)) { 5348 ALOGW("Identified supported mpeg4 through LegacySniffMPEG4."); 5349 return true; 5350 } 5351 5352 return false; 5353} 5354 5355} // namespace android 5356