MPEG4Extractor.cpp revision 08a5a3a473354c965c32190f8b68549d19e08912
1/* 2 * Copyright (C) 2009 The Android Open Source Project 3 * 4 * Licensed under the Apache License, Version 2.0 (the "License"); 5 * you may not use this file except in compliance with the License. 6 * You may obtain a copy of the License at 7 * 8 * http://www.apache.org/licenses/LICENSE-2.0 9 * 10 * Unless required by applicable law or agreed to in writing, software 11 * distributed under the License is distributed on an "AS IS" BASIS, 12 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 * See the License for the specific language governing permissions and 14 * limitations under the License. 15 */ 16 17//#define LOG_NDEBUG 0 18#define LOG_TAG "MPEG4Extractor" 19 20#include <ctype.h> 21#include <inttypes.h> 22#include <memory> 23#include <stdint.h> 24#include <stdlib.h> 25#include <string.h> 26 27#include <utils/Log.h> 28 29#include "MPEG4Extractor.h" 30#include "SampleTable.h" 31#include "ItemTable.h" 32#include "include/ESDS.h" 33 34#include <media/MediaSource.h> 35#include <media/stagefright/foundation/ABitReader.h> 36#include <media/stagefright/foundation/ABuffer.h> 37#include <media/stagefright/foundation/ADebug.h> 38#include <media/stagefright/foundation/AMessage.h> 39#include <media/stagefright/foundation/AUtils.h> 40#include <media/stagefright/foundation/ByteUtils.h> 41#include <media/stagefright/foundation/ColorUtils.h> 42#include <media/stagefright/foundation/avc_utils.h> 43#include <media/stagefright/foundation/hexdump.h> 44#include <media/stagefright/MediaBuffer.h> 45#include <media/stagefright/MediaBufferGroup.h> 46#include <media/stagefright/MediaDefs.h> 47#include <media/stagefright/MetaData.h> 48#include <utils/String8.h> 49 50#include <byteswap.h> 51#include "include/ID3.h" 52 53#ifndef UINT32_MAX 54#define UINT32_MAX (4294967295U) 55#endif 56 57namespace android { 58 59enum { 60 // max track header chunk to return 61 kMaxTrackHeaderSize = 32, 62 63 // maximum size of an atom. Some atoms can be bigger according to the spec, 64 // but we only allow up to this size. 65 kMaxAtomSize = 64 * 1024 * 1024, 66}; 67 68class MPEG4Source : public MediaSource { 69public: 70 // Caller retains ownership of both "dataSource" and "sampleTable". 71 MPEG4Source(const sp<MPEG4Extractor> &owner, 72 const sp<MetaData> &format, 73 const sp<DataSource> &dataSource, 74 int32_t timeScale, 75 const sp<SampleTable> &sampleTable, 76 Vector<SidxEntry> &sidx, 77 const Trex *trex, 78 off64_t firstMoofOffset, 79 const sp<ItemTable> &itemTable); 80 virtual status_t init(); 81 82 virtual status_t start(MetaData *params = NULL); 83 virtual status_t stop(); 84 85 virtual sp<MetaData> getFormat(); 86 87 virtual status_t read(MediaBuffer **buffer, const ReadOptions *options = NULL); 88 virtual bool supportNonblockingRead() { return true; } 89 virtual status_t fragmentedRead(MediaBuffer **buffer, const ReadOptions *options = NULL); 90 91protected: 92 virtual ~MPEG4Source(); 93 94private: 95 Mutex mLock; 96 97 // keep the MPEG4Extractor around, since we're referencing its data 98 sp<MPEG4Extractor> mOwner; 99 sp<MetaData> mFormat; 100 sp<DataSource> mDataSource; 101 int32_t mTimescale; 102 sp<SampleTable> mSampleTable; 103 uint32_t mCurrentSampleIndex; 104 uint32_t mCurrentFragmentIndex; 105 Vector<SidxEntry> &mSegments; 106 const Trex *mTrex; 107 off64_t mFirstMoofOffset; 108 off64_t mCurrentMoofOffset; 109 off64_t mNextMoofOffset; 110 uint32_t mCurrentTime; 111 int32_t mLastParsedTrackId; 112 int32_t mTrackId; 113 114 int32_t mCryptoMode; // passed in from extractor 115 int32_t mDefaultIVSize; // passed in from extractor 116 uint8_t mCryptoKey[16]; // passed in from extractor 117 uint32_t mCurrentAuxInfoType; 118 uint32_t mCurrentAuxInfoTypeParameter; 119 int32_t mCurrentDefaultSampleInfoSize; 120 uint32_t mCurrentSampleInfoCount; 121 uint32_t mCurrentSampleInfoAllocSize; 122 uint8_t* mCurrentSampleInfoSizes; 123 uint32_t mCurrentSampleInfoOffsetCount; 124 uint32_t mCurrentSampleInfoOffsetsAllocSize; 125 uint64_t* mCurrentSampleInfoOffsets; 126 127 bool mIsAVC; 128 bool mIsHEVC; 129 size_t mNALLengthSize; 130 131 bool mStarted; 132 133 MediaBufferGroup *mGroup; 134 135 MediaBuffer *mBuffer; 136 137 bool mWantsNALFragments; 138 139 uint8_t *mSrcBuffer; 140 141 bool mIsHEIF; 142 sp<ItemTable> mItemTable; 143 144 size_t parseNALSize(const uint8_t *data) const; 145 status_t parseChunk(off64_t *offset); 146 status_t parseTrackFragmentHeader(off64_t offset, off64_t size); 147 status_t parseTrackFragmentRun(off64_t offset, off64_t size); 148 status_t parseSampleAuxiliaryInformationSizes(off64_t offset, off64_t size); 149 status_t parseSampleAuxiliaryInformationOffsets(off64_t offset, off64_t size); 150 151 struct TrackFragmentHeaderInfo { 152 enum Flags { 153 kBaseDataOffsetPresent = 0x01, 154 kSampleDescriptionIndexPresent = 0x02, 155 kDefaultSampleDurationPresent = 0x08, 156 kDefaultSampleSizePresent = 0x10, 157 kDefaultSampleFlagsPresent = 0x20, 158 kDurationIsEmpty = 0x10000, 159 }; 160 161 uint32_t mTrackID; 162 uint32_t mFlags; 163 uint64_t mBaseDataOffset; 164 uint32_t mSampleDescriptionIndex; 165 uint32_t mDefaultSampleDuration; 166 uint32_t mDefaultSampleSize; 167 uint32_t mDefaultSampleFlags; 168 169 uint64_t mDataOffset; 170 }; 171 TrackFragmentHeaderInfo mTrackFragmentHeaderInfo; 172 173 struct Sample { 174 off64_t offset; 175 size_t size; 176 uint32_t duration; 177 int32_t compositionOffset; 178 uint8_t iv[16]; 179 Vector<size_t> clearsizes; 180 Vector<size_t> encryptedsizes; 181 }; 182 Vector<Sample> mCurrentSamples; 183 184 MPEG4Source(const MPEG4Source &); 185 MPEG4Source &operator=(const MPEG4Source &); 186}; 187 188// This custom data source wraps an existing one and satisfies requests 189// falling entirely within a cached range from the cache while forwarding 190// all remaining requests to the wrapped datasource. 191// This is used to cache the full sampletable metadata for a single track, 192// possibly wrapping multiple times to cover all tracks, i.e. 193// Each MPEG4DataSource caches the sampletable metadata for a single track. 194 195struct MPEG4DataSource : public DataSource { 196 explicit MPEG4DataSource(const sp<DataSource> &source); 197 198 virtual status_t initCheck() const; 199 virtual ssize_t readAt(off64_t offset, void *data, size_t size); 200 virtual status_t getSize(off64_t *size); 201 virtual uint32_t flags(); 202 203 status_t setCachedRange(off64_t offset, size_t size); 204 205protected: 206 virtual ~MPEG4DataSource(); 207 208private: 209 Mutex mLock; 210 211 sp<DataSource> mSource; 212 off64_t mCachedOffset; 213 size_t mCachedSize; 214 uint8_t *mCache; 215 216 void clearCache(); 217 218 MPEG4DataSource(const MPEG4DataSource &); 219 MPEG4DataSource &operator=(const MPEG4DataSource &); 220}; 221 222MPEG4DataSource::MPEG4DataSource(const sp<DataSource> &source) 223 : mSource(source), 224 mCachedOffset(0), 225 mCachedSize(0), 226 mCache(NULL) { 227} 228 229MPEG4DataSource::~MPEG4DataSource() { 230 clearCache(); 231} 232 233void MPEG4DataSource::clearCache() { 234 if (mCache) { 235 free(mCache); 236 mCache = NULL; 237 } 238 239 mCachedOffset = 0; 240 mCachedSize = 0; 241} 242 243status_t MPEG4DataSource::initCheck() const { 244 return mSource->initCheck(); 245} 246 247ssize_t MPEG4DataSource::readAt(off64_t offset, void *data, size_t size) { 248 Mutex::Autolock autoLock(mLock); 249 250 if (isInRange(mCachedOffset, mCachedSize, offset, size)) { 251 memcpy(data, &mCache[offset - mCachedOffset], size); 252 return size; 253 } 254 255 return mSource->readAt(offset, data, size); 256} 257 258status_t MPEG4DataSource::getSize(off64_t *size) { 259 return mSource->getSize(size); 260} 261 262uint32_t MPEG4DataSource::flags() { 263 return mSource->flags(); 264} 265 266status_t MPEG4DataSource::setCachedRange(off64_t offset, size_t size) { 267 Mutex::Autolock autoLock(mLock); 268 269 clearCache(); 270 271 mCache = (uint8_t *)malloc(size); 272 273 if (mCache == NULL) { 274 return -ENOMEM; 275 } 276 277 mCachedOffset = offset; 278 mCachedSize = size; 279 280 ssize_t err = mSource->readAt(mCachedOffset, mCache, mCachedSize); 281 282 if (err < (ssize_t)size) { 283 clearCache(); 284 285 return ERROR_IO; 286 } 287 288 return OK; 289} 290 291//////////////////////////////////////////////////////////////////////////////// 292 293static const bool kUseHexDump = false; 294 295static const char *FourCC2MIME(uint32_t fourcc) { 296 switch (fourcc) { 297 case FOURCC('m', 'p', '4', 'a'): 298 return MEDIA_MIMETYPE_AUDIO_AAC; 299 300 case FOURCC('s', 'a', 'm', 'r'): 301 return MEDIA_MIMETYPE_AUDIO_AMR_NB; 302 303 case FOURCC('s', 'a', 'w', 'b'): 304 return MEDIA_MIMETYPE_AUDIO_AMR_WB; 305 306 case FOURCC('m', 'p', '4', 'v'): 307 return MEDIA_MIMETYPE_VIDEO_MPEG4; 308 309 case FOURCC('s', '2', '6', '3'): 310 case FOURCC('h', '2', '6', '3'): 311 case FOURCC('H', '2', '6', '3'): 312 return MEDIA_MIMETYPE_VIDEO_H263; 313 314 case FOURCC('a', 'v', 'c', '1'): 315 return MEDIA_MIMETYPE_VIDEO_AVC; 316 317 case FOURCC('h', 'v', 'c', '1'): 318 case FOURCC('h', 'e', 'v', '1'): 319 return MEDIA_MIMETYPE_VIDEO_HEVC; 320 default: 321 CHECK(!"should not be here."); 322 return NULL; 323 } 324} 325 326static bool AdjustChannelsAndRate(uint32_t fourcc, uint32_t *channels, uint32_t *rate) { 327 if (!strcasecmp(MEDIA_MIMETYPE_AUDIO_AMR_NB, FourCC2MIME(fourcc))) { 328 // AMR NB audio is always mono, 8kHz 329 *channels = 1; 330 *rate = 8000; 331 return true; 332 } else if (!strcasecmp(MEDIA_MIMETYPE_AUDIO_AMR_WB, FourCC2MIME(fourcc))) { 333 // AMR WB audio is always mono, 16kHz 334 *channels = 1; 335 *rate = 16000; 336 return true; 337 } 338 return false; 339} 340 341MPEG4Extractor::MPEG4Extractor(const sp<DataSource> &source) 342 : mMoofOffset(0), 343 mMoofFound(false), 344 mMdatFound(false), 345 mDataSource(source), 346 mInitCheck(NO_INIT), 347 mHeaderTimescale(0), 348 mIsQT(false), 349 mIsHEIF(false), 350 mFirstTrack(NULL), 351 mLastTrack(NULL), 352 mFileMetaData(new MetaData), 353 mFirstSINF(NULL), 354 mIsDrm(false) { 355} 356 357MPEG4Extractor::~MPEG4Extractor() { 358 release(); 359} 360 361void MPEG4Extractor::release() { 362 Track *track = mFirstTrack; 363 while (track) { 364 Track *next = track->next; 365 366 delete track; 367 track = next; 368 } 369 mFirstTrack = mLastTrack = NULL; 370 371 SINF *sinf = mFirstSINF; 372 while (sinf) { 373 SINF *next = sinf->next; 374 delete[] sinf->IPMPData; 375 delete sinf; 376 sinf = next; 377 } 378 mFirstSINF = NULL; 379 380 for (size_t i = 0; i < mPssh.size(); i++) { 381 delete [] mPssh[i].data; 382 } 383 mPssh.clear(); 384 385 if (mDataSource != NULL) { 386 mDataSource->close(); 387 mDataSource.clear(); 388 } 389} 390 391uint32_t MPEG4Extractor::flags() const { 392 return CAN_PAUSE | 393 ((mMoofOffset == 0 || mSidxEntries.size() != 0) ? 394 (CAN_SEEK_BACKWARD | CAN_SEEK_FORWARD | CAN_SEEK) : 0); 395} 396 397sp<MetaData> MPEG4Extractor::getMetaData() { 398 status_t err; 399 if ((err = readMetaData()) != OK) { 400 return new MetaData; 401 } 402 403 return mFileMetaData; 404} 405 406size_t MPEG4Extractor::countTracks() { 407 status_t err; 408 if ((err = readMetaData()) != OK) { 409 ALOGV("MPEG4Extractor::countTracks: no tracks"); 410 return 0; 411 } 412 413 size_t n = 0; 414 Track *track = mFirstTrack; 415 while (track) { 416 ++n; 417 track = track->next; 418 } 419 420 ALOGV("MPEG4Extractor::countTracks: %zu tracks", n); 421 return n; 422} 423 424sp<MetaData> MPEG4Extractor::getTrackMetaData( 425 size_t index, uint32_t flags) { 426 status_t err; 427 if ((err = readMetaData()) != OK) { 428 return NULL; 429 } 430 431 Track *track = mFirstTrack; 432 while (index > 0) { 433 if (track == NULL) { 434 return NULL; 435 } 436 437 track = track->next; 438 --index; 439 } 440 441 if (track == NULL) { 442 return NULL; 443 } 444 445 int64_t duration; 446 int32_t samplerate; 447 if (track->has_elst && mHeaderTimescale != 0 && 448 track->meta->findInt64(kKeyDuration, &duration) && 449 track->meta->findInt32(kKeySampleRate, &samplerate)) { 450 451 track->has_elst = false; 452 453 if (track->elst_segment_duration > INT64_MAX) { 454 goto editlistoverflow; 455 } 456 int64_t segment_duration = track->elst_segment_duration; 457 int64_t media_time = track->elst_media_time; 458 int64_t halfscale = mHeaderTimescale / 2; 459 460 int64_t delay; 461 // delay = ((media_time * samplerate) + halfscale) / mHeaderTimescale; 462 if (__builtin_mul_overflow(media_time, samplerate, &delay) || 463 __builtin_add_overflow(delay, halfscale, &delay) || 464 (delay /= mHeaderTimescale, false) || 465 delay > INT32_MAX || 466 delay < INT32_MIN) { 467 goto editlistoverflow; 468 } 469 track->meta->setInt32(kKeyEncoderDelay, delay); 470 471 int64_t scaled_duration; 472 // scaled_duration = ((duration * mHeaderTimescale) + 500000) / 1000000; 473 if (__builtin_mul_overflow(duration, mHeaderTimescale, &scaled_duration) || 474 __builtin_add_overflow(scaled_duration, 500000, &scaled_duration)) { 475 goto editlistoverflow; 476 } 477 scaled_duration /= 1000000; 478 479 int64_t segment_end; 480 int64_t padding; 481 if (__builtin_add_overflow(segment_duration, media_time, &segment_end) || 482 __builtin_sub_overflow(scaled_duration, segment_end, &padding)) { 483 goto editlistoverflow; 484 } 485 486 if (padding < 0) { 487 // track duration from media header (which is what kKeyDuration is) might 488 // be slightly shorter than the segment duration, which would make the 489 // padding negative. Clamp to zero. 490 padding = 0; 491 } 492 493 int64_t paddingsamples; 494 // paddingsamples = ((padding * samplerate) + halfscale) / mHeaderTimescale; 495 if (__builtin_mul_overflow(padding, samplerate, &paddingsamples) || 496 __builtin_add_overflow(paddingsamples, halfscale, &paddingsamples) || 497 (paddingsamples /= mHeaderTimescale, false) || 498 paddingsamples > INT32_MAX) { 499 goto editlistoverflow; 500 } 501 track->meta->setInt32(kKeyEncoderPadding, paddingsamples); 502 } 503 editlistoverflow: 504 505 if ((flags & kIncludeExtensiveMetaData) 506 && !track->includes_expensive_metadata) { 507 track->includes_expensive_metadata = true; 508 509 const char *mime; 510 CHECK(track->meta->findCString(kKeyMIMEType, &mime)); 511 if (!strncasecmp("video/", mime, 6)) { 512 // MPEG2 tracks do not provide CSD, so read the stream header 513 if (!strcmp(mime, MEDIA_MIMETYPE_VIDEO_MPEG2)) { 514 off64_t offset; 515 size_t size; 516 if (track->sampleTable->getMetaDataForSample( 517 0 /* sampleIndex */, &offset, &size, NULL /* sampleTime */) == OK) { 518 if (size > kMaxTrackHeaderSize) { 519 size = kMaxTrackHeaderSize; 520 } 521 uint8_t header[kMaxTrackHeaderSize]; 522 if (mDataSource->readAt(offset, &header, size) == (ssize_t)size) { 523 track->meta->setData(kKeyStreamHeader, 'mdat', header, size); 524 } 525 } 526 } 527 528 if (mMoofOffset > 0) { 529 int64_t duration; 530 if (track->meta->findInt64(kKeyDuration, &duration)) { 531 // nothing fancy, just pick a frame near 1/4th of the duration 532 track->meta->setInt64( 533 kKeyThumbnailTime, duration / 4); 534 } 535 } else { 536 uint32_t sampleIndex; 537 uint32_t sampleTime; 538 if (track->timescale != 0 && 539 track->sampleTable->findThumbnailSample(&sampleIndex) == OK 540 && track->sampleTable->getMetaDataForSample( 541 sampleIndex, NULL /* offset */, NULL /* size */, 542 &sampleTime) == OK) { 543 track->meta->setInt64( 544 kKeyThumbnailTime, 545 ((int64_t)sampleTime * 1000000) / track->timescale); 546 } 547 } 548 } 549 } 550 551 return track->meta; 552} 553 554status_t MPEG4Extractor::readMetaData() { 555 if (mInitCheck != NO_INIT) { 556 return mInitCheck; 557 } 558 559 off64_t offset = 0; 560 status_t err; 561 bool sawMoovOrSidx = false; 562 563 while (!((sawMoovOrSidx && (mMdatFound || mMoofFound)) || 564 (mIsHEIF && (mItemTable != NULL) && mItemTable->isValid()))) { 565 off64_t orig_offset = offset; 566 err = parseChunk(&offset, 0); 567 568 if (err != OK && err != UNKNOWN_ERROR) { 569 break; 570 } else if (offset <= orig_offset) { 571 // only continue parsing if the offset was advanced, 572 // otherwise we might end up in an infinite loop 573 ALOGE("did not advance: %lld->%lld", (long long)orig_offset, (long long)offset); 574 err = ERROR_MALFORMED; 575 break; 576 } else if (err == UNKNOWN_ERROR) { 577 sawMoovOrSidx = true; 578 } 579 } 580 581 if (mInitCheck == OK) { 582 if (findTrackByMimePrefix("video/") != NULL) { 583 mFileMetaData->setCString( 584 kKeyMIMEType, MEDIA_MIMETYPE_CONTAINER_MPEG4); 585 } else if (findTrackByMimePrefix("audio/") != NULL) { 586 mFileMetaData->setCString(kKeyMIMEType, "audio/mp4"); 587 } else { 588 mFileMetaData->setCString(kKeyMIMEType, "application/octet-stream"); 589 } 590 } else { 591 mInitCheck = err; 592 } 593 594 CHECK_NE(err, (status_t)NO_INIT); 595 596 // copy pssh data into file metadata 597 uint64_t psshsize = 0; 598 for (size_t i = 0; i < mPssh.size(); i++) { 599 psshsize += 20 + mPssh[i].datalen; 600 } 601 if (psshsize > 0 && psshsize <= UINT32_MAX) { 602 char *buf = (char*)malloc(psshsize); 603 if (!buf) { 604 ALOGE("b/28471206"); 605 return NO_MEMORY; 606 } 607 char *ptr = buf; 608 for (size_t i = 0; i < mPssh.size(); i++) { 609 memcpy(ptr, mPssh[i].uuid, 20); // uuid + length 610 memcpy(ptr + 20, mPssh[i].data, mPssh[i].datalen); 611 ptr += (20 + mPssh[i].datalen); 612 } 613 mFileMetaData->setData(kKeyPssh, 'pssh', buf, psshsize); 614 free(buf); 615 } 616 617 if (mIsHEIF) { 618 sp<MetaData> meta = mItemTable->getImageMeta(); 619 if (meta == NULL) { 620 return ERROR_MALFORMED; 621 } 622 623 Track *track = mLastTrack; 624 if (track != NULL) { 625 ALOGW("track is set before metadata is fully processed"); 626 } else { 627 track = new Track; 628 track->next = NULL; 629 mFirstTrack = mLastTrack = track; 630 } 631 632 track->meta = meta; 633 track->meta->setInt32(kKeyTrackID, 0); 634 track->includes_expensive_metadata = false; 635 track->skipTrack = false; 636 track->timescale = 0; 637 } 638 639 return mInitCheck; 640} 641 642char* MPEG4Extractor::getDrmTrackInfo(size_t trackID, int *len) { 643 if (mFirstSINF == NULL) { 644 return NULL; 645 } 646 647 SINF *sinf = mFirstSINF; 648 while (sinf && (trackID != sinf->trackID)) { 649 sinf = sinf->next; 650 } 651 652 if (sinf == NULL) { 653 return NULL; 654 } 655 656 *len = sinf->len; 657 return sinf->IPMPData; 658} 659 660// Reads an encoded integer 7 bits at a time until it encounters the high bit clear. 661static int32_t readSize(off64_t offset, 662 const sp<DataSource> &DataSource, uint8_t *numOfBytes) { 663 uint32_t size = 0; 664 uint8_t data; 665 bool moreData = true; 666 *numOfBytes = 0; 667 668 while (moreData) { 669 if (DataSource->readAt(offset, &data, 1) < 1) { 670 return -1; 671 } 672 offset ++; 673 moreData = (data >= 128) ? true : false; 674 size = (size << 7) | (data & 0x7f); // Take last 7 bits 675 (*numOfBytes) ++; 676 } 677 678 return size; 679} 680 681status_t MPEG4Extractor::parseDrmSINF( 682 off64_t * /* offset */, off64_t data_offset) { 683 uint8_t updateIdTag; 684 if (mDataSource->readAt(data_offset, &updateIdTag, 1) < 1) { 685 return ERROR_IO; 686 } 687 data_offset ++; 688 689 if (0x01/*OBJECT_DESCRIPTOR_UPDATE_ID_TAG*/ != updateIdTag) { 690 return ERROR_MALFORMED; 691 } 692 693 uint8_t numOfBytes; 694 int32_t size = readSize(data_offset, mDataSource, &numOfBytes); 695 if (size < 0) { 696 return ERROR_IO; 697 } 698 data_offset += numOfBytes; 699 700 while(size >= 11 ) { 701 uint8_t descriptorTag; 702 if (mDataSource->readAt(data_offset, &descriptorTag, 1) < 1) { 703 return ERROR_IO; 704 } 705 data_offset ++; 706 707 if (0x11/*OBJECT_DESCRIPTOR_ID_TAG*/ != descriptorTag) { 708 return ERROR_MALFORMED; 709 } 710 711 uint8_t buffer[8]; 712 //ObjectDescriptorID and ObjectDescriptor url flag 713 if (mDataSource->readAt(data_offset, buffer, 2) < 2) { 714 return ERROR_IO; 715 } 716 data_offset += 2; 717 718 if ((buffer[1] >> 5) & 0x0001) { //url flag is set 719 return ERROR_MALFORMED; 720 } 721 722 if (mDataSource->readAt(data_offset, buffer, 8) < 8) { 723 return ERROR_IO; 724 } 725 data_offset += 8; 726 727 if ((0x0F/*ES_ID_REF_TAG*/ != buffer[1]) 728 || ( 0x0A/*IPMP_DESCRIPTOR_POINTER_ID_TAG*/ != buffer[5])) { 729 return ERROR_MALFORMED; 730 } 731 732 SINF *sinf = new SINF; 733 sinf->trackID = U16_AT(&buffer[3]); 734 sinf->IPMPDescriptorID = buffer[7]; 735 sinf->next = mFirstSINF; 736 mFirstSINF = sinf; 737 738 size -= (8 + 2 + 1); 739 } 740 741 if (size != 0) { 742 return ERROR_MALFORMED; 743 } 744 745 if (mDataSource->readAt(data_offset, &updateIdTag, 1) < 1) { 746 return ERROR_IO; 747 } 748 data_offset ++; 749 750 if(0x05/*IPMP_DESCRIPTOR_UPDATE_ID_TAG*/ != updateIdTag) { 751 return ERROR_MALFORMED; 752 } 753 754 size = readSize(data_offset, mDataSource, &numOfBytes); 755 if (size < 0) { 756 return ERROR_IO; 757 } 758 data_offset += numOfBytes; 759 760 while (size > 0) { 761 uint8_t tag; 762 int32_t dataLen; 763 if (mDataSource->readAt(data_offset, &tag, 1) < 1) { 764 return ERROR_IO; 765 } 766 data_offset ++; 767 768 if (0x0B/*IPMP_DESCRIPTOR_ID_TAG*/ == tag) { 769 uint8_t id; 770 dataLen = readSize(data_offset, mDataSource, &numOfBytes); 771 if (dataLen < 0) { 772 return ERROR_IO; 773 } else if (dataLen < 4) { 774 return ERROR_MALFORMED; 775 } 776 data_offset += numOfBytes; 777 778 if (mDataSource->readAt(data_offset, &id, 1) < 1) { 779 return ERROR_IO; 780 } 781 data_offset ++; 782 783 SINF *sinf = mFirstSINF; 784 while (sinf && (sinf->IPMPDescriptorID != id)) { 785 sinf = sinf->next; 786 } 787 if (sinf == NULL) { 788 return ERROR_MALFORMED; 789 } 790 sinf->len = dataLen - 3; 791 sinf->IPMPData = new (std::nothrow) char[sinf->len]; 792 if (sinf->IPMPData == NULL) { 793 return ERROR_MALFORMED; 794 } 795 data_offset += 2; 796 797 if (mDataSource->readAt(data_offset, sinf->IPMPData, sinf->len) < sinf->len) { 798 return ERROR_IO; 799 } 800 data_offset += sinf->len; 801 802 size -= (dataLen + numOfBytes + 1); 803 } 804 } 805 806 if (size != 0) { 807 return ERROR_MALFORMED; 808 } 809 810 return UNKNOWN_ERROR; // Return a dummy error. 811} 812 813struct PathAdder { 814 PathAdder(Vector<uint32_t> *path, uint32_t chunkType) 815 : mPath(path) { 816 mPath->push(chunkType); 817 } 818 819 ~PathAdder() { 820 mPath->pop(); 821 } 822 823private: 824 Vector<uint32_t> *mPath; 825 826 PathAdder(const PathAdder &); 827 PathAdder &operator=(const PathAdder &); 828}; 829 830static bool underMetaDataPath(const Vector<uint32_t> &path) { 831 return path.size() >= 5 832 && path[0] == FOURCC('m', 'o', 'o', 'v') 833 && path[1] == FOURCC('u', 'd', 't', 'a') 834 && path[2] == FOURCC('m', 'e', 't', 'a') 835 && path[3] == FOURCC('i', 'l', 's', 't'); 836} 837 838static bool underQTMetaPath(const Vector<uint32_t> &path, int32_t depth) { 839 return path.size() >= 2 840 && path[0] == FOURCC('m', 'o', 'o', 'v') 841 && path[1] == FOURCC('m', 'e', 't', 'a') 842 && (depth == 2 843 || (depth == 3 844 && (path[2] == FOURCC('h', 'd', 'l', 'r') 845 || path[2] == FOURCC('i', 'l', 's', 't') 846 || path[2] == FOURCC('k', 'e', 'y', 's')))); 847} 848 849// Given a time in seconds since Jan 1 1904, produce a human-readable string. 850static bool convertTimeToDate(int64_t time_1904, String8 *s) { 851 // delta between mpeg4 time and unix epoch time 852 static const int64_t delta = (((66 * 365 + 17) * 24) * 3600); 853 if (time_1904 < INT64_MIN + delta) { 854 return false; 855 } 856 time_t time_1970 = time_1904 - delta; 857 858 char tmp[32]; 859 struct tm* tm = gmtime(&time_1970); 860 if (tm != NULL && 861 strftime(tmp, sizeof(tmp), "%Y%m%dT%H%M%S.000Z", tm) > 0) { 862 s->setTo(tmp); 863 return true; 864 } 865 return false; 866} 867 868status_t MPEG4Extractor::parseChunk(off64_t *offset, int depth) { 869 ALOGV("entering parseChunk %lld/%d", (long long)*offset, depth); 870 871 if (*offset < 0) { 872 ALOGE("b/23540914"); 873 return ERROR_MALFORMED; 874 } 875 if (depth > 100) { 876 ALOGE("b/27456299"); 877 return ERROR_MALFORMED; 878 } 879 uint32_t hdr[2]; 880 if (mDataSource->readAt(*offset, hdr, 8) < 8) { 881 return ERROR_IO; 882 } 883 uint64_t chunk_size = ntohl(hdr[0]); 884 int32_t chunk_type = ntohl(hdr[1]); 885 off64_t data_offset = *offset + 8; 886 887 if (chunk_size == 1) { 888 if (mDataSource->readAt(*offset + 8, &chunk_size, 8) < 8) { 889 return ERROR_IO; 890 } 891 chunk_size = ntoh64(chunk_size); 892 data_offset += 8; 893 894 if (chunk_size < 16) { 895 // The smallest valid chunk is 16 bytes long in this case. 896 return ERROR_MALFORMED; 897 } 898 } else if (chunk_size == 0) { 899 if (depth == 0) { 900 // atom extends to end of file 901 off64_t sourceSize; 902 if (mDataSource->getSize(&sourceSize) == OK) { 903 chunk_size = (sourceSize - *offset); 904 } else { 905 // XXX could we just pick a "sufficiently large" value here? 906 ALOGE("atom size is 0, and data source has no size"); 907 return ERROR_MALFORMED; 908 } 909 } else { 910 // not allowed for non-toplevel atoms, skip it 911 *offset += 4; 912 return OK; 913 } 914 } else if (chunk_size < 8) { 915 // The smallest valid chunk is 8 bytes long. 916 ALOGE("invalid chunk size: %" PRIu64, chunk_size); 917 return ERROR_MALFORMED; 918 } 919 920 char chunk[5]; 921 MakeFourCCString(chunk_type, chunk); 922 ALOGV("chunk: %s @ %lld, %d", chunk, (long long)*offset, depth); 923 924 if (kUseHexDump) { 925 static const char kWhitespace[] = " "; 926 const char *indent = &kWhitespace[sizeof(kWhitespace) - 1 - 2 * depth]; 927 printf("%sfound chunk '%s' of size %" PRIu64 "\n", indent, chunk, chunk_size); 928 929 char buffer[256]; 930 size_t n = chunk_size; 931 if (n > sizeof(buffer)) { 932 n = sizeof(buffer); 933 } 934 if (mDataSource->readAt(*offset, buffer, n) 935 < (ssize_t)n) { 936 return ERROR_IO; 937 } 938 939 hexdump(buffer, n); 940 } 941 942 PathAdder autoAdder(&mPath, chunk_type); 943 944 // (data_offset - *offset) is either 8 or 16 945 off64_t chunk_data_size = chunk_size - (data_offset - *offset); 946 if (chunk_data_size < 0) { 947 ALOGE("b/23540914"); 948 return ERROR_MALFORMED; 949 } 950 if (chunk_type != FOURCC('m', 'd', 'a', 't') && chunk_data_size > kMaxAtomSize) { 951 char errMsg[100]; 952 sprintf(errMsg, "%s atom has size %" PRId64, chunk, chunk_data_size); 953 ALOGE("%s (b/28615448)", errMsg); 954 android_errorWriteWithInfoLog(0x534e4554, "28615448", -1, errMsg, strlen(errMsg)); 955 return ERROR_MALFORMED; 956 } 957 958 if (chunk_type != FOURCC('c', 'p', 'r', 't') 959 && chunk_type != FOURCC('c', 'o', 'v', 'r') 960 && mPath.size() == 5 && underMetaDataPath(mPath)) { 961 off64_t stop_offset = *offset + chunk_size; 962 *offset = data_offset; 963 while (*offset < stop_offset) { 964 status_t err = parseChunk(offset, depth + 1); 965 if (err != OK) { 966 return err; 967 } 968 } 969 970 if (*offset != stop_offset) { 971 return ERROR_MALFORMED; 972 } 973 974 return OK; 975 } 976 977 switch(chunk_type) { 978 case FOURCC('m', 'o', 'o', 'v'): 979 case FOURCC('t', 'r', 'a', 'k'): 980 case FOURCC('m', 'd', 'i', 'a'): 981 case FOURCC('m', 'i', 'n', 'f'): 982 case FOURCC('d', 'i', 'n', 'f'): 983 case FOURCC('s', 't', 'b', 'l'): 984 case FOURCC('m', 'v', 'e', 'x'): 985 case FOURCC('m', 'o', 'o', 'f'): 986 case FOURCC('t', 'r', 'a', 'f'): 987 case FOURCC('m', 'f', 'r', 'a'): 988 case FOURCC('u', 'd', 't', 'a'): 989 case FOURCC('i', 'l', 's', 't'): 990 case FOURCC('s', 'i', 'n', 'f'): 991 case FOURCC('s', 'c', 'h', 'i'): 992 case FOURCC('e', 'd', 't', 's'): 993 case FOURCC('w', 'a', 'v', 'e'): 994 { 995 if (chunk_type == FOURCC('m', 'o', 'o', 'v') && depth != 0) { 996 ALOGE("moov: depth %d", depth); 997 return ERROR_MALFORMED; 998 } 999 1000 if (chunk_type == FOURCC('m', 'o', 'o', 'v') && mInitCheck == OK) { 1001 ALOGE("duplicate moov"); 1002 return ERROR_MALFORMED; 1003 } 1004 1005 if (chunk_type == FOURCC('m', 'o', 'o', 'f') && !mMoofFound) { 1006 // store the offset of the first segment 1007 mMoofFound = true; 1008 mMoofOffset = *offset; 1009 } 1010 1011 if (chunk_type == FOURCC('s', 't', 'b', 'l')) { 1012 ALOGV("sampleTable chunk is %" PRIu64 " bytes long.", chunk_size); 1013 1014 if (mDataSource->flags() 1015 & (DataSource::kWantsPrefetching 1016 | DataSource::kIsCachingDataSource)) { 1017 sp<MPEG4DataSource> cachedSource = 1018 new MPEG4DataSource(mDataSource); 1019 1020 if (cachedSource->setCachedRange(*offset, chunk_size) == OK) { 1021 mDataSource = cachedSource; 1022 } 1023 } 1024 1025 if (mLastTrack == NULL) { 1026 return ERROR_MALFORMED; 1027 } 1028 1029 mLastTrack->sampleTable = new SampleTable(mDataSource); 1030 } 1031 1032 bool isTrack = false; 1033 if (chunk_type == FOURCC('t', 'r', 'a', 'k')) { 1034 if (depth != 1) { 1035 ALOGE("trak: depth %d", depth); 1036 return ERROR_MALFORMED; 1037 } 1038 isTrack = true; 1039 1040 Track *track = new Track; 1041 track->next = NULL; 1042 if (mLastTrack) { 1043 mLastTrack->next = track; 1044 } else { 1045 mFirstTrack = track; 1046 } 1047 mLastTrack = track; 1048 1049 track->meta = new MetaData; 1050 track->includes_expensive_metadata = false; 1051 track->skipTrack = false; 1052 track->timescale = 0; 1053 track->meta->setCString(kKeyMIMEType, "application/octet-stream"); 1054 track->has_elst = false; 1055 } 1056 1057 off64_t stop_offset = *offset + chunk_size; 1058 *offset = data_offset; 1059 while (*offset < stop_offset) { 1060 status_t err = parseChunk(offset, depth + 1); 1061 if (err != OK) { 1062 if (isTrack) { 1063 mLastTrack->skipTrack = true; 1064 break; 1065 } 1066 return err; 1067 } 1068 } 1069 1070 if (*offset != stop_offset) { 1071 return ERROR_MALFORMED; 1072 } 1073 1074 if (isTrack) { 1075 int32_t trackId; 1076 // There must be exact one track header per track. 1077 if (!mLastTrack->meta->findInt32(kKeyTrackID, &trackId)) { 1078 mLastTrack->skipTrack = true; 1079 } 1080 1081 status_t err = verifyTrack(mLastTrack); 1082 if (err != OK) { 1083 mLastTrack->skipTrack = true; 1084 } 1085 1086 if (mLastTrack->skipTrack) { 1087 Track *cur = mFirstTrack; 1088 1089 if (cur == mLastTrack) { 1090 delete cur; 1091 mFirstTrack = mLastTrack = NULL; 1092 } else { 1093 while (cur && cur->next != mLastTrack) { 1094 cur = cur->next; 1095 } 1096 if (cur) { 1097 cur->next = NULL; 1098 } 1099 delete mLastTrack; 1100 mLastTrack = cur; 1101 } 1102 1103 return OK; 1104 } 1105 } else if (chunk_type == FOURCC('m', 'o', 'o', 'v')) { 1106 mInitCheck = OK; 1107 1108 if (!mIsDrm) { 1109 return UNKNOWN_ERROR; // Return a dummy error. 1110 } else { 1111 return OK; 1112 } 1113 } 1114 break; 1115 } 1116 1117 case FOURCC('e', 'l', 's', 't'): 1118 { 1119 *offset += chunk_size; 1120 1121 if (!mLastTrack) { 1122 return ERROR_MALFORMED; 1123 } 1124 1125 // See 14496-12 8.6.6 1126 uint8_t version; 1127 if (mDataSource->readAt(data_offset, &version, 1) < 1) { 1128 return ERROR_IO; 1129 } 1130 1131 uint32_t entry_count; 1132 if (!mDataSource->getUInt32(data_offset + 4, &entry_count)) { 1133 return ERROR_IO; 1134 } 1135 1136 if (entry_count != 1) { 1137 // we only support a single entry at the moment, for gapless playback 1138 ALOGW("ignoring edit list with %d entries", entry_count); 1139 } else { 1140 off64_t entriesoffset = data_offset + 8; 1141 uint64_t segment_duration; 1142 int64_t media_time; 1143 1144 if (version == 1) { 1145 if (!mDataSource->getUInt64(entriesoffset, &segment_duration) || 1146 !mDataSource->getUInt64(entriesoffset + 8, (uint64_t*)&media_time)) { 1147 return ERROR_IO; 1148 } 1149 } else if (version == 0) { 1150 uint32_t sd; 1151 int32_t mt; 1152 if (!mDataSource->getUInt32(entriesoffset, &sd) || 1153 !mDataSource->getUInt32(entriesoffset + 4, (uint32_t*)&mt)) { 1154 return ERROR_IO; 1155 } 1156 segment_duration = sd; 1157 media_time = mt; 1158 } else { 1159 return ERROR_IO; 1160 } 1161 1162 // save these for later, because the elst atom might precede 1163 // the atoms that actually gives us the duration and sample rate 1164 // needed to calculate the padding and delay values 1165 mLastTrack->has_elst = true; 1166 mLastTrack->elst_media_time = media_time; 1167 mLastTrack->elst_segment_duration = segment_duration; 1168 } 1169 break; 1170 } 1171 1172 case FOURCC('f', 'r', 'm', 'a'): 1173 { 1174 *offset += chunk_size; 1175 1176 uint32_t original_fourcc; 1177 if (mDataSource->readAt(data_offset, &original_fourcc, 4) < 4) { 1178 return ERROR_IO; 1179 } 1180 original_fourcc = ntohl(original_fourcc); 1181 ALOGV("read original format: %d", original_fourcc); 1182 1183 if (mLastTrack == NULL) { 1184 return ERROR_MALFORMED; 1185 } 1186 1187 mLastTrack->meta->setCString(kKeyMIMEType, FourCC2MIME(original_fourcc)); 1188 uint32_t num_channels = 0; 1189 uint32_t sample_rate = 0; 1190 if (AdjustChannelsAndRate(original_fourcc, &num_channels, &sample_rate)) { 1191 mLastTrack->meta->setInt32(kKeyChannelCount, num_channels); 1192 mLastTrack->meta->setInt32(kKeySampleRate, sample_rate); 1193 } 1194 break; 1195 } 1196 1197 case FOURCC('t', 'e', 'n', 'c'): 1198 { 1199 *offset += chunk_size; 1200 1201 if (chunk_size < 32) { 1202 return ERROR_MALFORMED; 1203 } 1204 1205 // tenc box contains 1 byte version, 3 byte flags, 3 byte default algorithm id, one byte 1206 // default IV size, 16 bytes default KeyID 1207 // (ISO 23001-7) 1208 char buf[4]; 1209 memset(buf, 0, 4); 1210 if (mDataSource->readAt(data_offset + 4, buf + 1, 3) < 3) { 1211 return ERROR_IO; 1212 } 1213 uint32_t defaultAlgorithmId = ntohl(*((int32_t*)buf)); 1214 if (defaultAlgorithmId > 1) { 1215 // only 0 (clear) and 1 (AES-128) are valid 1216 return ERROR_MALFORMED; 1217 } 1218 1219 memset(buf, 0, 4); 1220 if (mDataSource->readAt(data_offset + 7, buf + 3, 1) < 1) { 1221 return ERROR_IO; 1222 } 1223 uint32_t defaultIVSize = ntohl(*((int32_t*)buf)); 1224 1225 if ((defaultAlgorithmId == 0 && defaultIVSize != 0) || 1226 (defaultAlgorithmId != 0 && defaultIVSize == 0)) { 1227 // only unencrypted data must have 0 IV size 1228 return ERROR_MALFORMED; 1229 } else if (defaultIVSize != 0 && 1230 defaultIVSize != 8 && 1231 defaultIVSize != 16) { 1232 // only supported sizes are 0, 8 and 16 1233 return ERROR_MALFORMED; 1234 } 1235 1236 uint8_t defaultKeyId[16]; 1237 1238 if (mDataSource->readAt(data_offset + 8, &defaultKeyId, 16) < 16) { 1239 return ERROR_IO; 1240 } 1241 1242 if (mLastTrack == NULL) 1243 return ERROR_MALFORMED; 1244 1245 mLastTrack->meta->setInt32(kKeyCryptoMode, defaultAlgorithmId); 1246 mLastTrack->meta->setInt32(kKeyCryptoDefaultIVSize, defaultIVSize); 1247 mLastTrack->meta->setData(kKeyCryptoKey, 'tenc', defaultKeyId, 16); 1248 break; 1249 } 1250 1251 case FOURCC('t', 'k', 'h', 'd'): 1252 { 1253 *offset += chunk_size; 1254 1255 status_t err; 1256 if ((err = parseTrackHeader(data_offset, chunk_data_size)) != OK) { 1257 return err; 1258 } 1259 1260 break; 1261 } 1262 1263 case FOURCC('p', 's', 's', 'h'): 1264 { 1265 *offset += chunk_size; 1266 1267 PsshInfo pssh; 1268 1269 if (mDataSource->readAt(data_offset + 4, &pssh.uuid, 16) < 16) { 1270 return ERROR_IO; 1271 } 1272 1273 uint32_t psshdatalen = 0; 1274 if (mDataSource->readAt(data_offset + 20, &psshdatalen, 4) < 4) { 1275 return ERROR_IO; 1276 } 1277 pssh.datalen = ntohl(psshdatalen); 1278 ALOGV("pssh data size: %d", pssh.datalen); 1279 if (chunk_size < 20 || pssh.datalen > chunk_size - 20) { 1280 // pssh data length exceeds size of containing box 1281 return ERROR_MALFORMED; 1282 } 1283 1284 pssh.data = new (std::nothrow) uint8_t[pssh.datalen]; 1285 if (pssh.data == NULL) { 1286 return ERROR_MALFORMED; 1287 } 1288 ALOGV("allocated pssh @ %p", pssh.data); 1289 ssize_t requested = (ssize_t) pssh.datalen; 1290 if (mDataSource->readAt(data_offset + 24, pssh.data, requested) < requested) { 1291 delete[] pssh.data; 1292 return ERROR_IO; 1293 } 1294 mPssh.push_back(pssh); 1295 1296 break; 1297 } 1298 1299 case FOURCC('m', 'd', 'h', 'd'): 1300 { 1301 *offset += chunk_size; 1302 1303 if (chunk_data_size < 4 || mLastTrack == NULL) { 1304 return ERROR_MALFORMED; 1305 } 1306 1307 uint8_t version; 1308 if (mDataSource->readAt( 1309 data_offset, &version, sizeof(version)) 1310 < (ssize_t)sizeof(version)) { 1311 return ERROR_IO; 1312 } 1313 1314 off64_t timescale_offset; 1315 1316 if (version == 1) { 1317 timescale_offset = data_offset + 4 + 16; 1318 } else if (version == 0) { 1319 timescale_offset = data_offset + 4 + 8; 1320 } else { 1321 return ERROR_IO; 1322 } 1323 1324 uint32_t timescale; 1325 if (mDataSource->readAt( 1326 timescale_offset, ×cale, sizeof(timescale)) 1327 < (ssize_t)sizeof(timescale)) { 1328 return ERROR_IO; 1329 } 1330 1331 if (!timescale) { 1332 ALOGE("timescale should not be ZERO."); 1333 return ERROR_MALFORMED; 1334 } 1335 1336 mLastTrack->timescale = ntohl(timescale); 1337 1338 // 14496-12 says all ones means indeterminate, but some files seem to use 1339 // 0 instead. We treat both the same. 1340 int64_t duration = 0; 1341 if (version == 1) { 1342 if (mDataSource->readAt( 1343 timescale_offset + 4, &duration, sizeof(duration)) 1344 < (ssize_t)sizeof(duration)) { 1345 return ERROR_IO; 1346 } 1347 if (duration != -1) { 1348 duration = ntoh64(duration); 1349 } 1350 } else { 1351 uint32_t duration32; 1352 if (mDataSource->readAt( 1353 timescale_offset + 4, &duration32, sizeof(duration32)) 1354 < (ssize_t)sizeof(duration32)) { 1355 return ERROR_IO; 1356 } 1357 if (duration32 != 0xffffffff) { 1358 duration = ntohl(duration32); 1359 } 1360 } 1361 if (duration != 0 && mLastTrack->timescale != 0) { 1362 mLastTrack->meta->setInt64( 1363 kKeyDuration, (duration * 1000000) / mLastTrack->timescale); 1364 } 1365 1366 uint8_t lang[2]; 1367 off64_t lang_offset; 1368 if (version == 1) { 1369 lang_offset = timescale_offset + 4 + 8; 1370 } else if (version == 0) { 1371 lang_offset = timescale_offset + 4 + 4; 1372 } else { 1373 return ERROR_IO; 1374 } 1375 1376 if (mDataSource->readAt(lang_offset, &lang, sizeof(lang)) 1377 < (ssize_t)sizeof(lang)) { 1378 return ERROR_IO; 1379 } 1380 1381 // To get the ISO-639-2/T three character language code 1382 // 1 bit pad followed by 3 5-bits characters. Each character 1383 // is packed as the difference between its ASCII value and 0x60. 1384 char lang_code[4]; 1385 lang_code[0] = ((lang[0] >> 2) & 0x1f) + 0x60; 1386 lang_code[1] = ((lang[0] & 0x3) << 3 | (lang[1] >> 5)) + 0x60; 1387 lang_code[2] = (lang[1] & 0x1f) + 0x60; 1388 lang_code[3] = '\0'; 1389 1390 mLastTrack->meta->setCString( 1391 kKeyMediaLanguage, lang_code); 1392 1393 break; 1394 } 1395 1396 case FOURCC('s', 't', 's', 'd'): 1397 { 1398 uint8_t buffer[8]; 1399 if (chunk_data_size < (off64_t)sizeof(buffer)) { 1400 return ERROR_MALFORMED; 1401 } 1402 1403 if (mDataSource->readAt( 1404 data_offset, buffer, 8) < 8) { 1405 return ERROR_IO; 1406 } 1407 1408 if (U32_AT(buffer) != 0) { 1409 // Should be version 0, flags 0. 1410 return ERROR_MALFORMED; 1411 } 1412 1413 uint32_t entry_count = U32_AT(&buffer[4]); 1414 1415 if (entry_count > 1) { 1416 // For 3GPP timed text, there could be multiple tx3g boxes contain 1417 // multiple text display formats. These formats will be used to 1418 // display the timed text. 1419 // For encrypted files, there may also be more than one entry. 1420 const char *mime; 1421 1422 if (mLastTrack == NULL) 1423 return ERROR_MALFORMED; 1424 1425 CHECK(mLastTrack->meta->findCString(kKeyMIMEType, &mime)); 1426 if (strcasecmp(mime, MEDIA_MIMETYPE_TEXT_3GPP) && 1427 strcasecmp(mime, "application/octet-stream")) { 1428 // For now we only support a single type of media per track. 1429 mLastTrack->skipTrack = true; 1430 *offset += chunk_size; 1431 break; 1432 } 1433 } 1434 off64_t stop_offset = *offset + chunk_size; 1435 *offset = data_offset + 8; 1436 for (uint32_t i = 0; i < entry_count; ++i) { 1437 status_t err = parseChunk(offset, depth + 1); 1438 if (err != OK) { 1439 return err; 1440 } 1441 } 1442 1443 if (*offset != stop_offset) { 1444 return ERROR_MALFORMED; 1445 } 1446 break; 1447 } 1448 case FOURCC('m', 'e', 't', 't'): 1449 { 1450 *offset += chunk_size; 1451 1452 if (mLastTrack == NULL) 1453 return ERROR_MALFORMED; 1454 1455 sp<ABuffer> buffer = new ABuffer(chunk_data_size); 1456 if (buffer->data() == NULL) { 1457 return NO_MEMORY; 1458 } 1459 1460 if (mDataSource->readAt( 1461 data_offset, buffer->data(), chunk_data_size) < chunk_data_size) { 1462 return ERROR_IO; 1463 } 1464 1465 String8 mimeFormat((const char *)(buffer->data()), chunk_data_size); 1466 mLastTrack->meta->setCString(kKeyMIMEType, mimeFormat.string()); 1467 1468 break; 1469 } 1470 1471 case FOURCC('m', 'p', '4', 'a'): 1472 case FOURCC('e', 'n', 'c', 'a'): 1473 case FOURCC('s', 'a', 'm', 'r'): 1474 case FOURCC('s', 'a', 'w', 'b'): 1475 { 1476 if (mIsQT && chunk_type == FOURCC('m', 'p', '4', 'a') 1477 && depth >= 1 && mPath[depth - 1] == FOURCC('w', 'a', 'v', 'e')) { 1478 // Ignore mp4a embedded in QT wave atom 1479 *offset += chunk_size; 1480 break; 1481 } 1482 1483 uint8_t buffer[8 + 20]; 1484 if (chunk_data_size < (ssize_t)sizeof(buffer)) { 1485 // Basic AudioSampleEntry size. 1486 return ERROR_MALFORMED; 1487 } 1488 1489 if (mDataSource->readAt( 1490 data_offset, buffer, sizeof(buffer)) < (ssize_t)sizeof(buffer)) { 1491 return ERROR_IO; 1492 } 1493 1494 uint16_t data_ref_index __unused = U16_AT(&buffer[6]); 1495 uint16_t version = U16_AT(&buffer[8]); 1496 uint32_t num_channels = U16_AT(&buffer[16]); 1497 1498 uint16_t sample_size = U16_AT(&buffer[18]); 1499 uint32_t sample_rate = U32_AT(&buffer[24]) >> 16; 1500 1501 if (mLastTrack == NULL) 1502 return ERROR_MALFORMED; 1503 1504 off64_t stop_offset = *offset + chunk_size; 1505 *offset = data_offset + sizeof(buffer); 1506 1507 if (mIsQT && chunk_type == FOURCC('m', 'p', '4', 'a')) { 1508 if (version == 1) { 1509 if (mDataSource->readAt(*offset, buffer, 16) < 16) { 1510 return ERROR_IO; 1511 } 1512 1513#if 0 1514 U32_AT(buffer); // samples per packet 1515 U32_AT(&buffer[4]); // bytes per packet 1516 U32_AT(&buffer[8]); // bytes per frame 1517 U32_AT(&buffer[12]); // bytes per sample 1518#endif 1519 *offset += 16; 1520 } else if (version == 2) { 1521 uint8_t v2buffer[36]; 1522 if (mDataSource->readAt(*offset, v2buffer, 36) < 36) { 1523 return ERROR_IO; 1524 } 1525 1526#if 0 1527 U32_AT(v2buffer); // size of struct only 1528 sample_rate = (uint32_t)U64_AT(&v2buffer[4]); // audio sample rate 1529 num_channels = U32_AT(&v2buffer[12]); // num audio channels 1530 U32_AT(&v2buffer[16]); // always 0x7f000000 1531 sample_size = (uint16_t)U32_AT(&v2buffer[20]); // const bits per channel 1532 U32_AT(&v2buffer[24]); // format specifc flags 1533 U32_AT(&v2buffer[28]); // const bytes per audio packet 1534 U32_AT(&v2buffer[32]); // const LPCM frames per audio packet 1535#endif 1536 *offset += 36; 1537 } 1538 } 1539 1540 if (chunk_type != FOURCC('e', 'n', 'c', 'a')) { 1541 // if the chunk type is enca, we'll get the type from the sinf/frma box later 1542 mLastTrack->meta->setCString(kKeyMIMEType, FourCC2MIME(chunk_type)); 1543 AdjustChannelsAndRate(chunk_type, &num_channels, &sample_rate); 1544 } 1545 ALOGV("*** coding='%s' %d channels, size %d, rate %d\n", 1546 chunk, num_channels, sample_size, sample_rate); 1547 mLastTrack->meta->setInt32(kKeyChannelCount, num_channels); 1548 mLastTrack->meta->setInt32(kKeySampleRate, sample_rate); 1549 1550 while (*offset < stop_offset) { 1551 status_t err = parseChunk(offset, depth + 1); 1552 if (err != OK) { 1553 return err; 1554 } 1555 } 1556 1557 if (*offset != stop_offset) { 1558 return ERROR_MALFORMED; 1559 } 1560 break; 1561 } 1562 1563 case FOURCC('m', 'p', '4', 'v'): 1564 case FOURCC('e', 'n', 'c', 'v'): 1565 case FOURCC('s', '2', '6', '3'): 1566 case FOURCC('H', '2', '6', '3'): 1567 case FOURCC('h', '2', '6', '3'): 1568 case FOURCC('a', 'v', 'c', '1'): 1569 case FOURCC('h', 'v', 'c', '1'): 1570 case FOURCC('h', 'e', 'v', '1'): 1571 { 1572 uint8_t buffer[78]; 1573 if (chunk_data_size < (ssize_t)sizeof(buffer)) { 1574 // Basic VideoSampleEntry size. 1575 return ERROR_MALFORMED; 1576 } 1577 1578 if (mDataSource->readAt( 1579 data_offset, buffer, sizeof(buffer)) < (ssize_t)sizeof(buffer)) { 1580 return ERROR_IO; 1581 } 1582 1583 uint16_t data_ref_index __unused = U16_AT(&buffer[6]); 1584 uint16_t width = U16_AT(&buffer[6 + 18]); 1585 uint16_t height = U16_AT(&buffer[6 + 20]); 1586 1587 // The video sample is not standard-compliant if it has invalid dimension. 1588 // Use some default width and height value, and 1589 // let the decoder figure out the actual width and height (and thus 1590 // be prepared for INFO_FOMRAT_CHANGED event). 1591 if (width == 0) width = 352; 1592 if (height == 0) height = 288; 1593 1594 // printf("*** coding='%s' width=%d height=%d\n", 1595 // chunk, width, height); 1596 1597 if (mLastTrack == NULL) 1598 return ERROR_MALFORMED; 1599 1600 if (chunk_type != FOURCC('e', 'n', 'c', 'v')) { 1601 // if the chunk type is encv, we'll get the type from the sinf/frma box later 1602 mLastTrack->meta->setCString(kKeyMIMEType, FourCC2MIME(chunk_type)); 1603 } 1604 mLastTrack->meta->setInt32(kKeyWidth, width); 1605 mLastTrack->meta->setInt32(kKeyHeight, height); 1606 1607 off64_t stop_offset = *offset + chunk_size; 1608 *offset = data_offset + sizeof(buffer); 1609 while (*offset < stop_offset) { 1610 status_t err = parseChunk(offset, depth + 1); 1611 if (err != OK) { 1612 return err; 1613 } 1614 } 1615 1616 if (*offset != stop_offset) { 1617 return ERROR_MALFORMED; 1618 } 1619 break; 1620 } 1621 1622 case FOURCC('s', 't', 'c', 'o'): 1623 case FOURCC('c', 'o', '6', '4'): 1624 { 1625 if ((mLastTrack == NULL) || (mLastTrack->sampleTable == NULL)) { 1626 return ERROR_MALFORMED; 1627 } 1628 1629 status_t err = 1630 mLastTrack->sampleTable->setChunkOffsetParams( 1631 chunk_type, data_offset, chunk_data_size); 1632 1633 *offset += chunk_size; 1634 1635 if (err != OK) { 1636 return err; 1637 } 1638 1639 break; 1640 } 1641 1642 case FOURCC('s', 't', 's', 'c'): 1643 { 1644 if ((mLastTrack == NULL) || (mLastTrack->sampleTable == NULL)) 1645 return ERROR_MALFORMED; 1646 1647 status_t err = 1648 mLastTrack->sampleTable->setSampleToChunkParams( 1649 data_offset, chunk_data_size); 1650 1651 *offset += chunk_size; 1652 1653 if (err != OK) { 1654 return err; 1655 } 1656 1657 break; 1658 } 1659 1660 case FOURCC('s', 't', 's', 'z'): 1661 case FOURCC('s', 't', 'z', '2'): 1662 { 1663 if ((mLastTrack == NULL) || (mLastTrack->sampleTable == NULL)) { 1664 return ERROR_MALFORMED; 1665 } 1666 1667 status_t err = 1668 mLastTrack->sampleTable->setSampleSizeParams( 1669 chunk_type, data_offset, chunk_data_size); 1670 1671 *offset += chunk_size; 1672 1673 if (err != OK) { 1674 return err; 1675 } 1676 1677 size_t max_size; 1678 err = mLastTrack->sampleTable->getMaxSampleSize(&max_size); 1679 1680 if (err != OK) { 1681 return err; 1682 } 1683 1684 if (max_size != 0) { 1685 // Assume that a given buffer only contains at most 10 chunks, 1686 // each chunk originally prefixed with a 2 byte length will 1687 // have a 4 byte header (0x00 0x00 0x00 0x01) after conversion, 1688 // and thus will grow by 2 bytes per chunk. 1689 if (max_size > SIZE_MAX - 10 * 2) { 1690 ALOGE("max sample size too big: %zu", max_size); 1691 return ERROR_MALFORMED; 1692 } 1693 mLastTrack->meta->setInt32(kKeyMaxInputSize, max_size + 10 * 2); 1694 } else { 1695 // No size was specified. Pick a conservatively large size. 1696 uint32_t width, height; 1697 if (!mLastTrack->meta->findInt32(kKeyWidth, (int32_t*)&width) || 1698 !mLastTrack->meta->findInt32(kKeyHeight,(int32_t*) &height)) { 1699 ALOGE("No width or height, assuming worst case 1080p"); 1700 width = 1920; 1701 height = 1080; 1702 } else { 1703 // A resolution was specified, check that it's not too big. The values below 1704 // were chosen so that the calculations below don't cause overflows, they're 1705 // not indicating that resolutions up to 32kx32k are actually supported. 1706 if (width > 32768 || height > 32768) { 1707 ALOGE("can't support %u x %u video", width, height); 1708 return ERROR_MALFORMED; 1709 } 1710 } 1711 1712 const char *mime; 1713 CHECK(mLastTrack->meta->findCString(kKeyMIMEType, &mime)); 1714 if (!strcmp(mime, MEDIA_MIMETYPE_VIDEO_AVC) 1715 || !strcmp(mime, MEDIA_MIMETYPE_VIDEO_HEVC)) { 1716 // AVC & HEVC requires compression ratio of at least 2, and uses 1717 // macroblocks 1718 max_size = ((width + 15) / 16) * ((height + 15) / 16) * 192; 1719 } else { 1720 // For all other formats there is no minimum compression 1721 // ratio. Use compression ratio of 1. 1722 max_size = width * height * 3 / 2; 1723 } 1724 // HACK: allow 10% overhead 1725 // TODO: read sample size from traf atom for fragmented MPEG4. 1726 max_size += max_size / 10; 1727 mLastTrack->meta->setInt32(kKeyMaxInputSize, max_size); 1728 } 1729 1730 // NOTE: setting another piece of metadata invalidates any pointers (such as the 1731 // mimetype) previously obtained, so don't cache them. 1732 const char *mime; 1733 CHECK(mLastTrack->meta->findCString(kKeyMIMEType, &mime)); 1734 // Calculate average frame rate. 1735 if (!strncasecmp("video/", mime, 6)) { 1736 size_t nSamples = mLastTrack->sampleTable->countSamples(); 1737 if (nSamples == 0) { 1738 int32_t trackId; 1739 if (mLastTrack->meta->findInt32(kKeyTrackID, &trackId)) { 1740 for (size_t i = 0; i < mTrex.size(); i++) { 1741 Trex *t = &mTrex.editItemAt(i); 1742 if (t->track_ID == (uint32_t) trackId) { 1743 if (t->default_sample_duration > 0) { 1744 int32_t frameRate = 1745 mLastTrack->timescale / t->default_sample_duration; 1746 mLastTrack->meta->setInt32(kKeyFrameRate, frameRate); 1747 } 1748 break; 1749 } 1750 } 1751 } 1752 } else { 1753 int64_t durationUs; 1754 if (mLastTrack->meta->findInt64(kKeyDuration, &durationUs)) { 1755 if (durationUs > 0) { 1756 int32_t frameRate = (nSamples * 1000000LL + 1757 (durationUs >> 1)) / durationUs; 1758 mLastTrack->meta->setInt32(kKeyFrameRate, frameRate); 1759 } 1760 } 1761 } 1762 } 1763 1764 break; 1765 } 1766 1767 case FOURCC('s', 't', 't', 's'): 1768 { 1769 if ((mLastTrack == NULL) || (mLastTrack->sampleTable == NULL)) 1770 return ERROR_MALFORMED; 1771 1772 *offset += chunk_size; 1773 1774 status_t err = 1775 mLastTrack->sampleTable->setTimeToSampleParams( 1776 data_offset, chunk_data_size); 1777 1778 if (err != OK) { 1779 return err; 1780 } 1781 1782 break; 1783 } 1784 1785 case FOURCC('c', 't', 't', 's'): 1786 { 1787 if ((mLastTrack == NULL) || (mLastTrack->sampleTable == NULL)) 1788 return ERROR_MALFORMED; 1789 1790 *offset += chunk_size; 1791 1792 status_t err = 1793 mLastTrack->sampleTable->setCompositionTimeToSampleParams( 1794 data_offset, chunk_data_size); 1795 1796 if (err != OK) { 1797 return err; 1798 } 1799 1800 break; 1801 } 1802 1803 case FOURCC('s', 't', 's', 's'): 1804 { 1805 if ((mLastTrack == NULL) || (mLastTrack->sampleTable == NULL)) 1806 return ERROR_MALFORMED; 1807 1808 *offset += chunk_size; 1809 1810 status_t err = 1811 mLastTrack->sampleTable->setSyncSampleParams( 1812 data_offset, chunk_data_size); 1813 1814 if (err != OK) { 1815 return err; 1816 } 1817 1818 break; 1819 } 1820 1821 // \xA9xyz 1822 case FOURCC(0xA9, 'x', 'y', 'z'): 1823 { 1824 *offset += chunk_size; 1825 1826 // Best case the total data length inside "\xA9xyz" box would 1827 // be 9, for instance "\xA9xyz" + "\x00\x05\x15\xc7" + "+0+0/", 1828 // where "\x00\x05" is the text string length with value = 5, 1829 // "\0x15\xc7" is the language code = en, and "+0+0/" is a 1830 // location (string) value with longitude = 0 and latitude = 0. 1831 // Since some devices encountered in the wild omit the trailing 1832 // slash, we'll allow that. 1833 if (chunk_data_size < 8) { // 8 instead of 9 to allow for missing / 1834 return ERROR_MALFORMED; 1835 } 1836 1837 uint16_t len; 1838 if (!mDataSource->getUInt16(data_offset, &len)) { 1839 return ERROR_IO; 1840 } 1841 1842 // allow "+0+0" without trailing slash 1843 if (len < 4 || len > chunk_data_size - 4) { 1844 return ERROR_MALFORMED; 1845 } 1846 // The location string following the language code is formatted 1847 // according to ISO 6709:2008 (https://en.wikipedia.org/wiki/ISO_6709). 1848 // Allocate 2 extra bytes, in case we need to add a trailing slash, 1849 // and to add a terminating 0. 1850 std::unique_ptr<char[]> buffer(new (std::nothrow) char[len+2]()); 1851 if (!buffer) { 1852 return NO_MEMORY; 1853 } 1854 1855 if (mDataSource->readAt( 1856 data_offset + 4, &buffer[0], len) < len) { 1857 return ERROR_IO; 1858 } 1859 1860 len = strlen(&buffer[0]); 1861 if (len < 4) { 1862 return ERROR_MALFORMED; 1863 } 1864 // Add a trailing slash if there wasn't one. 1865 if (buffer[len - 1] != '/') { 1866 buffer[len] = '/'; 1867 } 1868 mFileMetaData->setCString(kKeyLocation, &buffer[0]); 1869 break; 1870 } 1871 1872 case FOURCC('e', 's', 'd', 's'): 1873 { 1874 *offset += chunk_size; 1875 1876 if (chunk_data_size < 4) { 1877 return ERROR_MALFORMED; 1878 } 1879 1880 uint8_t buffer[256]; 1881 if (chunk_data_size > (off64_t)sizeof(buffer)) { 1882 return ERROR_BUFFER_TOO_SMALL; 1883 } 1884 1885 if (mDataSource->readAt( 1886 data_offset, buffer, chunk_data_size) < chunk_data_size) { 1887 return ERROR_IO; 1888 } 1889 1890 if (U32_AT(buffer) != 0) { 1891 // Should be version 0, flags 0. 1892 return ERROR_MALFORMED; 1893 } 1894 1895 if (mLastTrack == NULL) 1896 return ERROR_MALFORMED; 1897 1898 mLastTrack->meta->setData( 1899 kKeyESDS, kTypeESDS, &buffer[4], chunk_data_size - 4); 1900 1901 if (mPath.size() >= 2 1902 && mPath[mPath.size() - 2] == FOURCC('m', 'p', '4', 'a')) { 1903 // Information from the ESDS must be relied on for proper 1904 // setup of sample rate and channel count for MPEG4 Audio. 1905 // The generic header appears to only contain generic 1906 // information... 1907 1908 status_t err = updateAudioTrackInfoFromESDS_MPEG4Audio( 1909 &buffer[4], chunk_data_size - 4); 1910 1911 if (err != OK) { 1912 return err; 1913 } 1914 } 1915 if (mPath.size() >= 2 1916 && mPath[mPath.size() - 2] == FOURCC('m', 'p', '4', 'v')) { 1917 // Check if the video is MPEG2 1918 ESDS esds(&buffer[4], chunk_data_size - 4); 1919 1920 uint8_t objectTypeIndication; 1921 if (esds.getObjectTypeIndication(&objectTypeIndication) == OK) { 1922 if (objectTypeIndication >= 0x60 && objectTypeIndication <= 0x65) { 1923 mLastTrack->meta->setCString(kKeyMIMEType, MEDIA_MIMETYPE_VIDEO_MPEG2); 1924 } 1925 } 1926 } 1927 break; 1928 } 1929 1930 case FOURCC('b', 't', 'r', 't'): 1931 { 1932 *offset += chunk_size; 1933 if (mLastTrack == NULL) { 1934 return ERROR_MALFORMED; 1935 } 1936 1937 uint8_t buffer[12]; 1938 if (chunk_data_size != sizeof(buffer)) { 1939 return ERROR_MALFORMED; 1940 } 1941 1942 if (mDataSource->readAt( 1943 data_offset, buffer, chunk_data_size) < chunk_data_size) { 1944 return ERROR_IO; 1945 } 1946 1947 uint32_t maxBitrate = U32_AT(&buffer[4]); 1948 uint32_t avgBitrate = U32_AT(&buffer[8]); 1949 if (maxBitrate > 0 && maxBitrate < INT32_MAX) { 1950 mLastTrack->meta->setInt32(kKeyMaxBitRate, (int32_t)maxBitrate); 1951 } 1952 if (avgBitrate > 0 && avgBitrate < INT32_MAX) { 1953 mLastTrack->meta->setInt32(kKeyBitRate, (int32_t)avgBitrate); 1954 } 1955 break; 1956 } 1957 1958 case FOURCC('a', 'v', 'c', 'C'): 1959 { 1960 *offset += chunk_size; 1961 1962 sp<ABuffer> buffer = new ABuffer(chunk_data_size); 1963 1964 if (buffer->data() == NULL) { 1965 ALOGE("b/28471206"); 1966 return NO_MEMORY; 1967 } 1968 1969 if (mDataSource->readAt( 1970 data_offset, buffer->data(), chunk_data_size) < chunk_data_size) { 1971 return ERROR_IO; 1972 } 1973 1974 if (mLastTrack == NULL) 1975 return ERROR_MALFORMED; 1976 1977 mLastTrack->meta->setData( 1978 kKeyAVCC, kTypeAVCC, buffer->data(), chunk_data_size); 1979 1980 break; 1981 } 1982 case FOURCC('h', 'v', 'c', 'C'): 1983 { 1984 sp<ABuffer> buffer = new ABuffer(chunk_data_size); 1985 1986 if (buffer->data() == NULL) { 1987 ALOGE("b/28471206"); 1988 return NO_MEMORY; 1989 } 1990 1991 if (mDataSource->readAt( 1992 data_offset, buffer->data(), chunk_data_size) < chunk_data_size) { 1993 return ERROR_IO; 1994 } 1995 1996 if (mLastTrack == NULL) 1997 return ERROR_MALFORMED; 1998 1999 mLastTrack->meta->setData( 2000 kKeyHVCC, kTypeHVCC, buffer->data(), chunk_data_size); 2001 2002 *offset += chunk_size; 2003 break; 2004 } 2005 2006 case FOURCC('d', '2', '6', '3'): 2007 { 2008 *offset += chunk_size; 2009 /* 2010 * d263 contains a fixed 7 bytes part: 2011 * vendor - 4 bytes 2012 * version - 1 byte 2013 * level - 1 byte 2014 * profile - 1 byte 2015 * optionally, "d263" box itself may contain a 16-byte 2016 * bit rate box (bitr) 2017 * average bit rate - 4 bytes 2018 * max bit rate - 4 bytes 2019 */ 2020 char buffer[23]; 2021 if (chunk_data_size != 7 && 2022 chunk_data_size != 23) { 2023 ALOGE("Incorrect D263 box size %lld", (long long)chunk_data_size); 2024 return ERROR_MALFORMED; 2025 } 2026 2027 if (mDataSource->readAt( 2028 data_offset, buffer, chunk_data_size) < chunk_data_size) { 2029 return ERROR_IO; 2030 } 2031 2032 if (mLastTrack == NULL) 2033 return ERROR_MALFORMED; 2034 2035 mLastTrack->meta->setData(kKeyD263, kTypeD263, buffer, chunk_data_size); 2036 2037 break; 2038 } 2039 2040 case FOURCC('m', 'e', 't', 'a'): 2041 { 2042 off64_t stop_offset = *offset + chunk_size; 2043 *offset = data_offset; 2044 bool isParsingMetaKeys = underQTMetaPath(mPath, 2); 2045 if (!isParsingMetaKeys) { 2046 uint8_t buffer[4]; 2047 if (chunk_data_size < (off64_t)sizeof(buffer)) { 2048 *offset = stop_offset; 2049 return ERROR_MALFORMED; 2050 } 2051 2052 if (mDataSource->readAt( 2053 data_offset, buffer, 4) < 4) { 2054 *offset = stop_offset; 2055 return ERROR_IO; 2056 } 2057 2058 if (U32_AT(buffer) != 0) { 2059 // Should be version 0, flags 0. 2060 2061 // If it's not, let's assume this is one of those 2062 // apparently malformed chunks that don't have flags 2063 // and completely different semantics than what's 2064 // in the MPEG4 specs and skip it. 2065 *offset = stop_offset; 2066 return OK; 2067 } 2068 *offset += sizeof(buffer); 2069 } 2070 2071 while (*offset < stop_offset) { 2072 status_t err = parseChunk(offset, depth + 1); 2073 if (err != OK) { 2074 return err; 2075 } 2076 } 2077 2078 if (*offset != stop_offset) { 2079 return ERROR_MALFORMED; 2080 } 2081 break; 2082 } 2083 2084 case FOURCC('i', 'l', 'o', 'c'): 2085 case FOURCC('i', 'i', 'n', 'f'): 2086 case FOURCC('i', 'p', 'r', 'p'): 2087 case FOURCC('p', 'i', 't', 'm'): 2088 case FOURCC('i', 'd', 'a', 't'): 2089 case FOURCC('i', 'r', 'e', 'f'): 2090 case FOURCC('i', 'p', 'r', 'o'): 2091 { 2092 if (mIsHEIF) { 2093 if (mItemTable == NULL) { 2094 mItemTable = new ItemTable(mDataSource); 2095 } 2096 status_t err = mItemTable->parse( 2097 chunk_type, data_offset, chunk_data_size); 2098 if (err != OK) { 2099 return err; 2100 } 2101 } 2102 *offset += chunk_size; 2103 break; 2104 } 2105 2106 case FOURCC('m', 'e', 'a', 'n'): 2107 case FOURCC('n', 'a', 'm', 'e'): 2108 case FOURCC('d', 'a', 't', 'a'): 2109 { 2110 *offset += chunk_size; 2111 2112 if (mPath.size() == 6 && underMetaDataPath(mPath)) { 2113 status_t err = parseITunesMetaData(data_offset, chunk_data_size); 2114 2115 if (err != OK) { 2116 return err; 2117 } 2118 } 2119 2120 break; 2121 } 2122 2123 case FOURCC('m', 'v', 'h', 'd'): 2124 { 2125 *offset += chunk_size; 2126 2127 if (depth != 1) { 2128 ALOGE("mvhd: depth %d", depth); 2129 return ERROR_MALFORMED; 2130 } 2131 if (chunk_data_size < 32) { 2132 return ERROR_MALFORMED; 2133 } 2134 2135 uint8_t header[32]; 2136 if (mDataSource->readAt( 2137 data_offset, header, sizeof(header)) 2138 < (ssize_t)sizeof(header)) { 2139 return ERROR_IO; 2140 } 2141 2142 uint64_t creationTime; 2143 uint64_t duration = 0; 2144 if (header[0] == 1) { 2145 creationTime = U64_AT(&header[4]); 2146 mHeaderTimescale = U32_AT(&header[20]); 2147 duration = U64_AT(&header[24]); 2148 if (duration == 0xffffffffffffffff) { 2149 duration = 0; 2150 } 2151 } else if (header[0] != 0) { 2152 return ERROR_MALFORMED; 2153 } else { 2154 creationTime = U32_AT(&header[4]); 2155 mHeaderTimescale = U32_AT(&header[12]); 2156 uint32_t d32 = U32_AT(&header[16]); 2157 if (d32 == 0xffffffff) { 2158 d32 = 0; 2159 } 2160 duration = d32; 2161 } 2162 if (duration != 0 && mHeaderTimescale != 0 && duration < UINT64_MAX / 1000000) { 2163 mFileMetaData->setInt64(kKeyDuration, duration * 1000000 / mHeaderTimescale); 2164 } 2165 2166 String8 s; 2167 if (convertTimeToDate(creationTime, &s)) { 2168 mFileMetaData->setCString(kKeyDate, s.string()); 2169 } 2170 2171 2172 break; 2173 } 2174 2175 case FOURCC('m', 'e', 'h', 'd'): 2176 { 2177 *offset += chunk_size; 2178 2179 if (chunk_data_size < 8) { 2180 return ERROR_MALFORMED; 2181 } 2182 2183 uint8_t flags[4]; 2184 if (mDataSource->readAt( 2185 data_offset, flags, sizeof(flags)) 2186 < (ssize_t)sizeof(flags)) { 2187 return ERROR_IO; 2188 } 2189 2190 uint64_t duration = 0; 2191 if (flags[0] == 1) { 2192 // 64 bit 2193 if (chunk_data_size < 12) { 2194 return ERROR_MALFORMED; 2195 } 2196 mDataSource->getUInt64(data_offset + 4, &duration); 2197 if (duration == 0xffffffffffffffff) { 2198 duration = 0; 2199 } 2200 } else if (flags[0] == 0) { 2201 // 32 bit 2202 uint32_t d32; 2203 mDataSource->getUInt32(data_offset + 4, &d32); 2204 if (d32 == 0xffffffff) { 2205 d32 = 0; 2206 } 2207 duration = d32; 2208 } else { 2209 return ERROR_MALFORMED; 2210 } 2211 2212 if (duration != 0 && mHeaderTimescale != 0) { 2213 mFileMetaData->setInt64(kKeyDuration, duration * 1000000 / mHeaderTimescale); 2214 } 2215 2216 break; 2217 } 2218 2219 case FOURCC('m', 'd', 'a', 't'): 2220 { 2221 ALOGV("mdat chunk, drm: %d", mIsDrm); 2222 2223 mMdatFound = true; 2224 2225 if (!mIsDrm) { 2226 *offset += chunk_size; 2227 break; 2228 } 2229 2230 if (chunk_size < 8) { 2231 return ERROR_MALFORMED; 2232 } 2233 2234 return parseDrmSINF(offset, data_offset); 2235 } 2236 2237 case FOURCC('h', 'd', 'l', 'r'): 2238 { 2239 *offset += chunk_size; 2240 2241 if (underQTMetaPath(mPath, 3)) { 2242 break; 2243 } 2244 2245 uint32_t buffer; 2246 if (mDataSource->readAt( 2247 data_offset + 8, &buffer, 4) < 4) { 2248 return ERROR_IO; 2249 } 2250 2251 uint32_t type = ntohl(buffer); 2252 // For the 3GPP file format, the handler-type within the 'hdlr' box 2253 // shall be 'text'. We also want to support 'sbtl' handler type 2254 // for a practical reason as various MPEG4 containers use it. 2255 if (type == FOURCC('t', 'e', 'x', 't') || type == FOURCC('s', 'b', 't', 'l')) { 2256 if (mLastTrack != NULL) { 2257 mLastTrack->meta->setCString(kKeyMIMEType, MEDIA_MIMETYPE_TEXT_3GPP); 2258 } 2259 } 2260 2261 break; 2262 } 2263 2264 case FOURCC('k', 'e', 'y', 's'): 2265 { 2266 *offset += chunk_size; 2267 2268 if (underQTMetaPath(mPath, 3)) { 2269 status_t err = parseQTMetaKey(data_offset, chunk_data_size); 2270 if (err != OK) { 2271 return err; 2272 } 2273 } 2274 break; 2275 } 2276 2277 case FOURCC('t', 'r', 'e', 'x'): 2278 { 2279 *offset += chunk_size; 2280 2281 if (chunk_data_size < 24) { 2282 return ERROR_IO; 2283 } 2284 Trex trex; 2285 if (!mDataSource->getUInt32(data_offset + 4, &trex.track_ID) || 2286 !mDataSource->getUInt32(data_offset + 8, &trex.default_sample_description_index) || 2287 !mDataSource->getUInt32(data_offset + 12, &trex.default_sample_duration) || 2288 !mDataSource->getUInt32(data_offset + 16, &trex.default_sample_size) || 2289 !mDataSource->getUInt32(data_offset + 20, &trex.default_sample_flags)) { 2290 return ERROR_IO; 2291 } 2292 mTrex.add(trex); 2293 break; 2294 } 2295 2296 case FOURCC('t', 'x', '3', 'g'): 2297 { 2298 if (mLastTrack == NULL) 2299 return ERROR_MALFORMED; 2300 2301 uint32_t type; 2302 const void *data; 2303 size_t size = 0; 2304 if (!mLastTrack->meta->findData( 2305 kKeyTextFormatData, &type, &data, &size)) { 2306 size = 0; 2307 } 2308 2309 if ((chunk_size > SIZE_MAX) || (SIZE_MAX - chunk_size <= size)) { 2310 return ERROR_MALFORMED; 2311 } 2312 2313 uint8_t *buffer = new (std::nothrow) uint8_t[size + chunk_size]; 2314 if (buffer == NULL) { 2315 return ERROR_MALFORMED; 2316 } 2317 2318 if (size > 0) { 2319 memcpy(buffer, data, size); 2320 } 2321 2322 if ((size_t)(mDataSource->readAt(*offset, buffer + size, chunk_size)) 2323 < chunk_size) { 2324 delete[] buffer; 2325 buffer = NULL; 2326 2327 // advance read pointer so we don't end up reading this again 2328 *offset += chunk_size; 2329 return ERROR_IO; 2330 } 2331 2332 mLastTrack->meta->setData( 2333 kKeyTextFormatData, 0, buffer, size + chunk_size); 2334 2335 delete[] buffer; 2336 2337 *offset += chunk_size; 2338 break; 2339 } 2340 2341 case FOURCC('c', 'o', 'v', 'r'): 2342 { 2343 *offset += chunk_size; 2344 2345 if (mFileMetaData != NULL) { 2346 ALOGV("chunk_data_size = %" PRId64 " and data_offset = %" PRId64, 2347 chunk_data_size, data_offset); 2348 2349 if (chunk_data_size < 0 || static_cast<uint64_t>(chunk_data_size) >= SIZE_MAX - 1) { 2350 return ERROR_MALFORMED; 2351 } 2352 sp<ABuffer> buffer = new ABuffer(chunk_data_size + 1); 2353 if (buffer->data() == NULL) { 2354 ALOGE("b/28471206"); 2355 return NO_MEMORY; 2356 } 2357 if (mDataSource->readAt( 2358 data_offset, buffer->data(), chunk_data_size) != (ssize_t)chunk_data_size) { 2359 return ERROR_IO; 2360 } 2361 const int kSkipBytesOfDataBox = 16; 2362 if (chunk_data_size <= kSkipBytesOfDataBox) { 2363 return ERROR_MALFORMED; 2364 } 2365 2366 mFileMetaData->setData( 2367 kKeyAlbumArt, MetaData::TYPE_NONE, 2368 buffer->data() + kSkipBytesOfDataBox, chunk_data_size - kSkipBytesOfDataBox); 2369 } 2370 2371 break; 2372 } 2373 2374 case FOURCC('c', 'o', 'l', 'r'): 2375 { 2376 *offset += chunk_size; 2377 // this must be in a VisualSampleEntry box under the Sample Description Box ('stsd') 2378 // ignore otherwise 2379 if (depth >= 2 && mPath[depth - 2] == FOURCC('s', 't', 's', 'd')) { 2380 status_t err = parseColorInfo(data_offset, chunk_data_size); 2381 if (err != OK) { 2382 return err; 2383 } 2384 } 2385 2386 break; 2387 } 2388 2389 case FOURCC('t', 'i', 't', 'l'): 2390 case FOURCC('p', 'e', 'r', 'f'): 2391 case FOURCC('a', 'u', 't', 'h'): 2392 case FOURCC('g', 'n', 'r', 'e'): 2393 case FOURCC('a', 'l', 'b', 'm'): 2394 case FOURCC('y', 'r', 'r', 'c'): 2395 { 2396 *offset += chunk_size; 2397 2398 status_t err = parse3GPPMetaData(data_offset, chunk_data_size, depth); 2399 2400 if (err != OK) { 2401 return err; 2402 } 2403 2404 break; 2405 } 2406 2407 case FOURCC('I', 'D', '3', '2'): 2408 { 2409 *offset += chunk_size; 2410 2411 if (chunk_data_size < 6) { 2412 return ERROR_MALFORMED; 2413 } 2414 2415 parseID3v2MetaData(data_offset + 6); 2416 2417 break; 2418 } 2419 2420 case FOURCC('-', '-', '-', '-'): 2421 { 2422 mLastCommentMean.clear(); 2423 mLastCommentName.clear(); 2424 mLastCommentData.clear(); 2425 *offset += chunk_size; 2426 break; 2427 } 2428 2429 case FOURCC('s', 'i', 'd', 'x'): 2430 { 2431 status_t err = parseSegmentIndex(data_offset, chunk_data_size); 2432 if (err != OK) { 2433 return err; 2434 } 2435 *offset += chunk_size; 2436 return UNKNOWN_ERROR; // stop parsing after sidx 2437 } 2438 2439 case FOURCC('a', 'c', '-', '3'): 2440 { 2441 *offset += chunk_size; 2442 return parseAC3SampleEntry(data_offset); 2443 } 2444 2445 case FOURCC('f', 't', 'y', 'p'): 2446 { 2447 if (chunk_data_size < 8 || depth != 0) { 2448 return ERROR_MALFORMED; 2449 } 2450 2451 off64_t stop_offset = *offset + chunk_size; 2452 uint32_t numCompatibleBrands = (chunk_data_size - 8) / 4; 2453 std::set<uint32_t> brandSet; 2454 for (size_t i = 0; i < numCompatibleBrands + 2; ++i) { 2455 if (i == 1) { 2456 // Skip this index, it refers to the minorVersion, 2457 // not a brand. 2458 continue; 2459 } 2460 2461 uint32_t brand; 2462 if (mDataSource->readAt(data_offset + 4 * i, &brand, 4) < 4) { 2463 return ERROR_MALFORMED; 2464 } 2465 2466 brand = ntohl(brand); 2467 brandSet.insert(brand); 2468 } 2469 2470 if (brandSet.count(FOURCC('q', 't', ' ', ' ')) > 0) { 2471 mIsQT = true; 2472 } else if (brandSet.count(FOURCC('m', 'i', 'f', '1')) > 0 2473 && brandSet.count(FOURCC('h', 'e', 'i', 'c')) > 0) { 2474 mIsHEIF = true; 2475 ALOGV("identified HEIF image"); 2476 } 2477 2478 *offset = stop_offset; 2479 2480 break; 2481 } 2482 2483 default: 2484 { 2485 // check if we're parsing 'ilst' for meta keys 2486 // if so, treat type as a number (key-id). 2487 if (underQTMetaPath(mPath, 3)) { 2488 status_t err = parseQTMetaVal(chunk_type, data_offset, chunk_data_size); 2489 if (err != OK) { 2490 return err; 2491 } 2492 } 2493 2494 *offset += chunk_size; 2495 break; 2496 } 2497 } 2498 2499 return OK; 2500} 2501 2502status_t MPEG4Extractor::parseAC3SampleEntry(off64_t offset) { 2503 // skip 16 bytes: 2504 // + 6-byte reserved, 2505 // + 2-byte data reference index, 2506 // + 8-byte reserved 2507 offset += 16; 2508 uint16_t channelCount; 2509 if (!mDataSource->getUInt16(offset, &channelCount)) { 2510 return ERROR_MALFORMED; 2511 } 2512 // skip 8 bytes: 2513 // + 2-byte channelCount, 2514 // + 2-byte sample size, 2515 // + 4-byte reserved 2516 offset += 8; 2517 uint16_t sampleRate; 2518 if (!mDataSource->getUInt16(offset, &sampleRate)) { 2519 ALOGE("MPEG4Extractor: error while reading ac-3 block: cannot read sample rate"); 2520 return ERROR_MALFORMED; 2521 } 2522 2523 // skip 4 bytes: 2524 // + 2-byte sampleRate, 2525 // + 2-byte reserved 2526 offset += 4; 2527 return parseAC3SpecificBox(offset, sampleRate); 2528} 2529 2530status_t MPEG4Extractor::parseAC3SpecificBox( 2531 off64_t offset, uint16_t sampleRate) { 2532 uint32_t size; 2533 // + 4-byte size 2534 // + 4-byte type 2535 // + 3-byte payload 2536 const uint32_t kAC3SpecificBoxSize = 11; 2537 if (!mDataSource->getUInt32(offset, &size) || size < kAC3SpecificBoxSize) { 2538 ALOGE("MPEG4Extractor: error while reading ac-3 block: cannot read specific box size"); 2539 return ERROR_MALFORMED; 2540 } 2541 2542 offset += 4; 2543 uint32_t type; 2544 if (!mDataSource->getUInt32(offset, &type) || type != FOURCC('d', 'a', 'c', '3')) { 2545 ALOGE("MPEG4Extractor: error while reading ac-3 specific block: header not dac3"); 2546 return ERROR_MALFORMED; 2547 } 2548 2549 offset += 4; 2550 const uint32_t kAC3SpecificBoxPayloadSize = 3; 2551 uint8_t chunk[kAC3SpecificBoxPayloadSize]; 2552 if (mDataSource->readAt(offset, chunk, sizeof(chunk)) != sizeof(chunk)) { 2553 ALOGE("MPEG4Extractor: error while reading ac-3 specific block: bitstream fields"); 2554 return ERROR_MALFORMED; 2555 } 2556 2557 ABitReader br(chunk, sizeof(chunk)); 2558 static const unsigned channelCountTable[] = {2, 1, 2, 3, 3, 4, 4, 5}; 2559 static const unsigned sampleRateTable[] = {48000, 44100, 32000}; 2560 2561 unsigned fscod = br.getBits(2); 2562 if (fscod == 3) { 2563 ALOGE("Incorrect fscod (3) in AC3 header"); 2564 return ERROR_MALFORMED; 2565 } 2566 unsigned boxSampleRate = sampleRateTable[fscod]; 2567 if (boxSampleRate != sampleRate) { 2568 ALOGE("sample rate mismatch: boxSampleRate = %d, sampleRate = %d", 2569 boxSampleRate, sampleRate); 2570 return ERROR_MALFORMED; 2571 } 2572 2573 unsigned bsid = br.getBits(5); 2574 if (bsid > 8) { 2575 ALOGW("Incorrect bsid in AC3 header. Possibly E-AC-3?"); 2576 return ERROR_MALFORMED; 2577 } 2578 2579 // skip 2580 unsigned bsmod __unused = br.getBits(3); 2581 2582 unsigned acmod = br.getBits(3); 2583 unsigned lfeon = br.getBits(1); 2584 unsigned channelCount = channelCountTable[acmod] + lfeon; 2585 2586 if (mLastTrack == NULL) { 2587 return ERROR_MALFORMED; 2588 } 2589 mLastTrack->meta->setCString(kKeyMIMEType, MEDIA_MIMETYPE_AUDIO_AC3); 2590 mLastTrack->meta->setInt32(kKeyChannelCount, channelCount); 2591 mLastTrack->meta->setInt32(kKeySampleRate, sampleRate); 2592 return OK; 2593} 2594 2595status_t MPEG4Extractor::parseSegmentIndex(off64_t offset, size_t size) { 2596 ALOGV("MPEG4Extractor::parseSegmentIndex"); 2597 2598 if (size < 12) { 2599 return -EINVAL; 2600 } 2601 2602 uint32_t flags; 2603 if (!mDataSource->getUInt32(offset, &flags)) { 2604 return ERROR_MALFORMED; 2605 } 2606 2607 uint32_t version = flags >> 24; 2608 flags &= 0xffffff; 2609 2610 ALOGV("sidx version %d", version); 2611 2612 uint32_t referenceId; 2613 if (!mDataSource->getUInt32(offset + 4, &referenceId)) { 2614 return ERROR_MALFORMED; 2615 } 2616 2617 uint32_t timeScale; 2618 if (!mDataSource->getUInt32(offset + 8, &timeScale)) { 2619 return ERROR_MALFORMED; 2620 } 2621 ALOGV("sidx refid/timescale: %d/%d", referenceId, timeScale); 2622 if (timeScale == 0) 2623 return ERROR_MALFORMED; 2624 2625 uint64_t earliestPresentationTime; 2626 uint64_t firstOffset; 2627 2628 offset += 12; 2629 size -= 12; 2630 2631 if (version == 0) { 2632 if (size < 8) { 2633 return -EINVAL; 2634 } 2635 uint32_t tmp; 2636 if (!mDataSource->getUInt32(offset, &tmp)) { 2637 return ERROR_MALFORMED; 2638 } 2639 earliestPresentationTime = tmp; 2640 if (!mDataSource->getUInt32(offset + 4, &tmp)) { 2641 return ERROR_MALFORMED; 2642 } 2643 firstOffset = tmp; 2644 offset += 8; 2645 size -= 8; 2646 } else { 2647 if (size < 16) { 2648 return -EINVAL; 2649 } 2650 if (!mDataSource->getUInt64(offset, &earliestPresentationTime)) { 2651 return ERROR_MALFORMED; 2652 } 2653 if (!mDataSource->getUInt64(offset + 8, &firstOffset)) { 2654 return ERROR_MALFORMED; 2655 } 2656 offset += 16; 2657 size -= 16; 2658 } 2659 ALOGV("sidx pres/off: %" PRIu64 "/%" PRIu64, earliestPresentationTime, firstOffset); 2660 2661 if (size < 4) { 2662 return -EINVAL; 2663 } 2664 2665 uint16_t referenceCount; 2666 if (!mDataSource->getUInt16(offset + 2, &referenceCount)) { 2667 return ERROR_MALFORMED; 2668 } 2669 offset += 4; 2670 size -= 4; 2671 ALOGV("refcount: %d", referenceCount); 2672 2673 if (size < referenceCount * 12) { 2674 return -EINVAL; 2675 } 2676 2677 uint64_t total_duration = 0; 2678 for (unsigned int i = 0; i < referenceCount; i++) { 2679 uint32_t d1, d2, d3; 2680 2681 if (!mDataSource->getUInt32(offset, &d1) || // size 2682 !mDataSource->getUInt32(offset + 4, &d2) || // duration 2683 !mDataSource->getUInt32(offset + 8, &d3)) { // flags 2684 return ERROR_MALFORMED; 2685 } 2686 2687 if (d1 & 0x80000000) { 2688 ALOGW("sub-sidx boxes not supported yet"); 2689 } 2690 bool sap = d3 & 0x80000000; 2691 uint32_t saptype = (d3 >> 28) & 7; 2692 if (!sap || (saptype != 1 && saptype != 2)) { 2693 // type 1 and 2 are sync samples 2694 ALOGW("not a stream access point, or unsupported type: %08x", d3); 2695 } 2696 total_duration += d2; 2697 offset += 12; 2698 ALOGV(" item %d, %08x %08x %08x", i, d1, d2, d3); 2699 SidxEntry se; 2700 se.mSize = d1 & 0x7fffffff; 2701 se.mDurationUs = 1000000LL * d2 / timeScale; 2702 mSidxEntries.add(se); 2703 } 2704 2705 uint64_t sidxDuration = total_duration * 1000000 / timeScale; 2706 2707 if (mLastTrack == NULL) 2708 return ERROR_MALFORMED; 2709 2710 int64_t metaDuration; 2711 if (!mLastTrack->meta->findInt64(kKeyDuration, &metaDuration) || metaDuration == 0) { 2712 mLastTrack->meta->setInt64(kKeyDuration, sidxDuration); 2713 } 2714 return OK; 2715} 2716 2717status_t MPEG4Extractor::parseQTMetaKey(off64_t offset, size_t size) { 2718 if (size < 8) { 2719 return ERROR_MALFORMED; 2720 } 2721 2722 uint32_t count; 2723 if (!mDataSource->getUInt32(offset + 4, &count)) { 2724 return ERROR_MALFORMED; 2725 } 2726 2727 if (mMetaKeyMap.size() > 0) { 2728 ALOGW("'keys' atom seen again, discarding existing entries"); 2729 mMetaKeyMap.clear(); 2730 } 2731 2732 off64_t keyOffset = offset + 8; 2733 off64_t stopOffset = offset + size; 2734 for (size_t i = 1; i <= count; i++) { 2735 if (keyOffset + 8 > stopOffset) { 2736 return ERROR_MALFORMED; 2737 } 2738 2739 uint32_t keySize; 2740 if (!mDataSource->getUInt32(keyOffset, &keySize) 2741 || keySize < 8 2742 || keyOffset + keySize > stopOffset) { 2743 return ERROR_MALFORMED; 2744 } 2745 2746 uint32_t type; 2747 if (!mDataSource->getUInt32(keyOffset + 4, &type) 2748 || type != FOURCC('m', 'd', 't', 'a')) { 2749 return ERROR_MALFORMED; 2750 } 2751 2752 keySize -= 8; 2753 keyOffset += 8; 2754 2755 sp<ABuffer> keyData = new ABuffer(keySize); 2756 if (keyData->data() == NULL) { 2757 return ERROR_MALFORMED; 2758 } 2759 if (mDataSource->readAt( 2760 keyOffset, keyData->data(), keySize) < (ssize_t) keySize) { 2761 return ERROR_MALFORMED; 2762 } 2763 2764 AString key((const char *)keyData->data(), keySize); 2765 mMetaKeyMap.add(i, key); 2766 2767 keyOffset += keySize; 2768 } 2769 return OK; 2770} 2771 2772status_t MPEG4Extractor::parseQTMetaVal( 2773 int32_t keyId, off64_t offset, size_t size) { 2774 ssize_t index = mMetaKeyMap.indexOfKey(keyId); 2775 if (index < 0) { 2776 // corresponding key is not present, ignore 2777 return ERROR_MALFORMED; 2778 } 2779 2780 if (size <= 16) { 2781 return ERROR_MALFORMED; 2782 } 2783 uint32_t dataSize; 2784 if (!mDataSource->getUInt32(offset, &dataSize) 2785 || dataSize > size || dataSize <= 16) { 2786 return ERROR_MALFORMED; 2787 } 2788 uint32_t atomFourCC; 2789 if (!mDataSource->getUInt32(offset + 4, &atomFourCC) 2790 || atomFourCC != FOURCC('d', 'a', 't', 'a')) { 2791 return ERROR_MALFORMED; 2792 } 2793 uint32_t dataType; 2794 if (!mDataSource->getUInt32(offset + 8, &dataType) 2795 || ((dataType & 0xff000000) != 0)) { 2796 // not well-known type 2797 return ERROR_MALFORMED; 2798 } 2799 2800 dataSize -= 16; 2801 offset += 16; 2802 2803 if (dataType == 23 && dataSize >= 4) { 2804 // BE Float32 2805 uint32_t val; 2806 if (!mDataSource->getUInt32(offset, &val)) { 2807 return ERROR_MALFORMED; 2808 } 2809 if (!strcasecmp(mMetaKeyMap[index].c_str(), "com.android.capture.fps")) { 2810 mFileMetaData->setFloat(kKeyCaptureFramerate, *(float *)&val); 2811 } 2812 } else if (dataType == 67 && dataSize >= 4) { 2813 // BE signed int32 2814 uint32_t val; 2815 if (!mDataSource->getUInt32(offset, &val)) { 2816 return ERROR_MALFORMED; 2817 } 2818 if (!strcasecmp(mMetaKeyMap[index].c_str(), "com.android.video.temporal_layers_count")) { 2819 mFileMetaData->setInt32(kKeyTemporalLayerCount, val); 2820 } 2821 } else { 2822 // add more keys if needed 2823 ALOGV("ignoring key: type %d, size %d", dataType, dataSize); 2824 } 2825 2826 return OK; 2827} 2828 2829status_t MPEG4Extractor::parseTrackHeader( 2830 off64_t data_offset, off64_t data_size) { 2831 if (data_size < 4) { 2832 return ERROR_MALFORMED; 2833 } 2834 2835 uint8_t version; 2836 if (mDataSource->readAt(data_offset, &version, 1) < 1) { 2837 return ERROR_IO; 2838 } 2839 2840 size_t dynSize = (version == 1) ? 36 : 24; 2841 2842 uint8_t buffer[36 + 60]; 2843 2844 if (data_size != (off64_t)dynSize + 60) { 2845 return ERROR_MALFORMED; 2846 } 2847 2848 if (mDataSource->readAt( 2849 data_offset, buffer, data_size) < (ssize_t)data_size) { 2850 return ERROR_IO; 2851 } 2852 2853 uint64_t ctime __unused, mtime __unused, duration __unused; 2854 int32_t id; 2855 2856 if (version == 1) { 2857 ctime = U64_AT(&buffer[4]); 2858 mtime = U64_AT(&buffer[12]); 2859 id = U32_AT(&buffer[20]); 2860 duration = U64_AT(&buffer[28]); 2861 } else if (version == 0) { 2862 ctime = U32_AT(&buffer[4]); 2863 mtime = U32_AT(&buffer[8]); 2864 id = U32_AT(&buffer[12]); 2865 duration = U32_AT(&buffer[20]); 2866 } else { 2867 return ERROR_UNSUPPORTED; 2868 } 2869 2870 if (mLastTrack == NULL) 2871 return ERROR_MALFORMED; 2872 2873 mLastTrack->meta->setInt32(kKeyTrackID, id); 2874 2875 size_t matrixOffset = dynSize + 16; 2876 int32_t a00 = U32_AT(&buffer[matrixOffset]); 2877 int32_t a01 = U32_AT(&buffer[matrixOffset + 4]); 2878 int32_t a10 = U32_AT(&buffer[matrixOffset + 12]); 2879 int32_t a11 = U32_AT(&buffer[matrixOffset + 16]); 2880 2881#if 0 2882 int32_t dx = U32_AT(&buffer[matrixOffset + 8]); 2883 int32_t dy = U32_AT(&buffer[matrixOffset + 20]); 2884 2885 ALOGI("x' = %.2f * x + %.2f * y + %.2f", 2886 a00 / 65536.0f, a01 / 65536.0f, dx / 65536.0f); 2887 ALOGI("y' = %.2f * x + %.2f * y + %.2f", 2888 a10 / 65536.0f, a11 / 65536.0f, dy / 65536.0f); 2889#endif 2890 2891 uint32_t rotationDegrees; 2892 2893 static const int32_t kFixedOne = 0x10000; 2894 if (a00 == kFixedOne && a01 == 0 && a10 == 0 && a11 == kFixedOne) { 2895 // Identity, no rotation 2896 rotationDegrees = 0; 2897 } else if (a00 == 0 && a01 == kFixedOne && a10 == -kFixedOne && a11 == 0) { 2898 rotationDegrees = 90; 2899 } else if (a00 == 0 && a01 == -kFixedOne && a10 == kFixedOne && a11 == 0) { 2900 rotationDegrees = 270; 2901 } else if (a00 == -kFixedOne && a01 == 0 && a10 == 0 && a11 == -kFixedOne) { 2902 rotationDegrees = 180; 2903 } else { 2904 ALOGW("We only support 0,90,180,270 degree rotation matrices"); 2905 rotationDegrees = 0; 2906 } 2907 2908 if (rotationDegrees != 0) { 2909 mLastTrack->meta->setInt32(kKeyRotation, rotationDegrees); 2910 } 2911 2912 // Handle presentation display size, which could be different 2913 // from the image size indicated by kKeyWidth and kKeyHeight. 2914 uint32_t width = U32_AT(&buffer[dynSize + 52]); 2915 uint32_t height = U32_AT(&buffer[dynSize + 56]); 2916 mLastTrack->meta->setInt32(kKeyDisplayWidth, width >> 16); 2917 mLastTrack->meta->setInt32(kKeyDisplayHeight, height >> 16); 2918 2919 return OK; 2920} 2921 2922status_t MPEG4Extractor::parseITunesMetaData(off64_t offset, size_t size) { 2923 if (size == 0) { 2924 return OK; 2925 } 2926 2927 if (size < 4 || size == SIZE_MAX) { 2928 return ERROR_MALFORMED; 2929 } 2930 2931 uint8_t *buffer = new (std::nothrow) uint8_t[size + 1]; 2932 if (buffer == NULL) { 2933 return ERROR_MALFORMED; 2934 } 2935 if (mDataSource->readAt( 2936 offset, buffer, size) != (ssize_t)size) { 2937 delete[] buffer; 2938 buffer = NULL; 2939 2940 return ERROR_IO; 2941 } 2942 2943 uint32_t flags = U32_AT(buffer); 2944 2945 uint32_t metadataKey = 0; 2946 char chunk[5]; 2947 MakeFourCCString(mPath[4], chunk); 2948 ALOGV("meta: %s @ %lld", chunk, (long long)offset); 2949 switch ((int32_t)mPath[4]) { 2950 case FOURCC(0xa9, 'a', 'l', 'b'): 2951 { 2952 metadataKey = kKeyAlbum; 2953 break; 2954 } 2955 case FOURCC(0xa9, 'A', 'R', 'T'): 2956 { 2957 metadataKey = kKeyArtist; 2958 break; 2959 } 2960 case FOURCC('a', 'A', 'R', 'T'): 2961 { 2962 metadataKey = kKeyAlbumArtist; 2963 break; 2964 } 2965 case FOURCC(0xa9, 'd', 'a', 'y'): 2966 { 2967 metadataKey = kKeyYear; 2968 break; 2969 } 2970 case FOURCC(0xa9, 'n', 'a', 'm'): 2971 { 2972 metadataKey = kKeyTitle; 2973 break; 2974 } 2975 case FOURCC(0xa9, 'w', 'r', 't'): 2976 { 2977 metadataKey = kKeyWriter; 2978 break; 2979 } 2980 case FOURCC('c', 'o', 'v', 'r'): 2981 { 2982 metadataKey = kKeyAlbumArt; 2983 break; 2984 } 2985 case FOURCC('g', 'n', 'r', 'e'): 2986 { 2987 metadataKey = kKeyGenre; 2988 break; 2989 } 2990 case FOURCC(0xa9, 'g', 'e', 'n'): 2991 { 2992 metadataKey = kKeyGenre; 2993 break; 2994 } 2995 case FOURCC('c', 'p', 'i', 'l'): 2996 { 2997 if (size == 9 && flags == 21) { 2998 char tmp[16]; 2999 sprintf(tmp, "%d", 3000 (int)buffer[size - 1]); 3001 3002 mFileMetaData->setCString(kKeyCompilation, tmp); 3003 } 3004 break; 3005 } 3006 case FOURCC('t', 'r', 'k', 'n'): 3007 { 3008 if (size == 16 && flags == 0) { 3009 char tmp[16]; 3010 uint16_t* pTrack = (uint16_t*)&buffer[10]; 3011 uint16_t* pTotalTracks = (uint16_t*)&buffer[12]; 3012 sprintf(tmp, "%d/%d", ntohs(*pTrack), ntohs(*pTotalTracks)); 3013 3014 mFileMetaData->setCString(kKeyCDTrackNumber, tmp); 3015 } 3016 break; 3017 } 3018 case FOURCC('d', 'i', 's', 'k'): 3019 { 3020 if ((size == 14 || size == 16) && flags == 0) { 3021 char tmp[16]; 3022 uint16_t* pDisc = (uint16_t*)&buffer[10]; 3023 uint16_t* pTotalDiscs = (uint16_t*)&buffer[12]; 3024 sprintf(tmp, "%d/%d", ntohs(*pDisc), ntohs(*pTotalDiscs)); 3025 3026 mFileMetaData->setCString(kKeyDiscNumber, tmp); 3027 } 3028 break; 3029 } 3030 case FOURCC('-', '-', '-', '-'): 3031 { 3032 buffer[size] = '\0'; 3033 switch (mPath[5]) { 3034 case FOURCC('m', 'e', 'a', 'n'): 3035 mLastCommentMean.setTo((const char *)buffer + 4); 3036 break; 3037 case FOURCC('n', 'a', 'm', 'e'): 3038 mLastCommentName.setTo((const char *)buffer + 4); 3039 break; 3040 case FOURCC('d', 'a', 't', 'a'): 3041 if (size < 8) { 3042 delete[] buffer; 3043 buffer = NULL; 3044 ALOGE("b/24346430"); 3045 return ERROR_MALFORMED; 3046 } 3047 mLastCommentData.setTo((const char *)buffer + 8); 3048 break; 3049 } 3050 3051 // Once we have a set of mean/name/data info, go ahead and process 3052 // it to see if its something we are interested in. Whether or not 3053 // were are interested in the specific tag, make sure to clear out 3054 // the set so we can be ready to process another tuple should one 3055 // show up later in the file. 3056 if ((mLastCommentMean.length() != 0) && 3057 (mLastCommentName.length() != 0) && 3058 (mLastCommentData.length() != 0)) { 3059 3060 if (mLastCommentMean == "com.apple.iTunes" 3061 && mLastCommentName == "iTunSMPB") { 3062 int32_t delay, padding; 3063 if (sscanf(mLastCommentData, 3064 " %*x %x %x %*x", &delay, &padding) == 2) { 3065 if (mLastTrack == NULL) { 3066 delete[] buffer; 3067 return ERROR_MALFORMED; 3068 } 3069 3070 mLastTrack->meta->setInt32(kKeyEncoderDelay, delay); 3071 mLastTrack->meta->setInt32(kKeyEncoderPadding, padding); 3072 } 3073 } 3074 3075 mLastCommentMean.clear(); 3076 mLastCommentName.clear(); 3077 mLastCommentData.clear(); 3078 } 3079 break; 3080 } 3081 3082 default: 3083 break; 3084 } 3085 3086 if (size >= 8 && metadataKey && !mFileMetaData->hasData(metadataKey)) { 3087 if (metadataKey == kKeyAlbumArt) { 3088 mFileMetaData->setData( 3089 kKeyAlbumArt, MetaData::TYPE_NONE, 3090 buffer + 8, size - 8); 3091 } else if (metadataKey == kKeyGenre) { 3092 if (flags == 0) { 3093 // uint8_t genre code, iTunes genre codes are 3094 // the standard id3 codes, except they start 3095 // at 1 instead of 0 (e.g. Pop is 14, not 13) 3096 // We use standard id3 numbering, so subtract 1. 3097 int genrecode = (int)buffer[size - 1]; 3098 genrecode--; 3099 if (genrecode < 0) { 3100 genrecode = 255; // reserved for 'unknown genre' 3101 } 3102 char genre[10]; 3103 sprintf(genre, "%d", genrecode); 3104 3105 mFileMetaData->setCString(metadataKey, genre); 3106 } else if (flags == 1) { 3107 // custom genre string 3108 buffer[size] = '\0'; 3109 3110 mFileMetaData->setCString( 3111 metadataKey, (const char *)buffer + 8); 3112 } 3113 } else { 3114 buffer[size] = '\0'; 3115 3116 mFileMetaData->setCString( 3117 metadataKey, (const char *)buffer + 8); 3118 } 3119 } 3120 3121 delete[] buffer; 3122 buffer = NULL; 3123 3124 return OK; 3125} 3126 3127status_t MPEG4Extractor::parseColorInfo(off64_t offset, size_t size) { 3128 if (size < 4 || size == SIZE_MAX || mLastTrack == NULL) { 3129 return ERROR_MALFORMED; 3130 } 3131 3132 uint8_t *buffer = new (std::nothrow) uint8_t[size + 1]; 3133 if (buffer == NULL) { 3134 return ERROR_MALFORMED; 3135 } 3136 if (mDataSource->readAt(offset, buffer, size) != (ssize_t)size) { 3137 delete[] buffer; 3138 buffer = NULL; 3139 3140 return ERROR_IO; 3141 } 3142 3143 int32_t type = U32_AT(&buffer[0]); 3144 if ((type == FOURCC('n', 'c', 'l', 'x') && size >= 11) 3145 || (type == FOURCC('n', 'c', 'l', 'c') && size >= 10)) { 3146 int32_t primaries = U16_AT(&buffer[4]); 3147 int32_t transfer = U16_AT(&buffer[6]); 3148 int32_t coeffs = U16_AT(&buffer[8]); 3149 bool fullRange = (type == FOURCC('n', 'c', 'l', 'x')) && (buffer[10] & 128); 3150 3151 ColorAspects aspects; 3152 ColorUtils::convertIsoColorAspectsToCodecAspects( 3153 primaries, transfer, coeffs, fullRange, aspects); 3154 3155 // only store the first color specification 3156 if (!mLastTrack->meta->hasData(kKeyColorPrimaries)) { 3157 mLastTrack->meta->setInt32(kKeyColorPrimaries, aspects.mPrimaries); 3158 mLastTrack->meta->setInt32(kKeyTransferFunction, aspects.mTransfer); 3159 mLastTrack->meta->setInt32(kKeyColorMatrix, aspects.mMatrixCoeffs); 3160 mLastTrack->meta->setInt32(kKeyColorRange, aspects.mRange); 3161 } 3162 } 3163 3164 delete[] buffer; 3165 buffer = NULL; 3166 3167 return OK; 3168} 3169 3170status_t MPEG4Extractor::parse3GPPMetaData(off64_t offset, size_t size, int depth) { 3171 if (size < 4 || size == SIZE_MAX) { 3172 return ERROR_MALFORMED; 3173 } 3174 3175 uint8_t *buffer = new (std::nothrow) uint8_t[size + 1]; 3176 if (buffer == NULL) { 3177 return ERROR_MALFORMED; 3178 } 3179 if (mDataSource->readAt( 3180 offset, buffer, size) != (ssize_t)size) { 3181 delete[] buffer; 3182 buffer = NULL; 3183 3184 return ERROR_IO; 3185 } 3186 3187 uint32_t metadataKey = 0; 3188 switch (mPath[depth]) { 3189 case FOURCC('t', 'i', 't', 'l'): 3190 { 3191 metadataKey = kKeyTitle; 3192 break; 3193 } 3194 case FOURCC('p', 'e', 'r', 'f'): 3195 { 3196 metadataKey = kKeyArtist; 3197 break; 3198 } 3199 case FOURCC('a', 'u', 't', 'h'): 3200 { 3201 metadataKey = kKeyWriter; 3202 break; 3203 } 3204 case FOURCC('g', 'n', 'r', 'e'): 3205 { 3206 metadataKey = kKeyGenre; 3207 break; 3208 } 3209 case FOURCC('a', 'l', 'b', 'm'): 3210 { 3211 if (buffer[size - 1] != '\0') { 3212 char tmp[4]; 3213 sprintf(tmp, "%u", buffer[size - 1]); 3214 3215 mFileMetaData->setCString(kKeyCDTrackNumber, tmp); 3216 } 3217 3218 metadataKey = kKeyAlbum; 3219 break; 3220 } 3221 case FOURCC('y', 'r', 'r', 'c'): 3222 { 3223 if (size < 6) { 3224 delete[] buffer; 3225 buffer = NULL; 3226 ALOGE("b/62133227"); 3227 android_errorWriteLog(0x534e4554, "62133227"); 3228 return ERROR_MALFORMED; 3229 } 3230 char tmp[5]; 3231 uint16_t year = U16_AT(&buffer[4]); 3232 3233 if (year < 10000) { 3234 sprintf(tmp, "%u", year); 3235 3236 mFileMetaData->setCString(kKeyYear, tmp); 3237 } 3238 break; 3239 } 3240 3241 default: 3242 break; 3243 } 3244 3245 if (metadataKey > 0) { 3246 bool isUTF8 = true; // Common case 3247 char16_t *framedata = NULL; 3248 int len16 = 0; // Number of UTF-16 characters 3249 3250 // smallest possible valid UTF-16 string w BOM: 0xfe 0xff 0x00 0x00 3251 if (size < 6) { 3252 delete[] buffer; 3253 buffer = NULL; 3254 return ERROR_MALFORMED; 3255 } 3256 3257 if (size - 6 >= 4) { 3258 len16 = ((size - 6) / 2) - 1; // don't include 0x0000 terminator 3259 framedata = (char16_t *)(buffer + 6); 3260 if (0xfffe == *framedata) { 3261 // endianness marker (BOM) doesn't match host endianness 3262 for (int i = 0; i < len16; i++) { 3263 framedata[i] = bswap_16(framedata[i]); 3264 } 3265 // BOM is now swapped to 0xfeff, we will execute next block too 3266 } 3267 3268 if (0xfeff == *framedata) { 3269 // Remove the BOM 3270 framedata++; 3271 len16--; 3272 isUTF8 = false; 3273 } 3274 // else normal non-zero-length UTF-8 string 3275 // we can't handle UTF-16 without BOM as there is no other 3276 // indication of encoding. 3277 } 3278 3279 if (isUTF8) { 3280 buffer[size] = 0; 3281 mFileMetaData->setCString(metadataKey, (const char *)buffer + 6); 3282 } else { 3283 // Convert from UTF-16 string to UTF-8 string. 3284 String8 tmpUTF8str(framedata, len16); 3285 mFileMetaData->setCString(metadataKey, tmpUTF8str.string()); 3286 } 3287 } 3288 3289 delete[] buffer; 3290 buffer = NULL; 3291 3292 return OK; 3293} 3294 3295void MPEG4Extractor::parseID3v2MetaData(off64_t offset) { 3296 ID3 id3(mDataSource, true /* ignorev1 */, offset); 3297 3298 if (id3.isValid()) { 3299 struct Map { 3300 int key; 3301 const char *tag1; 3302 const char *tag2; 3303 }; 3304 static const Map kMap[] = { 3305 { kKeyAlbum, "TALB", "TAL" }, 3306 { kKeyArtist, "TPE1", "TP1" }, 3307 { kKeyAlbumArtist, "TPE2", "TP2" }, 3308 { kKeyComposer, "TCOM", "TCM" }, 3309 { kKeyGenre, "TCON", "TCO" }, 3310 { kKeyTitle, "TIT2", "TT2" }, 3311 { kKeyYear, "TYE", "TYER" }, 3312 { kKeyAuthor, "TXT", "TEXT" }, 3313 { kKeyCDTrackNumber, "TRK", "TRCK" }, 3314 { kKeyDiscNumber, "TPA", "TPOS" }, 3315 { kKeyCompilation, "TCP", "TCMP" }, 3316 }; 3317 static const size_t kNumMapEntries = sizeof(kMap) / sizeof(kMap[0]); 3318 3319 for (size_t i = 0; i < kNumMapEntries; ++i) { 3320 if (!mFileMetaData->hasData(kMap[i].key)) { 3321 ID3::Iterator *it = new ID3::Iterator(id3, kMap[i].tag1); 3322 if (it->done()) { 3323 delete it; 3324 it = new ID3::Iterator(id3, kMap[i].tag2); 3325 } 3326 3327 if (it->done()) { 3328 delete it; 3329 continue; 3330 } 3331 3332 String8 s; 3333 it->getString(&s); 3334 delete it; 3335 3336 mFileMetaData->setCString(kMap[i].key, s); 3337 } 3338 } 3339 3340 size_t dataSize; 3341 String8 mime; 3342 const void *data = id3.getAlbumArt(&dataSize, &mime); 3343 3344 if (data) { 3345 mFileMetaData->setData(kKeyAlbumArt, MetaData::TYPE_NONE, data, dataSize); 3346 mFileMetaData->setCString(kKeyAlbumArtMIME, mime.string()); 3347 } 3348 } 3349} 3350 3351sp<MediaSource> MPEG4Extractor::getTrack(size_t index) { 3352 status_t err; 3353 if ((err = readMetaData()) != OK) { 3354 return NULL; 3355 } 3356 3357 Track *track = mFirstTrack; 3358 while (index > 0) { 3359 if (track == NULL) { 3360 return NULL; 3361 } 3362 3363 track = track->next; 3364 --index; 3365 } 3366 3367 if (track == NULL) { 3368 return NULL; 3369 } 3370 3371 3372 Trex *trex = NULL; 3373 int32_t trackId; 3374 if (track->meta->findInt32(kKeyTrackID, &trackId)) { 3375 for (size_t i = 0; i < mTrex.size(); i++) { 3376 Trex *t = &mTrex.editItemAt(i); 3377 if (t->track_ID == (uint32_t) trackId) { 3378 trex = t; 3379 break; 3380 } 3381 } 3382 } else { 3383 ALOGE("b/21657957"); 3384 return NULL; 3385 } 3386 3387 ALOGV("getTrack called, pssh: %zu", mPssh.size()); 3388 3389 const char *mime; 3390 if (!track->meta->findCString(kKeyMIMEType, &mime)) { 3391 return NULL; 3392 } 3393 3394 if (!strcasecmp(mime, MEDIA_MIMETYPE_VIDEO_AVC)) { 3395 uint32_t type; 3396 const void *data; 3397 size_t size; 3398 if (!track->meta->findData(kKeyAVCC, &type, &data, &size)) { 3399 return NULL; 3400 } 3401 3402 const uint8_t *ptr = (const uint8_t *)data; 3403 3404 if (size < 7 || ptr[0] != 1) { // configurationVersion == 1 3405 return NULL; 3406 } 3407 } else if (!strcasecmp(mime, MEDIA_MIMETYPE_VIDEO_HEVC)) { 3408 uint32_t type; 3409 const void *data; 3410 size_t size; 3411 if (!track->meta->findData(kKeyHVCC, &type, &data, &size)) { 3412 return NULL; 3413 } 3414 3415 const uint8_t *ptr = (const uint8_t *)data; 3416 3417 if (size < 22 || ptr[0] != 1) { // configurationVersion == 1 3418 return NULL; 3419 } 3420 } 3421 3422 sp<MPEG4Source> source = new MPEG4Source(this, 3423 track->meta, mDataSource, track->timescale, track->sampleTable, 3424 mSidxEntries, trex, mMoofOffset, mItemTable); 3425 if (source->init() != OK) { 3426 return NULL; 3427 } 3428 return source; 3429} 3430 3431// static 3432status_t MPEG4Extractor::verifyTrack(Track *track) { 3433 const char *mime; 3434 CHECK(track->meta->findCString(kKeyMIMEType, &mime)); 3435 3436 uint32_t type; 3437 const void *data; 3438 size_t size; 3439 if (!strcasecmp(mime, MEDIA_MIMETYPE_VIDEO_AVC)) { 3440 if (!track->meta->findData(kKeyAVCC, &type, &data, &size) 3441 || type != kTypeAVCC) { 3442 return ERROR_MALFORMED; 3443 } 3444 } else if (!strcasecmp(mime, MEDIA_MIMETYPE_VIDEO_HEVC)) { 3445 if (!track->meta->findData(kKeyHVCC, &type, &data, &size) 3446 || type != kTypeHVCC) { 3447 return ERROR_MALFORMED; 3448 } 3449 } else if (!strcasecmp(mime, MEDIA_MIMETYPE_VIDEO_MPEG4) 3450 || !strcasecmp(mime, MEDIA_MIMETYPE_VIDEO_MPEG2) 3451 || !strcasecmp(mime, MEDIA_MIMETYPE_AUDIO_AAC)) { 3452 if (!track->meta->findData(kKeyESDS, &type, &data, &size) 3453 || type != kTypeESDS) { 3454 return ERROR_MALFORMED; 3455 } 3456 } 3457 3458 if (track->sampleTable == NULL || !track->sampleTable->isValid()) { 3459 // Make sure we have all the metadata we need. 3460 ALOGE("stbl atom missing/invalid."); 3461 return ERROR_MALFORMED; 3462 } 3463 3464 if (track->timescale == 0) { 3465 ALOGE("timescale invalid."); 3466 return ERROR_MALFORMED; 3467 } 3468 3469 return OK; 3470} 3471 3472typedef enum { 3473 //AOT_NONE = -1, 3474 //AOT_NULL_OBJECT = 0, 3475 //AOT_AAC_MAIN = 1, /**< Main profile */ 3476 AOT_AAC_LC = 2, /**< Low Complexity object */ 3477 //AOT_AAC_SSR = 3, 3478 //AOT_AAC_LTP = 4, 3479 AOT_SBR = 5, 3480 //AOT_AAC_SCAL = 6, 3481 //AOT_TWIN_VQ = 7, 3482 //AOT_CELP = 8, 3483 //AOT_HVXC = 9, 3484 //AOT_RSVD_10 = 10, /**< (reserved) */ 3485 //AOT_RSVD_11 = 11, /**< (reserved) */ 3486 //AOT_TTSI = 12, /**< TTSI Object */ 3487 //AOT_MAIN_SYNTH = 13, /**< Main Synthetic object */ 3488 //AOT_WAV_TAB_SYNTH = 14, /**< Wavetable Synthesis object */ 3489 //AOT_GEN_MIDI = 15, /**< General MIDI object */ 3490 //AOT_ALG_SYNTH_AUD_FX = 16, /**< Algorithmic Synthesis and Audio FX object */ 3491 AOT_ER_AAC_LC = 17, /**< Error Resilient(ER) AAC Low Complexity */ 3492 //AOT_RSVD_18 = 18, /**< (reserved) */ 3493 //AOT_ER_AAC_LTP = 19, /**< Error Resilient(ER) AAC LTP object */ 3494 AOT_ER_AAC_SCAL = 20, /**< Error Resilient(ER) AAC Scalable object */ 3495 //AOT_ER_TWIN_VQ = 21, /**< Error Resilient(ER) TwinVQ object */ 3496 AOT_ER_BSAC = 22, /**< Error Resilient(ER) BSAC object */ 3497 AOT_ER_AAC_LD = 23, /**< Error Resilient(ER) AAC LowDelay object */ 3498 //AOT_ER_CELP = 24, /**< Error Resilient(ER) CELP object */ 3499 //AOT_ER_HVXC = 25, /**< Error Resilient(ER) HVXC object */ 3500 //AOT_ER_HILN = 26, /**< Error Resilient(ER) HILN object */ 3501 //AOT_ER_PARA = 27, /**< Error Resilient(ER) Parametric object */ 3502 //AOT_RSVD_28 = 28, /**< might become SSC */ 3503 AOT_PS = 29, /**< PS, Parametric Stereo (includes SBR) */ 3504 //AOT_MPEGS = 30, /**< MPEG Surround */ 3505 3506 AOT_ESCAPE = 31, /**< Signal AOT uses more than 5 bits */ 3507 3508 //AOT_MP3ONMP4_L1 = 32, /**< MPEG-Layer1 in mp4 */ 3509 //AOT_MP3ONMP4_L2 = 33, /**< MPEG-Layer2 in mp4 */ 3510 //AOT_MP3ONMP4_L3 = 34, /**< MPEG-Layer3 in mp4 */ 3511 //AOT_RSVD_35 = 35, /**< might become DST */ 3512 //AOT_RSVD_36 = 36, /**< might become ALS */ 3513 //AOT_AAC_SLS = 37, /**< AAC + SLS */ 3514 //AOT_SLS = 38, /**< SLS */ 3515 //AOT_ER_AAC_ELD = 39, /**< AAC Enhanced Low Delay */ 3516 3517 //AOT_USAC = 42, /**< USAC */ 3518 //AOT_SAOC = 43, /**< SAOC */ 3519 //AOT_LD_MPEGS = 44, /**< Low Delay MPEG Surround */ 3520 3521 //AOT_RSVD50 = 50, /**< Interim AOT for Rsvd50 */ 3522} AUDIO_OBJECT_TYPE; 3523 3524status_t MPEG4Extractor::updateAudioTrackInfoFromESDS_MPEG4Audio( 3525 const void *esds_data, size_t esds_size) { 3526 ESDS esds(esds_data, esds_size); 3527 3528 uint8_t objectTypeIndication; 3529 if (esds.getObjectTypeIndication(&objectTypeIndication) != OK) { 3530 return ERROR_MALFORMED; 3531 } 3532 3533 if (objectTypeIndication == 0xe1) { 3534 // This isn't MPEG4 audio at all, it's QCELP 14k... 3535 if (mLastTrack == NULL) 3536 return ERROR_MALFORMED; 3537 3538 mLastTrack->meta->setCString(kKeyMIMEType, MEDIA_MIMETYPE_AUDIO_QCELP); 3539 return OK; 3540 } 3541 3542 if (objectTypeIndication == 0x6b) { 3543 // The media subtype is MP3 audio 3544 // Our software MP3 audio decoder may not be able to handle 3545 // packetized MP3 audio; for now, lets just return ERROR_UNSUPPORTED 3546 ALOGE("MP3 track in MP4/3GPP file is not supported"); 3547 return ERROR_UNSUPPORTED; 3548 } 3549 3550 if (mLastTrack != NULL) { 3551 uint32_t maxBitrate = 0; 3552 uint32_t avgBitrate = 0; 3553 esds.getBitRate(&maxBitrate, &avgBitrate); 3554 if (maxBitrate > 0 && maxBitrate < INT32_MAX) { 3555 mLastTrack->meta->setInt32(kKeyMaxBitRate, (int32_t)maxBitrate); 3556 } 3557 if (avgBitrate > 0 && avgBitrate < INT32_MAX) { 3558 mLastTrack->meta->setInt32(kKeyBitRate, (int32_t)avgBitrate); 3559 } 3560 } 3561 3562 const uint8_t *csd; 3563 size_t csd_size; 3564 if (esds.getCodecSpecificInfo( 3565 (const void **)&csd, &csd_size) != OK) { 3566 return ERROR_MALFORMED; 3567 } 3568 3569 if (kUseHexDump) { 3570 printf("ESD of size %zu\n", csd_size); 3571 hexdump(csd, csd_size); 3572 } 3573 3574 if (csd_size == 0) { 3575 // There's no further information, i.e. no codec specific data 3576 // Let's assume that the information provided in the mpeg4 headers 3577 // is accurate and hope for the best. 3578 3579 return OK; 3580 } 3581 3582 if (csd_size < 2) { 3583 return ERROR_MALFORMED; 3584 } 3585 3586 static uint32_t kSamplingRate[] = { 3587 96000, 88200, 64000, 48000, 44100, 32000, 24000, 22050, 3588 16000, 12000, 11025, 8000, 7350 3589 }; 3590 3591 ABitReader br(csd, csd_size); 3592 uint32_t objectType = br.getBits(5); 3593 3594 if (objectType == 31) { // AAC-ELD => additional 6 bits 3595 objectType = 32 + br.getBits(6); 3596 } 3597 3598 if (mLastTrack == NULL) 3599 return ERROR_MALFORMED; 3600 3601 //keep AOT type 3602 mLastTrack->meta->setInt32(kKeyAACAOT, objectType); 3603 3604 uint32_t freqIndex = br.getBits(4); 3605 3606 int32_t sampleRate = 0; 3607 int32_t numChannels = 0; 3608 if (freqIndex == 15) { 3609 if (br.numBitsLeft() < 28) return ERROR_MALFORMED; 3610 sampleRate = br.getBits(24); 3611 numChannels = br.getBits(4); 3612 } else { 3613 if (br.numBitsLeft() < 4) return ERROR_MALFORMED; 3614 numChannels = br.getBits(4); 3615 3616 if (freqIndex == 13 || freqIndex == 14) { 3617 return ERROR_MALFORMED; 3618 } 3619 3620 sampleRate = kSamplingRate[freqIndex]; 3621 } 3622 3623 if (objectType == AOT_SBR || objectType == AOT_PS) {//SBR specific config per 14496-3 table 1.13 3624 if (br.numBitsLeft() < 4) return ERROR_MALFORMED; 3625 uint32_t extFreqIndex = br.getBits(4); 3626 int32_t extSampleRate __unused; 3627 if (extFreqIndex == 15) { 3628 if (csd_size < 8) { 3629 return ERROR_MALFORMED; 3630 } 3631 if (br.numBitsLeft() < 24) return ERROR_MALFORMED; 3632 extSampleRate = br.getBits(24); 3633 } else { 3634 if (extFreqIndex == 13 || extFreqIndex == 14) { 3635 return ERROR_MALFORMED; 3636 } 3637 extSampleRate = kSamplingRate[extFreqIndex]; 3638 } 3639 //TODO: save the extension sampling rate value in meta data => 3640 // mLastTrack->meta->setInt32(kKeyExtSampleRate, extSampleRate); 3641 } 3642 3643 switch (numChannels) { 3644 // values defined in 14496-3_2009 amendment-4 Table 1.19 - Channel Configuration 3645 case 0: 3646 case 1:// FC 3647 case 2:// FL FR 3648 case 3:// FC, FL FR 3649 case 4:// FC, FL FR, RC 3650 case 5:// FC, FL FR, SL SR 3651 case 6:// FC, FL FR, SL SR, LFE 3652 //numChannels already contains the right value 3653 break; 3654 case 11:// FC, FL FR, SL SR, RC, LFE 3655 numChannels = 7; 3656 break; 3657 case 7: // FC, FCL FCR, FL FR, SL SR, LFE 3658 case 12:// FC, FL FR, SL SR, RL RR, LFE 3659 case 14:// FC, FL FR, SL SR, LFE, FHL FHR 3660 numChannels = 8; 3661 break; 3662 default: 3663 return ERROR_UNSUPPORTED; 3664 } 3665 3666 { 3667 if (objectType == AOT_SBR || objectType == AOT_PS) { 3668 if (br.numBitsLeft() < 5) return ERROR_MALFORMED; 3669 objectType = br.getBits(5); 3670 3671 if (objectType == AOT_ESCAPE) { 3672 if (br.numBitsLeft() < 6) return ERROR_MALFORMED; 3673 objectType = 32 + br.getBits(6); 3674 } 3675 } 3676 if (objectType == AOT_AAC_LC || objectType == AOT_ER_AAC_LC || 3677 objectType == AOT_ER_AAC_LD || objectType == AOT_ER_AAC_SCAL || 3678 objectType == AOT_ER_BSAC) { 3679 if (br.numBitsLeft() < 2) return ERROR_MALFORMED; 3680 const int32_t frameLengthFlag __unused = br.getBits(1); 3681 3682 const int32_t dependsOnCoreCoder = br.getBits(1); 3683 3684 if (dependsOnCoreCoder ) { 3685 if (br.numBitsLeft() < 14) return ERROR_MALFORMED; 3686 const int32_t coreCoderDelay __unused = br.getBits(14); 3687 } 3688 3689 int32_t extensionFlag = -1; 3690 if (br.numBitsLeft() > 0) { 3691 extensionFlag = br.getBits(1); 3692 } else { 3693 switch (objectType) { 3694 // 14496-3 4.5.1.1 extensionFlag 3695 case AOT_AAC_LC: 3696 extensionFlag = 0; 3697 break; 3698 case AOT_ER_AAC_LC: 3699 case AOT_ER_AAC_SCAL: 3700 case AOT_ER_BSAC: 3701 case AOT_ER_AAC_LD: 3702 extensionFlag = 1; 3703 break; 3704 default: 3705 return ERROR_MALFORMED; 3706 break; 3707 } 3708 ALOGW("csd missing extension flag; assuming %d for object type %u.", 3709 extensionFlag, objectType); 3710 } 3711 3712 if (numChannels == 0) { 3713 int32_t channelsEffectiveNum = 0; 3714 int32_t channelsNum = 0; 3715 if (br.numBitsLeft() < 32) { 3716 return ERROR_MALFORMED; 3717 } 3718 const int32_t ElementInstanceTag __unused = br.getBits(4); 3719 const int32_t Profile __unused = br.getBits(2); 3720 const int32_t SamplingFrequencyIndex __unused = br.getBits(4); 3721 const int32_t NumFrontChannelElements = br.getBits(4); 3722 const int32_t NumSideChannelElements = br.getBits(4); 3723 const int32_t NumBackChannelElements = br.getBits(4); 3724 const int32_t NumLfeChannelElements = br.getBits(2); 3725 const int32_t NumAssocDataElements __unused = br.getBits(3); 3726 const int32_t NumValidCcElements __unused = br.getBits(4); 3727 3728 const int32_t MonoMixdownPresent = br.getBits(1); 3729 3730 if (MonoMixdownPresent != 0) { 3731 if (br.numBitsLeft() < 4) return ERROR_MALFORMED; 3732 const int32_t MonoMixdownElementNumber __unused = br.getBits(4); 3733 } 3734 3735 if (br.numBitsLeft() < 1) return ERROR_MALFORMED; 3736 const int32_t StereoMixdownPresent = br.getBits(1); 3737 if (StereoMixdownPresent != 0) { 3738 if (br.numBitsLeft() < 4) return ERROR_MALFORMED; 3739 const int32_t StereoMixdownElementNumber __unused = br.getBits(4); 3740 } 3741 3742 if (br.numBitsLeft() < 1) return ERROR_MALFORMED; 3743 const int32_t MatrixMixdownIndexPresent = br.getBits(1); 3744 if (MatrixMixdownIndexPresent != 0) { 3745 if (br.numBitsLeft() < 3) return ERROR_MALFORMED; 3746 const int32_t MatrixMixdownIndex __unused = br.getBits(2); 3747 const int32_t PseudoSurroundEnable __unused = br.getBits(1); 3748 } 3749 3750 int i; 3751 for (i=0; i < NumFrontChannelElements; i++) { 3752 if (br.numBitsLeft() < 5) return ERROR_MALFORMED; 3753 const int32_t FrontElementIsCpe = br.getBits(1); 3754 const int32_t FrontElementTagSelect __unused = br.getBits(4); 3755 channelsNum += FrontElementIsCpe ? 2 : 1; 3756 } 3757 3758 for (i=0; i < NumSideChannelElements; i++) { 3759 if (br.numBitsLeft() < 5) return ERROR_MALFORMED; 3760 const int32_t SideElementIsCpe = br.getBits(1); 3761 const int32_t SideElementTagSelect __unused = br.getBits(4); 3762 channelsNum += SideElementIsCpe ? 2 : 1; 3763 } 3764 3765 for (i=0; i < NumBackChannelElements; i++) { 3766 if (br.numBitsLeft() < 5) return ERROR_MALFORMED; 3767 const int32_t BackElementIsCpe = br.getBits(1); 3768 const int32_t BackElementTagSelect __unused = br.getBits(4); 3769 channelsNum += BackElementIsCpe ? 2 : 1; 3770 } 3771 channelsEffectiveNum = channelsNum; 3772 3773 for (i=0; i < NumLfeChannelElements; i++) { 3774 if (br.numBitsLeft() < 4) return ERROR_MALFORMED; 3775 const int32_t LfeElementTagSelect __unused = br.getBits(4); 3776 channelsNum += 1; 3777 } 3778 ALOGV("mpeg4 audio channelsNum = %d", channelsNum); 3779 ALOGV("mpeg4 audio channelsEffectiveNum = %d", channelsEffectiveNum); 3780 numChannels = channelsNum; 3781 } 3782 } 3783 } 3784 3785 if (numChannels == 0) { 3786 return ERROR_UNSUPPORTED; 3787 } 3788 3789 if (mLastTrack == NULL) 3790 return ERROR_MALFORMED; 3791 3792 int32_t prevSampleRate; 3793 CHECK(mLastTrack->meta->findInt32(kKeySampleRate, &prevSampleRate)); 3794 3795 if (prevSampleRate != sampleRate) { 3796 ALOGV("mpeg4 audio sample rate different from previous setting. " 3797 "was: %d, now: %d", prevSampleRate, sampleRate); 3798 } 3799 3800 mLastTrack->meta->setInt32(kKeySampleRate, sampleRate); 3801 3802 int32_t prevChannelCount; 3803 CHECK(mLastTrack->meta->findInt32(kKeyChannelCount, &prevChannelCount)); 3804 3805 if (prevChannelCount != numChannels) { 3806 ALOGV("mpeg4 audio channel count different from previous setting. " 3807 "was: %d, now: %d", prevChannelCount, numChannels); 3808 } 3809 3810 mLastTrack->meta->setInt32(kKeyChannelCount, numChannels); 3811 3812 return OK; 3813} 3814 3815//////////////////////////////////////////////////////////////////////////////// 3816 3817MPEG4Source::MPEG4Source( 3818 const sp<MPEG4Extractor> &owner, 3819 const sp<MetaData> &format, 3820 const sp<DataSource> &dataSource, 3821 int32_t timeScale, 3822 const sp<SampleTable> &sampleTable, 3823 Vector<SidxEntry> &sidx, 3824 const Trex *trex, 3825 off64_t firstMoofOffset, 3826 const sp<ItemTable> &itemTable) 3827 : mOwner(owner), 3828 mFormat(format), 3829 mDataSource(dataSource), 3830 mTimescale(timeScale), 3831 mSampleTable(sampleTable), 3832 mCurrentSampleIndex(0), 3833 mCurrentFragmentIndex(0), 3834 mSegments(sidx), 3835 mTrex(trex), 3836 mFirstMoofOffset(firstMoofOffset), 3837 mCurrentMoofOffset(firstMoofOffset), 3838 mNextMoofOffset(-1), 3839 mCurrentTime(0), 3840 mCurrentSampleInfoAllocSize(0), 3841 mCurrentSampleInfoSizes(NULL), 3842 mCurrentSampleInfoOffsetsAllocSize(0), 3843 mCurrentSampleInfoOffsets(NULL), 3844 mIsAVC(false), 3845 mIsHEVC(false), 3846 mNALLengthSize(0), 3847 mStarted(false), 3848 mGroup(NULL), 3849 mBuffer(NULL), 3850 mWantsNALFragments(false), 3851 mSrcBuffer(NULL), 3852 mIsHEIF(itemTable != NULL), 3853 mItemTable(itemTable) { 3854 3855 memset(&mTrackFragmentHeaderInfo, 0, sizeof(mTrackFragmentHeaderInfo)); 3856 3857 mFormat->findInt32(kKeyCryptoMode, &mCryptoMode); 3858 mDefaultIVSize = 0; 3859 mFormat->findInt32(kKeyCryptoDefaultIVSize, &mDefaultIVSize); 3860 uint32_t keytype; 3861 const void *key; 3862 size_t keysize; 3863 if (mFormat->findData(kKeyCryptoKey, &keytype, &key, &keysize)) { 3864 CHECK(keysize <= 16); 3865 memset(mCryptoKey, 0, 16); 3866 memcpy(mCryptoKey, key, keysize); 3867 } 3868 3869 const char *mime; 3870 bool success = mFormat->findCString(kKeyMIMEType, &mime); 3871 CHECK(success); 3872 3873 mIsAVC = !strcasecmp(mime, MEDIA_MIMETYPE_VIDEO_AVC); 3874 mIsHEVC = !strcasecmp(mime, MEDIA_MIMETYPE_VIDEO_HEVC); 3875 3876 if (mIsAVC) { 3877 uint32_t type; 3878 const void *data; 3879 size_t size; 3880 CHECK(format->findData(kKeyAVCC, &type, &data, &size)); 3881 3882 const uint8_t *ptr = (const uint8_t *)data; 3883 3884 CHECK(size >= 7); 3885 CHECK_EQ((unsigned)ptr[0], 1u); // configurationVersion == 1 3886 3887 // The number of bytes used to encode the length of a NAL unit. 3888 mNALLengthSize = 1 + (ptr[4] & 3); 3889 } else if (mIsHEVC) { 3890 uint32_t type; 3891 const void *data; 3892 size_t size; 3893 CHECK(format->findData(kKeyHVCC, &type, &data, &size)); 3894 3895 const uint8_t *ptr = (const uint8_t *)data; 3896 3897 CHECK(size >= 22); 3898 CHECK_EQ((unsigned)ptr[0], 1u); // configurationVersion == 1 3899 3900 mNALLengthSize = 1 + (ptr[14 + 7] & 3); 3901 } 3902 3903 CHECK(format->findInt32(kKeyTrackID, &mTrackId)); 3904 3905} 3906 3907status_t MPEG4Source::init() { 3908 if (mFirstMoofOffset != 0) { 3909 off64_t offset = mFirstMoofOffset; 3910 return parseChunk(&offset); 3911 } 3912 return OK; 3913} 3914 3915MPEG4Source::~MPEG4Source() { 3916 if (mStarted) { 3917 stop(); 3918 } 3919 free(mCurrentSampleInfoSizes); 3920 free(mCurrentSampleInfoOffsets); 3921} 3922 3923status_t MPEG4Source::start(MetaData *params) { 3924 Mutex::Autolock autoLock(mLock); 3925 3926 CHECK(!mStarted); 3927 3928 int32_t val; 3929 if (params && params->findInt32(kKeyWantsNALFragments, &val) 3930 && val != 0) { 3931 mWantsNALFragments = true; 3932 } else { 3933 mWantsNALFragments = false; 3934 } 3935 3936 int32_t tmp; 3937 CHECK(mFormat->findInt32(kKeyMaxInputSize, &tmp)); 3938 size_t max_size = tmp; 3939 3940 // A somewhat arbitrary limit that should be sufficient for 8k video frames 3941 // If you see the message below for a valid input stream: increase the limit 3942 const size_t kMaxBufferSize = 64 * 1024 * 1024; 3943 if (max_size > kMaxBufferSize) { 3944 ALOGE("bogus max input size: %zu > %zu", max_size, kMaxBufferSize); 3945 return ERROR_MALFORMED; 3946 } 3947 if (max_size == 0) { 3948 ALOGE("zero max input size"); 3949 return ERROR_MALFORMED; 3950 } 3951 3952 // Allow up to kMaxBuffers, but not if the total exceeds kMaxBufferSize. 3953 const size_t kMaxBuffers = 8; 3954 const size_t buffers = min(kMaxBufferSize / max_size, kMaxBuffers); 3955 mGroup = new MediaBufferGroup(buffers, max_size); 3956 mSrcBuffer = new (std::nothrow) uint8_t[max_size]; 3957 if (mSrcBuffer == NULL) { 3958 // file probably specified a bad max size 3959 delete mGroup; 3960 mGroup = NULL; 3961 return ERROR_MALFORMED; 3962 } 3963 3964 mStarted = true; 3965 3966 return OK; 3967} 3968 3969status_t MPEG4Source::stop() { 3970 Mutex::Autolock autoLock(mLock); 3971 3972 CHECK(mStarted); 3973 3974 if (mBuffer != NULL) { 3975 mBuffer->release(); 3976 mBuffer = NULL; 3977 } 3978 3979 delete[] mSrcBuffer; 3980 mSrcBuffer = NULL; 3981 3982 delete mGroup; 3983 mGroup = NULL; 3984 3985 mStarted = false; 3986 mCurrentSampleIndex = 0; 3987 3988 return OK; 3989} 3990 3991status_t MPEG4Source::parseChunk(off64_t *offset) { 3992 uint32_t hdr[2]; 3993 if (mDataSource->readAt(*offset, hdr, 8) < 8) { 3994 return ERROR_IO; 3995 } 3996 uint64_t chunk_size = ntohl(hdr[0]); 3997 uint32_t chunk_type = ntohl(hdr[1]); 3998 off64_t data_offset = *offset + 8; 3999 4000 if (chunk_size == 1) { 4001 if (mDataSource->readAt(*offset + 8, &chunk_size, 8) < 8) { 4002 return ERROR_IO; 4003 } 4004 chunk_size = ntoh64(chunk_size); 4005 data_offset += 8; 4006 4007 if (chunk_size < 16) { 4008 // The smallest valid chunk is 16 bytes long in this case. 4009 return ERROR_MALFORMED; 4010 } 4011 } else if (chunk_size < 8) { 4012 // The smallest valid chunk is 8 bytes long. 4013 return ERROR_MALFORMED; 4014 } 4015 4016 char chunk[5]; 4017 MakeFourCCString(chunk_type, chunk); 4018 ALOGV("MPEG4Source chunk %s @ %#llx", chunk, (long long)*offset); 4019 4020 off64_t chunk_data_size = *offset + chunk_size - data_offset; 4021 4022 switch(chunk_type) { 4023 4024 case FOURCC('t', 'r', 'a', 'f'): 4025 case FOURCC('m', 'o', 'o', 'f'): { 4026 off64_t stop_offset = *offset + chunk_size; 4027 *offset = data_offset; 4028 while (*offset < stop_offset) { 4029 status_t err = parseChunk(offset); 4030 if (err != OK) { 4031 return err; 4032 } 4033 } 4034 if (chunk_type == FOURCC('m', 'o', 'o', 'f')) { 4035 // *offset points to the box following this moof. Find the next moof from there. 4036 4037 while (true) { 4038 if (mDataSource->readAt(*offset, hdr, 8) < 8) { 4039 // no more box to the end of file. 4040 break; 4041 } 4042 chunk_size = ntohl(hdr[0]); 4043 chunk_type = ntohl(hdr[1]); 4044 if (chunk_size == 1) { 4045 // ISO/IEC 14496-12:2012, 8.8.4 Movie Fragment Box, moof is a Box 4046 // which is defined in 4.2 Object Structure. 4047 // When chunk_size==1, 8 bytes follows as "largesize". 4048 if (mDataSource->readAt(*offset + 8, &chunk_size, 8) < 8) { 4049 return ERROR_IO; 4050 } 4051 chunk_size = ntoh64(chunk_size); 4052 if (chunk_size < 16) { 4053 // The smallest valid chunk is 16 bytes long in this case. 4054 return ERROR_MALFORMED; 4055 } 4056 } else if (chunk_size == 0) { 4057 // next box extends to end of file. 4058 } else if (chunk_size < 8) { 4059 // The smallest valid chunk is 8 bytes long in this case. 4060 return ERROR_MALFORMED; 4061 } 4062 4063 if (chunk_type == FOURCC('m', 'o', 'o', 'f')) { 4064 mNextMoofOffset = *offset; 4065 break; 4066 } else if (chunk_size == 0) { 4067 break; 4068 } 4069 *offset += chunk_size; 4070 } 4071 } 4072 break; 4073 } 4074 4075 case FOURCC('t', 'f', 'h', 'd'): { 4076 status_t err; 4077 if ((err = parseTrackFragmentHeader(data_offset, chunk_data_size)) != OK) { 4078 return err; 4079 } 4080 *offset += chunk_size; 4081 break; 4082 } 4083 4084 case FOURCC('t', 'r', 'u', 'n'): { 4085 status_t err; 4086 if (mLastParsedTrackId == mTrackId) { 4087 if ((err = parseTrackFragmentRun(data_offset, chunk_data_size)) != OK) { 4088 return err; 4089 } 4090 } 4091 4092 *offset += chunk_size; 4093 break; 4094 } 4095 4096 case FOURCC('s', 'a', 'i', 'z'): { 4097 status_t err; 4098 if ((err = parseSampleAuxiliaryInformationSizes(data_offset, chunk_data_size)) != OK) { 4099 return err; 4100 } 4101 *offset += chunk_size; 4102 break; 4103 } 4104 case FOURCC('s', 'a', 'i', 'o'): { 4105 status_t err; 4106 if ((err = parseSampleAuxiliaryInformationOffsets(data_offset, chunk_data_size)) != OK) { 4107 return err; 4108 } 4109 *offset += chunk_size; 4110 break; 4111 } 4112 4113 case FOURCC('m', 'd', 'a', 't'): { 4114 // parse DRM info if present 4115 ALOGV("MPEG4Source::parseChunk mdat"); 4116 // if saiz/saoi was previously observed, do something with the sampleinfos 4117 *offset += chunk_size; 4118 break; 4119 } 4120 4121 default: { 4122 *offset += chunk_size; 4123 break; 4124 } 4125 } 4126 return OK; 4127} 4128 4129status_t MPEG4Source::parseSampleAuxiliaryInformationSizes( 4130 off64_t offset, off64_t /* size */) { 4131 ALOGV("parseSampleAuxiliaryInformationSizes"); 4132 // 14496-12 8.7.12 4133 uint8_t version; 4134 if (mDataSource->readAt( 4135 offset, &version, sizeof(version)) 4136 < (ssize_t)sizeof(version)) { 4137 return ERROR_IO; 4138 } 4139 4140 if (version != 0) { 4141 return ERROR_UNSUPPORTED; 4142 } 4143 offset++; 4144 4145 uint32_t flags; 4146 if (!mDataSource->getUInt24(offset, &flags)) { 4147 return ERROR_IO; 4148 } 4149 offset += 3; 4150 4151 if (flags & 1) { 4152 uint32_t tmp; 4153 if (!mDataSource->getUInt32(offset, &tmp)) { 4154 return ERROR_MALFORMED; 4155 } 4156 mCurrentAuxInfoType = tmp; 4157 offset += 4; 4158 if (!mDataSource->getUInt32(offset, &tmp)) { 4159 return ERROR_MALFORMED; 4160 } 4161 mCurrentAuxInfoTypeParameter = tmp; 4162 offset += 4; 4163 } 4164 4165 uint8_t defsize; 4166 if (mDataSource->readAt(offset, &defsize, 1) != 1) { 4167 return ERROR_MALFORMED; 4168 } 4169 mCurrentDefaultSampleInfoSize = defsize; 4170 offset++; 4171 4172 uint32_t smplcnt; 4173 if (!mDataSource->getUInt32(offset, &smplcnt)) { 4174 return ERROR_MALFORMED; 4175 } 4176 mCurrentSampleInfoCount = smplcnt; 4177 offset += 4; 4178 4179 if (mCurrentDefaultSampleInfoSize != 0) { 4180 ALOGV("@@@@ using default sample info size of %d", mCurrentDefaultSampleInfoSize); 4181 return OK; 4182 } 4183 if (smplcnt > mCurrentSampleInfoAllocSize) { 4184 uint8_t * newPtr = (uint8_t*) realloc(mCurrentSampleInfoSizes, smplcnt); 4185 if (newPtr == NULL) { 4186 ALOGE("failed to realloc %u -> %u", mCurrentSampleInfoAllocSize, smplcnt); 4187 return NO_MEMORY; 4188 } 4189 mCurrentSampleInfoSizes = newPtr; 4190 mCurrentSampleInfoAllocSize = smplcnt; 4191 } 4192 4193 mDataSource->readAt(offset, mCurrentSampleInfoSizes, smplcnt); 4194 return OK; 4195} 4196 4197status_t MPEG4Source::parseSampleAuxiliaryInformationOffsets( 4198 off64_t offset, off64_t /* size */) { 4199 ALOGV("parseSampleAuxiliaryInformationOffsets"); 4200 // 14496-12 8.7.13 4201 uint8_t version; 4202 if (mDataSource->readAt(offset, &version, sizeof(version)) != 1) { 4203 return ERROR_IO; 4204 } 4205 offset++; 4206 4207 uint32_t flags; 4208 if (!mDataSource->getUInt24(offset, &flags)) { 4209 return ERROR_IO; 4210 } 4211 offset += 3; 4212 4213 uint32_t entrycount; 4214 if (!mDataSource->getUInt32(offset, &entrycount)) { 4215 return ERROR_IO; 4216 } 4217 offset += 4; 4218 if (entrycount == 0) { 4219 return OK; 4220 } 4221 if (entrycount > UINT32_MAX / 8) { 4222 return ERROR_MALFORMED; 4223 } 4224 4225 if (entrycount > mCurrentSampleInfoOffsetsAllocSize) { 4226 uint64_t *newPtr = (uint64_t *)realloc(mCurrentSampleInfoOffsets, entrycount * 8); 4227 if (newPtr == NULL) { 4228 ALOGE("failed to realloc %u -> %u", mCurrentSampleInfoOffsetsAllocSize, entrycount * 8); 4229 return NO_MEMORY; 4230 } 4231 mCurrentSampleInfoOffsets = newPtr; 4232 mCurrentSampleInfoOffsetsAllocSize = entrycount; 4233 } 4234 mCurrentSampleInfoOffsetCount = entrycount; 4235 4236 if (mCurrentSampleInfoOffsets == NULL) { 4237 return OK; 4238 } 4239 4240 for (size_t i = 0; i < entrycount; i++) { 4241 if (version == 0) { 4242 uint32_t tmp; 4243 if (!mDataSource->getUInt32(offset, &tmp)) { 4244 return ERROR_IO; 4245 } 4246 mCurrentSampleInfoOffsets[i] = tmp; 4247 offset += 4; 4248 } else { 4249 uint64_t tmp; 4250 if (!mDataSource->getUInt64(offset, &tmp)) { 4251 return ERROR_IO; 4252 } 4253 mCurrentSampleInfoOffsets[i] = tmp; 4254 offset += 8; 4255 } 4256 } 4257 4258 // parse clear/encrypted data 4259 4260 off64_t drmoffset = mCurrentSampleInfoOffsets[0]; // from moof 4261 4262 drmoffset += mCurrentMoofOffset; 4263 int ivlength; 4264 CHECK(mFormat->findInt32(kKeyCryptoDefaultIVSize, &ivlength)); 4265 4266 // only 0, 8 and 16 byte initialization vectors are supported 4267 if (ivlength != 0 && ivlength != 8 && ivlength != 16) { 4268 ALOGW("unsupported IV length: %d", ivlength); 4269 return ERROR_MALFORMED; 4270 } 4271 // read CencSampleAuxiliaryDataFormats 4272 for (size_t i = 0; i < mCurrentSampleInfoCount; i++) { 4273 if (i >= mCurrentSamples.size()) { 4274 ALOGW("too few samples"); 4275 break; 4276 } 4277 Sample *smpl = &mCurrentSamples.editItemAt(i); 4278 4279 memset(smpl->iv, 0, 16); 4280 if (mDataSource->readAt(drmoffset, smpl->iv, ivlength) != ivlength) { 4281 return ERROR_IO; 4282 } 4283 4284 drmoffset += ivlength; 4285 4286 int32_t smplinfosize = mCurrentDefaultSampleInfoSize; 4287 if (smplinfosize == 0) { 4288 smplinfosize = mCurrentSampleInfoSizes[i]; 4289 } 4290 if (smplinfosize > ivlength) { 4291 uint16_t numsubsamples; 4292 if (!mDataSource->getUInt16(drmoffset, &numsubsamples)) { 4293 return ERROR_IO; 4294 } 4295 drmoffset += 2; 4296 for (size_t j = 0; j < numsubsamples; j++) { 4297 uint16_t numclear; 4298 uint32_t numencrypted; 4299 if (!mDataSource->getUInt16(drmoffset, &numclear)) { 4300 return ERROR_IO; 4301 } 4302 drmoffset += 2; 4303 if (!mDataSource->getUInt32(drmoffset, &numencrypted)) { 4304 return ERROR_IO; 4305 } 4306 drmoffset += 4; 4307 smpl->clearsizes.add(numclear); 4308 smpl->encryptedsizes.add(numencrypted); 4309 } 4310 } else { 4311 smpl->clearsizes.add(0); 4312 smpl->encryptedsizes.add(smpl->size); 4313 } 4314 } 4315 4316 4317 return OK; 4318} 4319 4320status_t MPEG4Source::parseTrackFragmentHeader(off64_t offset, off64_t size) { 4321 4322 if (size < 8) { 4323 return -EINVAL; 4324 } 4325 4326 uint32_t flags; 4327 if (!mDataSource->getUInt32(offset, &flags)) { // actually version + flags 4328 return ERROR_MALFORMED; 4329 } 4330 4331 if (flags & 0xff000000) { 4332 return -EINVAL; 4333 } 4334 4335 if (!mDataSource->getUInt32(offset + 4, (uint32_t*)&mLastParsedTrackId)) { 4336 return ERROR_MALFORMED; 4337 } 4338 4339 if (mLastParsedTrackId != mTrackId) { 4340 // this is not the right track, skip it 4341 return OK; 4342 } 4343 4344 mTrackFragmentHeaderInfo.mFlags = flags; 4345 mTrackFragmentHeaderInfo.mTrackID = mLastParsedTrackId; 4346 offset += 8; 4347 size -= 8; 4348 4349 ALOGV("fragment header: %08x %08x", flags, mTrackFragmentHeaderInfo.mTrackID); 4350 4351 if (flags & TrackFragmentHeaderInfo::kBaseDataOffsetPresent) { 4352 if (size < 8) { 4353 return -EINVAL; 4354 } 4355 4356 if (!mDataSource->getUInt64(offset, &mTrackFragmentHeaderInfo.mBaseDataOffset)) { 4357 return ERROR_MALFORMED; 4358 } 4359 offset += 8; 4360 size -= 8; 4361 } 4362 4363 if (flags & TrackFragmentHeaderInfo::kSampleDescriptionIndexPresent) { 4364 if (size < 4) { 4365 return -EINVAL; 4366 } 4367 4368 if (!mDataSource->getUInt32(offset, &mTrackFragmentHeaderInfo.mSampleDescriptionIndex)) { 4369 return ERROR_MALFORMED; 4370 } 4371 offset += 4; 4372 size -= 4; 4373 } 4374 4375 if (flags & TrackFragmentHeaderInfo::kDefaultSampleDurationPresent) { 4376 if (size < 4) { 4377 return -EINVAL; 4378 } 4379 4380 if (!mDataSource->getUInt32(offset, &mTrackFragmentHeaderInfo.mDefaultSampleDuration)) { 4381 return ERROR_MALFORMED; 4382 } 4383 offset += 4; 4384 size -= 4; 4385 } 4386 4387 if (flags & TrackFragmentHeaderInfo::kDefaultSampleSizePresent) { 4388 if (size < 4) { 4389 return -EINVAL; 4390 } 4391 4392 if (!mDataSource->getUInt32(offset, &mTrackFragmentHeaderInfo.mDefaultSampleSize)) { 4393 return ERROR_MALFORMED; 4394 } 4395 offset += 4; 4396 size -= 4; 4397 } 4398 4399 if (flags & TrackFragmentHeaderInfo::kDefaultSampleFlagsPresent) { 4400 if (size < 4) { 4401 return -EINVAL; 4402 } 4403 4404 if (!mDataSource->getUInt32(offset, &mTrackFragmentHeaderInfo.mDefaultSampleFlags)) { 4405 return ERROR_MALFORMED; 4406 } 4407 offset += 4; 4408 size -= 4; 4409 } 4410 4411 if (!(flags & TrackFragmentHeaderInfo::kBaseDataOffsetPresent)) { 4412 mTrackFragmentHeaderInfo.mBaseDataOffset = mCurrentMoofOffset; 4413 } 4414 4415 mTrackFragmentHeaderInfo.mDataOffset = 0; 4416 return OK; 4417} 4418 4419status_t MPEG4Source::parseTrackFragmentRun(off64_t offset, off64_t size) { 4420 4421 ALOGV("MPEG4Extractor::parseTrackFragmentRun"); 4422 if (size < 8) { 4423 return -EINVAL; 4424 } 4425 4426 enum { 4427 kDataOffsetPresent = 0x01, 4428 kFirstSampleFlagsPresent = 0x04, 4429 kSampleDurationPresent = 0x100, 4430 kSampleSizePresent = 0x200, 4431 kSampleFlagsPresent = 0x400, 4432 kSampleCompositionTimeOffsetPresent = 0x800, 4433 }; 4434 4435 uint32_t flags; 4436 if (!mDataSource->getUInt32(offset, &flags)) { 4437 return ERROR_MALFORMED; 4438 } 4439 // |version| only affects SampleCompositionTimeOffset field. 4440 // If version == 0, SampleCompositionTimeOffset is uint32_t; 4441 // Otherwise, SampleCompositionTimeOffset is int32_t. 4442 // Sample.compositionOffset is defined as int32_t. 4443 uint8_t version = flags >> 24; 4444 flags &= 0xffffff; 4445 ALOGV("fragment run version: 0x%02x, flags: 0x%06x", version, flags); 4446 4447 if ((flags & kFirstSampleFlagsPresent) && (flags & kSampleFlagsPresent)) { 4448 // These two shall not be used together. 4449 return -EINVAL; 4450 } 4451 4452 uint32_t sampleCount; 4453 if (!mDataSource->getUInt32(offset + 4, &sampleCount)) { 4454 return ERROR_MALFORMED; 4455 } 4456 offset += 8; 4457 size -= 8; 4458 4459 uint64_t dataOffset = mTrackFragmentHeaderInfo.mDataOffset; 4460 4461 uint32_t firstSampleFlags = 0; 4462 4463 if (flags & kDataOffsetPresent) { 4464 if (size < 4) { 4465 return -EINVAL; 4466 } 4467 4468 int32_t dataOffsetDelta; 4469 if (!mDataSource->getUInt32(offset, (uint32_t*)&dataOffsetDelta)) { 4470 return ERROR_MALFORMED; 4471 } 4472 4473 dataOffset = mTrackFragmentHeaderInfo.mBaseDataOffset + dataOffsetDelta; 4474 4475 offset += 4; 4476 size -= 4; 4477 } 4478 4479 if (flags & kFirstSampleFlagsPresent) { 4480 if (size < 4) { 4481 return -EINVAL; 4482 } 4483 4484 if (!mDataSource->getUInt32(offset, &firstSampleFlags)) { 4485 return ERROR_MALFORMED; 4486 } 4487 offset += 4; 4488 size -= 4; 4489 } 4490 4491 uint32_t sampleDuration = 0, sampleSize = 0, sampleFlags = 0, 4492 sampleCtsOffset = 0; 4493 4494 size_t bytesPerSample = 0; 4495 if (flags & kSampleDurationPresent) { 4496 bytesPerSample += 4; 4497 } else if (mTrackFragmentHeaderInfo.mFlags 4498 & TrackFragmentHeaderInfo::kDefaultSampleDurationPresent) { 4499 sampleDuration = mTrackFragmentHeaderInfo.mDefaultSampleDuration; 4500 } else if (mTrex) { 4501 sampleDuration = mTrex->default_sample_duration; 4502 } 4503 4504 if (flags & kSampleSizePresent) { 4505 bytesPerSample += 4; 4506 } else if (mTrackFragmentHeaderInfo.mFlags 4507 & TrackFragmentHeaderInfo::kDefaultSampleSizePresent) { 4508 sampleSize = mTrackFragmentHeaderInfo.mDefaultSampleSize; 4509 } else { 4510 sampleSize = mTrackFragmentHeaderInfo.mDefaultSampleSize; 4511 } 4512 4513 if (flags & kSampleFlagsPresent) { 4514 bytesPerSample += 4; 4515 } else if (mTrackFragmentHeaderInfo.mFlags 4516 & TrackFragmentHeaderInfo::kDefaultSampleFlagsPresent) { 4517 sampleFlags = mTrackFragmentHeaderInfo.mDefaultSampleFlags; 4518 } else { 4519 sampleFlags = mTrackFragmentHeaderInfo.mDefaultSampleFlags; 4520 } 4521 4522 if (flags & kSampleCompositionTimeOffsetPresent) { 4523 bytesPerSample += 4; 4524 } else { 4525 sampleCtsOffset = 0; 4526 } 4527 4528 if (size < (off64_t)(sampleCount * bytesPerSample)) { 4529 return -EINVAL; 4530 } 4531 4532 Sample tmp; 4533 for (uint32_t i = 0; i < sampleCount; ++i) { 4534 if (flags & kSampleDurationPresent) { 4535 if (!mDataSource->getUInt32(offset, &sampleDuration)) { 4536 return ERROR_MALFORMED; 4537 } 4538 offset += 4; 4539 } 4540 4541 if (flags & kSampleSizePresent) { 4542 if (!mDataSource->getUInt32(offset, &sampleSize)) { 4543 return ERROR_MALFORMED; 4544 } 4545 offset += 4; 4546 } 4547 4548 if (flags & kSampleFlagsPresent) { 4549 if (!mDataSource->getUInt32(offset, &sampleFlags)) { 4550 return ERROR_MALFORMED; 4551 } 4552 offset += 4; 4553 } 4554 4555 if (flags & kSampleCompositionTimeOffsetPresent) { 4556 if (!mDataSource->getUInt32(offset, &sampleCtsOffset)) { 4557 return ERROR_MALFORMED; 4558 } 4559 offset += 4; 4560 } 4561 4562 ALOGV("adding sample %d at offset 0x%08" PRIx64 ", size %u, duration %u, " 4563 " flags 0x%08x", i + 1, 4564 dataOffset, sampleSize, sampleDuration, 4565 (flags & kFirstSampleFlagsPresent) && i == 0 4566 ? firstSampleFlags : sampleFlags); 4567 tmp.offset = dataOffset; 4568 tmp.size = sampleSize; 4569 tmp.duration = sampleDuration; 4570 tmp.compositionOffset = sampleCtsOffset; 4571 mCurrentSamples.add(tmp); 4572 4573 dataOffset += sampleSize; 4574 } 4575 4576 mTrackFragmentHeaderInfo.mDataOffset = dataOffset; 4577 4578 return OK; 4579} 4580 4581sp<MetaData> MPEG4Source::getFormat() { 4582 Mutex::Autolock autoLock(mLock); 4583 4584 return mFormat; 4585} 4586 4587size_t MPEG4Source::parseNALSize(const uint8_t *data) const { 4588 switch (mNALLengthSize) { 4589 case 1: 4590 return *data; 4591 case 2: 4592 return U16_AT(data); 4593 case 3: 4594 return ((size_t)data[0] << 16) | U16_AT(&data[1]); 4595 case 4: 4596 return U32_AT(data); 4597 } 4598 4599 // This cannot happen, mNALLengthSize springs to life by adding 1 to 4600 // a 2-bit integer. 4601 CHECK(!"Should not be here."); 4602 4603 return 0; 4604} 4605 4606status_t MPEG4Source::read( 4607 MediaBuffer **out, const ReadOptions *options) { 4608 Mutex::Autolock autoLock(mLock); 4609 4610 CHECK(mStarted); 4611 4612 if (options != nullptr && options->getNonBlocking() && !mGroup->has_buffers()) { 4613 *out = nullptr; 4614 return WOULD_BLOCK; 4615 } 4616 4617 if (mFirstMoofOffset > 0) { 4618 return fragmentedRead(out, options); 4619 } 4620 4621 *out = NULL; 4622 4623 int64_t targetSampleTimeUs = -1; 4624 4625 int64_t seekTimeUs; 4626 ReadOptions::SeekMode mode; 4627 if (options && options->getSeekTo(&seekTimeUs, &mode)) { 4628 if (mIsHEIF) { 4629 CHECK(mSampleTable == NULL); 4630 CHECK(mItemTable != NULL); 4631 4632 status_t err; 4633 if (seekTimeUs >= 0) { 4634 err = mItemTable->findPrimaryImage(&mCurrentSampleIndex); 4635 } else { 4636 err = mItemTable->findThumbnail(&mCurrentSampleIndex); 4637 } 4638 if (err != OK) { 4639 return err; 4640 } 4641 } else { 4642 uint32_t findFlags = 0; 4643 switch (mode) { 4644 case ReadOptions::SEEK_PREVIOUS_SYNC: 4645 findFlags = SampleTable::kFlagBefore; 4646 break; 4647 case ReadOptions::SEEK_NEXT_SYNC: 4648 findFlags = SampleTable::kFlagAfter; 4649 break; 4650 case ReadOptions::SEEK_CLOSEST_SYNC: 4651 case ReadOptions::SEEK_CLOSEST: 4652 findFlags = SampleTable::kFlagClosest; 4653 break; 4654 default: 4655 CHECK(!"Should not be here."); 4656 break; 4657 } 4658 4659 uint32_t sampleIndex; 4660 status_t err = mSampleTable->findSampleAtTime( 4661 seekTimeUs, 1000000, mTimescale, 4662 &sampleIndex, findFlags); 4663 4664 if (mode == ReadOptions::SEEK_CLOSEST) { 4665 // We found the closest sample already, now we want the sync 4666 // sample preceding it (or the sample itself of course), even 4667 // if the subsequent sync sample is closer. 4668 findFlags = SampleTable::kFlagBefore; 4669 } 4670 4671 uint32_t syncSampleIndex; 4672 if (err == OK) { 4673 err = mSampleTable->findSyncSampleNear( 4674 sampleIndex, &syncSampleIndex, findFlags); 4675 } 4676 4677 uint32_t sampleTime; 4678 if (err == OK) { 4679 err = mSampleTable->getMetaDataForSample( 4680 sampleIndex, NULL, NULL, &sampleTime); 4681 } 4682 4683 if (err != OK) { 4684 if (err == ERROR_OUT_OF_RANGE) { 4685 // An attempt to seek past the end of the stream would 4686 // normally cause this ERROR_OUT_OF_RANGE error. Propagating 4687 // this all the way to the MediaPlayer would cause abnormal 4688 // termination. Legacy behaviour appears to be to behave as if 4689 // we had seeked to the end of stream, ending normally. 4690 err = ERROR_END_OF_STREAM; 4691 } 4692 ALOGV("end of stream"); 4693 return err; 4694 } 4695 4696 if (mode == ReadOptions::SEEK_CLOSEST) { 4697 targetSampleTimeUs = (sampleTime * 1000000ll) / mTimescale; 4698 } 4699 4700#if 0 4701 uint32_t syncSampleTime; 4702 CHECK_EQ(OK, mSampleTable->getMetaDataForSample( 4703 syncSampleIndex, NULL, NULL, &syncSampleTime)); 4704 4705 ALOGI("seek to time %lld us => sample at time %lld us, " 4706 "sync sample at time %lld us", 4707 seekTimeUs, 4708 sampleTime * 1000000ll / mTimescale, 4709 syncSampleTime * 1000000ll / mTimescale); 4710#endif 4711 4712 mCurrentSampleIndex = syncSampleIndex; 4713 } 4714 4715 if (mBuffer != NULL) { 4716 mBuffer->release(); 4717 mBuffer = NULL; 4718 } 4719 4720 // fall through 4721 } 4722 4723 off64_t offset = 0; 4724 size_t size = 0; 4725 uint32_t cts, stts; 4726 bool isSyncSample; 4727 bool newBuffer = false; 4728 if (mBuffer == NULL) { 4729 newBuffer = true; 4730 4731 status_t err; 4732 if (!mIsHEIF) { 4733 err = mSampleTable->getMetaDataForSample( 4734 mCurrentSampleIndex, &offset, &size, &cts, &isSyncSample, &stts); 4735 } else { 4736 err = mItemTable->getImageOffsetAndSize( 4737 options && options->getSeekTo(&seekTimeUs, &mode) ? 4738 &mCurrentSampleIndex : NULL, &offset, &size); 4739 4740 cts = stts = 0; 4741 isSyncSample = 0; 4742 ALOGV("image offset %lld, size %zu", (long long)offset, size); 4743 } 4744 4745 if (err != OK) { 4746 return err; 4747 } 4748 4749 err = mGroup->acquire_buffer(&mBuffer); 4750 4751 if (err != OK) { 4752 CHECK(mBuffer == NULL); 4753 return err; 4754 } 4755 if (size > mBuffer->size()) { 4756 ALOGE("buffer too small: %zu > %zu", size, mBuffer->size()); 4757 return ERROR_BUFFER_TOO_SMALL; 4758 } 4759 } 4760 4761 if ((!mIsAVC && !mIsHEVC) || mWantsNALFragments) { 4762 if (newBuffer) { 4763 ssize_t num_bytes_read = 4764 mDataSource->readAt(offset, (uint8_t *)mBuffer->data(), size); 4765 4766 if (num_bytes_read < (ssize_t)size) { 4767 mBuffer->release(); 4768 mBuffer = NULL; 4769 4770 return ERROR_IO; 4771 } 4772 4773 CHECK(mBuffer != NULL); 4774 mBuffer->set_range(0, size); 4775 mBuffer->meta_data()->clear(); 4776 mBuffer->meta_data()->setInt64( 4777 kKeyTime, ((int64_t)cts * 1000000) / mTimescale); 4778 mBuffer->meta_data()->setInt64( 4779 kKeyDuration, ((int64_t)stts * 1000000) / mTimescale); 4780 4781 if (targetSampleTimeUs >= 0) { 4782 mBuffer->meta_data()->setInt64( 4783 kKeyTargetTime, targetSampleTimeUs); 4784 } 4785 4786 if (isSyncSample) { 4787 mBuffer->meta_data()->setInt32(kKeyIsSyncFrame, 1); 4788 } 4789 4790 ++mCurrentSampleIndex; 4791 } 4792 4793 if (!mIsAVC && !mIsHEVC) { 4794 *out = mBuffer; 4795 mBuffer = NULL; 4796 4797 return OK; 4798 } 4799 4800 // Each NAL unit is split up into its constituent fragments and 4801 // each one of them returned in its own buffer. 4802 4803 CHECK(mBuffer->range_length() >= mNALLengthSize); 4804 4805 const uint8_t *src = 4806 (const uint8_t *)mBuffer->data() + mBuffer->range_offset(); 4807 4808 size_t nal_size = parseNALSize(src); 4809 if (mNALLengthSize > SIZE_MAX - nal_size) { 4810 ALOGE("b/24441553, b/24445122"); 4811 } 4812 if (mBuffer->range_length() - mNALLengthSize < nal_size) { 4813 ALOGE("incomplete NAL unit."); 4814 4815 mBuffer->release(); 4816 mBuffer = NULL; 4817 4818 return ERROR_MALFORMED; 4819 } 4820 4821 MediaBuffer *clone = mBuffer->clone(); 4822 CHECK(clone != NULL); 4823 clone->set_range(mBuffer->range_offset() + mNALLengthSize, nal_size); 4824 4825 CHECK(mBuffer != NULL); 4826 mBuffer->set_range( 4827 mBuffer->range_offset() + mNALLengthSize + nal_size, 4828 mBuffer->range_length() - mNALLengthSize - nal_size); 4829 4830 if (mBuffer->range_length() == 0) { 4831 mBuffer->release(); 4832 mBuffer = NULL; 4833 } 4834 4835 *out = clone; 4836 4837 return OK; 4838 } else { 4839 // Whole NAL units are returned but each fragment is prefixed by 4840 // the start code (0x00 00 00 01). 4841 ssize_t num_bytes_read = 0; 4842 int32_t drm = 0; 4843 bool usesDRM = (mFormat->findInt32(kKeyIsDRM, &drm) && drm != 0); 4844 if (usesDRM) { 4845 num_bytes_read = 4846 mDataSource->readAt(offset, (uint8_t*)mBuffer->data(), size); 4847 } else { 4848 num_bytes_read = mDataSource->readAt(offset, mSrcBuffer, size); 4849 } 4850 4851 if (num_bytes_read < (ssize_t)size) { 4852 mBuffer->release(); 4853 mBuffer = NULL; 4854 4855 return ERROR_IO; 4856 } 4857 4858 if (usesDRM) { 4859 CHECK(mBuffer != NULL); 4860 mBuffer->set_range(0, size); 4861 4862 } else { 4863 uint8_t *dstData = (uint8_t *)mBuffer->data(); 4864 size_t srcOffset = 0; 4865 size_t dstOffset = 0; 4866 4867 while (srcOffset < size) { 4868 bool isMalFormed = !isInRange((size_t)0u, size, srcOffset, mNALLengthSize); 4869 size_t nalLength = 0; 4870 if (!isMalFormed) { 4871 nalLength = parseNALSize(&mSrcBuffer[srcOffset]); 4872 srcOffset += mNALLengthSize; 4873 isMalFormed = !isInRange((size_t)0u, size, srcOffset, nalLength); 4874 } 4875 4876 if (isMalFormed) { 4877 ALOGE("Video is malformed"); 4878 mBuffer->release(); 4879 mBuffer = NULL; 4880 return ERROR_MALFORMED; 4881 } 4882 4883 if (nalLength == 0) { 4884 continue; 4885 } 4886 4887 if (dstOffset > SIZE_MAX - 4 || 4888 dstOffset + 4 > SIZE_MAX - nalLength || 4889 dstOffset + 4 + nalLength > mBuffer->size()) { 4890 ALOGE("b/27208621 : %zu %zu", dstOffset, mBuffer->size()); 4891 android_errorWriteLog(0x534e4554, "27208621"); 4892 mBuffer->release(); 4893 mBuffer = NULL; 4894 return ERROR_MALFORMED; 4895 } 4896 4897 dstData[dstOffset++] = 0; 4898 dstData[dstOffset++] = 0; 4899 dstData[dstOffset++] = 0; 4900 dstData[dstOffset++] = 1; 4901 memcpy(&dstData[dstOffset], &mSrcBuffer[srcOffset], nalLength); 4902 srcOffset += nalLength; 4903 dstOffset += nalLength; 4904 } 4905 CHECK_EQ(srcOffset, size); 4906 CHECK(mBuffer != NULL); 4907 mBuffer->set_range(0, dstOffset); 4908 } 4909 4910 mBuffer->meta_data()->clear(); 4911 mBuffer->meta_data()->setInt64( 4912 kKeyTime, ((int64_t)cts * 1000000) / mTimescale); 4913 mBuffer->meta_data()->setInt64( 4914 kKeyDuration, ((int64_t)stts * 1000000) / mTimescale); 4915 4916 if (targetSampleTimeUs >= 0) { 4917 mBuffer->meta_data()->setInt64( 4918 kKeyTargetTime, targetSampleTimeUs); 4919 } 4920 4921 if (mIsAVC) { 4922 uint32_t layerId = FindAVCLayerId( 4923 (const uint8_t *)mBuffer->data(), mBuffer->range_length()); 4924 mBuffer->meta_data()->setInt32(kKeyTemporalLayerId, layerId); 4925 } 4926 4927 if (isSyncSample) { 4928 mBuffer->meta_data()->setInt32(kKeyIsSyncFrame, 1); 4929 } 4930 4931 ++mCurrentSampleIndex; 4932 4933 *out = mBuffer; 4934 mBuffer = NULL; 4935 4936 return OK; 4937 } 4938} 4939 4940status_t MPEG4Source::fragmentedRead( 4941 MediaBuffer **out, const ReadOptions *options) { 4942 4943 ALOGV("MPEG4Source::fragmentedRead"); 4944 4945 CHECK(mStarted); 4946 4947 *out = NULL; 4948 4949 int64_t targetSampleTimeUs = -1; 4950 4951 int64_t seekTimeUs; 4952 ReadOptions::SeekMode mode; 4953 if (options && options->getSeekTo(&seekTimeUs, &mode)) { 4954 4955 int numSidxEntries = mSegments.size(); 4956 if (numSidxEntries != 0) { 4957 int64_t totalTime = 0; 4958 off64_t totalOffset = mFirstMoofOffset; 4959 for (int i = 0; i < numSidxEntries; i++) { 4960 const SidxEntry *se = &mSegments[i]; 4961 if (totalTime + se->mDurationUs > seekTimeUs) { 4962 // The requested time is somewhere in this segment 4963 if ((mode == ReadOptions::SEEK_NEXT_SYNC && seekTimeUs > totalTime) || 4964 (mode == ReadOptions::SEEK_CLOSEST_SYNC && 4965 (seekTimeUs - totalTime) > (totalTime + se->mDurationUs - seekTimeUs))) { 4966 // requested next sync, or closest sync and it was closer to the end of 4967 // this segment 4968 totalTime += se->mDurationUs; 4969 totalOffset += se->mSize; 4970 } 4971 break; 4972 } 4973 totalTime += se->mDurationUs; 4974 totalOffset += se->mSize; 4975 } 4976 mCurrentMoofOffset = totalOffset; 4977 mNextMoofOffset = -1; 4978 mCurrentSamples.clear(); 4979 mCurrentSampleIndex = 0; 4980 status_t err = parseChunk(&totalOffset); 4981 if (err != OK) { 4982 return err; 4983 } 4984 mCurrentTime = totalTime * mTimescale / 1000000ll; 4985 } else { 4986 // without sidx boxes, we can only seek to 0 4987 mCurrentMoofOffset = mFirstMoofOffset; 4988 mNextMoofOffset = -1; 4989 mCurrentSamples.clear(); 4990 mCurrentSampleIndex = 0; 4991 off64_t tmp = mCurrentMoofOffset; 4992 status_t err = parseChunk(&tmp); 4993 if (err != OK) { 4994 return err; 4995 } 4996 mCurrentTime = 0; 4997 } 4998 4999 if (mBuffer != NULL) { 5000 mBuffer->release(); 5001 mBuffer = NULL; 5002 } 5003 5004 // fall through 5005 } 5006 5007 off64_t offset = 0; 5008 size_t size = 0; 5009 uint32_t cts = 0; 5010 bool isSyncSample = false; 5011 bool newBuffer = false; 5012 if (mBuffer == NULL) { 5013 newBuffer = true; 5014 5015 if (mCurrentSampleIndex >= mCurrentSamples.size()) { 5016 // move to next fragment if there is one 5017 if (mNextMoofOffset <= mCurrentMoofOffset) { 5018 return ERROR_END_OF_STREAM; 5019 } 5020 off64_t nextMoof = mNextMoofOffset; 5021 mCurrentMoofOffset = nextMoof; 5022 mCurrentSamples.clear(); 5023 mCurrentSampleIndex = 0; 5024 status_t err = parseChunk(&nextMoof); 5025 if (err != OK) { 5026 return err; 5027 } 5028 if (mCurrentSampleIndex >= mCurrentSamples.size()) { 5029 return ERROR_END_OF_STREAM; 5030 } 5031 } 5032 5033 const Sample *smpl = &mCurrentSamples[mCurrentSampleIndex]; 5034 offset = smpl->offset; 5035 size = smpl->size; 5036 cts = mCurrentTime + smpl->compositionOffset; 5037 mCurrentTime += smpl->duration; 5038 isSyncSample = (mCurrentSampleIndex == 0); // XXX 5039 5040 status_t err = mGroup->acquire_buffer(&mBuffer); 5041 5042 if (err != OK) { 5043 CHECK(mBuffer == NULL); 5044 ALOGV("acquire_buffer returned %d", err); 5045 return err; 5046 } 5047 if (size > mBuffer->size()) { 5048 ALOGE("buffer too small: %zu > %zu", size, mBuffer->size()); 5049 return ERROR_BUFFER_TOO_SMALL; 5050 } 5051 } 5052 5053 const Sample *smpl = &mCurrentSamples[mCurrentSampleIndex]; 5054 const sp<MetaData> bufmeta = mBuffer->meta_data(); 5055 bufmeta->clear(); 5056 if (smpl->encryptedsizes.size()) { 5057 // store clear/encrypted lengths in metadata 5058 bufmeta->setData(kKeyPlainSizes, 0, 5059 smpl->clearsizes.array(), smpl->clearsizes.size() * 4); 5060 bufmeta->setData(kKeyEncryptedSizes, 0, 5061 smpl->encryptedsizes.array(), smpl->encryptedsizes.size() * 4); 5062 bufmeta->setData(kKeyCryptoIV, 0, smpl->iv, 16); // use 16 or the actual size? 5063 bufmeta->setInt32(kKeyCryptoDefaultIVSize, mDefaultIVSize); 5064 bufmeta->setInt32(kKeyCryptoMode, mCryptoMode); 5065 bufmeta->setData(kKeyCryptoKey, 0, mCryptoKey, 16); 5066 } 5067 5068 if ((!mIsAVC && !mIsHEVC)|| mWantsNALFragments) { 5069 if (newBuffer) { 5070 if (!isInRange((size_t)0u, mBuffer->size(), size)) { 5071 mBuffer->release(); 5072 mBuffer = NULL; 5073 5074 ALOGE("fragmentedRead ERROR_MALFORMED size %zu", size); 5075 return ERROR_MALFORMED; 5076 } 5077 5078 ssize_t num_bytes_read = 5079 mDataSource->readAt(offset, (uint8_t *)mBuffer->data(), size); 5080 5081 if (num_bytes_read < (ssize_t)size) { 5082 mBuffer->release(); 5083 mBuffer = NULL; 5084 5085 ALOGE("i/o error"); 5086 return ERROR_IO; 5087 } 5088 5089 CHECK(mBuffer != NULL); 5090 mBuffer->set_range(0, size); 5091 mBuffer->meta_data()->setInt64( 5092 kKeyTime, ((int64_t)cts * 1000000) / mTimescale); 5093 mBuffer->meta_data()->setInt64( 5094 kKeyDuration, ((int64_t)smpl->duration * 1000000) / mTimescale); 5095 5096 if (targetSampleTimeUs >= 0) { 5097 mBuffer->meta_data()->setInt64( 5098 kKeyTargetTime, targetSampleTimeUs); 5099 } 5100 5101 if (mIsAVC) { 5102 uint32_t layerId = FindAVCLayerId( 5103 (const uint8_t *)mBuffer->data(), mBuffer->range_length()); 5104 mBuffer->meta_data()->setInt32(kKeyTemporalLayerId, layerId); 5105 } 5106 5107 if (isSyncSample) { 5108 mBuffer->meta_data()->setInt32(kKeyIsSyncFrame, 1); 5109 } 5110 5111 ++mCurrentSampleIndex; 5112 } 5113 5114 if (!mIsAVC && !mIsHEVC) { 5115 *out = mBuffer; 5116 mBuffer = NULL; 5117 5118 return OK; 5119 } 5120 5121 // Each NAL unit is split up into its constituent fragments and 5122 // each one of them returned in its own buffer. 5123 5124 CHECK(mBuffer->range_length() >= mNALLengthSize); 5125 5126 const uint8_t *src = 5127 (const uint8_t *)mBuffer->data() + mBuffer->range_offset(); 5128 5129 size_t nal_size = parseNALSize(src); 5130 if (mNALLengthSize > SIZE_MAX - nal_size) { 5131 ALOGE("b/24441553, b/24445122"); 5132 } 5133 5134 if (mBuffer->range_length() - mNALLengthSize < nal_size) { 5135 ALOGE("incomplete NAL unit."); 5136 5137 mBuffer->release(); 5138 mBuffer = NULL; 5139 5140 return ERROR_MALFORMED; 5141 } 5142 5143 MediaBuffer *clone = mBuffer->clone(); 5144 CHECK(clone != NULL); 5145 clone->set_range(mBuffer->range_offset() + mNALLengthSize, nal_size); 5146 5147 CHECK(mBuffer != NULL); 5148 mBuffer->set_range( 5149 mBuffer->range_offset() + mNALLengthSize + nal_size, 5150 mBuffer->range_length() - mNALLengthSize - nal_size); 5151 5152 if (mBuffer->range_length() == 0) { 5153 mBuffer->release(); 5154 mBuffer = NULL; 5155 } 5156 5157 *out = clone; 5158 5159 return OK; 5160 } else { 5161 ALOGV("whole NAL"); 5162 // Whole NAL units are returned but each fragment is prefixed by 5163 // the start code (0x00 00 00 01). 5164 ssize_t num_bytes_read = 0; 5165 int32_t drm = 0; 5166 bool usesDRM = (mFormat->findInt32(kKeyIsDRM, &drm) && drm != 0); 5167 void *data = NULL; 5168 bool isMalFormed = false; 5169 if (usesDRM) { 5170 if (mBuffer == NULL || !isInRange((size_t)0u, mBuffer->size(), size)) { 5171 isMalFormed = true; 5172 } else { 5173 data = mBuffer->data(); 5174 } 5175 } else { 5176 int32_t max_size; 5177 if (mFormat == NULL 5178 || !mFormat->findInt32(kKeyMaxInputSize, &max_size) 5179 || !isInRange((size_t)0u, (size_t)max_size, size)) { 5180 isMalFormed = true; 5181 } else { 5182 data = mSrcBuffer; 5183 } 5184 } 5185 5186 if (isMalFormed || data == NULL) { 5187 ALOGE("isMalFormed size %zu", size); 5188 if (mBuffer != NULL) { 5189 mBuffer->release(); 5190 mBuffer = NULL; 5191 } 5192 return ERROR_MALFORMED; 5193 } 5194 num_bytes_read = mDataSource->readAt(offset, data, size); 5195 5196 if (num_bytes_read < (ssize_t)size) { 5197 mBuffer->release(); 5198 mBuffer = NULL; 5199 5200 ALOGE("i/o error"); 5201 return ERROR_IO; 5202 } 5203 5204 if (usesDRM) { 5205 CHECK(mBuffer != NULL); 5206 mBuffer->set_range(0, size); 5207 5208 } else { 5209 uint8_t *dstData = (uint8_t *)mBuffer->data(); 5210 size_t srcOffset = 0; 5211 size_t dstOffset = 0; 5212 5213 while (srcOffset < size) { 5214 isMalFormed = !isInRange((size_t)0u, size, srcOffset, mNALLengthSize); 5215 size_t nalLength = 0; 5216 if (!isMalFormed) { 5217 nalLength = parseNALSize(&mSrcBuffer[srcOffset]); 5218 srcOffset += mNALLengthSize; 5219 isMalFormed = !isInRange((size_t)0u, size, srcOffset, nalLength) 5220 || !isInRange((size_t)0u, mBuffer->size(), dstOffset, (size_t)4u) 5221 || !isInRange((size_t)0u, mBuffer->size(), dstOffset + 4, nalLength); 5222 } 5223 5224 if (isMalFormed) { 5225 ALOGE("Video is malformed; nalLength %zu", nalLength); 5226 mBuffer->release(); 5227 mBuffer = NULL; 5228 return ERROR_MALFORMED; 5229 } 5230 5231 if (nalLength == 0) { 5232 continue; 5233 } 5234 5235 if (dstOffset > SIZE_MAX - 4 || 5236 dstOffset + 4 > SIZE_MAX - nalLength || 5237 dstOffset + 4 + nalLength > mBuffer->size()) { 5238 ALOGE("b/26365349 : %zu %zu", dstOffset, mBuffer->size()); 5239 android_errorWriteLog(0x534e4554, "26365349"); 5240 mBuffer->release(); 5241 mBuffer = NULL; 5242 return ERROR_MALFORMED; 5243 } 5244 5245 dstData[dstOffset++] = 0; 5246 dstData[dstOffset++] = 0; 5247 dstData[dstOffset++] = 0; 5248 dstData[dstOffset++] = 1; 5249 memcpy(&dstData[dstOffset], &mSrcBuffer[srcOffset], nalLength); 5250 srcOffset += nalLength; 5251 dstOffset += nalLength; 5252 } 5253 CHECK_EQ(srcOffset, size); 5254 CHECK(mBuffer != NULL); 5255 mBuffer->set_range(0, dstOffset); 5256 } 5257 5258 mBuffer->meta_data()->setInt64( 5259 kKeyTime, ((int64_t)cts * 1000000) / mTimescale); 5260 mBuffer->meta_data()->setInt64( 5261 kKeyDuration, ((int64_t)smpl->duration * 1000000) / mTimescale); 5262 5263 if (targetSampleTimeUs >= 0) { 5264 mBuffer->meta_data()->setInt64( 5265 kKeyTargetTime, targetSampleTimeUs); 5266 } 5267 5268 if (isSyncSample) { 5269 mBuffer->meta_data()->setInt32(kKeyIsSyncFrame, 1); 5270 } 5271 5272 ++mCurrentSampleIndex; 5273 5274 *out = mBuffer; 5275 mBuffer = NULL; 5276 5277 return OK; 5278 } 5279} 5280 5281MPEG4Extractor::Track *MPEG4Extractor::findTrackByMimePrefix( 5282 const char *mimePrefix) { 5283 for (Track *track = mFirstTrack; track != NULL; track = track->next) { 5284 const char *mime; 5285 if (track->meta != NULL 5286 && track->meta->findCString(kKeyMIMEType, &mime) 5287 && !strncasecmp(mime, mimePrefix, strlen(mimePrefix))) { 5288 return track; 5289 } 5290 } 5291 5292 return NULL; 5293} 5294 5295void MPEG4Extractor::populateMetrics() { 5296 ALOGV("MPEG4Extractor::populateMetrics"); 5297 // write into mAnalyticsItem 5298} 5299 5300static bool LegacySniffMPEG4( 5301 const sp<DataSource> &source, String8 *mimeType, float *confidence) { 5302 uint8_t header[8]; 5303 5304 ssize_t n = source->readAt(4, header, sizeof(header)); 5305 if (n < (ssize_t)sizeof(header)) { 5306 return false; 5307 } 5308 5309 if (!memcmp(header, "ftyp3gp", 7) || !memcmp(header, "ftypmp42", 8) 5310 || !memcmp(header, "ftyp3gr6", 8) || !memcmp(header, "ftyp3gs6", 8) 5311 || !memcmp(header, "ftyp3ge6", 8) || !memcmp(header, "ftyp3gg6", 8) 5312 || !memcmp(header, "ftypisom", 8) || !memcmp(header, "ftypM4V ", 8) 5313 || !memcmp(header, "ftypM4A ", 8) || !memcmp(header, "ftypf4v ", 8) 5314 || !memcmp(header, "ftypkddi", 8) || !memcmp(header, "ftypM4VP", 8) 5315 || !memcmp(header, "ftypmif1", 8) || !memcmp(header, "ftypheic", 8)) { 5316 *mimeType = MEDIA_MIMETYPE_CONTAINER_MPEG4; 5317 *confidence = 0.4; 5318 5319 return true; 5320 } 5321 5322 return false; 5323} 5324 5325static bool isCompatibleBrand(uint32_t fourcc) { 5326 static const uint32_t kCompatibleBrands[] = { 5327 FOURCC('i', 's', 'o', 'm'), 5328 FOURCC('i', 's', 'o', '2'), 5329 FOURCC('a', 'v', 'c', '1'), 5330 FOURCC('h', 'v', 'c', '1'), 5331 FOURCC('h', 'e', 'v', '1'), 5332 FOURCC('3', 'g', 'p', '4'), 5333 FOURCC('m', 'p', '4', '1'), 5334 FOURCC('m', 'p', '4', '2'), 5335 FOURCC('d', 'a', 's', 'h'), 5336 5337 // Won't promise that the following file types can be played. 5338 // Just give these file types a chance. 5339 FOURCC('q', 't', ' ', ' '), // Apple's QuickTime 5340 FOURCC('M', 'S', 'N', 'V'), // Sony's PSP 5341 5342 FOURCC('3', 'g', '2', 'a'), // 3GPP2 5343 FOURCC('3', 'g', '2', 'b'), 5344 FOURCC('m', 'i', 'f', '1'), // HEIF image 5345 FOURCC('h', 'e', 'i', 'c'), // HEIF image 5346 }; 5347 5348 for (size_t i = 0; 5349 i < sizeof(kCompatibleBrands) / sizeof(kCompatibleBrands[0]); 5350 ++i) { 5351 if (kCompatibleBrands[i] == fourcc) { 5352 return true; 5353 } 5354 } 5355 5356 return false; 5357} 5358 5359// Attempt to actually parse the 'ftyp' atom and determine if a suitable 5360// compatible brand is present. 5361// Also try to identify where this file's metadata ends 5362// (end of the 'moov' atom) and report it to the caller as part of 5363// the metadata. 5364static bool BetterSniffMPEG4( 5365 const sp<DataSource> &source, String8 *mimeType, float *confidence, 5366 sp<AMessage> *meta) { 5367 // We scan up to 128 bytes to identify this file as an MP4. 5368 static const off64_t kMaxScanOffset = 128ll; 5369 5370 off64_t offset = 0ll; 5371 bool foundGoodFileType = false; 5372 off64_t moovAtomEndOffset = -1ll; 5373 bool done = false; 5374 5375 while (!done && offset < kMaxScanOffset) { 5376 uint32_t hdr[2]; 5377 if (source->readAt(offset, hdr, 8) < 8) { 5378 return false; 5379 } 5380 5381 uint64_t chunkSize = ntohl(hdr[0]); 5382 uint32_t chunkType = ntohl(hdr[1]); 5383 off64_t chunkDataOffset = offset + 8; 5384 5385 if (chunkSize == 1) { 5386 if (source->readAt(offset + 8, &chunkSize, 8) < 8) { 5387 return false; 5388 } 5389 5390 chunkSize = ntoh64(chunkSize); 5391 chunkDataOffset += 8; 5392 5393 if (chunkSize < 16) { 5394 // The smallest valid chunk is 16 bytes long in this case. 5395 return false; 5396 } 5397 5398 } else if (chunkSize < 8) { 5399 // The smallest valid chunk is 8 bytes long. 5400 return false; 5401 } 5402 5403 // (data_offset - offset) is either 8 or 16 5404 off64_t chunkDataSize = chunkSize - (chunkDataOffset - offset); 5405 if (chunkDataSize < 0) { 5406 ALOGE("b/23540914"); 5407 return false; 5408 } 5409 5410 char chunkstring[5]; 5411 MakeFourCCString(chunkType, chunkstring); 5412 ALOGV("saw chunk type %s, size %" PRIu64 " @ %lld", chunkstring, chunkSize, (long long)offset); 5413 switch (chunkType) { 5414 case FOURCC('f', 't', 'y', 'p'): 5415 { 5416 if (chunkDataSize < 8) { 5417 return false; 5418 } 5419 5420 uint32_t numCompatibleBrands = (chunkDataSize - 8) / 4; 5421 for (size_t i = 0; i < numCompatibleBrands + 2; ++i) { 5422 if (i == 1) { 5423 // Skip this index, it refers to the minorVersion, 5424 // not a brand. 5425 continue; 5426 } 5427 5428 uint32_t brand; 5429 if (source->readAt( 5430 chunkDataOffset + 4 * i, &brand, 4) < 4) { 5431 return false; 5432 } 5433 5434 brand = ntohl(brand); 5435 5436 if (isCompatibleBrand(brand)) { 5437 foundGoodFileType = true; 5438 break; 5439 } 5440 } 5441 5442 if (!foundGoodFileType) { 5443 return false; 5444 } 5445 5446 break; 5447 } 5448 5449 case FOURCC('m', 'o', 'o', 'v'): 5450 { 5451 moovAtomEndOffset = offset + chunkSize; 5452 5453 done = true; 5454 break; 5455 } 5456 5457 default: 5458 break; 5459 } 5460 5461 offset += chunkSize; 5462 } 5463 5464 if (!foundGoodFileType) { 5465 return false; 5466 } 5467 5468 *mimeType = MEDIA_MIMETYPE_CONTAINER_MPEG4; 5469 *confidence = 0.4f; 5470 5471 if (moovAtomEndOffset >= 0) { 5472 *meta = new AMessage; 5473 (*meta)->setInt64("meta-data-size", moovAtomEndOffset); 5474 5475 ALOGV("found metadata size: %lld", (long long)moovAtomEndOffset); 5476 } 5477 5478 return true; 5479} 5480 5481static MediaExtractor* CreateExtractor( 5482 const sp<DataSource> &source, 5483 const sp<AMessage>& meta __unused) { 5484 return new MPEG4Extractor(source); 5485} 5486 5487static MediaExtractor::CreatorFunc Sniff( 5488 const sp<DataSource> &source, 5489 String8 *mimeType, 5490 float *confidence, 5491 sp<AMessage> *meta) { 5492 if (BetterSniffMPEG4(source, mimeType, confidence, meta)) { 5493 return CreateExtractor; 5494 } 5495 5496 if (LegacySniffMPEG4(source, mimeType, confidence)) { 5497 ALOGW("Identified supported mpeg4 through LegacySniffMPEG4."); 5498 return CreateExtractor; 5499 } 5500 5501 return NULL; 5502} 5503 5504extern "C" { 5505// This is the only symbol that needs to be exported 5506__attribute__ ((visibility ("default"))) 5507MediaExtractor::ExtractorDef GETEXTRACTORDEF() { 5508 return { 5509 MediaExtractor::EXTRACTORDEF_VERSION, 5510 UUID("27575c67-4417-4c54-8d3d-8e626985a164"), 5511 1, // version 5512 "MP4 Extractor", 5513 Sniff 5514 }; 5515} 5516 5517} // extern "C" 5518 5519} // namespace android 5520