MPEG4Extractor.cpp revision 7b78bfd448028aeeb642cb35d46cc96b205f24f9
1/* 2 * Copyright (C) 2009 The Android Open Source Project 3 * 4 * Licensed under the Apache License, Version 2.0 (the "License"); 5 * you may not use this file except in compliance with the License. 6 * You may obtain a copy of the License at 7 * 8 * http://www.apache.org/licenses/LICENSE-2.0 9 * 10 * Unless required by applicable law or agreed to in writing, software 11 * distributed under the License is distributed on an "AS IS" BASIS, 12 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 * See the License for the specific language governing permissions and 14 * limitations under the License. 15 */ 16 17//#define LOG_NDEBUG 0 18#define LOG_TAG "MPEG4Extractor" 19 20#include <ctype.h> 21#include <inttypes.h> 22#include <memory> 23#include <stdint.h> 24#include <stdlib.h> 25#include <string.h> 26 27#include <utils/Log.h> 28 29#include "MPEG4Extractor.h" 30#include "SampleTable.h" 31#include "ItemTable.h" 32#include "include/ESDS.h" 33 34#include <media/ExtractorUtils.h> 35#include <media/MediaTrack.h> 36#include <media/stagefright/foundation/ABitReader.h> 37#include <media/stagefright/foundation/ABuffer.h> 38#include <media/stagefright/foundation/ADebug.h> 39#include <media/stagefright/foundation/AMessage.h> 40#include <media/stagefright/foundation/AUtils.h> 41#include <media/stagefright/foundation/ByteUtils.h> 42#include <media/stagefright/foundation/ColorUtils.h> 43#include <media/stagefright/foundation/avc_utils.h> 44#include <media/stagefright/foundation/hexdump.h> 45#include <media/stagefright/MediaBufferBase.h> 46#include <media/stagefright/MediaBufferGroup.h> 47#include <media/stagefright/MediaDefs.h> 48#include <media/stagefright/MetaData.h> 49#include <utils/String8.h> 50 51#include <byteswap.h> 52#include "include/ID3.h" 53 54#ifndef UINT32_MAX 55#define UINT32_MAX (4294967295U) 56#endif 57 58namespace android { 59 60enum { 61 // max track header chunk to return 62 kMaxTrackHeaderSize = 32, 63 64 // maximum size of an atom. Some atoms can be bigger according to the spec, 65 // but we only allow up to this size. 66 kMaxAtomSize = 64 * 1024 * 1024, 67}; 68 69class MPEG4Source : public MediaTrack { 70public: 71 // Caller retains ownership of both "dataSource" and "sampleTable". 72 MPEG4Source(MetaDataBase &format, 73 DataSourceBase *dataSource, 74 int32_t timeScale, 75 const sp<SampleTable> &sampleTable, 76 Vector<SidxEntry> &sidx, 77 const Trex *trex, 78 off64_t firstMoofOffset, 79 const sp<ItemTable> &itemTable); 80 virtual status_t init(); 81 82 virtual status_t start(MetaDataBase *params = NULL); 83 virtual status_t stop(); 84 85 virtual status_t getFormat(MetaDataBase &); 86 87 virtual status_t read(MediaBufferBase **buffer, const ReadOptions *options = NULL); 88 virtual bool supportNonblockingRead() { return true; } 89 virtual status_t fragmentedRead(MediaBufferBase **buffer, const ReadOptions *options = NULL); 90 91 virtual ~MPEG4Source(); 92 93private: 94 Mutex mLock; 95 96 MetaDataBase &mFormat; 97 DataSourceBase *mDataSource; 98 int32_t mTimescale; 99 sp<SampleTable> mSampleTable; 100 uint32_t mCurrentSampleIndex; 101 uint32_t mCurrentFragmentIndex; 102 Vector<SidxEntry> &mSegments; 103 const Trex *mTrex; 104 off64_t mFirstMoofOffset; 105 off64_t mCurrentMoofOffset; 106 off64_t mNextMoofOffset; 107 uint32_t mCurrentTime; 108 int32_t mLastParsedTrackId; 109 int32_t mTrackId; 110 111 int32_t mCryptoMode; // passed in from extractor 112 int32_t mDefaultIVSize; // passed in from extractor 113 uint8_t mCryptoKey[16]; // passed in from extractor 114 int32_t mDefaultEncryptedByteBlock; 115 int32_t mDefaultSkipByteBlock; 116 uint32_t mCurrentAuxInfoType; 117 uint32_t mCurrentAuxInfoTypeParameter; 118 int32_t mCurrentDefaultSampleInfoSize; 119 uint32_t mCurrentSampleInfoCount; 120 uint32_t mCurrentSampleInfoAllocSize; 121 uint8_t* mCurrentSampleInfoSizes; 122 uint32_t mCurrentSampleInfoOffsetCount; 123 uint32_t mCurrentSampleInfoOffsetsAllocSize; 124 uint64_t* mCurrentSampleInfoOffsets; 125 126 bool mIsAVC; 127 bool mIsHEVC; 128 size_t mNALLengthSize; 129 130 bool mStarted; 131 132 MediaBufferGroup *mGroup; 133 134 MediaBufferBase *mBuffer; 135 136 bool mWantsNALFragments; 137 138 uint8_t *mSrcBuffer; 139 140 bool mIsHeif; 141 sp<ItemTable> mItemTable; 142 143 size_t parseNALSize(const uint8_t *data) const; 144 status_t parseChunk(off64_t *offset); 145 status_t parseTrackFragmentHeader(off64_t offset, off64_t size); 146 status_t parseTrackFragmentRun(off64_t offset, off64_t size); 147 status_t parseSampleAuxiliaryInformationSizes(off64_t offset, off64_t size); 148 status_t parseSampleAuxiliaryInformationOffsets(off64_t offset, off64_t size); 149 status_t parseClearEncryptedSizes(off64_t offset, bool isSubsampleEncryption, uint32_t flags); 150 status_t parseSampleEncryption(off64_t offset); 151 152 struct TrackFragmentHeaderInfo { 153 enum Flags { 154 kBaseDataOffsetPresent = 0x01, 155 kSampleDescriptionIndexPresent = 0x02, 156 kDefaultSampleDurationPresent = 0x08, 157 kDefaultSampleSizePresent = 0x10, 158 kDefaultSampleFlagsPresent = 0x20, 159 kDurationIsEmpty = 0x10000, 160 }; 161 162 uint32_t mTrackID; 163 uint32_t mFlags; 164 uint64_t mBaseDataOffset; 165 uint32_t mSampleDescriptionIndex; 166 uint32_t mDefaultSampleDuration; 167 uint32_t mDefaultSampleSize; 168 uint32_t mDefaultSampleFlags; 169 170 uint64_t mDataOffset; 171 }; 172 TrackFragmentHeaderInfo mTrackFragmentHeaderInfo; 173 174 struct Sample { 175 off64_t offset; 176 size_t size; 177 uint32_t duration; 178 int32_t compositionOffset; 179 uint8_t iv[16]; 180 Vector<size_t> clearsizes; 181 Vector<size_t> encryptedsizes; 182 }; 183 Vector<Sample> mCurrentSamples; 184 185 MPEG4Source(const MPEG4Source &); 186 MPEG4Source &operator=(const MPEG4Source &); 187}; 188 189// This custom data source wraps an existing one and satisfies requests 190// falling entirely within a cached range from the cache while forwarding 191// all remaining requests to the wrapped datasource. 192// This is used to cache the full sampletable metadata for a single track, 193// possibly wrapping multiple times to cover all tracks, i.e. 194// Each CachedRangedDataSource caches the sampletable metadata for a single track. 195 196struct CachedRangedDataSource : public DataSourceBase { 197 explicit CachedRangedDataSource(DataSourceBase *source); 198 virtual ~CachedRangedDataSource(); 199 200 virtual status_t initCheck() const; 201 virtual ssize_t readAt(off64_t offset, void *data, size_t size); 202 virtual status_t getSize(off64_t *size); 203 virtual uint32_t flags(); 204 205 status_t setCachedRange(off64_t offset, size_t size, bool assumeSourceOwnershipOnSuccess); 206 207 208private: 209 Mutex mLock; 210 211 DataSourceBase *mSource; 212 bool mOwnsDataSource; 213 off64_t mCachedOffset; 214 size_t mCachedSize; 215 uint8_t *mCache; 216 217 void clearCache(); 218 219 CachedRangedDataSource(const CachedRangedDataSource &); 220 CachedRangedDataSource &operator=(const CachedRangedDataSource &); 221}; 222 223CachedRangedDataSource::CachedRangedDataSource(DataSourceBase *source) 224 : mSource(source), 225 mOwnsDataSource(false), 226 mCachedOffset(0), 227 mCachedSize(0), 228 mCache(NULL) { 229} 230 231CachedRangedDataSource::~CachedRangedDataSource() { 232 clearCache(); 233 if (mOwnsDataSource) { 234 delete (CachedRangedDataSource*)mSource; 235 } 236} 237 238void CachedRangedDataSource::clearCache() { 239 if (mCache) { 240 free(mCache); 241 mCache = NULL; 242 } 243 244 mCachedOffset = 0; 245 mCachedSize = 0; 246} 247 248status_t CachedRangedDataSource::initCheck() const { 249 return mSource->initCheck(); 250} 251 252ssize_t CachedRangedDataSource::readAt(off64_t offset, void *data, size_t size) { 253 Mutex::Autolock autoLock(mLock); 254 255 if (isInRange(mCachedOffset, mCachedSize, offset, size)) { 256 memcpy(data, &mCache[offset - mCachedOffset], size); 257 return size; 258 } 259 260 return mSource->readAt(offset, data, size); 261} 262 263status_t CachedRangedDataSource::getSize(off64_t *size) { 264 return mSource->getSize(size); 265} 266 267uint32_t CachedRangedDataSource::flags() { 268 return mSource->flags(); 269} 270 271status_t CachedRangedDataSource::setCachedRange(off64_t offset, 272 size_t size, 273 bool assumeSourceOwnershipOnSuccess) { 274 Mutex::Autolock autoLock(mLock); 275 276 clearCache(); 277 278 mCache = (uint8_t *)malloc(size); 279 280 if (mCache == NULL) { 281 return -ENOMEM; 282 } 283 284 mCachedOffset = offset; 285 mCachedSize = size; 286 287 ssize_t err = mSource->readAt(mCachedOffset, mCache, mCachedSize); 288 289 if (err < (ssize_t)size) { 290 clearCache(); 291 292 return ERROR_IO; 293 } 294 mOwnsDataSource = assumeSourceOwnershipOnSuccess; 295 return OK; 296} 297 298//////////////////////////////////////////////////////////////////////////////// 299 300static const bool kUseHexDump = false; 301 302static const char *FourCC2MIME(uint32_t fourcc) { 303 switch (fourcc) { 304 case FOURCC('m', 'p', '4', 'a'): 305 return MEDIA_MIMETYPE_AUDIO_AAC; 306 307 case FOURCC('s', 'a', 'm', 'r'): 308 return MEDIA_MIMETYPE_AUDIO_AMR_NB; 309 310 case FOURCC('s', 'a', 'w', 'b'): 311 return MEDIA_MIMETYPE_AUDIO_AMR_WB; 312 313 case FOURCC('m', 'p', '4', 'v'): 314 return MEDIA_MIMETYPE_VIDEO_MPEG4; 315 316 case FOURCC('s', '2', '6', '3'): 317 case FOURCC('h', '2', '6', '3'): 318 case FOURCC('H', '2', '6', '3'): 319 return MEDIA_MIMETYPE_VIDEO_H263; 320 321 case FOURCC('a', 'v', 'c', '1'): 322 return MEDIA_MIMETYPE_VIDEO_AVC; 323 324 case FOURCC('h', 'v', 'c', '1'): 325 case FOURCC('h', 'e', 'v', '1'): 326 return MEDIA_MIMETYPE_VIDEO_HEVC; 327 default: 328 ALOGW("Unknown fourcc: %c%c%c%c", 329 (fourcc >> 24) & 0xff, 330 (fourcc >> 16) & 0xff, 331 (fourcc >> 8) & 0xff, 332 fourcc & 0xff 333 ); 334 return "application/octet-stream"; 335 } 336} 337 338static bool AdjustChannelsAndRate(uint32_t fourcc, uint32_t *channels, uint32_t *rate) { 339 if (!strcasecmp(MEDIA_MIMETYPE_AUDIO_AMR_NB, FourCC2MIME(fourcc))) { 340 // AMR NB audio is always mono, 8kHz 341 *channels = 1; 342 *rate = 8000; 343 return true; 344 } else if (!strcasecmp(MEDIA_MIMETYPE_AUDIO_AMR_WB, FourCC2MIME(fourcc))) { 345 // AMR WB audio is always mono, 16kHz 346 *channels = 1; 347 *rate = 16000; 348 return true; 349 } 350 return false; 351} 352 353MPEG4Extractor::MPEG4Extractor(DataSourceBase *source, const char *mime) 354 : mMoofOffset(0), 355 mMoofFound(false), 356 mMdatFound(false), 357 mDataSource(source), 358 mCachedSource(NULL), 359 mInitCheck(NO_INIT), 360 mHeaderTimescale(0), 361 mIsQT(false), 362 mIsHeif(false), 363 mHasMoovBox(false), 364 mPreferHeif(mime != NULL && !strcasecmp(mime, MEDIA_MIMETYPE_CONTAINER_HEIF)), 365 mFirstTrack(NULL), 366 mLastTrack(NULL) { 367 ALOGV("mime=%s, mPreferHeif=%d", mime, mPreferHeif); 368} 369 370MPEG4Extractor::~MPEG4Extractor() { 371 Track *track = mFirstTrack; 372 while (track) { 373 Track *next = track->next; 374 375 delete track; 376 track = next; 377 } 378 mFirstTrack = mLastTrack = NULL; 379 380 for (size_t i = 0; i < mPssh.size(); i++) { 381 delete [] mPssh[i].data; 382 } 383 mPssh.clear(); 384 385 delete mCachedSource; 386} 387 388uint32_t MPEG4Extractor::flags() const { 389 return CAN_PAUSE | 390 ((mMoofOffset == 0 || mSidxEntries.size() != 0) ? 391 (CAN_SEEK_BACKWARD | CAN_SEEK_FORWARD | CAN_SEEK) : 0); 392} 393 394status_t MPEG4Extractor::getMetaData(MetaDataBase &meta) { 395 status_t err; 396 if ((err = readMetaData()) != OK) { 397 return UNKNOWN_ERROR; 398 } 399 meta = mFileMetaData; 400 return OK; 401} 402 403size_t MPEG4Extractor::countTracks() { 404 status_t err; 405 if ((err = readMetaData()) != OK) { 406 ALOGV("MPEG4Extractor::countTracks: no tracks"); 407 return 0; 408 } 409 410 size_t n = 0; 411 Track *track = mFirstTrack; 412 while (track) { 413 ++n; 414 track = track->next; 415 } 416 417 ALOGV("MPEG4Extractor::countTracks: %zu tracks", n); 418 return n; 419} 420 421status_t MPEG4Extractor::getTrackMetaData( 422 MetaDataBase &meta, 423 size_t index, uint32_t flags) { 424 status_t err; 425 if ((err = readMetaData()) != OK) { 426 return UNKNOWN_ERROR; 427 } 428 429 Track *track = mFirstTrack; 430 while (index > 0) { 431 if (track == NULL) { 432 return UNKNOWN_ERROR; 433 } 434 435 track = track->next; 436 --index; 437 } 438 439 if (track == NULL) { 440 return UNKNOWN_ERROR; 441 } 442 443 [=] { 444 int64_t duration; 445 int32_t samplerate; 446 if (track->has_elst && mHeaderTimescale != 0 && 447 track->meta.findInt64(kKeyDuration, &duration) && 448 track->meta.findInt32(kKeySampleRate, &samplerate)) { 449 450 track->has_elst = false; 451 452 if (track->elst_segment_duration > INT64_MAX) { 453 return; 454 } 455 int64_t segment_duration = track->elst_segment_duration; 456 int64_t media_time = track->elst_media_time; 457 int64_t halfscale = mHeaderTimescale / 2; 458 ALOGV("segment_duration = %" PRId64 ", media_time = %" PRId64 459 ", halfscale = %" PRId64 ", timescale = %d", 460 segment_duration, 461 media_time, 462 halfscale, 463 mHeaderTimescale); 464 465 int64_t delay; 466 // delay = ((media_time * samplerate) + halfscale) / mHeaderTimescale; 467 if (__builtin_mul_overflow(media_time, samplerate, &delay) || 468 __builtin_add_overflow(delay, halfscale, &delay) || 469 (delay /= mHeaderTimescale, false) || 470 delay > INT32_MAX || 471 delay < INT32_MIN) { 472 return; 473 } 474 ALOGV("delay = %" PRId64, delay); 475 track->meta.setInt32(kKeyEncoderDelay, delay); 476 477 int64_t scaled_duration; 478 // scaled_duration = duration * mHeaderTimescale; 479 if (__builtin_mul_overflow(duration, mHeaderTimescale, &scaled_duration)) { 480 return; 481 } 482 ALOGV("scaled_duration = %" PRId64, scaled_duration); 483 484 int64_t segment_end; 485 int64_t padding; 486 // padding = scaled_duration - ((segment_duration + media_time) * 1000000); 487 if (__builtin_add_overflow(segment_duration, media_time, &segment_end) || 488 __builtin_mul_overflow(segment_end, 1000000, &segment_end) || 489 __builtin_sub_overflow(scaled_duration, segment_end, &padding)) { 490 return; 491 } 492 ALOGV("segment_end = %" PRId64 ", padding = %" PRId64, segment_end, padding); 493 494 if (padding < 0) { 495 // track duration from media header (which is what kKeyDuration is) might 496 // be slightly shorter than the segment duration, which would make the 497 // padding negative. Clamp to zero. 498 padding = 0; 499 } 500 501 int64_t paddingsamples; 502 int64_t halfscale_e6; 503 int64_t timescale_e6; 504 // paddingsamples = ((padding * samplerate) + (halfscale * 1000000)) 505 // / (mHeaderTimescale * 1000000); 506 if (__builtin_mul_overflow(padding, samplerate, &paddingsamples) || 507 __builtin_mul_overflow(halfscale, 1000000, &halfscale_e6) || 508 __builtin_mul_overflow(mHeaderTimescale, 1000000, ×cale_e6) || 509 __builtin_add_overflow(paddingsamples, halfscale_e6, &paddingsamples) || 510 (paddingsamples /= timescale_e6, false) || 511 paddingsamples > INT32_MAX) { 512 return; 513 } 514 ALOGV("paddingsamples = %" PRId64, paddingsamples); 515 track->meta.setInt32(kKeyEncoderPadding, paddingsamples); 516 } 517 }(); 518 519 if ((flags & kIncludeExtensiveMetaData) 520 && !track->includes_expensive_metadata) { 521 track->includes_expensive_metadata = true; 522 523 const char *mime; 524 CHECK(track->meta.findCString(kKeyMIMEType, &mime)); 525 if (!strncasecmp("video/", mime, 6)) { 526 // MPEG2 tracks do not provide CSD, so read the stream header 527 if (!strcmp(mime, MEDIA_MIMETYPE_VIDEO_MPEG2)) { 528 off64_t offset; 529 size_t size; 530 if (track->sampleTable->getMetaDataForSample( 531 0 /* sampleIndex */, &offset, &size, NULL /* sampleTime */) == OK) { 532 if (size > kMaxTrackHeaderSize) { 533 size = kMaxTrackHeaderSize; 534 } 535 uint8_t header[kMaxTrackHeaderSize]; 536 if (mDataSource->readAt(offset, &header, size) == (ssize_t)size) { 537 track->meta.setData(kKeyStreamHeader, 'mdat', header, size); 538 } 539 } 540 } 541 542 if (mMoofOffset > 0) { 543 int64_t duration; 544 if (track->meta.findInt64(kKeyDuration, &duration)) { 545 // nothing fancy, just pick a frame near 1/4th of the duration 546 track->meta.setInt64( 547 kKeyThumbnailTime, duration / 4); 548 } 549 } else { 550 uint32_t sampleIndex; 551 uint32_t sampleTime; 552 if (track->timescale != 0 && 553 track->sampleTable->findThumbnailSample(&sampleIndex) == OK 554 && track->sampleTable->getMetaDataForSample( 555 sampleIndex, NULL /* offset */, NULL /* size */, 556 &sampleTime) == OK) { 557 track->meta.setInt64( 558 kKeyThumbnailTime, 559 ((int64_t)sampleTime * 1000000) / track->timescale); 560 } 561 } 562 } 563 } 564 565 meta = track->meta; 566 return OK; 567} 568 569status_t MPEG4Extractor::readMetaData() { 570 if (mInitCheck != NO_INIT) { 571 return mInitCheck; 572 } 573 574 off64_t offset = 0; 575 status_t err; 576 bool sawMoovOrSidx = false; 577 578 while (!((mHasMoovBox && sawMoovOrSidx && (mMdatFound || mMoofFound)) || 579 (mIsHeif && (mPreferHeif || !mHasMoovBox) && 580 (mItemTable != NULL) && mItemTable->isValid()))) { 581 off64_t orig_offset = offset; 582 err = parseChunk(&offset, 0); 583 584 if (err != OK && err != UNKNOWN_ERROR) { 585 break; 586 } else if (offset <= orig_offset) { 587 // only continue parsing if the offset was advanced, 588 // otherwise we might end up in an infinite loop 589 ALOGE("did not advance: %lld->%lld", (long long)orig_offset, (long long)offset); 590 err = ERROR_MALFORMED; 591 break; 592 } else if (err == UNKNOWN_ERROR) { 593 sawMoovOrSidx = true; 594 } 595 } 596 597 if (mIsHeif && (mItemTable != NULL) && (mItemTable->countImages() > 0)) { 598 off64_t exifOffset; 599 size_t exifSize; 600 if (mItemTable->getExifOffsetAndSize(&exifOffset, &exifSize) == OK) { 601 mFileMetaData.setInt64(kKeyExifOffset, (int64_t)exifOffset); 602 mFileMetaData.setInt64(kKeyExifSize, (int64_t)exifSize); 603 } 604 for (uint32_t imageIndex = 0; 605 imageIndex < mItemTable->countImages(); imageIndex++) { 606 sp<MetaData> meta = mItemTable->getImageMeta(imageIndex); 607 if (meta == NULL) { 608 ALOGE("heif image %u has no meta!", imageIndex); 609 continue; 610 } 611 // Some heif files advertise image sequence brands (eg. 'hevc') in 612 // ftyp box, but don't have any valid tracks in them. Instead of 613 // reporting the entire file as malformed, we override the error 614 // to allow still images to be extracted. 615 if (err != OK) { 616 ALOGW("Extracting still images only"); 617 err = OK; 618 } 619 mInitCheck = OK; 620 621 ALOGV("adding HEIF image track %u", imageIndex); 622 Track *track = new Track; 623 track->next = NULL; 624 if (mLastTrack != NULL) { 625 mLastTrack->next = track; 626 } else { 627 mFirstTrack = track; 628 } 629 mLastTrack = track; 630 631 track->meta = *(meta.get()); 632 track->meta.setInt32(kKeyTrackID, imageIndex); 633 track->includes_expensive_metadata = false; 634 track->skipTrack = false; 635 track->timescale = 1000000; 636 } 637 } 638 639 if (mInitCheck == OK) { 640 if (findTrackByMimePrefix("video/") != NULL) { 641 mFileMetaData.setCString( 642 kKeyMIMEType, MEDIA_MIMETYPE_CONTAINER_MPEG4); 643 } else if (findTrackByMimePrefix("audio/") != NULL) { 644 mFileMetaData.setCString(kKeyMIMEType, "audio/mp4"); 645 } else if (findTrackByMimePrefix( 646 MEDIA_MIMETYPE_IMAGE_ANDROID_HEIC) != NULL) { 647 mFileMetaData.setCString( 648 kKeyMIMEType, MEDIA_MIMETYPE_CONTAINER_HEIF); 649 } else { 650 mFileMetaData.setCString(kKeyMIMEType, "application/octet-stream"); 651 } 652 } else { 653 mInitCheck = err; 654 } 655 656 CHECK_NE(err, (status_t)NO_INIT); 657 658 // copy pssh data into file metadata 659 uint64_t psshsize = 0; 660 for (size_t i = 0; i < mPssh.size(); i++) { 661 psshsize += 20 + mPssh[i].datalen; 662 } 663 if (psshsize > 0 && psshsize <= UINT32_MAX) { 664 char *buf = (char*)malloc(psshsize); 665 if (!buf) { 666 ALOGE("b/28471206"); 667 return NO_MEMORY; 668 } 669 char *ptr = buf; 670 for (size_t i = 0; i < mPssh.size(); i++) { 671 memcpy(ptr, mPssh[i].uuid, 20); // uuid + length 672 memcpy(ptr + 20, mPssh[i].data, mPssh[i].datalen); 673 ptr += (20 + mPssh[i].datalen); 674 } 675 mFileMetaData.setData(kKeyPssh, 'pssh', buf, psshsize); 676 free(buf); 677 } 678 679 return mInitCheck; 680} 681 682struct PathAdder { 683 PathAdder(Vector<uint32_t> *path, uint32_t chunkType) 684 : mPath(path) { 685 mPath->push(chunkType); 686 } 687 688 ~PathAdder() { 689 mPath->pop(); 690 } 691 692private: 693 Vector<uint32_t> *mPath; 694 695 PathAdder(const PathAdder &); 696 PathAdder &operator=(const PathAdder &); 697}; 698 699static bool underMetaDataPath(const Vector<uint32_t> &path) { 700 return path.size() >= 5 701 && path[0] == FOURCC('m', 'o', 'o', 'v') 702 && path[1] == FOURCC('u', 'd', 't', 'a') 703 && path[2] == FOURCC('m', 'e', 't', 'a') 704 && path[3] == FOURCC('i', 'l', 's', 't'); 705} 706 707static bool underQTMetaPath(const Vector<uint32_t> &path, int32_t depth) { 708 return path.size() >= 2 709 && path[0] == FOURCC('m', 'o', 'o', 'v') 710 && path[1] == FOURCC('m', 'e', 't', 'a') 711 && (depth == 2 712 || (depth == 3 713 && (path[2] == FOURCC('h', 'd', 'l', 'r') 714 || path[2] == FOURCC('i', 'l', 's', 't') 715 || path[2] == FOURCC('k', 'e', 'y', 's')))); 716} 717 718// Given a time in seconds since Jan 1 1904, produce a human-readable string. 719static bool convertTimeToDate(int64_t time_1904, String8 *s) { 720 // delta between mpeg4 time and unix epoch time 721 static const int64_t delta = (((66 * 365 + 17) * 24) * 3600); 722 if (time_1904 < INT64_MIN + delta) { 723 return false; 724 } 725 time_t time_1970 = time_1904 - delta; 726 727 char tmp[32]; 728 struct tm* tm = gmtime(&time_1970); 729 if (tm != NULL && 730 strftime(tmp, sizeof(tmp), "%Y%m%dT%H%M%S.000Z", tm) > 0) { 731 s->setTo(tmp); 732 return true; 733 } 734 return false; 735} 736 737status_t MPEG4Extractor::parseChunk(off64_t *offset, int depth) { 738 ALOGV("entering parseChunk %lld/%d", (long long)*offset, depth); 739 740 if (*offset < 0) { 741 ALOGE("b/23540914"); 742 return ERROR_MALFORMED; 743 } 744 if (depth > 100) { 745 ALOGE("b/27456299"); 746 return ERROR_MALFORMED; 747 } 748 uint32_t hdr[2]; 749 if (mDataSource->readAt(*offset, hdr, 8) < 8) { 750 return ERROR_IO; 751 } 752 uint64_t chunk_size = ntohl(hdr[0]); 753 int32_t chunk_type = ntohl(hdr[1]); 754 off64_t data_offset = *offset + 8; 755 756 if (chunk_size == 1) { 757 if (mDataSource->readAt(*offset + 8, &chunk_size, 8) < 8) { 758 return ERROR_IO; 759 } 760 chunk_size = ntoh64(chunk_size); 761 data_offset += 8; 762 763 if (chunk_size < 16) { 764 // The smallest valid chunk is 16 bytes long in this case. 765 return ERROR_MALFORMED; 766 } 767 } else if (chunk_size == 0) { 768 if (depth == 0) { 769 // atom extends to end of file 770 off64_t sourceSize; 771 if (mDataSource->getSize(&sourceSize) == OK) { 772 chunk_size = (sourceSize - *offset); 773 } else { 774 // XXX could we just pick a "sufficiently large" value here? 775 ALOGE("atom size is 0, and data source has no size"); 776 return ERROR_MALFORMED; 777 } 778 } else { 779 // not allowed for non-toplevel atoms, skip it 780 *offset += 4; 781 return OK; 782 } 783 } else if (chunk_size < 8) { 784 // The smallest valid chunk is 8 bytes long. 785 ALOGE("invalid chunk size: %" PRIu64, chunk_size); 786 return ERROR_MALFORMED; 787 } 788 789 char chunk[5]; 790 MakeFourCCString(chunk_type, chunk); 791 ALOGV("chunk: %s @ %lld, %d", chunk, (long long)*offset, depth); 792 793 if (kUseHexDump) { 794 static const char kWhitespace[] = " "; 795 const char *indent = &kWhitespace[sizeof(kWhitespace) - 1 - 2 * depth]; 796 printf("%sfound chunk '%s' of size %" PRIu64 "\n", indent, chunk, chunk_size); 797 798 char buffer[256]; 799 size_t n = chunk_size; 800 if (n > sizeof(buffer)) { 801 n = sizeof(buffer); 802 } 803 if (mDataSource->readAt(*offset, buffer, n) 804 < (ssize_t)n) { 805 return ERROR_IO; 806 } 807 808 hexdump(buffer, n); 809 } 810 811 PathAdder autoAdder(&mPath, chunk_type); 812 813 // (data_offset - *offset) is either 8 or 16 814 off64_t chunk_data_size = chunk_size - (data_offset - *offset); 815 if (chunk_data_size < 0) { 816 ALOGE("b/23540914"); 817 return ERROR_MALFORMED; 818 } 819 if (chunk_type != FOURCC('m', 'd', 'a', 't') && chunk_data_size > kMaxAtomSize) { 820 char errMsg[100]; 821 sprintf(errMsg, "%s atom has size %" PRId64, chunk, chunk_data_size); 822 ALOGE("%s (b/28615448)", errMsg); 823 android_errorWriteWithInfoLog(0x534e4554, "28615448", -1, errMsg, strlen(errMsg)); 824 return ERROR_MALFORMED; 825 } 826 827 if (chunk_type != FOURCC('c', 'p', 'r', 't') 828 && chunk_type != FOURCC('c', 'o', 'v', 'r') 829 && mPath.size() == 5 && underMetaDataPath(mPath)) { 830 off64_t stop_offset = *offset + chunk_size; 831 *offset = data_offset; 832 while (*offset < stop_offset) { 833 status_t err = parseChunk(offset, depth + 1); 834 if (err != OK) { 835 return err; 836 } 837 } 838 839 if (*offset != stop_offset) { 840 return ERROR_MALFORMED; 841 } 842 843 return OK; 844 } 845 846 switch(chunk_type) { 847 case FOURCC('m', 'o', 'o', 'v'): 848 case FOURCC('t', 'r', 'a', 'k'): 849 case FOURCC('m', 'd', 'i', 'a'): 850 case FOURCC('m', 'i', 'n', 'f'): 851 case FOURCC('d', 'i', 'n', 'f'): 852 case FOURCC('s', 't', 'b', 'l'): 853 case FOURCC('m', 'v', 'e', 'x'): 854 case FOURCC('m', 'o', 'o', 'f'): 855 case FOURCC('t', 'r', 'a', 'f'): 856 case FOURCC('m', 'f', 'r', 'a'): 857 case FOURCC('u', 'd', 't', 'a'): 858 case FOURCC('i', 'l', 's', 't'): 859 case FOURCC('s', 'i', 'n', 'f'): 860 case FOURCC('s', 'c', 'h', 'i'): 861 case FOURCC('e', 'd', 't', 's'): 862 case FOURCC('w', 'a', 'v', 'e'): 863 { 864 if (chunk_type == FOURCC('m', 'o', 'o', 'v') && depth != 0) { 865 ALOGE("moov: depth %d", depth); 866 return ERROR_MALFORMED; 867 } 868 869 if (chunk_type == FOURCC('m', 'o', 'o', 'v') && mInitCheck == OK) { 870 ALOGE("duplicate moov"); 871 return ERROR_MALFORMED; 872 } 873 874 if (chunk_type == FOURCC('m', 'o', 'o', 'f') && !mMoofFound) { 875 // store the offset of the first segment 876 mMoofFound = true; 877 mMoofOffset = *offset; 878 } 879 880 if (chunk_type == FOURCC('s', 't', 'b', 'l')) { 881 ALOGV("sampleTable chunk is %" PRIu64 " bytes long.", chunk_size); 882 883 if (mDataSource->flags() 884 & (DataSourceBase::kWantsPrefetching 885 | DataSourceBase::kIsCachingDataSource)) { 886 CachedRangedDataSource *cachedSource = 887 new CachedRangedDataSource(mDataSource); 888 889 if (cachedSource->setCachedRange( 890 *offset, chunk_size, 891 mCachedSource != NULL /* assume ownership on success */) == OK) { 892 mDataSource = mCachedSource = cachedSource; 893 } else { 894 delete cachedSource; 895 } 896 } 897 898 if (mLastTrack == NULL) { 899 return ERROR_MALFORMED; 900 } 901 902 mLastTrack->sampleTable = new SampleTable(mDataSource); 903 } 904 905 bool isTrack = false; 906 if (chunk_type == FOURCC('t', 'r', 'a', 'k')) { 907 if (depth != 1) { 908 ALOGE("trak: depth %d", depth); 909 return ERROR_MALFORMED; 910 } 911 isTrack = true; 912 913 ALOGV("adding new track"); 914 Track *track = new Track; 915 track->next = NULL; 916 if (mLastTrack) { 917 mLastTrack->next = track; 918 } else { 919 mFirstTrack = track; 920 } 921 mLastTrack = track; 922 923 track->includes_expensive_metadata = false; 924 track->skipTrack = false; 925 track->timescale = 0; 926 track->meta.setCString(kKeyMIMEType, "application/octet-stream"); 927 track->has_elst = false; 928 track->subsample_encryption = false; 929 } 930 931 off64_t stop_offset = *offset + chunk_size; 932 *offset = data_offset; 933 while (*offset < stop_offset) { 934 status_t err = parseChunk(offset, depth + 1); 935 if (err != OK) { 936 if (isTrack) { 937 mLastTrack->skipTrack = true; 938 break; 939 } 940 return err; 941 } 942 } 943 944 if (*offset != stop_offset) { 945 return ERROR_MALFORMED; 946 } 947 948 if (isTrack) { 949 int32_t trackId; 950 // There must be exact one track header per track. 951 if (!mLastTrack->meta.findInt32(kKeyTrackID, &trackId)) { 952 mLastTrack->skipTrack = true; 953 } 954 955 status_t err = verifyTrack(mLastTrack); 956 if (err != OK) { 957 mLastTrack->skipTrack = true; 958 } 959 960 if (mLastTrack->skipTrack) { 961 ALOGV("skipping this track..."); 962 Track *cur = mFirstTrack; 963 964 if (cur == mLastTrack) { 965 delete cur; 966 mFirstTrack = mLastTrack = NULL; 967 } else { 968 while (cur && cur->next != mLastTrack) { 969 cur = cur->next; 970 } 971 if (cur) { 972 cur->next = NULL; 973 } 974 delete mLastTrack; 975 mLastTrack = cur; 976 } 977 978 return OK; 979 } 980 } else if (chunk_type == FOURCC('m', 'o', 'o', 'v')) { 981 mInitCheck = OK; 982 983 return UNKNOWN_ERROR; // Return a dummy error. 984 } 985 break; 986 } 987 988 case FOURCC('s', 'c', 'h', 'm'): 989 { 990 991 *offset += chunk_size; 992 if (!mLastTrack) { 993 return ERROR_MALFORMED; 994 } 995 996 uint32_t scheme_type; 997 if (mDataSource->readAt(data_offset + 4, &scheme_type, 4) < 4) { 998 return ERROR_IO; 999 } 1000 scheme_type = ntohl(scheme_type); 1001 int32_t mode = kCryptoModeUnencrypted; 1002 switch(scheme_type) { 1003 case FOURCC('c', 'b', 'c', '1'): 1004 { 1005 mode = kCryptoModeAesCbc; 1006 break; 1007 } 1008 case FOURCC('c', 'b', 'c', 's'): 1009 { 1010 mode = kCryptoModeAesCbc; 1011 mLastTrack->subsample_encryption = true; 1012 break; 1013 } 1014 case FOURCC('c', 'e', 'n', 'c'): 1015 { 1016 mode = kCryptoModeAesCtr; 1017 break; 1018 } 1019 case FOURCC('c', 'e', 'n', 's'): 1020 { 1021 mode = kCryptoModeAesCtr; 1022 mLastTrack->subsample_encryption = true; 1023 break; 1024 } 1025 } 1026 mLastTrack->meta.setInt32(kKeyCryptoMode, mode); 1027 break; 1028 } 1029 1030 1031 case FOURCC('e', 'l', 's', 't'): 1032 { 1033 *offset += chunk_size; 1034 1035 if (!mLastTrack) { 1036 return ERROR_MALFORMED; 1037 } 1038 1039 // See 14496-12 8.6.6 1040 uint8_t version; 1041 if (mDataSource->readAt(data_offset, &version, 1) < 1) { 1042 return ERROR_IO; 1043 } 1044 1045 uint32_t entry_count; 1046 if (!mDataSource->getUInt32(data_offset + 4, &entry_count)) { 1047 return ERROR_IO; 1048 } 1049 1050 if (entry_count != 1) { 1051 // we only support a single entry at the moment, for gapless playback 1052 ALOGW("ignoring edit list with %d entries", entry_count); 1053 } else { 1054 off64_t entriesoffset = data_offset + 8; 1055 uint64_t segment_duration; 1056 int64_t media_time; 1057 1058 if (version == 1) { 1059 if (!mDataSource->getUInt64(entriesoffset, &segment_duration) || 1060 !mDataSource->getUInt64(entriesoffset + 8, (uint64_t*)&media_time)) { 1061 return ERROR_IO; 1062 } 1063 } else if (version == 0) { 1064 uint32_t sd; 1065 int32_t mt; 1066 if (!mDataSource->getUInt32(entriesoffset, &sd) || 1067 !mDataSource->getUInt32(entriesoffset + 4, (uint32_t*)&mt)) { 1068 return ERROR_IO; 1069 } 1070 segment_duration = sd; 1071 media_time = mt; 1072 } else { 1073 return ERROR_IO; 1074 } 1075 1076 // save these for later, because the elst atom might precede 1077 // the atoms that actually gives us the duration and sample rate 1078 // needed to calculate the padding and delay values 1079 mLastTrack->has_elst = true; 1080 mLastTrack->elst_media_time = media_time; 1081 mLastTrack->elst_segment_duration = segment_duration; 1082 } 1083 break; 1084 } 1085 1086 case FOURCC('f', 'r', 'm', 'a'): 1087 { 1088 *offset += chunk_size; 1089 1090 uint32_t original_fourcc; 1091 if (mDataSource->readAt(data_offset, &original_fourcc, 4) < 4) { 1092 return ERROR_IO; 1093 } 1094 original_fourcc = ntohl(original_fourcc); 1095 ALOGV("read original format: %d", original_fourcc); 1096 1097 if (mLastTrack == NULL) { 1098 return ERROR_MALFORMED; 1099 } 1100 1101 mLastTrack->meta.setCString(kKeyMIMEType, FourCC2MIME(original_fourcc)); 1102 uint32_t num_channels = 0; 1103 uint32_t sample_rate = 0; 1104 if (AdjustChannelsAndRate(original_fourcc, &num_channels, &sample_rate)) { 1105 mLastTrack->meta.setInt32(kKeyChannelCount, num_channels); 1106 mLastTrack->meta.setInt32(kKeySampleRate, sample_rate); 1107 } 1108 break; 1109 } 1110 1111 case FOURCC('t', 'e', 'n', 'c'): 1112 { 1113 *offset += chunk_size; 1114 1115 if (chunk_size < 32) { 1116 return ERROR_MALFORMED; 1117 } 1118 1119 // tenc box contains 1 byte version, 3 byte flags, 3 byte default algorithm id, one byte 1120 // default IV size, 16 bytes default KeyID 1121 // (ISO 23001-7) 1122 1123 uint8_t version; 1124 if (mDataSource->readAt(data_offset, &version, sizeof(version)) 1125 < (ssize_t)sizeof(version)) { 1126 return ERROR_IO; 1127 } 1128 1129 uint8_t buf[4]; 1130 memset(buf, 0, 4); 1131 if (mDataSource->readAt(data_offset + 4, buf + 1, 3) < 3) { 1132 return ERROR_IO; 1133 } 1134 1135 if (mLastTrack == NULL) { 1136 return ERROR_MALFORMED; 1137 } 1138 1139 uint8_t defaultEncryptedByteBlock = 0; 1140 uint8_t defaultSkipByteBlock = 0; 1141 uint32_t defaultAlgorithmId = ntohl(*((int32_t*)buf)); 1142 if (version == 1) { 1143 uint32_t pattern = buf[2]; 1144 defaultEncryptedByteBlock = pattern >> 4; 1145 defaultSkipByteBlock = pattern & 0xf; 1146 if (defaultEncryptedByteBlock == 0 && defaultSkipByteBlock == 0) { 1147 // use (1,0) to mean "encrypt everything" 1148 defaultEncryptedByteBlock = 1; 1149 } 1150 } else if (mLastTrack->subsample_encryption) { 1151 ALOGW("subsample_encryption should be version 1"); 1152 } else if (defaultAlgorithmId > 1) { 1153 // only 0 (clear) and 1 (AES-128) are valid 1154 ALOGW("defaultAlgorithmId: %u is a reserved value", defaultAlgorithmId); 1155 defaultAlgorithmId = 1; 1156 } 1157 1158 memset(buf, 0, 4); 1159 if (mDataSource->readAt(data_offset + 7, buf + 3, 1) < 1) { 1160 return ERROR_IO; 1161 } 1162 uint32_t defaultIVSize = ntohl(*((int32_t*)buf)); 1163 1164 if (defaultAlgorithmId == 0 && defaultIVSize != 0) { 1165 // only unencrypted data must have 0 IV size 1166 return ERROR_MALFORMED; 1167 } else if (defaultIVSize != 0 && 1168 defaultIVSize != 8 && 1169 defaultIVSize != 16) { 1170 return ERROR_MALFORMED; 1171 } 1172 1173 uint8_t defaultKeyId[16]; 1174 1175 if (mDataSource->readAt(data_offset + 8, &defaultKeyId, 16) < 16) { 1176 return ERROR_IO; 1177 } 1178 1179 sp<ABuffer> defaultConstantIv; 1180 if (defaultAlgorithmId != 0 && defaultIVSize == 0) { 1181 1182 uint8_t ivlength; 1183 if (mDataSource->readAt(data_offset + 24, &ivlength, sizeof(ivlength)) 1184 < (ssize_t)sizeof(ivlength)) { 1185 return ERROR_IO; 1186 } 1187 1188 if (ivlength != 8 && ivlength != 16) { 1189 ALOGW("unsupported IV length: %u", ivlength); 1190 return ERROR_MALFORMED; 1191 } 1192 1193 defaultConstantIv = new ABuffer(ivlength); 1194 if (mDataSource->readAt(data_offset + 25, defaultConstantIv->data(), ivlength) 1195 < (ssize_t)ivlength) { 1196 return ERROR_IO; 1197 } 1198 1199 defaultConstantIv->setRange(0, ivlength); 1200 } 1201 1202 int32_t tmpAlgorithmId; 1203 if (!mLastTrack->meta.findInt32(kKeyCryptoMode, &tmpAlgorithmId)) { 1204 mLastTrack->meta.setInt32(kKeyCryptoMode, defaultAlgorithmId); 1205 } 1206 1207 mLastTrack->meta.setInt32(kKeyCryptoDefaultIVSize, defaultIVSize); 1208 mLastTrack->meta.setData(kKeyCryptoKey, 'tenc', defaultKeyId, 16); 1209 mLastTrack->meta.setInt32(kKeyEncryptedByteBlock, defaultEncryptedByteBlock); 1210 mLastTrack->meta.setInt32(kKeySkipByteBlock, defaultSkipByteBlock); 1211 if (defaultConstantIv != NULL) { 1212 mLastTrack->meta.setData(kKeyCryptoIV, 'dciv', defaultConstantIv->data(), defaultConstantIv->size()); 1213 } 1214 break; 1215 } 1216 1217 case FOURCC('t', 'k', 'h', 'd'): 1218 { 1219 *offset += chunk_size; 1220 1221 status_t err; 1222 if ((err = parseTrackHeader(data_offset, chunk_data_size)) != OK) { 1223 return err; 1224 } 1225 1226 break; 1227 } 1228 1229 case FOURCC('t', 'r', 'e', 'f'): 1230 { 1231 off64_t stop_offset = *offset + chunk_size; 1232 *offset = data_offset; 1233 while (*offset < stop_offset) { 1234 status_t err = parseChunk(offset, depth + 1); 1235 if (err != OK) { 1236 return err; 1237 } 1238 } 1239 if (*offset != stop_offset) { 1240 return ERROR_MALFORMED; 1241 } 1242 break; 1243 } 1244 1245 case FOURCC('t', 'h', 'm', 'b'): 1246 { 1247 *offset += chunk_size; 1248 1249 if (mLastTrack != NULL) { 1250 // Skip thumbnail track for now since we don't have an 1251 // API to retrieve it yet. 1252 // The thumbnail track can't be accessed by negative index or time, 1253 // because each timed sample has its own corresponding thumbnail 1254 // in the thumbnail track. We'll need a dedicated API to retrieve 1255 // thumbnail at time instead. 1256 mLastTrack->skipTrack = true; 1257 } 1258 1259 break; 1260 } 1261 1262 case FOURCC('p', 's', 's', 'h'): 1263 { 1264 *offset += chunk_size; 1265 1266 PsshInfo pssh; 1267 1268 if (mDataSource->readAt(data_offset + 4, &pssh.uuid, 16) < 16) { 1269 return ERROR_IO; 1270 } 1271 1272 uint32_t psshdatalen = 0; 1273 if (mDataSource->readAt(data_offset + 20, &psshdatalen, 4) < 4) { 1274 return ERROR_IO; 1275 } 1276 pssh.datalen = ntohl(psshdatalen); 1277 ALOGV("pssh data size: %d", pssh.datalen); 1278 if (chunk_size < 20 || pssh.datalen > chunk_size - 20) { 1279 // pssh data length exceeds size of containing box 1280 return ERROR_MALFORMED; 1281 } 1282 1283 pssh.data = new (std::nothrow) uint8_t[pssh.datalen]; 1284 if (pssh.data == NULL) { 1285 return ERROR_MALFORMED; 1286 } 1287 ALOGV("allocated pssh @ %p", pssh.data); 1288 ssize_t requested = (ssize_t) pssh.datalen; 1289 if (mDataSource->readAt(data_offset + 24, pssh.data, requested) < requested) { 1290 delete[] pssh.data; 1291 return ERROR_IO; 1292 } 1293 mPssh.push_back(pssh); 1294 1295 break; 1296 } 1297 1298 case FOURCC('m', 'd', 'h', 'd'): 1299 { 1300 *offset += chunk_size; 1301 1302 if (chunk_data_size < 4 || mLastTrack == NULL) { 1303 return ERROR_MALFORMED; 1304 } 1305 1306 uint8_t version; 1307 if (mDataSource->readAt( 1308 data_offset, &version, sizeof(version)) 1309 < (ssize_t)sizeof(version)) { 1310 return ERROR_IO; 1311 } 1312 1313 off64_t timescale_offset; 1314 1315 if (version == 1) { 1316 timescale_offset = data_offset + 4 + 16; 1317 } else if (version == 0) { 1318 timescale_offset = data_offset + 4 + 8; 1319 } else { 1320 return ERROR_IO; 1321 } 1322 1323 uint32_t timescale; 1324 if (mDataSource->readAt( 1325 timescale_offset, ×cale, sizeof(timescale)) 1326 < (ssize_t)sizeof(timescale)) { 1327 return ERROR_IO; 1328 } 1329 1330 if (!timescale) { 1331 ALOGE("timescale should not be ZERO."); 1332 return ERROR_MALFORMED; 1333 } 1334 1335 mLastTrack->timescale = ntohl(timescale); 1336 1337 // 14496-12 says all ones means indeterminate, but some files seem to use 1338 // 0 instead. We treat both the same. 1339 int64_t duration = 0; 1340 if (version == 1) { 1341 if (mDataSource->readAt( 1342 timescale_offset + 4, &duration, sizeof(duration)) 1343 < (ssize_t)sizeof(duration)) { 1344 return ERROR_IO; 1345 } 1346 if (duration != -1) { 1347 duration = ntoh64(duration); 1348 } 1349 } else { 1350 uint32_t duration32; 1351 if (mDataSource->readAt( 1352 timescale_offset + 4, &duration32, sizeof(duration32)) 1353 < (ssize_t)sizeof(duration32)) { 1354 return ERROR_IO; 1355 } 1356 if (duration32 != 0xffffffff) { 1357 duration = ntohl(duration32); 1358 } 1359 } 1360 if (duration != 0 && mLastTrack->timescale != 0) { 1361 mLastTrack->meta.setInt64( 1362 kKeyDuration, (duration * 1000000) / mLastTrack->timescale); 1363 } 1364 1365 uint8_t lang[2]; 1366 off64_t lang_offset; 1367 if (version == 1) { 1368 lang_offset = timescale_offset + 4 + 8; 1369 } else if (version == 0) { 1370 lang_offset = timescale_offset + 4 + 4; 1371 } else { 1372 return ERROR_IO; 1373 } 1374 1375 if (mDataSource->readAt(lang_offset, &lang, sizeof(lang)) 1376 < (ssize_t)sizeof(lang)) { 1377 return ERROR_IO; 1378 } 1379 1380 // To get the ISO-639-2/T three character language code 1381 // 1 bit pad followed by 3 5-bits characters. Each character 1382 // is packed as the difference between its ASCII value and 0x60. 1383 char lang_code[4]; 1384 lang_code[0] = ((lang[0] >> 2) & 0x1f) + 0x60; 1385 lang_code[1] = ((lang[0] & 0x3) << 3 | (lang[1] >> 5)) + 0x60; 1386 lang_code[2] = (lang[1] & 0x1f) + 0x60; 1387 lang_code[3] = '\0'; 1388 1389 mLastTrack->meta.setCString( 1390 kKeyMediaLanguage, lang_code); 1391 1392 break; 1393 } 1394 1395 case FOURCC('s', 't', 's', 'd'): 1396 { 1397 uint8_t buffer[8]; 1398 if (chunk_data_size < (off64_t)sizeof(buffer)) { 1399 return ERROR_MALFORMED; 1400 } 1401 1402 if (mDataSource->readAt( 1403 data_offset, buffer, 8) < 8) { 1404 return ERROR_IO; 1405 } 1406 1407 if (U32_AT(buffer) != 0) { 1408 // Should be version 0, flags 0. 1409 return ERROR_MALFORMED; 1410 } 1411 1412 uint32_t entry_count = U32_AT(&buffer[4]); 1413 1414 if (entry_count > 1) { 1415 // For 3GPP timed text, there could be multiple tx3g boxes contain 1416 // multiple text display formats. These formats will be used to 1417 // display the timed text. 1418 // For encrypted files, there may also be more than one entry. 1419 const char *mime; 1420 1421 if (mLastTrack == NULL) 1422 return ERROR_MALFORMED; 1423 1424 CHECK(mLastTrack->meta.findCString(kKeyMIMEType, &mime)); 1425 if (strcasecmp(mime, MEDIA_MIMETYPE_TEXT_3GPP) && 1426 strcasecmp(mime, "application/octet-stream")) { 1427 // For now we only support a single type of media per track. 1428 mLastTrack->skipTrack = true; 1429 *offset += chunk_size; 1430 break; 1431 } 1432 } 1433 off64_t stop_offset = *offset + chunk_size; 1434 *offset = data_offset + 8; 1435 for (uint32_t i = 0; i < entry_count; ++i) { 1436 status_t err = parseChunk(offset, depth + 1); 1437 if (err != OK) { 1438 return err; 1439 } 1440 } 1441 1442 if (*offset != stop_offset) { 1443 return ERROR_MALFORMED; 1444 } 1445 break; 1446 } 1447 case FOURCC('m', 'e', 't', 't'): 1448 { 1449 *offset += chunk_size; 1450 1451 if (mLastTrack == NULL) 1452 return ERROR_MALFORMED; 1453 1454 auto buffer = heapbuffer<uint8_t>(chunk_data_size); 1455 if (buffer.get() == NULL) { 1456 return NO_MEMORY; 1457 } 1458 1459 if (mDataSource->readAt( 1460 data_offset, buffer.get(), chunk_data_size) < chunk_data_size) { 1461 return ERROR_IO; 1462 } 1463 1464 String8 mimeFormat((const char *)(buffer.get()), chunk_data_size); 1465 mLastTrack->meta.setCString(kKeyMIMEType, mimeFormat.string()); 1466 1467 break; 1468 } 1469 1470 case FOURCC('m', 'p', '4', 'a'): 1471 case FOURCC('e', 'n', 'c', 'a'): 1472 case FOURCC('s', 'a', 'm', 'r'): 1473 case FOURCC('s', 'a', 'w', 'b'): 1474 { 1475 if (mIsQT && chunk_type == FOURCC('m', 'p', '4', 'a') 1476 && depth >= 1 && mPath[depth - 1] == FOURCC('w', 'a', 'v', 'e')) { 1477 // Ignore mp4a embedded in QT wave atom 1478 *offset += chunk_size; 1479 break; 1480 } 1481 1482 uint8_t buffer[8 + 20]; 1483 if (chunk_data_size < (ssize_t)sizeof(buffer)) { 1484 // Basic AudioSampleEntry size. 1485 return ERROR_MALFORMED; 1486 } 1487 1488 if (mDataSource->readAt( 1489 data_offset, buffer, sizeof(buffer)) < (ssize_t)sizeof(buffer)) { 1490 return ERROR_IO; 1491 } 1492 1493 uint16_t data_ref_index __unused = U16_AT(&buffer[6]); 1494 uint16_t version = U16_AT(&buffer[8]); 1495 uint32_t num_channels = U16_AT(&buffer[16]); 1496 1497 uint16_t sample_size = U16_AT(&buffer[18]); 1498 uint32_t sample_rate = U32_AT(&buffer[24]) >> 16; 1499 1500 if (mLastTrack == NULL) 1501 return ERROR_MALFORMED; 1502 1503 off64_t stop_offset = *offset + chunk_size; 1504 *offset = data_offset + sizeof(buffer); 1505 1506 if (mIsQT && chunk_type == FOURCC('m', 'p', '4', 'a')) { 1507 if (version == 1) { 1508 if (mDataSource->readAt(*offset, buffer, 16) < 16) { 1509 return ERROR_IO; 1510 } 1511 1512#if 0 1513 U32_AT(buffer); // samples per packet 1514 U32_AT(&buffer[4]); // bytes per packet 1515 U32_AT(&buffer[8]); // bytes per frame 1516 U32_AT(&buffer[12]); // bytes per sample 1517#endif 1518 *offset += 16; 1519 } else if (version == 2) { 1520 uint8_t v2buffer[36]; 1521 if (mDataSource->readAt(*offset, v2buffer, 36) < 36) { 1522 return ERROR_IO; 1523 } 1524 1525#if 0 1526 U32_AT(v2buffer); // size of struct only 1527 sample_rate = (uint32_t)U64_AT(&v2buffer[4]); // audio sample rate 1528 num_channels = U32_AT(&v2buffer[12]); // num audio channels 1529 U32_AT(&v2buffer[16]); // always 0x7f000000 1530 sample_size = (uint16_t)U32_AT(&v2buffer[20]); // const bits per channel 1531 U32_AT(&v2buffer[24]); // format specifc flags 1532 U32_AT(&v2buffer[28]); // const bytes per audio packet 1533 U32_AT(&v2buffer[32]); // const LPCM frames per audio packet 1534#endif 1535 *offset += 36; 1536 } 1537 } 1538 1539 if (chunk_type != FOURCC('e', 'n', 'c', 'a')) { 1540 // if the chunk type is enca, we'll get the type from the frma box later 1541 mLastTrack->meta.setCString(kKeyMIMEType, FourCC2MIME(chunk_type)); 1542 AdjustChannelsAndRate(chunk_type, &num_channels, &sample_rate); 1543 } 1544 ALOGV("*** coding='%s' %d channels, size %d, rate %d\n", 1545 chunk, num_channels, sample_size, sample_rate); 1546 mLastTrack->meta.setInt32(kKeyChannelCount, num_channels); 1547 mLastTrack->meta.setInt32(kKeySampleRate, sample_rate); 1548 1549 while (*offset < stop_offset) { 1550 status_t err = parseChunk(offset, depth + 1); 1551 if (err != OK) { 1552 return err; 1553 } 1554 } 1555 1556 if (*offset != stop_offset) { 1557 return ERROR_MALFORMED; 1558 } 1559 break; 1560 } 1561 1562 case FOURCC('m', 'p', '4', 'v'): 1563 case FOURCC('e', 'n', 'c', 'v'): 1564 case FOURCC('s', '2', '6', '3'): 1565 case FOURCC('H', '2', '6', '3'): 1566 case FOURCC('h', '2', '6', '3'): 1567 case FOURCC('a', 'v', 'c', '1'): 1568 case FOURCC('h', 'v', 'c', '1'): 1569 case FOURCC('h', 'e', 'v', '1'): 1570 { 1571 uint8_t buffer[78]; 1572 if (chunk_data_size < (ssize_t)sizeof(buffer)) { 1573 // Basic VideoSampleEntry size. 1574 return ERROR_MALFORMED; 1575 } 1576 1577 if (mDataSource->readAt( 1578 data_offset, buffer, sizeof(buffer)) < (ssize_t)sizeof(buffer)) { 1579 return ERROR_IO; 1580 } 1581 1582 uint16_t data_ref_index __unused = U16_AT(&buffer[6]); 1583 uint16_t width = U16_AT(&buffer[6 + 18]); 1584 uint16_t height = U16_AT(&buffer[6 + 20]); 1585 1586 // The video sample is not standard-compliant if it has invalid dimension. 1587 // Use some default width and height value, and 1588 // let the decoder figure out the actual width and height (and thus 1589 // be prepared for INFO_FOMRAT_CHANGED event). 1590 if (width == 0) width = 352; 1591 if (height == 0) height = 288; 1592 1593 // printf("*** coding='%s' width=%d height=%d\n", 1594 // chunk, width, height); 1595 1596 if (mLastTrack == NULL) 1597 return ERROR_MALFORMED; 1598 1599 if (chunk_type != FOURCC('e', 'n', 'c', 'v')) { 1600 // if the chunk type is encv, we'll get the type from the frma box later 1601 mLastTrack->meta.setCString(kKeyMIMEType, FourCC2MIME(chunk_type)); 1602 } 1603 mLastTrack->meta.setInt32(kKeyWidth, width); 1604 mLastTrack->meta.setInt32(kKeyHeight, height); 1605 1606 off64_t stop_offset = *offset + chunk_size; 1607 *offset = data_offset + sizeof(buffer); 1608 while (*offset < stop_offset) { 1609 status_t err = parseChunk(offset, depth + 1); 1610 if (err != OK) { 1611 return err; 1612 } 1613 } 1614 1615 if (*offset != stop_offset) { 1616 return ERROR_MALFORMED; 1617 } 1618 break; 1619 } 1620 1621 case FOURCC('s', 't', 'c', 'o'): 1622 case FOURCC('c', 'o', '6', '4'): 1623 { 1624 if ((mLastTrack == NULL) || (mLastTrack->sampleTable == NULL)) { 1625 return ERROR_MALFORMED; 1626 } 1627 1628 status_t err = 1629 mLastTrack->sampleTable->setChunkOffsetParams( 1630 chunk_type, data_offset, chunk_data_size); 1631 1632 *offset += chunk_size; 1633 1634 if (err != OK) { 1635 return err; 1636 } 1637 1638 break; 1639 } 1640 1641 case FOURCC('s', 't', 's', 'c'): 1642 { 1643 if ((mLastTrack == NULL) || (mLastTrack->sampleTable == NULL)) 1644 return ERROR_MALFORMED; 1645 1646 status_t err = 1647 mLastTrack->sampleTable->setSampleToChunkParams( 1648 data_offset, chunk_data_size); 1649 1650 *offset += chunk_size; 1651 1652 if (err != OK) { 1653 return err; 1654 } 1655 1656 break; 1657 } 1658 1659 case FOURCC('s', 't', 's', 'z'): 1660 case FOURCC('s', 't', 'z', '2'): 1661 { 1662 if ((mLastTrack == NULL) || (mLastTrack->sampleTable == NULL)) { 1663 return ERROR_MALFORMED; 1664 } 1665 1666 status_t err = 1667 mLastTrack->sampleTable->setSampleSizeParams( 1668 chunk_type, data_offset, chunk_data_size); 1669 1670 *offset += chunk_size; 1671 1672 if (err != OK) { 1673 return err; 1674 } 1675 1676 size_t max_size; 1677 err = mLastTrack->sampleTable->getMaxSampleSize(&max_size); 1678 1679 if (err != OK) { 1680 return err; 1681 } 1682 1683 if (max_size != 0) { 1684 // Assume that a given buffer only contains at most 10 chunks, 1685 // each chunk originally prefixed with a 2 byte length will 1686 // have a 4 byte header (0x00 0x00 0x00 0x01) after conversion, 1687 // and thus will grow by 2 bytes per chunk. 1688 if (max_size > SIZE_MAX - 10 * 2) { 1689 ALOGE("max sample size too big: %zu", max_size); 1690 return ERROR_MALFORMED; 1691 } 1692 mLastTrack->meta.setInt32(kKeyMaxInputSize, max_size + 10 * 2); 1693 } else { 1694 // No size was specified. Pick a conservatively large size. 1695 uint32_t width, height; 1696 if (!mLastTrack->meta.findInt32(kKeyWidth, (int32_t*)&width) || 1697 !mLastTrack->meta.findInt32(kKeyHeight,(int32_t*) &height)) { 1698 ALOGE("No width or height, assuming worst case 1080p"); 1699 width = 1920; 1700 height = 1080; 1701 } else { 1702 // A resolution was specified, check that it's not too big. The values below 1703 // were chosen so that the calculations below don't cause overflows, they're 1704 // not indicating that resolutions up to 32kx32k are actually supported. 1705 if (width > 32768 || height > 32768) { 1706 ALOGE("can't support %u x %u video", width, height); 1707 return ERROR_MALFORMED; 1708 } 1709 } 1710 1711 const char *mime; 1712 CHECK(mLastTrack->meta.findCString(kKeyMIMEType, &mime)); 1713 if (!strcmp(mime, MEDIA_MIMETYPE_VIDEO_AVC) 1714 || !strcmp(mime, MEDIA_MIMETYPE_VIDEO_HEVC)) { 1715 // AVC & HEVC requires compression ratio of at least 2, and uses 1716 // macroblocks 1717 max_size = ((width + 15) / 16) * ((height + 15) / 16) * 192; 1718 } else { 1719 // For all other formats there is no minimum compression 1720 // ratio. Use compression ratio of 1. 1721 max_size = width * height * 3 / 2; 1722 } 1723 // HACK: allow 10% overhead 1724 // TODO: read sample size from traf atom for fragmented MPEG4. 1725 max_size += max_size / 10; 1726 mLastTrack->meta.setInt32(kKeyMaxInputSize, max_size); 1727 } 1728 1729 // NOTE: setting another piece of metadata invalidates any pointers (such as the 1730 // mimetype) previously obtained, so don't cache them. 1731 const char *mime; 1732 CHECK(mLastTrack->meta.findCString(kKeyMIMEType, &mime)); 1733 // Calculate average frame rate. 1734 if (!strncasecmp("video/", mime, 6)) { 1735 size_t nSamples = mLastTrack->sampleTable->countSamples(); 1736 if (nSamples == 0) { 1737 int32_t trackId; 1738 if (mLastTrack->meta.findInt32(kKeyTrackID, &trackId)) { 1739 for (size_t i = 0; i < mTrex.size(); i++) { 1740 Trex *t = &mTrex.editItemAt(i); 1741 if (t->track_ID == (uint32_t) trackId) { 1742 if (t->default_sample_duration > 0) { 1743 int32_t frameRate = 1744 mLastTrack->timescale / t->default_sample_duration; 1745 mLastTrack->meta.setInt32(kKeyFrameRate, frameRate); 1746 } 1747 break; 1748 } 1749 } 1750 } 1751 } else { 1752 int64_t durationUs; 1753 if (mLastTrack->meta.findInt64(kKeyDuration, &durationUs)) { 1754 if (durationUs > 0) { 1755 int32_t frameRate = (nSamples * 1000000LL + 1756 (durationUs >> 1)) / durationUs; 1757 mLastTrack->meta.setInt32(kKeyFrameRate, frameRate); 1758 } 1759 } 1760 ALOGV("setting frame count %zu", nSamples); 1761 mLastTrack->meta.setInt32(kKeyFrameCount, nSamples); 1762 } 1763 } 1764 1765 break; 1766 } 1767 1768 case FOURCC('s', 't', 't', 's'): 1769 { 1770 if ((mLastTrack == NULL) || (mLastTrack->sampleTable == NULL)) 1771 return ERROR_MALFORMED; 1772 1773 *offset += chunk_size; 1774 1775 status_t err = 1776 mLastTrack->sampleTable->setTimeToSampleParams( 1777 data_offset, chunk_data_size); 1778 1779 if (err != OK) { 1780 return err; 1781 } 1782 1783 break; 1784 } 1785 1786 case FOURCC('c', 't', 't', 's'): 1787 { 1788 if ((mLastTrack == NULL) || (mLastTrack->sampleTable == NULL)) 1789 return ERROR_MALFORMED; 1790 1791 *offset += chunk_size; 1792 1793 status_t err = 1794 mLastTrack->sampleTable->setCompositionTimeToSampleParams( 1795 data_offset, chunk_data_size); 1796 1797 if (err != OK) { 1798 return err; 1799 } 1800 1801 break; 1802 } 1803 1804 case FOURCC('s', 't', 's', 's'): 1805 { 1806 if ((mLastTrack == NULL) || (mLastTrack->sampleTable == NULL)) 1807 return ERROR_MALFORMED; 1808 1809 *offset += chunk_size; 1810 1811 status_t err = 1812 mLastTrack->sampleTable->setSyncSampleParams( 1813 data_offset, chunk_data_size); 1814 1815 if (err != OK) { 1816 return err; 1817 } 1818 1819 break; 1820 } 1821 1822 // \xA9xyz 1823 case FOURCC(0xA9, 'x', 'y', 'z'): 1824 { 1825 *offset += chunk_size; 1826 1827 // Best case the total data length inside "\xA9xyz" box would 1828 // be 9, for instance "\xA9xyz" + "\x00\x05\x15\xc7" + "+0+0/", 1829 // where "\x00\x05" is the text string length with value = 5, 1830 // "\0x15\xc7" is the language code = en, and "+0+0/" is a 1831 // location (string) value with longitude = 0 and latitude = 0. 1832 // Since some devices encountered in the wild omit the trailing 1833 // slash, we'll allow that. 1834 if (chunk_data_size < 8) { // 8 instead of 9 to allow for missing / 1835 return ERROR_MALFORMED; 1836 } 1837 1838 uint16_t len; 1839 if (!mDataSource->getUInt16(data_offset, &len)) { 1840 return ERROR_IO; 1841 } 1842 1843 // allow "+0+0" without trailing slash 1844 if (len < 4 || len > chunk_data_size - 4) { 1845 return ERROR_MALFORMED; 1846 } 1847 // The location string following the language code is formatted 1848 // according to ISO 6709:2008 (https://en.wikipedia.org/wiki/ISO_6709). 1849 // Allocate 2 extra bytes, in case we need to add a trailing slash, 1850 // and to add a terminating 0. 1851 std::unique_ptr<char[]> buffer(new (std::nothrow) char[len+2]()); 1852 if (!buffer) { 1853 return NO_MEMORY; 1854 } 1855 1856 if (mDataSource->readAt( 1857 data_offset + 4, &buffer[0], len) < len) { 1858 return ERROR_IO; 1859 } 1860 1861 len = strlen(&buffer[0]); 1862 if (len < 4) { 1863 return ERROR_MALFORMED; 1864 } 1865 // Add a trailing slash if there wasn't one. 1866 if (buffer[len - 1] != '/') { 1867 buffer[len] = '/'; 1868 } 1869 mFileMetaData.setCString(kKeyLocation, &buffer[0]); 1870 break; 1871 } 1872 1873 case FOURCC('e', 's', 'd', 's'): 1874 { 1875 *offset += chunk_size; 1876 1877 if (chunk_data_size < 4) { 1878 return ERROR_MALFORMED; 1879 } 1880 1881 uint8_t buffer[256]; 1882 if (chunk_data_size > (off64_t)sizeof(buffer)) { 1883 return ERROR_BUFFER_TOO_SMALL; 1884 } 1885 1886 if (mDataSource->readAt( 1887 data_offset, buffer, chunk_data_size) < chunk_data_size) { 1888 return ERROR_IO; 1889 } 1890 1891 if (U32_AT(buffer) != 0) { 1892 // Should be version 0, flags 0. 1893 return ERROR_MALFORMED; 1894 } 1895 1896 if (mLastTrack == NULL) 1897 return ERROR_MALFORMED; 1898 1899 mLastTrack->meta.setData( 1900 kKeyESDS, kTypeESDS, &buffer[4], chunk_data_size - 4); 1901 1902 if (mPath.size() >= 2 1903 && mPath[mPath.size() - 2] == FOURCC('m', 'p', '4', 'a')) { 1904 // Information from the ESDS must be relied on for proper 1905 // setup of sample rate and channel count for MPEG4 Audio. 1906 // The generic header appears to only contain generic 1907 // information... 1908 1909 status_t err = updateAudioTrackInfoFromESDS_MPEG4Audio( 1910 &buffer[4], chunk_data_size - 4); 1911 1912 if (err != OK) { 1913 return err; 1914 } 1915 } 1916 if (mPath.size() >= 2 1917 && mPath[mPath.size() - 2] == FOURCC('m', 'p', '4', 'v')) { 1918 // Check if the video is MPEG2 1919 ESDS esds(&buffer[4], chunk_data_size - 4); 1920 1921 uint8_t objectTypeIndication; 1922 if (esds.getObjectTypeIndication(&objectTypeIndication) == OK) { 1923 if (objectTypeIndication >= 0x60 && objectTypeIndication <= 0x65) { 1924 mLastTrack->meta.setCString(kKeyMIMEType, MEDIA_MIMETYPE_VIDEO_MPEG2); 1925 } 1926 } 1927 } 1928 break; 1929 } 1930 1931 case FOURCC('b', 't', 'r', 't'): 1932 { 1933 *offset += chunk_size; 1934 if (mLastTrack == NULL) { 1935 return ERROR_MALFORMED; 1936 } 1937 1938 uint8_t buffer[12]; 1939 if (chunk_data_size != sizeof(buffer)) { 1940 return ERROR_MALFORMED; 1941 } 1942 1943 if (mDataSource->readAt( 1944 data_offset, buffer, chunk_data_size) < chunk_data_size) { 1945 return ERROR_IO; 1946 } 1947 1948 uint32_t maxBitrate = U32_AT(&buffer[4]); 1949 uint32_t avgBitrate = U32_AT(&buffer[8]); 1950 if (maxBitrate > 0 && maxBitrate < INT32_MAX) { 1951 mLastTrack->meta.setInt32(kKeyMaxBitRate, (int32_t)maxBitrate); 1952 } 1953 if (avgBitrate > 0 && avgBitrate < INT32_MAX) { 1954 mLastTrack->meta.setInt32(kKeyBitRate, (int32_t)avgBitrate); 1955 } 1956 break; 1957 } 1958 1959 case FOURCC('a', 'v', 'c', 'C'): 1960 { 1961 *offset += chunk_size; 1962 1963 auto buffer = heapbuffer<uint8_t>(chunk_data_size); 1964 1965 if (buffer.get() == NULL) { 1966 ALOGE("b/28471206"); 1967 return NO_MEMORY; 1968 } 1969 1970 if (mDataSource->readAt( 1971 data_offset, buffer.get(), chunk_data_size) < chunk_data_size) { 1972 return ERROR_IO; 1973 } 1974 1975 if (mLastTrack == NULL) 1976 return ERROR_MALFORMED; 1977 1978 mLastTrack->meta.setData( 1979 kKeyAVCC, kTypeAVCC, buffer.get(), chunk_data_size); 1980 1981 break; 1982 } 1983 case FOURCC('h', 'v', 'c', 'C'): 1984 { 1985 auto buffer = heapbuffer<uint8_t>(chunk_data_size); 1986 1987 if (buffer.get() == NULL) { 1988 ALOGE("b/28471206"); 1989 return NO_MEMORY; 1990 } 1991 1992 if (mDataSource->readAt( 1993 data_offset, buffer.get(), chunk_data_size) < chunk_data_size) { 1994 return ERROR_IO; 1995 } 1996 1997 if (mLastTrack == NULL) 1998 return ERROR_MALFORMED; 1999 2000 mLastTrack->meta.setData( 2001 kKeyHVCC, kTypeHVCC, buffer.get(), chunk_data_size); 2002 2003 *offset += chunk_size; 2004 break; 2005 } 2006 2007 case FOURCC('d', '2', '6', '3'): 2008 { 2009 *offset += chunk_size; 2010 /* 2011 * d263 contains a fixed 7 bytes part: 2012 * vendor - 4 bytes 2013 * version - 1 byte 2014 * level - 1 byte 2015 * profile - 1 byte 2016 * optionally, "d263" box itself may contain a 16-byte 2017 * bit rate box (bitr) 2018 * average bit rate - 4 bytes 2019 * max bit rate - 4 bytes 2020 */ 2021 char buffer[23]; 2022 if (chunk_data_size != 7 && 2023 chunk_data_size != 23) { 2024 ALOGE("Incorrect D263 box size %lld", (long long)chunk_data_size); 2025 return ERROR_MALFORMED; 2026 } 2027 2028 if (mDataSource->readAt( 2029 data_offset, buffer, chunk_data_size) < chunk_data_size) { 2030 return ERROR_IO; 2031 } 2032 2033 if (mLastTrack == NULL) 2034 return ERROR_MALFORMED; 2035 2036 mLastTrack->meta.setData(kKeyD263, kTypeD263, buffer, chunk_data_size); 2037 2038 break; 2039 } 2040 2041 case FOURCC('m', 'e', 't', 'a'): 2042 { 2043 off64_t stop_offset = *offset + chunk_size; 2044 *offset = data_offset; 2045 bool isParsingMetaKeys = underQTMetaPath(mPath, 2); 2046 if (!isParsingMetaKeys) { 2047 uint8_t buffer[4]; 2048 if (chunk_data_size < (off64_t)sizeof(buffer)) { 2049 *offset = stop_offset; 2050 return ERROR_MALFORMED; 2051 } 2052 2053 if (mDataSource->readAt( 2054 data_offset, buffer, 4) < 4) { 2055 *offset = stop_offset; 2056 return ERROR_IO; 2057 } 2058 2059 if (U32_AT(buffer) != 0) { 2060 // Should be version 0, flags 0. 2061 2062 // If it's not, let's assume this is one of those 2063 // apparently malformed chunks that don't have flags 2064 // and completely different semantics than what's 2065 // in the MPEG4 specs and skip it. 2066 *offset = stop_offset; 2067 return OK; 2068 } 2069 *offset += sizeof(buffer); 2070 } 2071 2072 while (*offset < stop_offset) { 2073 status_t err = parseChunk(offset, depth + 1); 2074 if (err != OK) { 2075 return err; 2076 } 2077 } 2078 2079 if (*offset != stop_offset) { 2080 return ERROR_MALFORMED; 2081 } 2082 break; 2083 } 2084 2085 case FOURCC('i', 'l', 'o', 'c'): 2086 case FOURCC('i', 'i', 'n', 'f'): 2087 case FOURCC('i', 'p', 'r', 'p'): 2088 case FOURCC('p', 'i', 't', 'm'): 2089 case FOURCC('i', 'd', 'a', 't'): 2090 case FOURCC('i', 'r', 'e', 'f'): 2091 case FOURCC('i', 'p', 'r', 'o'): 2092 { 2093 if (mIsHeif) { 2094 if (mItemTable == NULL) { 2095 mItemTable = new ItemTable(mDataSource); 2096 } 2097 status_t err = mItemTable->parse( 2098 chunk_type, data_offset, chunk_data_size); 2099 if (err != OK) { 2100 return err; 2101 } 2102 } 2103 *offset += chunk_size; 2104 break; 2105 } 2106 2107 case FOURCC('m', 'e', 'a', 'n'): 2108 case FOURCC('n', 'a', 'm', 'e'): 2109 case FOURCC('d', 'a', 't', 'a'): 2110 { 2111 *offset += chunk_size; 2112 2113 if (mPath.size() == 6 && underMetaDataPath(mPath)) { 2114 status_t err = parseITunesMetaData(data_offset, chunk_data_size); 2115 2116 if (err != OK) { 2117 return err; 2118 } 2119 } 2120 2121 break; 2122 } 2123 2124 case FOURCC('m', 'v', 'h', 'd'): 2125 { 2126 *offset += chunk_size; 2127 2128 if (depth != 1) { 2129 ALOGE("mvhd: depth %d", depth); 2130 return ERROR_MALFORMED; 2131 } 2132 if (chunk_data_size < 32) { 2133 return ERROR_MALFORMED; 2134 } 2135 2136 uint8_t header[32]; 2137 if (mDataSource->readAt( 2138 data_offset, header, sizeof(header)) 2139 < (ssize_t)sizeof(header)) { 2140 return ERROR_IO; 2141 } 2142 2143 uint64_t creationTime; 2144 uint64_t duration = 0; 2145 if (header[0] == 1) { 2146 creationTime = U64_AT(&header[4]); 2147 mHeaderTimescale = U32_AT(&header[20]); 2148 duration = U64_AT(&header[24]); 2149 if (duration == 0xffffffffffffffff) { 2150 duration = 0; 2151 } 2152 } else if (header[0] != 0) { 2153 return ERROR_MALFORMED; 2154 } else { 2155 creationTime = U32_AT(&header[4]); 2156 mHeaderTimescale = U32_AT(&header[12]); 2157 uint32_t d32 = U32_AT(&header[16]); 2158 if (d32 == 0xffffffff) { 2159 d32 = 0; 2160 } 2161 duration = d32; 2162 } 2163 if (duration != 0 && mHeaderTimescale != 0 && duration < UINT64_MAX / 1000000) { 2164 mFileMetaData.setInt64(kKeyDuration, duration * 1000000 / mHeaderTimescale); 2165 } 2166 2167 String8 s; 2168 if (convertTimeToDate(creationTime, &s)) { 2169 mFileMetaData.setCString(kKeyDate, s.string()); 2170 } 2171 2172 2173 break; 2174 } 2175 2176 case FOURCC('m', 'e', 'h', 'd'): 2177 { 2178 *offset += chunk_size; 2179 2180 if (chunk_data_size < 8) { 2181 return ERROR_MALFORMED; 2182 } 2183 2184 uint8_t flags[4]; 2185 if (mDataSource->readAt( 2186 data_offset, flags, sizeof(flags)) 2187 < (ssize_t)sizeof(flags)) { 2188 return ERROR_IO; 2189 } 2190 2191 uint64_t duration = 0; 2192 if (flags[0] == 1) { 2193 // 64 bit 2194 if (chunk_data_size < 12) { 2195 return ERROR_MALFORMED; 2196 } 2197 mDataSource->getUInt64(data_offset + 4, &duration); 2198 if (duration == 0xffffffffffffffff) { 2199 duration = 0; 2200 } 2201 } else if (flags[0] == 0) { 2202 // 32 bit 2203 uint32_t d32; 2204 mDataSource->getUInt32(data_offset + 4, &d32); 2205 if (d32 == 0xffffffff) { 2206 d32 = 0; 2207 } 2208 duration = d32; 2209 } else { 2210 return ERROR_MALFORMED; 2211 } 2212 2213 if (duration != 0 && mHeaderTimescale != 0) { 2214 mFileMetaData.setInt64(kKeyDuration, duration * 1000000 / mHeaderTimescale); 2215 } 2216 2217 break; 2218 } 2219 2220 case FOURCC('m', 'd', 'a', 't'): 2221 { 2222 mMdatFound = true; 2223 2224 *offset += chunk_size; 2225 break; 2226 } 2227 2228 case FOURCC('h', 'd', 'l', 'r'): 2229 { 2230 *offset += chunk_size; 2231 2232 if (underQTMetaPath(mPath, 3)) { 2233 break; 2234 } 2235 2236 uint32_t buffer; 2237 if (mDataSource->readAt( 2238 data_offset + 8, &buffer, 4) < 4) { 2239 return ERROR_IO; 2240 } 2241 2242 uint32_t type = ntohl(buffer); 2243 // For the 3GPP file format, the handler-type within the 'hdlr' box 2244 // shall be 'text'. We also want to support 'sbtl' handler type 2245 // for a practical reason as various MPEG4 containers use it. 2246 if (type == FOURCC('t', 'e', 'x', 't') || type == FOURCC('s', 'b', 't', 'l')) { 2247 if (mLastTrack != NULL) { 2248 mLastTrack->meta.setCString(kKeyMIMEType, MEDIA_MIMETYPE_TEXT_3GPP); 2249 } 2250 } 2251 2252 break; 2253 } 2254 2255 case FOURCC('k', 'e', 'y', 's'): 2256 { 2257 *offset += chunk_size; 2258 2259 if (underQTMetaPath(mPath, 3)) { 2260 status_t err = parseQTMetaKey(data_offset, chunk_data_size); 2261 if (err != OK) { 2262 return err; 2263 } 2264 } 2265 break; 2266 } 2267 2268 case FOURCC('t', 'r', 'e', 'x'): 2269 { 2270 *offset += chunk_size; 2271 2272 if (chunk_data_size < 24) { 2273 return ERROR_IO; 2274 } 2275 Trex trex; 2276 if (!mDataSource->getUInt32(data_offset + 4, &trex.track_ID) || 2277 !mDataSource->getUInt32(data_offset + 8, &trex.default_sample_description_index) || 2278 !mDataSource->getUInt32(data_offset + 12, &trex.default_sample_duration) || 2279 !mDataSource->getUInt32(data_offset + 16, &trex.default_sample_size) || 2280 !mDataSource->getUInt32(data_offset + 20, &trex.default_sample_flags)) { 2281 return ERROR_IO; 2282 } 2283 mTrex.add(trex); 2284 break; 2285 } 2286 2287 case FOURCC('t', 'x', '3', 'g'): 2288 { 2289 if (mLastTrack == NULL) 2290 return ERROR_MALFORMED; 2291 2292 uint32_t type; 2293 const void *data; 2294 size_t size = 0; 2295 if (!mLastTrack->meta.findData( 2296 kKeyTextFormatData, &type, &data, &size)) { 2297 size = 0; 2298 } 2299 2300 if ((chunk_size > SIZE_MAX) || (SIZE_MAX - chunk_size <= size)) { 2301 return ERROR_MALFORMED; 2302 } 2303 2304 uint8_t *buffer = new (std::nothrow) uint8_t[size + chunk_size]; 2305 if (buffer == NULL) { 2306 return ERROR_MALFORMED; 2307 } 2308 2309 if (size > 0) { 2310 memcpy(buffer, data, size); 2311 } 2312 2313 if ((size_t)(mDataSource->readAt(*offset, buffer + size, chunk_size)) 2314 < chunk_size) { 2315 delete[] buffer; 2316 buffer = NULL; 2317 2318 // advance read pointer so we don't end up reading this again 2319 *offset += chunk_size; 2320 return ERROR_IO; 2321 } 2322 2323 mLastTrack->meta.setData( 2324 kKeyTextFormatData, 0, buffer, size + chunk_size); 2325 2326 delete[] buffer; 2327 2328 *offset += chunk_size; 2329 break; 2330 } 2331 2332 case FOURCC('c', 'o', 'v', 'r'): 2333 { 2334 *offset += chunk_size; 2335 2336 ALOGV("chunk_data_size = %" PRId64 " and data_offset = %" PRId64, 2337 chunk_data_size, data_offset); 2338 2339 if (chunk_data_size < 0 || static_cast<uint64_t>(chunk_data_size) >= SIZE_MAX - 1) { 2340 return ERROR_MALFORMED; 2341 } 2342 auto buffer = heapbuffer<uint8_t>(chunk_data_size); 2343 if (buffer.get() == NULL) { 2344 ALOGE("b/28471206"); 2345 return NO_MEMORY; 2346 } 2347 if (mDataSource->readAt( 2348 data_offset, buffer.get(), chunk_data_size) != (ssize_t)chunk_data_size) { 2349 return ERROR_IO; 2350 } 2351 const int kSkipBytesOfDataBox = 16; 2352 if (chunk_data_size <= kSkipBytesOfDataBox) { 2353 return ERROR_MALFORMED; 2354 } 2355 2356 mFileMetaData.setData( 2357 kKeyAlbumArt, MetaData::TYPE_NONE, 2358 buffer.get() + kSkipBytesOfDataBox, chunk_data_size - kSkipBytesOfDataBox); 2359 2360 break; 2361 } 2362 2363 case FOURCC('c', 'o', 'l', 'r'): 2364 { 2365 *offset += chunk_size; 2366 // this must be in a VisualSampleEntry box under the Sample Description Box ('stsd') 2367 // ignore otherwise 2368 if (depth >= 2 && mPath[depth - 2] == FOURCC('s', 't', 's', 'd')) { 2369 status_t err = parseColorInfo(data_offset, chunk_data_size); 2370 if (err != OK) { 2371 return err; 2372 } 2373 } 2374 2375 break; 2376 } 2377 2378 case FOURCC('t', 'i', 't', 'l'): 2379 case FOURCC('p', 'e', 'r', 'f'): 2380 case FOURCC('a', 'u', 't', 'h'): 2381 case FOURCC('g', 'n', 'r', 'e'): 2382 case FOURCC('a', 'l', 'b', 'm'): 2383 case FOURCC('y', 'r', 'r', 'c'): 2384 { 2385 *offset += chunk_size; 2386 2387 status_t err = parse3GPPMetaData(data_offset, chunk_data_size, depth); 2388 2389 if (err != OK) { 2390 return err; 2391 } 2392 2393 break; 2394 } 2395 2396 case FOURCC('I', 'D', '3', '2'): 2397 { 2398 *offset += chunk_size; 2399 2400 if (chunk_data_size < 6) { 2401 return ERROR_MALFORMED; 2402 } 2403 2404 parseID3v2MetaData(data_offset + 6); 2405 2406 break; 2407 } 2408 2409 case FOURCC('-', '-', '-', '-'): 2410 { 2411 mLastCommentMean.clear(); 2412 mLastCommentName.clear(); 2413 mLastCommentData.clear(); 2414 *offset += chunk_size; 2415 break; 2416 } 2417 2418 case FOURCC('s', 'i', 'd', 'x'): 2419 { 2420 status_t err = parseSegmentIndex(data_offset, chunk_data_size); 2421 if (err != OK) { 2422 return err; 2423 } 2424 *offset += chunk_size; 2425 return UNKNOWN_ERROR; // stop parsing after sidx 2426 } 2427 2428 case FOURCC('a', 'c', '-', '3'): 2429 { 2430 *offset += chunk_size; 2431 return parseAC3SampleEntry(data_offset); 2432 } 2433 2434 case FOURCC('f', 't', 'y', 'p'): 2435 { 2436 if (chunk_data_size < 8 || depth != 0) { 2437 return ERROR_MALFORMED; 2438 } 2439 2440 off64_t stop_offset = *offset + chunk_size; 2441 uint32_t numCompatibleBrands = (chunk_data_size - 8) / 4; 2442 std::set<uint32_t> brandSet; 2443 for (size_t i = 0; i < numCompatibleBrands + 2; ++i) { 2444 if (i == 1) { 2445 // Skip this index, it refers to the minorVersion, 2446 // not a brand. 2447 continue; 2448 } 2449 2450 uint32_t brand; 2451 if (mDataSource->readAt(data_offset + 4 * i, &brand, 4) < 4) { 2452 return ERROR_MALFORMED; 2453 } 2454 2455 brand = ntohl(brand); 2456 brandSet.insert(brand); 2457 } 2458 2459 if (brandSet.count(FOURCC('q', 't', ' ', ' ')) > 0) { 2460 mIsQT = true; 2461 } else { 2462 if (brandSet.count(FOURCC('m', 'i', 'f', '1')) > 0 2463 && brandSet.count(FOURCC('h', 'e', 'i', 'c')) > 0) { 2464 ALOGV("identified HEIF image"); 2465 2466 mIsHeif = true; 2467 brandSet.erase(FOURCC('m', 'i', 'f', '1')); 2468 brandSet.erase(FOURCC('h', 'e', 'i', 'c')); 2469 } 2470 2471 if (!brandSet.empty()) { 2472 // This means that the file should have moov box. 2473 // It could be any iso files (mp4, heifs, etc.) 2474 mHasMoovBox = true; 2475 if (mIsHeif) { 2476 ALOGV("identified HEIF image with other tracks"); 2477 } 2478 } 2479 } 2480 2481 *offset = stop_offset; 2482 2483 break; 2484 } 2485 2486 default: 2487 { 2488 // check if we're parsing 'ilst' for meta keys 2489 // if so, treat type as a number (key-id). 2490 if (underQTMetaPath(mPath, 3)) { 2491 status_t err = parseQTMetaVal(chunk_type, data_offset, chunk_data_size); 2492 if (err != OK) { 2493 return err; 2494 } 2495 } 2496 2497 *offset += chunk_size; 2498 break; 2499 } 2500 } 2501 2502 return OK; 2503} 2504 2505status_t MPEG4Extractor::parseAC3SampleEntry(off64_t offset) { 2506 // skip 16 bytes: 2507 // + 6-byte reserved, 2508 // + 2-byte data reference index, 2509 // + 8-byte reserved 2510 offset += 16; 2511 uint16_t channelCount; 2512 if (!mDataSource->getUInt16(offset, &channelCount)) { 2513 return ERROR_MALFORMED; 2514 } 2515 // skip 8 bytes: 2516 // + 2-byte channelCount, 2517 // + 2-byte sample size, 2518 // + 4-byte reserved 2519 offset += 8; 2520 uint16_t sampleRate; 2521 if (!mDataSource->getUInt16(offset, &sampleRate)) { 2522 ALOGE("MPEG4Extractor: error while reading ac-3 block: cannot read sample rate"); 2523 return ERROR_MALFORMED; 2524 } 2525 2526 // skip 4 bytes: 2527 // + 2-byte sampleRate, 2528 // + 2-byte reserved 2529 offset += 4; 2530 return parseAC3SpecificBox(offset, sampleRate); 2531} 2532 2533status_t MPEG4Extractor::parseAC3SpecificBox( 2534 off64_t offset, uint16_t sampleRate) { 2535 uint32_t size; 2536 // + 4-byte size 2537 // + 4-byte type 2538 // + 3-byte payload 2539 const uint32_t kAC3SpecificBoxSize = 11; 2540 if (!mDataSource->getUInt32(offset, &size) || size < kAC3SpecificBoxSize) { 2541 ALOGE("MPEG4Extractor: error while reading ac-3 block: cannot read specific box size"); 2542 return ERROR_MALFORMED; 2543 } 2544 2545 offset += 4; 2546 uint32_t type; 2547 if (!mDataSource->getUInt32(offset, &type) || type != FOURCC('d', 'a', 'c', '3')) { 2548 ALOGE("MPEG4Extractor: error while reading ac-3 specific block: header not dac3"); 2549 return ERROR_MALFORMED; 2550 } 2551 2552 offset += 4; 2553 const uint32_t kAC3SpecificBoxPayloadSize = 3; 2554 uint8_t chunk[kAC3SpecificBoxPayloadSize]; 2555 if (mDataSource->readAt(offset, chunk, sizeof(chunk)) != sizeof(chunk)) { 2556 ALOGE("MPEG4Extractor: error while reading ac-3 specific block: bitstream fields"); 2557 return ERROR_MALFORMED; 2558 } 2559 2560 ABitReader br(chunk, sizeof(chunk)); 2561 static const unsigned channelCountTable[] = {2, 1, 2, 3, 3, 4, 4, 5}; 2562 static const unsigned sampleRateTable[] = {48000, 44100, 32000}; 2563 2564 unsigned fscod = br.getBits(2); 2565 if (fscod == 3) { 2566 ALOGE("Incorrect fscod (3) in AC3 header"); 2567 return ERROR_MALFORMED; 2568 } 2569 unsigned boxSampleRate = sampleRateTable[fscod]; 2570 if (boxSampleRate != sampleRate) { 2571 ALOGE("sample rate mismatch: boxSampleRate = %d, sampleRate = %d", 2572 boxSampleRate, sampleRate); 2573 return ERROR_MALFORMED; 2574 } 2575 2576 unsigned bsid = br.getBits(5); 2577 if (bsid > 8) { 2578 ALOGW("Incorrect bsid in AC3 header. Possibly E-AC-3?"); 2579 return ERROR_MALFORMED; 2580 } 2581 2582 // skip 2583 unsigned bsmod __unused = br.getBits(3); 2584 2585 unsigned acmod = br.getBits(3); 2586 unsigned lfeon = br.getBits(1); 2587 unsigned channelCount = channelCountTable[acmod] + lfeon; 2588 2589 if (mLastTrack == NULL) { 2590 return ERROR_MALFORMED; 2591 } 2592 mLastTrack->meta.setCString(kKeyMIMEType, MEDIA_MIMETYPE_AUDIO_AC3); 2593 mLastTrack->meta.setInt32(kKeyChannelCount, channelCount); 2594 mLastTrack->meta.setInt32(kKeySampleRate, sampleRate); 2595 return OK; 2596} 2597 2598status_t MPEG4Extractor::parseSegmentIndex(off64_t offset, size_t size) { 2599 ALOGV("MPEG4Extractor::parseSegmentIndex"); 2600 2601 if (size < 12) { 2602 return -EINVAL; 2603 } 2604 2605 uint32_t flags; 2606 if (!mDataSource->getUInt32(offset, &flags)) { 2607 return ERROR_MALFORMED; 2608 } 2609 2610 uint32_t version = flags >> 24; 2611 flags &= 0xffffff; 2612 2613 ALOGV("sidx version %d", version); 2614 2615 uint32_t referenceId; 2616 if (!mDataSource->getUInt32(offset + 4, &referenceId)) { 2617 return ERROR_MALFORMED; 2618 } 2619 2620 uint32_t timeScale; 2621 if (!mDataSource->getUInt32(offset + 8, &timeScale)) { 2622 return ERROR_MALFORMED; 2623 } 2624 ALOGV("sidx refid/timescale: %d/%d", referenceId, timeScale); 2625 if (timeScale == 0) 2626 return ERROR_MALFORMED; 2627 2628 uint64_t earliestPresentationTime; 2629 uint64_t firstOffset; 2630 2631 offset += 12; 2632 size -= 12; 2633 2634 if (version == 0) { 2635 if (size < 8) { 2636 return -EINVAL; 2637 } 2638 uint32_t tmp; 2639 if (!mDataSource->getUInt32(offset, &tmp)) { 2640 return ERROR_MALFORMED; 2641 } 2642 earliestPresentationTime = tmp; 2643 if (!mDataSource->getUInt32(offset + 4, &tmp)) { 2644 return ERROR_MALFORMED; 2645 } 2646 firstOffset = tmp; 2647 offset += 8; 2648 size -= 8; 2649 } else { 2650 if (size < 16) { 2651 return -EINVAL; 2652 } 2653 if (!mDataSource->getUInt64(offset, &earliestPresentationTime)) { 2654 return ERROR_MALFORMED; 2655 } 2656 if (!mDataSource->getUInt64(offset + 8, &firstOffset)) { 2657 return ERROR_MALFORMED; 2658 } 2659 offset += 16; 2660 size -= 16; 2661 } 2662 ALOGV("sidx pres/off: %" PRIu64 "/%" PRIu64, earliestPresentationTime, firstOffset); 2663 2664 if (size < 4) { 2665 return -EINVAL; 2666 } 2667 2668 uint16_t referenceCount; 2669 if (!mDataSource->getUInt16(offset + 2, &referenceCount)) { 2670 return ERROR_MALFORMED; 2671 } 2672 offset += 4; 2673 size -= 4; 2674 ALOGV("refcount: %d", referenceCount); 2675 2676 if (size < referenceCount * 12) { 2677 return -EINVAL; 2678 } 2679 2680 uint64_t total_duration = 0; 2681 for (unsigned int i = 0; i < referenceCount; i++) { 2682 uint32_t d1, d2, d3; 2683 2684 if (!mDataSource->getUInt32(offset, &d1) || // size 2685 !mDataSource->getUInt32(offset + 4, &d2) || // duration 2686 !mDataSource->getUInt32(offset + 8, &d3)) { // flags 2687 return ERROR_MALFORMED; 2688 } 2689 2690 if (d1 & 0x80000000) { 2691 ALOGW("sub-sidx boxes not supported yet"); 2692 } 2693 bool sap = d3 & 0x80000000; 2694 uint32_t saptype = (d3 >> 28) & 7; 2695 if (!sap || (saptype != 1 && saptype != 2)) { 2696 // type 1 and 2 are sync samples 2697 ALOGW("not a stream access point, or unsupported type: %08x", d3); 2698 } 2699 total_duration += d2; 2700 offset += 12; 2701 ALOGV(" item %d, %08x %08x %08x", i, d1, d2, d3); 2702 SidxEntry se; 2703 se.mSize = d1 & 0x7fffffff; 2704 se.mDurationUs = 1000000LL * d2 / timeScale; 2705 mSidxEntries.add(se); 2706 } 2707 2708 uint64_t sidxDuration = total_duration * 1000000 / timeScale; 2709 2710 if (mLastTrack == NULL) 2711 return ERROR_MALFORMED; 2712 2713 int64_t metaDuration; 2714 if (!mLastTrack->meta.findInt64(kKeyDuration, &metaDuration) || metaDuration == 0) { 2715 mLastTrack->meta.setInt64(kKeyDuration, sidxDuration); 2716 } 2717 return OK; 2718} 2719 2720status_t MPEG4Extractor::parseQTMetaKey(off64_t offset, size_t size) { 2721 if (size < 8) { 2722 return ERROR_MALFORMED; 2723 } 2724 2725 uint32_t count; 2726 if (!mDataSource->getUInt32(offset + 4, &count)) { 2727 return ERROR_MALFORMED; 2728 } 2729 2730 if (mMetaKeyMap.size() > 0) { 2731 ALOGW("'keys' atom seen again, discarding existing entries"); 2732 mMetaKeyMap.clear(); 2733 } 2734 2735 off64_t keyOffset = offset + 8; 2736 off64_t stopOffset = offset + size; 2737 for (size_t i = 1; i <= count; i++) { 2738 if (keyOffset + 8 > stopOffset) { 2739 return ERROR_MALFORMED; 2740 } 2741 2742 uint32_t keySize; 2743 if (!mDataSource->getUInt32(keyOffset, &keySize) 2744 || keySize < 8 2745 || keyOffset + keySize > stopOffset) { 2746 return ERROR_MALFORMED; 2747 } 2748 2749 uint32_t type; 2750 if (!mDataSource->getUInt32(keyOffset + 4, &type) 2751 || type != FOURCC('m', 'd', 't', 'a')) { 2752 return ERROR_MALFORMED; 2753 } 2754 2755 keySize -= 8; 2756 keyOffset += 8; 2757 2758 auto keyData = heapbuffer<uint8_t>(keySize); 2759 if (keyData.get() == NULL) { 2760 return ERROR_MALFORMED; 2761 } 2762 if (mDataSource->readAt( 2763 keyOffset, keyData.get(), keySize) < (ssize_t) keySize) { 2764 return ERROR_MALFORMED; 2765 } 2766 2767 AString key((const char *)keyData.get(), keySize); 2768 mMetaKeyMap.add(i, key); 2769 2770 keyOffset += keySize; 2771 } 2772 return OK; 2773} 2774 2775status_t MPEG4Extractor::parseQTMetaVal( 2776 int32_t keyId, off64_t offset, size_t size) { 2777 ssize_t index = mMetaKeyMap.indexOfKey(keyId); 2778 if (index < 0) { 2779 // corresponding key is not present, ignore 2780 return ERROR_MALFORMED; 2781 } 2782 2783 if (size <= 16) { 2784 return ERROR_MALFORMED; 2785 } 2786 uint32_t dataSize; 2787 if (!mDataSource->getUInt32(offset, &dataSize) 2788 || dataSize > size || dataSize <= 16) { 2789 return ERROR_MALFORMED; 2790 } 2791 uint32_t atomFourCC; 2792 if (!mDataSource->getUInt32(offset + 4, &atomFourCC) 2793 || atomFourCC != FOURCC('d', 'a', 't', 'a')) { 2794 return ERROR_MALFORMED; 2795 } 2796 uint32_t dataType; 2797 if (!mDataSource->getUInt32(offset + 8, &dataType) 2798 || ((dataType & 0xff000000) != 0)) { 2799 // not well-known type 2800 return ERROR_MALFORMED; 2801 } 2802 2803 dataSize -= 16; 2804 offset += 16; 2805 2806 if (dataType == 23 && dataSize >= 4) { 2807 // BE Float32 2808 uint32_t val; 2809 if (!mDataSource->getUInt32(offset, &val)) { 2810 return ERROR_MALFORMED; 2811 } 2812 if (!strcasecmp(mMetaKeyMap[index].c_str(), "com.android.capture.fps")) { 2813 mFileMetaData.setFloat(kKeyCaptureFramerate, *(float *)&val); 2814 } 2815 } else if (dataType == 67 && dataSize >= 4) { 2816 // BE signed int32 2817 uint32_t val; 2818 if (!mDataSource->getUInt32(offset, &val)) { 2819 return ERROR_MALFORMED; 2820 } 2821 if (!strcasecmp(mMetaKeyMap[index].c_str(), "com.android.video.temporal_layers_count")) { 2822 mFileMetaData.setInt32(kKeyTemporalLayerCount, val); 2823 } 2824 } else { 2825 // add more keys if needed 2826 ALOGV("ignoring key: type %d, size %d", dataType, dataSize); 2827 } 2828 2829 return OK; 2830} 2831 2832status_t MPEG4Extractor::parseTrackHeader( 2833 off64_t data_offset, off64_t data_size) { 2834 if (data_size < 4) { 2835 return ERROR_MALFORMED; 2836 } 2837 2838 uint8_t version; 2839 if (mDataSource->readAt(data_offset, &version, 1) < 1) { 2840 return ERROR_IO; 2841 } 2842 2843 size_t dynSize = (version == 1) ? 36 : 24; 2844 2845 uint8_t buffer[36 + 60]; 2846 2847 if (data_size != (off64_t)dynSize + 60) { 2848 return ERROR_MALFORMED; 2849 } 2850 2851 if (mDataSource->readAt( 2852 data_offset, buffer, data_size) < (ssize_t)data_size) { 2853 return ERROR_IO; 2854 } 2855 2856 uint64_t ctime __unused, mtime __unused, duration __unused; 2857 int32_t id; 2858 2859 if (version == 1) { 2860 ctime = U64_AT(&buffer[4]); 2861 mtime = U64_AT(&buffer[12]); 2862 id = U32_AT(&buffer[20]); 2863 duration = U64_AT(&buffer[28]); 2864 } else if (version == 0) { 2865 ctime = U32_AT(&buffer[4]); 2866 mtime = U32_AT(&buffer[8]); 2867 id = U32_AT(&buffer[12]); 2868 duration = U32_AT(&buffer[20]); 2869 } else { 2870 return ERROR_UNSUPPORTED; 2871 } 2872 2873 if (mLastTrack == NULL) 2874 return ERROR_MALFORMED; 2875 2876 mLastTrack->meta.setInt32(kKeyTrackID, id); 2877 2878 size_t matrixOffset = dynSize + 16; 2879 int32_t a00 = U32_AT(&buffer[matrixOffset]); 2880 int32_t a01 = U32_AT(&buffer[matrixOffset + 4]); 2881 int32_t a10 = U32_AT(&buffer[matrixOffset + 12]); 2882 int32_t a11 = U32_AT(&buffer[matrixOffset + 16]); 2883 2884#if 0 2885 int32_t dx = U32_AT(&buffer[matrixOffset + 8]); 2886 int32_t dy = U32_AT(&buffer[matrixOffset + 20]); 2887 2888 ALOGI("x' = %.2f * x + %.2f * y + %.2f", 2889 a00 / 65536.0f, a01 / 65536.0f, dx / 65536.0f); 2890 ALOGI("y' = %.2f * x + %.2f * y + %.2f", 2891 a10 / 65536.0f, a11 / 65536.0f, dy / 65536.0f); 2892#endif 2893 2894 uint32_t rotationDegrees; 2895 2896 static const int32_t kFixedOne = 0x10000; 2897 if (a00 == kFixedOne && a01 == 0 && a10 == 0 && a11 == kFixedOne) { 2898 // Identity, no rotation 2899 rotationDegrees = 0; 2900 } else if (a00 == 0 && a01 == kFixedOne && a10 == -kFixedOne && a11 == 0) { 2901 rotationDegrees = 90; 2902 } else if (a00 == 0 && a01 == -kFixedOne && a10 == kFixedOne && a11 == 0) { 2903 rotationDegrees = 270; 2904 } else if (a00 == -kFixedOne && a01 == 0 && a10 == 0 && a11 == -kFixedOne) { 2905 rotationDegrees = 180; 2906 } else { 2907 ALOGW("We only support 0,90,180,270 degree rotation matrices"); 2908 rotationDegrees = 0; 2909 } 2910 2911 if (rotationDegrees != 0) { 2912 mLastTrack->meta.setInt32(kKeyRotation, rotationDegrees); 2913 } 2914 2915 // Handle presentation display size, which could be different 2916 // from the image size indicated by kKeyWidth and kKeyHeight. 2917 uint32_t width = U32_AT(&buffer[dynSize + 52]); 2918 uint32_t height = U32_AT(&buffer[dynSize + 56]); 2919 mLastTrack->meta.setInt32(kKeyDisplayWidth, width >> 16); 2920 mLastTrack->meta.setInt32(kKeyDisplayHeight, height >> 16); 2921 2922 return OK; 2923} 2924 2925status_t MPEG4Extractor::parseITunesMetaData(off64_t offset, size_t size) { 2926 if (size == 0) { 2927 return OK; 2928 } 2929 2930 if (size < 4 || size == SIZE_MAX) { 2931 return ERROR_MALFORMED; 2932 } 2933 2934 uint8_t *buffer = new (std::nothrow) uint8_t[size + 1]; 2935 if (buffer == NULL) { 2936 return ERROR_MALFORMED; 2937 } 2938 if (mDataSource->readAt( 2939 offset, buffer, size) != (ssize_t)size) { 2940 delete[] buffer; 2941 buffer = NULL; 2942 2943 return ERROR_IO; 2944 } 2945 2946 uint32_t flags = U32_AT(buffer); 2947 2948 uint32_t metadataKey = 0; 2949 char chunk[5]; 2950 MakeFourCCString(mPath[4], chunk); 2951 ALOGV("meta: %s @ %lld", chunk, (long long)offset); 2952 switch ((int32_t)mPath[4]) { 2953 case FOURCC(0xa9, 'a', 'l', 'b'): 2954 { 2955 metadataKey = kKeyAlbum; 2956 break; 2957 } 2958 case FOURCC(0xa9, 'A', 'R', 'T'): 2959 { 2960 metadataKey = kKeyArtist; 2961 break; 2962 } 2963 case FOURCC('a', 'A', 'R', 'T'): 2964 { 2965 metadataKey = kKeyAlbumArtist; 2966 break; 2967 } 2968 case FOURCC(0xa9, 'd', 'a', 'y'): 2969 { 2970 metadataKey = kKeyYear; 2971 break; 2972 } 2973 case FOURCC(0xa9, 'n', 'a', 'm'): 2974 { 2975 metadataKey = kKeyTitle; 2976 break; 2977 } 2978 case FOURCC(0xa9, 'w', 'r', 't'): 2979 { 2980 metadataKey = kKeyWriter; 2981 break; 2982 } 2983 case FOURCC('c', 'o', 'v', 'r'): 2984 { 2985 metadataKey = kKeyAlbumArt; 2986 break; 2987 } 2988 case FOURCC('g', 'n', 'r', 'e'): 2989 { 2990 metadataKey = kKeyGenre; 2991 break; 2992 } 2993 case FOURCC(0xa9, 'g', 'e', 'n'): 2994 { 2995 metadataKey = kKeyGenre; 2996 break; 2997 } 2998 case FOURCC('c', 'p', 'i', 'l'): 2999 { 3000 if (size == 9 && flags == 21) { 3001 char tmp[16]; 3002 sprintf(tmp, "%d", 3003 (int)buffer[size - 1]); 3004 3005 mFileMetaData.setCString(kKeyCompilation, tmp); 3006 } 3007 break; 3008 } 3009 case FOURCC('t', 'r', 'k', 'n'): 3010 { 3011 if (size == 16 && flags == 0) { 3012 char tmp[16]; 3013 uint16_t* pTrack = (uint16_t*)&buffer[10]; 3014 uint16_t* pTotalTracks = (uint16_t*)&buffer[12]; 3015 sprintf(tmp, "%d/%d", ntohs(*pTrack), ntohs(*pTotalTracks)); 3016 3017 mFileMetaData.setCString(kKeyCDTrackNumber, tmp); 3018 } 3019 break; 3020 } 3021 case FOURCC('d', 'i', 's', 'k'): 3022 { 3023 if ((size == 14 || size == 16) && flags == 0) { 3024 char tmp[16]; 3025 uint16_t* pDisc = (uint16_t*)&buffer[10]; 3026 uint16_t* pTotalDiscs = (uint16_t*)&buffer[12]; 3027 sprintf(tmp, "%d/%d", ntohs(*pDisc), ntohs(*pTotalDiscs)); 3028 3029 mFileMetaData.setCString(kKeyDiscNumber, tmp); 3030 } 3031 break; 3032 } 3033 case FOURCC('-', '-', '-', '-'): 3034 { 3035 buffer[size] = '\0'; 3036 switch (mPath[5]) { 3037 case FOURCC('m', 'e', 'a', 'n'): 3038 mLastCommentMean.setTo((const char *)buffer + 4); 3039 break; 3040 case FOURCC('n', 'a', 'm', 'e'): 3041 mLastCommentName.setTo((const char *)buffer + 4); 3042 break; 3043 case FOURCC('d', 'a', 't', 'a'): 3044 if (size < 8) { 3045 delete[] buffer; 3046 buffer = NULL; 3047 ALOGE("b/24346430"); 3048 return ERROR_MALFORMED; 3049 } 3050 mLastCommentData.setTo((const char *)buffer + 8); 3051 break; 3052 } 3053 3054 // Once we have a set of mean/name/data info, go ahead and process 3055 // it to see if its something we are interested in. Whether or not 3056 // were are interested in the specific tag, make sure to clear out 3057 // the set so we can be ready to process another tuple should one 3058 // show up later in the file. 3059 if ((mLastCommentMean.length() != 0) && 3060 (mLastCommentName.length() != 0) && 3061 (mLastCommentData.length() != 0)) { 3062 3063 if (mLastCommentMean == "com.apple.iTunes" 3064 && mLastCommentName == "iTunSMPB") { 3065 int32_t delay, padding; 3066 if (sscanf(mLastCommentData, 3067 " %*x %x %x %*x", &delay, &padding) == 2) { 3068 if (mLastTrack == NULL) { 3069 delete[] buffer; 3070 return ERROR_MALFORMED; 3071 } 3072 3073 mLastTrack->meta.setInt32(kKeyEncoderDelay, delay); 3074 mLastTrack->meta.setInt32(kKeyEncoderPadding, padding); 3075 } 3076 } 3077 3078 mLastCommentMean.clear(); 3079 mLastCommentName.clear(); 3080 mLastCommentData.clear(); 3081 } 3082 break; 3083 } 3084 3085 default: 3086 break; 3087 } 3088 3089 if (size >= 8 && metadataKey && !mFileMetaData.hasData(metadataKey)) { 3090 if (metadataKey == kKeyAlbumArt) { 3091 mFileMetaData.setData( 3092 kKeyAlbumArt, MetaData::TYPE_NONE, 3093 buffer + 8, size - 8); 3094 } else if (metadataKey == kKeyGenre) { 3095 if (flags == 0) { 3096 // uint8_t genre code, iTunes genre codes are 3097 // the standard id3 codes, except they start 3098 // at 1 instead of 0 (e.g. Pop is 14, not 13) 3099 // We use standard id3 numbering, so subtract 1. 3100 int genrecode = (int)buffer[size - 1]; 3101 genrecode--; 3102 if (genrecode < 0) { 3103 genrecode = 255; // reserved for 'unknown genre' 3104 } 3105 char genre[10]; 3106 sprintf(genre, "%d", genrecode); 3107 3108 mFileMetaData.setCString(metadataKey, genre); 3109 } else if (flags == 1) { 3110 // custom genre string 3111 buffer[size] = '\0'; 3112 3113 mFileMetaData.setCString( 3114 metadataKey, (const char *)buffer + 8); 3115 } 3116 } else { 3117 buffer[size] = '\0'; 3118 3119 mFileMetaData.setCString( 3120 metadataKey, (const char *)buffer + 8); 3121 } 3122 } 3123 3124 delete[] buffer; 3125 buffer = NULL; 3126 3127 return OK; 3128} 3129 3130status_t MPEG4Extractor::parseColorInfo(off64_t offset, size_t size) { 3131 if (size < 4 || size == SIZE_MAX || mLastTrack == NULL) { 3132 return ERROR_MALFORMED; 3133 } 3134 3135 uint8_t *buffer = new (std::nothrow) uint8_t[size + 1]; 3136 if (buffer == NULL) { 3137 return ERROR_MALFORMED; 3138 } 3139 if (mDataSource->readAt(offset, buffer, size) != (ssize_t)size) { 3140 delete[] buffer; 3141 buffer = NULL; 3142 3143 return ERROR_IO; 3144 } 3145 3146 int32_t type = U32_AT(&buffer[0]); 3147 if ((type == FOURCC('n', 'c', 'l', 'x') && size >= 11) 3148 || (type == FOURCC('n', 'c', 'l', 'c') && size >= 10)) { 3149 int32_t primaries = U16_AT(&buffer[4]); 3150 int32_t transfer = U16_AT(&buffer[6]); 3151 int32_t coeffs = U16_AT(&buffer[8]); 3152 bool fullRange = (type == FOURCC('n', 'c', 'l', 'x')) && (buffer[10] & 128); 3153 3154 ColorAspects aspects; 3155 ColorUtils::convertIsoColorAspectsToCodecAspects( 3156 primaries, transfer, coeffs, fullRange, aspects); 3157 3158 // only store the first color specification 3159 if (!mLastTrack->meta.hasData(kKeyColorPrimaries)) { 3160 mLastTrack->meta.setInt32(kKeyColorPrimaries, aspects.mPrimaries); 3161 mLastTrack->meta.setInt32(kKeyTransferFunction, aspects.mTransfer); 3162 mLastTrack->meta.setInt32(kKeyColorMatrix, aspects.mMatrixCoeffs); 3163 mLastTrack->meta.setInt32(kKeyColorRange, aspects.mRange); 3164 } 3165 } 3166 3167 delete[] buffer; 3168 buffer = NULL; 3169 3170 return OK; 3171} 3172 3173status_t MPEG4Extractor::parse3GPPMetaData(off64_t offset, size_t size, int depth) { 3174 if (size < 4 || size == SIZE_MAX) { 3175 return ERROR_MALFORMED; 3176 } 3177 3178 uint8_t *buffer = new (std::nothrow) uint8_t[size + 1]; 3179 if (buffer == NULL) { 3180 return ERROR_MALFORMED; 3181 } 3182 if (mDataSource->readAt( 3183 offset, buffer, size) != (ssize_t)size) { 3184 delete[] buffer; 3185 buffer = NULL; 3186 3187 return ERROR_IO; 3188 } 3189 3190 uint32_t metadataKey = 0; 3191 switch (mPath[depth]) { 3192 case FOURCC('t', 'i', 't', 'l'): 3193 { 3194 metadataKey = kKeyTitle; 3195 break; 3196 } 3197 case FOURCC('p', 'e', 'r', 'f'): 3198 { 3199 metadataKey = kKeyArtist; 3200 break; 3201 } 3202 case FOURCC('a', 'u', 't', 'h'): 3203 { 3204 metadataKey = kKeyWriter; 3205 break; 3206 } 3207 case FOURCC('g', 'n', 'r', 'e'): 3208 { 3209 metadataKey = kKeyGenre; 3210 break; 3211 } 3212 case FOURCC('a', 'l', 'b', 'm'): 3213 { 3214 if (buffer[size - 1] != '\0') { 3215 char tmp[4]; 3216 sprintf(tmp, "%u", buffer[size - 1]); 3217 3218 mFileMetaData.setCString(kKeyCDTrackNumber, tmp); 3219 } 3220 3221 metadataKey = kKeyAlbum; 3222 break; 3223 } 3224 case FOURCC('y', 'r', 'r', 'c'): 3225 { 3226 if (size < 6) { 3227 delete[] buffer; 3228 buffer = NULL; 3229 ALOGE("b/62133227"); 3230 android_errorWriteLog(0x534e4554, "62133227"); 3231 return ERROR_MALFORMED; 3232 } 3233 char tmp[5]; 3234 uint16_t year = U16_AT(&buffer[4]); 3235 3236 if (year < 10000) { 3237 sprintf(tmp, "%u", year); 3238 3239 mFileMetaData.setCString(kKeyYear, tmp); 3240 } 3241 break; 3242 } 3243 3244 default: 3245 break; 3246 } 3247 3248 if (metadataKey > 0) { 3249 bool isUTF8 = true; // Common case 3250 char16_t *framedata = NULL; 3251 int len16 = 0; // Number of UTF-16 characters 3252 3253 // smallest possible valid UTF-16 string w BOM: 0xfe 0xff 0x00 0x00 3254 if (size < 6) { 3255 delete[] buffer; 3256 buffer = NULL; 3257 return ERROR_MALFORMED; 3258 } 3259 3260 if (size - 6 >= 4) { 3261 len16 = ((size - 6) / 2) - 1; // don't include 0x0000 terminator 3262 framedata = (char16_t *)(buffer + 6); 3263 if (0xfffe == *framedata) { 3264 // endianness marker (BOM) doesn't match host endianness 3265 for (int i = 0; i < len16; i++) { 3266 framedata[i] = bswap_16(framedata[i]); 3267 } 3268 // BOM is now swapped to 0xfeff, we will execute next block too 3269 } 3270 3271 if (0xfeff == *framedata) { 3272 // Remove the BOM 3273 framedata++; 3274 len16--; 3275 isUTF8 = false; 3276 } 3277 // else normal non-zero-length UTF-8 string 3278 // we can't handle UTF-16 without BOM as there is no other 3279 // indication of encoding. 3280 } 3281 3282 if (isUTF8) { 3283 buffer[size] = 0; 3284 mFileMetaData.setCString(metadataKey, (const char *)buffer + 6); 3285 } else { 3286 // Convert from UTF-16 string to UTF-8 string. 3287 String8 tmpUTF8str(framedata, len16); 3288 mFileMetaData.setCString(metadataKey, tmpUTF8str.string()); 3289 } 3290 } 3291 3292 delete[] buffer; 3293 buffer = NULL; 3294 3295 return OK; 3296} 3297 3298void MPEG4Extractor::parseID3v2MetaData(off64_t offset) { 3299 ID3 id3(mDataSource, true /* ignorev1 */, offset); 3300 3301 if (id3.isValid()) { 3302 struct Map { 3303 int key; 3304 const char *tag1; 3305 const char *tag2; 3306 }; 3307 static const Map kMap[] = { 3308 { kKeyAlbum, "TALB", "TAL" }, 3309 { kKeyArtist, "TPE1", "TP1" }, 3310 { kKeyAlbumArtist, "TPE2", "TP2" }, 3311 { kKeyComposer, "TCOM", "TCM" }, 3312 { kKeyGenre, "TCON", "TCO" }, 3313 { kKeyTitle, "TIT2", "TT2" }, 3314 { kKeyYear, "TYE", "TYER" }, 3315 { kKeyAuthor, "TXT", "TEXT" }, 3316 { kKeyCDTrackNumber, "TRK", "TRCK" }, 3317 { kKeyDiscNumber, "TPA", "TPOS" }, 3318 { kKeyCompilation, "TCP", "TCMP" }, 3319 }; 3320 static const size_t kNumMapEntries = sizeof(kMap) / sizeof(kMap[0]); 3321 3322 for (size_t i = 0; i < kNumMapEntries; ++i) { 3323 if (!mFileMetaData.hasData(kMap[i].key)) { 3324 ID3::Iterator *it = new ID3::Iterator(id3, kMap[i].tag1); 3325 if (it->done()) { 3326 delete it; 3327 it = new ID3::Iterator(id3, kMap[i].tag2); 3328 } 3329 3330 if (it->done()) { 3331 delete it; 3332 continue; 3333 } 3334 3335 String8 s; 3336 it->getString(&s); 3337 delete it; 3338 3339 mFileMetaData.setCString(kMap[i].key, s); 3340 } 3341 } 3342 3343 size_t dataSize; 3344 String8 mime; 3345 const void *data = id3.getAlbumArt(&dataSize, &mime); 3346 3347 if (data) { 3348 mFileMetaData.setData(kKeyAlbumArt, MetaData::TYPE_NONE, data, dataSize); 3349 mFileMetaData.setCString(kKeyAlbumArtMIME, mime.string()); 3350 } 3351 } 3352} 3353 3354MediaTrack *MPEG4Extractor::getTrack(size_t index) { 3355 status_t err; 3356 if ((err = readMetaData()) != OK) { 3357 return NULL; 3358 } 3359 3360 Track *track = mFirstTrack; 3361 while (index > 0) { 3362 if (track == NULL) { 3363 return NULL; 3364 } 3365 3366 track = track->next; 3367 --index; 3368 } 3369 3370 if (track == NULL) { 3371 return NULL; 3372 } 3373 3374 3375 Trex *trex = NULL; 3376 int32_t trackId; 3377 if (track->meta.findInt32(kKeyTrackID, &trackId)) { 3378 for (size_t i = 0; i < mTrex.size(); i++) { 3379 Trex *t = &mTrex.editItemAt(i); 3380 if (t->track_ID == (uint32_t) trackId) { 3381 trex = t; 3382 break; 3383 } 3384 } 3385 } else { 3386 ALOGE("b/21657957"); 3387 return NULL; 3388 } 3389 3390 ALOGV("getTrack called, pssh: %zu", mPssh.size()); 3391 3392 const char *mime; 3393 if (!track->meta.findCString(kKeyMIMEType, &mime)) { 3394 return NULL; 3395 } 3396 3397 sp<ItemTable> itemTable; 3398 if (!strcasecmp(mime, MEDIA_MIMETYPE_VIDEO_AVC)) { 3399 uint32_t type; 3400 const void *data; 3401 size_t size; 3402 if (!track->meta.findData(kKeyAVCC, &type, &data, &size)) { 3403 return NULL; 3404 } 3405 3406 const uint8_t *ptr = (const uint8_t *)data; 3407 3408 if (size < 7 || ptr[0] != 1) { // configurationVersion == 1 3409 return NULL; 3410 } 3411 } else if (!strcasecmp(mime, MEDIA_MIMETYPE_VIDEO_HEVC) 3412 || !strcasecmp(mime, MEDIA_MIMETYPE_IMAGE_ANDROID_HEIC)) { 3413 uint32_t type; 3414 const void *data; 3415 size_t size; 3416 if (!track->meta.findData(kKeyHVCC, &type, &data, &size)) { 3417 return NULL; 3418 } 3419 3420 const uint8_t *ptr = (const uint8_t *)data; 3421 3422 if (size < 22 || ptr[0] != 1) { // configurationVersion == 1 3423 return NULL; 3424 } 3425 if (!strcasecmp(mime, MEDIA_MIMETYPE_IMAGE_ANDROID_HEIC)) { 3426 itemTable = mItemTable; 3427 } 3428 } 3429 3430 MPEG4Source *source = new MPEG4Source( 3431 track->meta, mDataSource, track->timescale, track->sampleTable, 3432 mSidxEntries, trex, mMoofOffset, itemTable); 3433 if (source->init() != OK) { 3434 delete source; 3435 return NULL; 3436 } 3437 return source; 3438} 3439 3440// static 3441status_t MPEG4Extractor::verifyTrack(Track *track) { 3442 const char *mime; 3443 CHECK(track->meta.findCString(kKeyMIMEType, &mime)); 3444 3445 uint32_t type; 3446 const void *data; 3447 size_t size; 3448 if (!strcasecmp(mime, MEDIA_MIMETYPE_VIDEO_AVC)) { 3449 if (!track->meta.findData(kKeyAVCC, &type, &data, &size) 3450 || type != kTypeAVCC) { 3451 return ERROR_MALFORMED; 3452 } 3453 } else if (!strcasecmp(mime, MEDIA_MIMETYPE_VIDEO_HEVC)) { 3454 if (!track->meta.findData(kKeyHVCC, &type, &data, &size) 3455 || type != kTypeHVCC) { 3456 return ERROR_MALFORMED; 3457 } 3458 } else if (!strcasecmp(mime, MEDIA_MIMETYPE_VIDEO_MPEG4) 3459 || !strcasecmp(mime, MEDIA_MIMETYPE_VIDEO_MPEG2) 3460 || !strcasecmp(mime, MEDIA_MIMETYPE_AUDIO_AAC)) { 3461 if (!track->meta.findData(kKeyESDS, &type, &data, &size) 3462 || type != kTypeESDS) { 3463 return ERROR_MALFORMED; 3464 } 3465 } 3466 3467 if (track->sampleTable == NULL || !track->sampleTable->isValid()) { 3468 // Make sure we have all the metadata we need. 3469 ALOGE("stbl atom missing/invalid."); 3470 return ERROR_MALFORMED; 3471 } 3472 3473 if (track->timescale == 0) { 3474 ALOGE("timescale invalid."); 3475 return ERROR_MALFORMED; 3476 } 3477 3478 return OK; 3479} 3480 3481typedef enum { 3482 //AOT_NONE = -1, 3483 //AOT_NULL_OBJECT = 0, 3484 //AOT_AAC_MAIN = 1, /**< Main profile */ 3485 AOT_AAC_LC = 2, /**< Low Complexity object */ 3486 //AOT_AAC_SSR = 3, 3487 //AOT_AAC_LTP = 4, 3488 AOT_SBR = 5, 3489 //AOT_AAC_SCAL = 6, 3490 //AOT_TWIN_VQ = 7, 3491 //AOT_CELP = 8, 3492 //AOT_HVXC = 9, 3493 //AOT_RSVD_10 = 10, /**< (reserved) */ 3494 //AOT_RSVD_11 = 11, /**< (reserved) */ 3495 //AOT_TTSI = 12, /**< TTSI Object */ 3496 //AOT_MAIN_SYNTH = 13, /**< Main Synthetic object */ 3497 //AOT_WAV_TAB_SYNTH = 14, /**< Wavetable Synthesis object */ 3498 //AOT_GEN_MIDI = 15, /**< General MIDI object */ 3499 //AOT_ALG_SYNTH_AUD_FX = 16, /**< Algorithmic Synthesis and Audio FX object */ 3500 AOT_ER_AAC_LC = 17, /**< Error Resilient(ER) AAC Low Complexity */ 3501 //AOT_RSVD_18 = 18, /**< (reserved) */ 3502 //AOT_ER_AAC_LTP = 19, /**< Error Resilient(ER) AAC LTP object */ 3503 AOT_ER_AAC_SCAL = 20, /**< Error Resilient(ER) AAC Scalable object */ 3504 //AOT_ER_TWIN_VQ = 21, /**< Error Resilient(ER) TwinVQ object */ 3505 AOT_ER_BSAC = 22, /**< Error Resilient(ER) BSAC object */ 3506 AOT_ER_AAC_LD = 23, /**< Error Resilient(ER) AAC LowDelay object */ 3507 //AOT_ER_CELP = 24, /**< Error Resilient(ER) CELP object */ 3508 //AOT_ER_HVXC = 25, /**< Error Resilient(ER) HVXC object */ 3509 //AOT_ER_HILN = 26, /**< Error Resilient(ER) HILN object */ 3510 //AOT_ER_PARA = 27, /**< Error Resilient(ER) Parametric object */ 3511 //AOT_RSVD_28 = 28, /**< might become SSC */ 3512 AOT_PS = 29, /**< PS, Parametric Stereo (includes SBR) */ 3513 //AOT_MPEGS = 30, /**< MPEG Surround */ 3514 3515 AOT_ESCAPE = 31, /**< Signal AOT uses more than 5 bits */ 3516 3517 //AOT_MP3ONMP4_L1 = 32, /**< MPEG-Layer1 in mp4 */ 3518 //AOT_MP3ONMP4_L2 = 33, /**< MPEG-Layer2 in mp4 */ 3519 //AOT_MP3ONMP4_L3 = 34, /**< MPEG-Layer3 in mp4 */ 3520 //AOT_RSVD_35 = 35, /**< might become DST */ 3521 //AOT_RSVD_36 = 36, /**< might become ALS */ 3522 //AOT_AAC_SLS = 37, /**< AAC + SLS */ 3523 //AOT_SLS = 38, /**< SLS */ 3524 //AOT_ER_AAC_ELD = 39, /**< AAC Enhanced Low Delay */ 3525 3526 //AOT_USAC = 42, /**< USAC */ 3527 //AOT_SAOC = 43, /**< SAOC */ 3528 //AOT_LD_MPEGS = 44, /**< Low Delay MPEG Surround */ 3529 3530 //AOT_RSVD50 = 50, /**< Interim AOT for Rsvd50 */ 3531} AUDIO_OBJECT_TYPE; 3532 3533status_t MPEG4Extractor::updateAudioTrackInfoFromESDS_MPEG4Audio( 3534 const void *esds_data, size_t esds_size) { 3535 ESDS esds(esds_data, esds_size); 3536 3537 uint8_t objectTypeIndication; 3538 if (esds.getObjectTypeIndication(&objectTypeIndication) != OK) { 3539 return ERROR_MALFORMED; 3540 } 3541 3542 if (objectTypeIndication == 0xe1) { 3543 // This isn't MPEG4 audio at all, it's QCELP 14k... 3544 if (mLastTrack == NULL) 3545 return ERROR_MALFORMED; 3546 3547 mLastTrack->meta.setCString(kKeyMIMEType, MEDIA_MIMETYPE_AUDIO_QCELP); 3548 return OK; 3549 } 3550 3551 if (objectTypeIndication == 0x6b) { 3552 // The media subtype is MP3 audio 3553 // Our software MP3 audio decoder may not be able to handle 3554 // packetized MP3 audio; for now, lets just return ERROR_UNSUPPORTED 3555 ALOGE("MP3 track in MP4/3GPP file is not supported"); 3556 return ERROR_UNSUPPORTED; 3557 } 3558 3559 if (mLastTrack != NULL) { 3560 uint32_t maxBitrate = 0; 3561 uint32_t avgBitrate = 0; 3562 esds.getBitRate(&maxBitrate, &avgBitrate); 3563 if (maxBitrate > 0 && maxBitrate < INT32_MAX) { 3564 mLastTrack->meta.setInt32(kKeyMaxBitRate, (int32_t)maxBitrate); 3565 } 3566 if (avgBitrate > 0 && avgBitrate < INT32_MAX) { 3567 mLastTrack->meta.setInt32(kKeyBitRate, (int32_t)avgBitrate); 3568 } 3569 } 3570 3571 const uint8_t *csd; 3572 size_t csd_size; 3573 if (esds.getCodecSpecificInfo( 3574 (const void **)&csd, &csd_size) != OK) { 3575 return ERROR_MALFORMED; 3576 } 3577 3578 if (kUseHexDump) { 3579 printf("ESD of size %zu\n", csd_size); 3580 hexdump(csd, csd_size); 3581 } 3582 3583 if (csd_size == 0) { 3584 // There's no further information, i.e. no codec specific data 3585 // Let's assume that the information provided in the mpeg4 headers 3586 // is accurate and hope for the best. 3587 3588 return OK; 3589 } 3590 3591 if (csd_size < 2) { 3592 return ERROR_MALFORMED; 3593 } 3594 3595 static uint32_t kSamplingRate[] = { 3596 96000, 88200, 64000, 48000, 44100, 32000, 24000, 22050, 3597 16000, 12000, 11025, 8000, 7350 3598 }; 3599 3600 ABitReader br(csd, csd_size); 3601 uint32_t objectType = br.getBits(5); 3602 3603 if (objectType == 31) { // AAC-ELD => additional 6 bits 3604 objectType = 32 + br.getBits(6); 3605 } 3606 3607 if (mLastTrack == NULL) 3608 return ERROR_MALFORMED; 3609 3610 //keep AOT type 3611 mLastTrack->meta.setInt32(kKeyAACAOT, objectType); 3612 3613 uint32_t freqIndex = br.getBits(4); 3614 3615 int32_t sampleRate = 0; 3616 int32_t numChannels = 0; 3617 if (freqIndex == 15) { 3618 if (br.numBitsLeft() < 28) return ERROR_MALFORMED; 3619 sampleRate = br.getBits(24); 3620 numChannels = br.getBits(4); 3621 } else { 3622 if (br.numBitsLeft() < 4) return ERROR_MALFORMED; 3623 numChannels = br.getBits(4); 3624 3625 if (freqIndex == 13 || freqIndex == 14) { 3626 return ERROR_MALFORMED; 3627 } 3628 3629 sampleRate = kSamplingRate[freqIndex]; 3630 } 3631 3632 if (objectType == AOT_SBR || objectType == AOT_PS) {//SBR specific config per 14496-3 table 1.13 3633 if (br.numBitsLeft() < 4) return ERROR_MALFORMED; 3634 uint32_t extFreqIndex = br.getBits(4); 3635 int32_t extSampleRate __unused; 3636 if (extFreqIndex == 15) { 3637 if (csd_size < 8) { 3638 return ERROR_MALFORMED; 3639 } 3640 if (br.numBitsLeft() < 24) return ERROR_MALFORMED; 3641 extSampleRate = br.getBits(24); 3642 } else { 3643 if (extFreqIndex == 13 || extFreqIndex == 14) { 3644 return ERROR_MALFORMED; 3645 } 3646 extSampleRate = kSamplingRate[extFreqIndex]; 3647 } 3648 //TODO: save the extension sampling rate value in meta data => 3649 // mLastTrack->meta.setInt32(kKeyExtSampleRate, extSampleRate); 3650 } 3651 3652 switch (numChannels) { 3653 // values defined in 14496-3_2009 amendment-4 Table 1.19 - Channel Configuration 3654 case 0: 3655 case 1:// FC 3656 case 2:// FL FR 3657 case 3:// FC, FL FR 3658 case 4:// FC, FL FR, RC 3659 case 5:// FC, FL FR, SL SR 3660 case 6:// FC, FL FR, SL SR, LFE 3661 //numChannels already contains the right value 3662 break; 3663 case 11:// FC, FL FR, SL SR, RC, LFE 3664 numChannels = 7; 3665 break; 3666 case 7: // FC, FCL FCR, FL FR, SL SR, LFE 3667 case 12:// FC, FL FR, SL SR, RL RR, LFE 3668 case 14:// FC, FL FR, SL SR, LFE, FHL FHR 3669 numChannels = 8; 3670 break; 3671 default: 3672 return ERROR_UNSUPPORTED; 3673 } 3674 3675 { 3676 if (objectType == AOT_SBR || objectType == AOT_PS) { 3677 if (br.numBitsLeft() < 5) return ERROR_MALFORMED; 3678 objectType = br.getBits(5); 3679 3680 if (objectType == AOT_ESCAPE) { 3681 if (br.numBitsLeft() < 6) return ERROR_MALFORMED; 3682 objectType = 32 + br.getBits(6); 3683 } 3684 } 3685 if (objectType == AOT_AAC_LC || objectType == AOT_ER_AAC_LC || 3686 objectType == AOT_ER_AAC_LD || objectType == AOT_ER_AAC_SCAL || 3687 objectType == AOT_ER_BSAC) { 3688 if (br.numBitsLeft() < 2) return ERROR_MALFORMED; 3689 const int32_t frameLengthFlag __unused = br.getBits(1); 3690 3691 const int32_t dependsOnCoreCoder = br.getBits(1); 3692 3693 if (dependsOnCoreCoder ) { 3694 if (br.numBitsLeft() < 14) return ERROR_MALFORMED; 3695 const int32_t coreCoderDelay __unused = br.getBits(14); 3696 } 3697 3698 int32_t extensionFlag = -1; 3699 if (br.numBitsLeft() > 0) { 3700 extensionFlag = br.getBits(1); 3701 } else { 3702 switch (objectType) { 3703 // 14496-3 4.5.1.1 extensionFlag 3704 case AOT_AAC_LC: 3705 extensionFlag = 0; 3706 break; 3707 case AOT_ER_AAC_LC: 3708 case AOT_ER_AAC_SCAL: 3709 case AOT_ER_BSAC: 3710 case AOT_ER_AAC_LD: 3711 extensionFlag = 1; 3712 break; 3713 default: 3714 return ERROR_MALFORMED; 3715 break; 3716 } 3717 ALOGW("csd missing extension flag; assuming %d for object type %u.", 3718 extensionFlag, objectType); 3719 } 3720 3721 if (numChannels == 0) { 3722 int32_t channelsEffectiveNum = 0; 3723 int32_t channelsNum = 0; 3724 if (br.numBitsLeft() < 32) { 3725 return ERROR_MALFORMED; 3726 } 3727 const int32_t ElementInstanceTag __unused = br.getBits(4); 3728 const int32_t Profile __unused = br.getBits(2); 3729 const int32_t SamplingFrequencyIndex __unused = br.getBits(4); 3730 const int32_t NumFrontChannelElements = br.getBits(4); 3731 const int32_t NumSideChannelElements = br.getBits(4); 3732 const int32_t NumBackChannelElements = br.getBits(4); 3733 const int32_t NumLfeChannelElements = br.getBits(2); 3734 const int32_t NumAssocDataElements __unused = br.getBits(3); 3735 const int32_t NumValidCcElements __unused = br.getBits(4); 3736 3737 const int32_t MonoMixdownPresent = br.getBits(1); 3738 3739 if (MonoMixdownPresent != 0) { 3740 if (br.numBitsLeft() < 4) return ERROR_MALFORMED; 3741 const int32_t MonoMixdownElementNumber __unused = br.getBits(4); 3742 } 3743 3744 if (br.numBitsLeft() < 1) return ERROR_MALFORMED; 3745 const int32_t StereoMixdownPresent = br.getBits(1); 3746 if (StereoMixdownPresent != 0) { 3747 if (br.numBitsLeft() < 4) return ERROR_MALFORMED; 3748 const int32_t StereoMixdownElementNumber __unused = br.getBits(4); 3749 } 3750 3751 if (br.numBitsLeft() < 1) return ERROR_MALFORMED; 3752 const int32_t MatrixMixdownIndexPresent = br.getBits(1); 3753 if (MatrixMixdownIndexPresent != 0) { 3754 if (br.numBitsLeft() < 3) return ERROR_MALFORMED; 3755 const int32_t MatrixMixdownIndex __unused = br.getBits(2); 3756 const int32_t PseudoSurroundEnable __unused = br.getBits(1); 3757 } 3758 3759 int i; 3760 for (i=0; i < NumFrontChannelElements; i++) { 3761 if (br.numBitsLeft() < 5) return ERROR_MALFORMED; 3762 const int32_t FrontElementIsCpe = br.getBits(1); 3763 const int32_t FrontElementTagSelect __unused = br.getBits(4); 3764 channelsNum += FrontElementIsCpe ? 2 : 1; 3765 } 3766 3767 for (i=0; i < NumSideChannelElements; i++) { 3768 if (br.numBitsLeft() < 5) return ERROR_MALFORMED; 3769 const int32_t SideElementIsCpe = br.getBits(1); 3770 const int32_t SideElementTagSelect __unused = br.getBits(4); 3771 channelsNum += SideElementIsCpe ? 2 : 1; 3772 } 3773 3774 for (i=0; i < NumBackChannelElements; i++) { 3775 if (br.numBitsLeft() < 5) return ERROR_MALFORMED; 3776 const int32_t BackElementIsCpe = br.getBits(1); 3777 const int32_t BackElementTagSelect __unused = br.getBits(4); 3778 channelsNum += BackElementIsCpe ? 2 : 1; 3779 } 3780 channelsEffectiveNum = channelsNum; 3781 3782 for (i=0; i < NumLfeChannelElements; i++) { 3783 if (br.numBitsLeft() < 4) return ERROR_MALFORMED; 3784 const int32_t LfeElementTagSelect __unused = br.getBits(4); 3785 channelsNum += 1; 3786 } 3787 ALOGV("mpeg4 audio channelsNum = %d", channelsNum); 3788 ALOGV("mpeg4 audio channelsEffectiveNum = %d", channelsEffectiveNum); 3789 numChannels = channelsNum; 3790 } 3791 } 3792 } 3793 3794 if (numChannels == 0) { 3795 return ERROR_UNSUPPORTED; 3796 } 3797 3798 if (mLastTrack == NULL) 3799 return ERROR_MALFORMED; 3800 3801 int32_t prevSampleRate; 3802 CHECK(mLastTrack->meta.findInt32(kKeySampleRate, &prevSampleRate)); 3803 3804 if (prevSampleRate != sampleRate) { 3805 ALOGV("mpeg4 audio sample rate different from previous setting. " 3806 "was: %d, now: %d", prevSampleRate, sampleRate); 3807 } 3808 3809 mLastTrack->meta.setInt32(kKeySampleRate, sampleRate); 3810 3811 int32_t prevChannelCount; 3812 CHECK(mLastTrack->meta.findInt32(kKeyChannelCount, &prevChannelCount)); 3813 3814 if (prevChannelCount != numChannels) { 3815 ALOGV("mpeg4 audio channel count different from previous setting. " 3816 "was: %d, now: %d", prevChannelCount, numChannels); 3817 } 3818 3819 mLastTrack->meta.setInt32(kKeyChannelCount, numChannels); 3820 3821 return OK; 3822} 3823 3824//////////////////////////////////////////////////////////////////////////////// 3825 3826MPEG4Source::MPEG4Source( 3827 MetaDataBase &format, 3828 DataSourceBase *dataSource, 3829 int32_t timeScale, 3830 const sp<SampleTable> &sampleTable, 3831 Vector<SidxEntry> &sidx, 3832 const Trex *trex, 3833 off64_t firstMoofOffset, 3834 const sp<ItemTable> &itemTable) 3835 : mFormat(format), 3836 mDataSource(dataSource), 3837 mTimescale(timeScale), 3838 mSampleTable(sampleTable), 3839 mCurrentSampleIndex(0), 3840 mCurrentFragmentIndex(0), 3841 mSegments(sidx), 3842 mTrex(trex), 3843 mFirstMoofOffset(firstMoofOffset), 3844 mCurrentMoofOffset(firstMoofOffset), 3845 mNextMoofOffset(-1), 3846 mCurrentTime(0), 3847 mDefaultEncryptedByteBlock(0), 3848 mDefaultSkipByteBlock(0), 3849 mCurrentSampleInfoAllocSize(0), 3850 mCurrentSampleInfoSizes(NULL), 3851 mCurrentSampleInfoOffsetsAllocSize(0), 3852 mCurrentSampleInfoOffsets(NULL), 3853 mIsAVC(false), 3854 mIsHEVC(false), 3855 mNALLengthSize(0), 3856 mStarted(false), 3857 mGroup(NULL), 3858 mBuffer(NULL), 3859 mWantsNALFragments(false), 3860 mSrcBuffer(NULL), 3861 mIsHeif(itemTable != NULL), 3862 mItemTable(itemTable) { 3863 3864 memset(&mTrackFragmentHeaderInfo, 0, sizeof(mTrackFragmentHeaderInfo)); 3865 3866 mFormat.findInt32(kKeyCryptoMode, &mCryptoMode); 3867 mDefaultIVSize = 0; 3868 mFormat.findInt32(kKeyCryptoDefaultIVSize, &mDefaultIVSize); 3869 uint32_t keytype; 3870 const void *key; 3871 size_t keysize; 3872 if (mFormat.findData(kKeyCryptoKey, &keytype, &key, &keysize)) { 3873 CHECK(keysize <= 16); 3874 memset(mCryptoKey, 0, 16); 3875 memcpy(mCryptoKey, key, keysize); 3876 } 3877 3878 mFormat.findInt32(kKeyEncryptedByteBlock, &mDefaultEncryptedByteBlock); 3879 mFormat.findInt32(kKeySkipByteBlock, &mDefaultSkipByteBlock); 3880 3881 const char *mime; 3882 bool success = mFormat.findCString(kKeyMIMEType, &mime); 3883 CHECK(success); 3884 3885 mIsAVC = !strcasecmp(mime, MEDIA_MIMETYPE_VIDEO_AVC); 3886 mIsHEVC = !strcasecmp(mime, MEDIA_MIMETYPE_VIDEO_HEVC) || 3887 !strcasecmp(mime, MEDIA_MIMETYPE_IMAGE_ANDROID_HEIC); 3888 3889 if (mIsAVC) { 3890 uint32_t type; 3891 const void *data; 3892 size_t size; 3893 CHECK(format.findData(kKeyAVCC, &type, &data, &size)); 3894 3895 const uint8_t *ptr = (const uint8_t *)data; 3896 3897 CHECK(size >= 7); 3898 CHECK_EQ((unsigned)ptr[0], 1u); // configurationVersion == 1 3899 3900 // The number of bytes used to encode the length of a NAL unit. 3901 mNALLengthSize = 1 + (ptr[4] & 3); 3902 } else if (mIsHEVC) { 3903 uint32_t type; 3904 const void *data; 3905 size_t size; 3906 CHECK(format.findData(kKeyHVCC, &type, &data, &size)); 3907 3908 const uint8_t *ptr = (const uint8_t *)data; 3909 3910 CHECK(size >= 22); 3911 CHECK_EQ((unsigned)ptr[0], 1u); // configurationVersion == 1 3912 3913 mNALLengthSize = 1 + (ptr[14 + 7] & 3); 3914 } 3915 3916 CHECK(format.findInt32(kKeyTrackID, &mTrackId)); 3917 3918} 3919 3920status_t MPEG4Source::init() { 3921 if (mFirstMoofOffset != 0) { 3922 off64_t offset = mFirstMoofOffset; 3923 return parseChunk(&offset); 3924 } 3925 return OK; 3926} 3927 3928MPEG4Source::~MPEG4Source() { 3929 if (mStarted) { 3930 stop(); 3931 } 3932 free(mCurrentSampleInfoSizes); 3933 free(mCurrentSampleInfoOffsets); 3934} 3935 3936status_t MPEG4Source::start(MetaDataBase *params) { 3937 Mutex::Autolock autoLock(mLock); 3938 3939 CHECK(!mStarted); 3940 3941 int32_t val; 3942 if (params && params->findInt32(kKeyWantsNALFragments, &val) 3943 && val != 0) { 3944 mWantsNALFragments = true; 3945 } else { 3946 mWantsNALFragments = false; 3947 } 3948 3949 int32_t tmp; 3950 CHECK(mFormat.findInt32(kKeyMaxInputSize, &tmp)); 3951 size_t max_size = tmp; 3952 3953 // A somewhat arbitrary limit that should be sufficient for 8k video frames 3954 // If you see the message below for a valid input stream: increase the limit 3955 const size_t kMaxBufferSize = 64 * 1024 * 1024; 3956 if (max_size > kMaxBufferSize) { 3957 ALOGE("bogus max input size: %zu > %zu", max_size, kMaxBufferSize); 3958 return ERROR_MALFORMED; 3959 } 3960 if (max_size == 0) { 3961 ALOGE("zero max input size"); 3962 return ERROR_MALFORMED; 3963 } 3964 3965 // Allow up to kMaxBuffers, but not if the total exceeds kMaxBufferSize. 3966 const size_t kMaxBuffers = 8; 3967 const size_t buffers = min(kMaxBufferSize / max_size, kMaxBuffers); 3968 mGroup = new MediaBufferGroup(buffers, max_size); 3969 mSrcBuffer = new (std::nothrow) uint8_t[max_size]; 3970 if (mSrcBuffer == NULL) { 3971 // file probably specified a bad max size 3972 delete mGroup; 3973 mGroup = NULL; 3974 return ERROR_MALFORMED; 3975 } 3976 3977 mStarted = true; 3978 3979 return OK; 3980} 3981 3982status_t MPEG4Source::stop() { 3983 Mutex::Autolock autoLock(mLock); 3984 3985 CHECK(mStarted); 3986 3987 if (mBuffer != NULL) { 3988 mBuffer->release(); 3989 mBuffer = NULL; 3990 } 3991 3992 delete[] mSrcBuffer; 3993 mSrcBuffer = NULL; 3994 3995 delete mGroup; 3996 mGroup = NULL; 3997 3998 mStarted = false; 3999 mCurrentSampleIndex = 0; 4000 4001 return OK; 4002} 4003 4004status_t MPEG4Source::parseChunk(off64_t *offset) { 4005 uint32_t hdr[2]; 4006 if (mDataSource->readAt(*offset, hdr, 8) < 8) { 4007 return ERROR_IO; 4008 } 4009 uint64_t chunk_size = ntohl(hdr[0]); 4010 uint32_t chunk_type = ntohl(hdr[1]); 4011 off64_t data_offset = *offset + 8; 4012 4013 if (chunk_size == 1) { 4014 if (mDataSource->readAt(*offset + 8, &chunk_size, 8) < 8) { 4015 return ERROR_IO; 4016 } 4017 chunk_size = ntoh64(chunk_size); 4018 data_offset += 8; 4019 4020 if (chunk_size < 16) { 4021 // The smallest valid chunk is 16 bytes long in this case. 4022 return ERROR_MALFORMED; 4023 } 4024 } else if (chunk_size < 8) { 4025 // The smallest valid chunk is 8 bytes long. 4026 return ERROR_MALFORMED; 4027 } 4028 4029 char chunk[5]; 4030 MakeFourCCString(chunk_type, chunk); 4031 ALOGV("MPEG4Source chunk %s @ %#llx", chunk, (long long)*offset); 4032 4033 off64_t chunk_data_size = *offset + chunk_size - data_offset; 4034 4035 switch(chunk_type) { 4036 4037 case FOURCC('t', 'r', 'a', 'f'): 4038 case FOURCC('m', 'o', 'o', 'f'): { 4039 off64_t stop_offset = *offset + chunk_size; 4040 *offset = data_offset; 4041 while (*offset < stop_offset) { 4042 status_t err = parseChunk(offset); 4043 if (err != OK) { 4044 return err; 4045 } 4046 } 4047 if (chunk_type == FOURCC('m', 'o', 'o', 'f')) { 4048 // *offset points to the box following this moof. Find the next moof from there. 4049 4050 while (true) { 4051 if (mDataSource->readAt(*offset, hdr, 8) < 8) { 4052 // no more box to the end of file. 4053 break; 4054 } 4055 chunk_size = ntohl(hdr[0]); 4056 chunk_type = ntohl(hdr[1]); 4057 if (chunk_size == 1) { 4058 // ISO/IEC 14496-12:2012, 8.8.4 Movie Fragment Box, moof is a Box 4059 // which is defined in 4.2 Object Structure. 4060 // When chunk_size==1, 8 bytes follows as "largesize". 4061 if (mDataSource->readAt(*offset + 8, &chunk_size, 8) < 8) { 4062 return ERROR_IO; 4063 } 4064 chunk_size = ntoh64(chunk_size); 4065 if (chunk_size < 16) { 4066 // The smallest valid chunk is 16 bytes long in this case. 4067 return ERROR_MALFORMED; 4068 } 4069 } else if (chunk_size == 0) { 4070 // next box extends to end of file. 4071 } else if (chunk_size < 8) { 4072 // The smallest valid chunk is 8 bytes long in this case. 4073 return ERROR_MALFORMED; 4074 } 4075 4076 if (chunk_type == FOURCC('m', 'o', 'o', 'f')) { 4077 mNextMoofOffset = *offset; 4078 break; 4079 } else if (chunk_size == 0) { 4080 break; 4081 } 4082 *offset += chunk_size; 4083 } 4084 } 4085 break; 4086 } 4087 4088 case FOURCC('t', 'f', 'h', 'd'): { 4089 status_t err; 4090 if ((err = parseTrackFragmentHeader(data_offset, chunk_data_size)) != OK) { 4091 return err; 4092 } 4093 *offset += chunk_size; 4094 break; 4095 } 4096 4097 case FOURCC('t', 'r', 'u', 'n'): { 4098 status_t err; 4099 if (mLastParsedTrackId == mTrackId) { 4100 if ((err = parseTrackFragmentRun(data_offset, chunk_data_size)) != OK) { 4101 return err; 4102 } 4103 } 4104 4105 *offset += chunk_size; 4106 break; 4107 } 4108 4109 case FOURCC('s', 'a', 'i', 'z'): { 4110 status_t err; 4111 if ((err = parseSampleAuxiliaryInformationSizes(data_offset, chunk_data_size)) != OK) { 4112 return err; 4113 } 4114 *offset += chunk_size; 4115 break; 4116 } 4117 case FOURCC('s', 'a', 'i', 'o'): { 4118 status_t err; 4119 if ((err = parseSampleAuxiliaryInformationOffsets(data_offset, chunk_data_size)) != OK) { 4120 return err; 4121 } 4122 *offset += chunk_size; 4123 break; 4124 } 4125 4126 case FOURCC('s', 'e', 'n', 'c'): { 4127 status_t err; 4128 if ((err = parseSampleEncryption(data_offset)) != OK) { 4129 return err; 4130 } 4131 *offset += chunk_size; 4132 break; 4133 } 4134 4135 case FOURCC('m', 'd', 'a', 't'): { 4136 // parse DRM info if present 4137 ALOGV("MPEG4Source::parseChunk mdat"); 4138 // if saiz/saoi was previously observed, do something with the sampleinfos 4139 *offset += chunk_size; 4140 break; 4141 } 4142 4143 default: { 4144 *offset += chunk_size; 4145 break; 4146 } 4147 } 4148 return OK; 4149} 4150 4151status_t MPEG4Source::parseSampleAuxiliaryInformationSizes( 4152 off64_t offset, off64_t /* size */) { 4153 ALOGV("parseSampleAuxiliaryInformationSizes"); 4154 // 14496-12 8.7.12 4155 uint8_t version; 4156 if (mDataSource->readAt( 4157 offset, &version, sizeof(version)) 4158 < (ssize_t)sizeof(version)) { 4159 return ERROR_IO; 4160 } 4161 4162 if (version != 0) { 4163 return ERROR_UNSUPPORTED; 4164 } 4165 offset++; 4166 4167 uint32_t flags; 4168 if (!mDataSource->getUInt24(offset, &flags)) { 4169 return ERROR_IO; 4170 } 4171 offset += 3; 4172 4173 if (flags & 1) { 4174 uint32_t tmp; 4175 if (!mDataSource->getUInt32(offset, &tmp)) { 4176 return ERROR_MALFORMED; 4177 } 4178 mCurrentAuxInfoType = tmp; 4179 offset += 4; 4180 if (!mDataSource->getUInt32(offset, &tmp)) { 4181 return ERROR_MALFORMED; 4182 } 4183 mCurrentAuxInfoTypeParameter = tmp; 4184 offset += 4; 4185 } 4186 4187 uint8_t defsize; 4188 if (mDataSource->readAt(offset, &defsize, 1) != 1) { 4189 return ERROR_MALFORMED; 4190 } 4191 mCurrentDefaultSampleInfoSize = defsize; 4192 offset++; 4193 4194 uint32_t smplcnt; 4195 if (!mDataSource->getUInt32(offset, &smplcnt)) { 4196 return ERROR_MALFORMED; 4197 } 4198 mCurrentSampleInfoCount = smplcnt; 4199 offset += 4; 4200 4201 if (mCurrentDefaultSampleInfoSize != 0) { 4202 ALOGV("@@@@ using default sample info size of %d", mCurrentDefaultSampleInfoSize); 4203 return OK; 4204 } 4205 if (smplcnt > mCurrentSampleInfoAllocSize) { 4206 uint8_t * newPtr = (uint8_t*) realloc(mCurrentSampleInfoSizes, smplcnt); 4207 if (newPtr == NULL) { 4208 ALOGE("failed to realloc %u -> %u", mCurrentSampleInfoAllocSize, smplcnt); 4209 return NO_MEMORY; 4210 } 4211 mCurrentSampleInfoSizes = newPtr; 4212 mCurrentSampleInfoAllocSize = smplcnt; 4213 } 4214 4215 mDataSource->readAt(offset, mCurrentSampleInfoSizes, smplcnt); 4216 return OK; 4217} 4218 4219status_t MPEG4Source::parseSampleAuxiliaryInformationOffsets( 4220 off64_t offset, off64_t /* size */) { 4221 ALOGV("parseSampleAuxiliaryInformationOffsets"); 4222 // 14496-12 8.7.13 4223 uint8_t version; 4224 if (mDataSource->readAt(offset, &version, sizeof(version)) != 1) { 4225 return ERROR_IO; 4226 } 4227 offset++; 4228 4229 uint32_t flags; 4230 if (!mDataSource->getUInt24(offset, &flags)) { 4231 return ERROR_IO; 4232 } 4233 offset += 3; 4234 4235 uint32_t entrycount; 4236 if (!mDataSource->getUInt32(offset, &entrycount)) { 4237 return ERROR_IO; 4238 } 4239 offset += 4; 4240 if (entrycount == 0) { 4241 return OK; 4242 } 4243 if (entrycount > UINT32_MAX / 8) { 4244 return ERROR_MALFORMED; 4245 } 4246 4247 if (entrycount > mCurrentSampleInfoOffsetsAllocSize) { 4248 uint64_t *newPtr = (uint64_t *)realloc(mCurrentSampleInfoOffsets, entrycount * 8); 4249 if (newPtr == NULL) { 4250 ALOGE("failed to realloc %u -> %u", mCurrentSampleInfoOffsetsAllocSize, entrycount * 8); 4251 return NO_MEMORY; 4252 } 4253 mCurrentSampleInfoOffsets = newPtr; 4254 mCurrentSampleInfoOffsetsAllocSize = entrycount; 4255 } 4256 mCurrentSampleInfoOffsetCount = entrycount; 4257 4258 if (mCurrentSampleInfoOffsets == NULL) { 4259 return OK; 4260 } 4261 4262 for (size_t i = 0; i < entrycount; i++) { 4263 if (version == 0) { 4264 uint32_t tmp; 4265 if (!mDataSource->getUInt32(offset, &tmp)) { 4266 return ERROR_IO; 4267 } 4268 mCurrentSampleInfoOffsets[i] = tmp; 4269 offset += 4; 4270 } else { 4271 uint64_t tmp; 4272 if (!mDataSource->getUInt64(offset, &tmp)) { 4273 return ERROR_IO; 4274 } 4275 mCurrentSampleInfoOffsets[i] = tmp; 4276 offset += 8; 4277 } 4278 } 4279 4280 // parse clear/encrypted data 4281 4282 off64_t drmoffset = mCurrentSampleInfoOffsets[0]; // from moof 4283 4284 drmoffset += mCurrentMoofOffset; 4285 4286 return parseClearEncryptedSizes(drmoffset, false, 0); 4287} 4288 4289status_t MPEG4Source::parseClearEncryptedSizes(off64_t offset, bool isSubsampleEncryption, uint32_t flags) { 4290 4291 int ivlength; 4292 CHECK(mFormat.findInt32(kKeyCryptoDefaultIVSize, &ivlength)); 4293 4294 // only 0, 8 and 16 byte initialization vectors are supported 4295 if (ivlength != 0 && ivlength != 8 && ivlength != 16) { 4296 ALOGW("unsupported IV length: %d", ivlength); 4297 return ERROR_MALFORMED; 4298 } 4299 4300 uint32_t sampleCount = mCurrentSampleInfoCount; 4301 if (isSubsampleEncryption) { 4302 if (!mDataSource->getUInt32(offset, &sampleCount)) { 4303 return ERROR_IO; 4304 } 4305 offset += 4; 4306 } 4307 4308 // read CencSampleAuxiliaryDataFormats 4309 for (size_t i = 0; i < sampleCount; i++) { 4310 if (i >= mCurrentSamples.size()) { 4311 ALOGW("too few samples"); 4312 break; 4313 } 4314 Sample *smpl = &mCurrentSamples.editItemAt(i); 4315 if (!smpl->clearsizes.isEmpty()) { 4316 continue; 4317 } 4318 4319 memset(smpl->iv, 0, 16); 4320 if (mDataSource->readAt(offset, smpl->iv, ivlength) != ivlength) { 4321 return ERROR_IO; 4322 } 4323 4324 offset += ivlength; 4325 4326 bool readSubsamples; 4327 if (isSubsampleEncryption) { 4328 readSubsamples = flags & 2; 4329 } else { 4330 int32_t smplinfosize = mCurrentDefaultSampleInfoSize; 4331 if (smplinfosize == 0) { 4332 smplinfosize = mCurrentSampleInfoSizes[i]; 4333 } 4334 readSubsamples = smplinfosize > ivlength; 4335 } 4336 4337 if (readSubsamples) { 4338 uint16_t numsubsamples; 4339 if (!mDataSource->getUInt16(offset, &numsubsamples)) { 4340 return ERROR_IO; 4341 } 4342 offset += 2; 4343 for (size_t j = 0; j < numsubsamples; j++) { 4344 uint16_t numclear; 4345 uint32_t numencrypted; 4346 if (!mDataSource->getUInt16(offset, &numclear)) { 4347 return ERROR_IO; 4348 } 4349 offset += 2; 4350 if (!mDataSource->getUInt32(offset, &numencrypted)) { 4351 return ERROR_IO; 4352 } 4353 offset += 4; 4354 smpl->clearsizes.add(numclear); 4355 smpl->encryptedsizes.add(numencrypted); 4356 } 4357 } else { 4358 smpl->clearsizes.add(0); 4359 smpl->encryptedsizes.add(smpl->size); 4360 } 4361 } 4362 4363 return OK; 4364} 4365 4366status_t MPEG4Source::parseSampleEncryption(off64_t offset) { 4367 uint32_t flags; 4368 if (!mDataSource->getUInt32(offset, &flags)) { // actually version + flags 4369 return ERROR_MALFORMED; 4370 } 4371 return parseClearEncryptedSizes(offset + 4, true, flags); 4372} 4373 4374status_t MPEG4Source::parseTrackFragmentHeader(off64_t offset, off64_t size) { 4375 4376 if (size < 8) { 4377 return -EINVAL; 4378 } 4379 4380 uint32_t flags; 4381 if (!mDataSource->getUInt32(offset, &flags)) { // actually version + flags 4382 return ERROR_MALFORMED; 4383 } 4384 4385 if (flags & 0xff000000) { 4386 return -EINVAL; 4387 } 4388 4389 if (!mDataSource->getUInt32(offset + 4, (uint32_t*)&mLastParsedTrackId)) { 4390 return ERROR_MALFORMED; 4391 } 4392 4393 if (mLastParsedTrackId != mTrackId) { 4394 // this is not the right track, skip it 4395 return OK; 4396 } 4397 4398 mTrackFragmentHeaderInfo.mFlags = flags; 4399 mTrackFragmentHeaderInfo.mTrackID = mLastParsedTrackId; 4400 offset += 8; 4401 size -= 8; 4402 4403 ALOGV("fragment header: %08x %08x", flags, mTrackFragmentHeaderInfo.mTrackID); 4404 4405 if (flags & TrackFragmentHeaderInfo::kBaseDataOffsetPresent) { 4406 if (size < 8) { 4407 return -EINVAL; 4408 } 4409 4410 if (!mDataSource->getUInt64(offset, &mTrackFragmentHeaderInfo.mBaseDataOffset)) { 4411 return ERROR_MALFORMED; 4412 } 4413 offset += 8; 4414 size -= 8; 4415 } 4416 4417 if (flags & TrackFragmentHeaderInfo::kSampleDescriptionIndexPresent) { 4418 if (size < 4) { 4419 return -EINVAL; 4420 } 4421 4422 if (!mDataSource->getUInt32(offset, &mTrackFragmentHeaderInfo.mSampleDescriptionIndex)) { 4423 return ERROR_MALFORMED; 4424 } 4425 offset += 4; 4426 size -= 4; 4427 } 4428 4429 if (flags & TrackFragmentHeaderInfo::kDefaultSampleDurationPresent) { 4430 if (size < 4) { 4431 return -EINVAL; 4432 } 4433 4434 if (!mDataSource->getUInt32(offset, &mTrackFragmentHeaderInfo.mDefaultSampleDuration)) { 4435 return ERROR_MALFORMED; 4436 } 4437 offset += 4; 4438 size -= 4; 4439 } 4440 4441 if (flags & TrackFragmentHeaderInfo::kDefaultSampleSizePresent) { 4442 if (size < 4) { 4443 return -EINVAL; 4444 } 4445 4446 if (!mDataSource->getUInt32(offset, &mTrackFragmentHeaderInfo.mDefaultSampleSize)) { 4447 return ERROR_MALFORMED; 4448 } 4449 offset += 4; 4450 size -= 4; 4451 } 4452 4453 if (flags & TrackFragmentHeaderInfo::kDefaultSampleFlagsPresent) { 4454 if (size < 4) { 4455 return -EINVAL; 4456 } 4457 4458 if (!mDataSource->getUInt32(offset, &mTrackFragmentHeaderInfo.mDefaultSampleFlags)) { 4459 return ERROR_MALFORMED; 4460 } 4461 offset += 4; 4462 size -= 4; 4463 } 4464 4465 if (!(flags & TrackFragmentHeaderInfo::kBaseDataOffsetPresent)) { 4466 mTrackFragmentHeaderInfo.mBaseDataOffset = mCurrentMoofOffset; 4467 } 4468 4469 mTrackFragmentHeaderInfo.mDataOffset = 0; 4470 return OK; 4471} 4472 4473status_t MPEG4Source::parseTrackFragmentRun(off64_t offset, off64_t size) { 4474 4475 ALOGV("MPEG4Extractor::parseTrackFragmentRun"); 4476 if (size < 8) { 4477 return -EINVAL; 4478 } 4479 4480 enum { 4481 kDataOffsetPresent = 0x01, 4482 kFirstSampleFlagsPresent = 0x04, 4483 kSampleDurationPresent = 0x100, 4484 kSampleSizePresent = 0x200, 4485 kSampleFlagsPresent = 0x400, 4486 kSampleCompositionTimeOffsetPresent = 0x800, 4487 }; 4488 4489 uint32_t flags; 4490 if (!mDataSource->getUInt32(offset, &flags)) { 4491 return ERROR_MALFORMED; 4492 } 4493 // |version| only affects SampleCompositionTimeOffset field. 4494 // If version == 0, SampleCompositionTimeOffset is uint32_t; 4495 // Otherwise, SampleCompositionTimeOffset is int32_t. 4496 // Sample.compositionOffset is defined as int32_t. 4497 uint8_t version = flags >> 24; 4498 flags &= 0xffffff; 4499 ALOGV("fragment run version: 0x%02x, flags: 0x%06x", version, flags); 4500 4501 if ((flags & kFirstSampleFlagsPresent) && (flags & kSampleFlagsPresent)) { 4502 // These two shall not be used together. 4503 return -EINVAL; 4504 } 4505 4506 uint32_t sampleCount; 4507 if (!mDataSource->getUInt32(offset + 4, &sampleCount)) { 4508 return ERROR_MALFORMED; 4509 } 4510 offset += 8; 4511 size -= 8; 4512 4513 uint64_t dataOffset = mTrackFragmentHeaderInfo.mDataOffset; 4514 4515 uint32_t firstSampleFlags = 0; 4516 4517 if (flags & kDataOffsetPresent) { 4518 if (size < 4) { 4519 return -EINVAL; 4520 } 4521 4522 int32_t dataOffsetDelta; 4523 if (!mDataSource->getUInt32(offset, (uint32_t*)&dataOffsetDelta)) { 4524 return ERROR_MALFORMED; 4525 } 4526 4527 dataOffset = mTrackFragmentHeaderInfo.mBaseDataOffset + dataOffsetDelta; 4528 4529 offset += 4; 4530 size -= 4; 4531 } 4532 4533 if (flags & kFirstSampleFlagsPresent) { 4534 if (size < 4) { 4535 return -EINVAL; 4536 } 4537 4538 if (!mDataSource->getUInt32(offset, &firstSampleFlags)) { 4539 return ERROR_MALFORMED; 4540 } 4541 offset += 4; 4542 size -= 4; 4543 } 4544 4545 uint32_t sampleDuration = 0, sampleSize = 0, sampleFlags = 0, 4546 sampleCtsOffset = 0; 4547 4548 size_t bytesPerSample = 0; 4549 if (flags & kSampleDurationPresent) { 4550 bytesPerSample += 4; 4551 } else if (mTrackFragmentHeaderInfo.mFlags 4552 & TrackFragmentHeaderInfo::kDefaultSampleDurationPresent) { 4553 sampleDuration = mTrackFragmentHeaderInfo.mDefaultSampleDuration; 4554 } else if (mTrex) { 4555 sampleDuration = mTrex->default_sample_duration; 4556 } 4557 4558 if (flags & kSampleSizePresent) { 4559 bytesPerSample += 4; 4560 } else if (mTrackFragmentHeaderInfo.mFlags 4561 & TrackFragmentHeaderInfo::kDefaultSampleSizePresent) { 4562 sampleSize = mTrackFragmentHeaderInfo.mDefaultSampleSize; 4563 } else { 4564 sampleSize = mTrackFragmentHeaderInfo.mDefaultSampleSize; 4565 } 4566 4567 if (flags & kSampleFlagsPresent) { 4568 bytesPerSample += 4; 4569 } else if (mTrackFragmentHeaderInfo.mFlags 4570 & TrackFragmentHeaderInfo::kDefaultSampleFlagsPresent) { 4571 sampleFlags = mTrackFragmentHeaderInfo.mDefaultSampleFlags; 4572 } else { 4573 sampleFlags = mTrackFragmentHeaderInfo.mDefaultSampleFlags; 4574 } 4575 4576 if (flags & kSampleCompositionTimeOffsetPresent) { 4577 bytesPerSample += 4; 4578 } else { 4579 sampleCtsOffset = 0; 4580 } 4581 4582 if (size < (off64_t)(sampleCount * bytesPerSample)) { 4583 return -EINVAL; 4584 } 4585 4586 Sample tmp; 4587 for (uint32_t i = 0; i < sampleCount; ++i) { 4588 if (flags & kSampleDurationPresent) { 4589 if (!mDataSource->getUInt32(offset, &sampleDuration)) { 4590 return ERROR_MALFORMED; 4591 } 4592 offset += 4; 4593 } 4594 4595 if (flags & kSampleSizePresent) { 4596 if (!mDataSource->getUInt32(offset, &sampleSize)) { 4597 return ERROR_MALFORMED; 4598 } 4599 offset += 4; 4600 } 4601 4602 if (flags & kSampleFlagsPresent) { 4603 if (!mDataSource->getUInt32(offset, &sampleFlags)) { 4604 return ERROR_MALFORMED; 4605 } 4606 offset += 4; 4607 } 4608 4609 if (flags & kSampleCompositionTimeOffsetPresent) { 4610 if (!mDataSource->getUInt32(offset, &sampleCtsOffset)) { 4611 return ERROR_MALFORMED; 4612 } 4613 offset += 4; 4614 } 4615 4616 ALOGV("adding sample %d at offset 0x%08" PRIx64 ", size %u, duration %u, " 4617 " flags 0x%08x", i + 1, 4618 dataOffset, sampleSize, sampleDuration, 4619 (flags & kFirstSampleFlagsPresent) && i == 0 4620 ? firstSampleFlags : sampleFlags); 4621 tmp.offset = dataOffset; 4622 tmp.size = sampleSize; 4623 tmp.duration = sampleDuration; 4624 tmp.compositionOffset = sampleCtsOffset; 4625 memset(tmp.iv, 0, sizeof(tmp.iv)); 4626 mCurrentSamples.add(tmp); 4627 4628 dataOffset += sampleSize; 4629 } 4630 4631 mTrackFragmentHeaderInfo.mDataOffset = dataOffset; 4632 4633 return OK; 4634} 4635 4636status_t MPEG4Source::getFormat(MetaDataBase &meta) { 4637 Mutex::Autolock autoLock(mLock); 4638 meta = mFormat; 4639 return OK; 4640} 4641 4642size_t MPEG4Source::parseNALSize(const uint8_t *data) const { 4643 switch (mNALLengthSize) { 4644 case 1: 4645 return *data; 4646 case 2: 4647 return U16_AT(data); 4648 case 3: 4649 return ((size_t)data[0] << 16) | U16_AT(&data[1]); 4650 case 4: 4651 return U32_AT(data); 4652 } 4653 4654 // This cannot happen, mNALLengthSize springs to life by adding 1 to 4655 // a 2-bit integer. 4656 CHECK(!"Should not be here."); 4657 4658 return 0; 4659} 4660 4661status_t MPEG4Source::read( 4662 MediaBufferBase **out, const ReadOptions *options) { 4663 Mutex::Autolock autoLock(mLock); 4664 4665 CHECK(mStarted); 4666 4667 if (options != nullptr && options->getNonBlocking() && !mGroup->has_buffers()) { 4668 *out = nullptr; 4669 return WOULD_BLOCK; 4670 } 4671 4672 if (mFirstMoofOffset > 0) { 4673 return fragmentedRead(out, options); 4674 } 4675 4676 *out = NULL; 4677 4678 int64_t targetSampleTimeUs = -1; 4679 4680 int64_t seekTimeUs; 4681 ReadOptions::SeekMode mode; 4682 if (options && options->getSeekTo(&seekTimeUs, &mode)) { 4683 if (mIsHeif) { 4684 CHECK(mSampleTable == NULL); 4685 CHECK(mItemTable != NULL); 4686 int32_t imageIndex; 4687 if (!mFormat.findInt32(kKeyTrackID, &imageIndex)) { 4688 return ERROR_MALFORMED; 4689 } 4690 4691 status_t err; 4692 if (seekTimeUs >= 0) { 4693 err = mItemTable->findImageItem(imageIndex, &mCurrentSampleIndex); 4694 } else { 4695 err = mItemTable->findThumbnailItem(imageIndex, &mCurrentSampleIndex); 4696 } 4697 if (err != OK) { 4698 return err; 4699 } 4700 } else { 4701 uint32_t findFlags = 0; 4702 switch (mode) { 4703 case ReadOptions::SEEK_PREVIOUS_SYNC: 4704 findFlags = SampleTable::kFlagBefore; 4705 break; 4706 case ReadOptions::SEEK_NEXT_SYNC: 4707 findFlags = SampleTable::kFlagAfter; 4708 break; 4709 case ReadOptions::SEEK_CLOSEST_SYNC: 4710 case ReadOptions::SEEK_CLOSEST: 4711 findFlags = SampleTable::kFlagClosest; 4712 break; 4713 case ReadOptions::SEEK_FRAME_INDEX: 4714 findFlags = SampleTable::kFlagFrameIndex; 4715 break; 4716 default: 4717 CHECK(!"Should not be here."); 4718 break; 4719 } 4720 4721 uint32_t sampleIndex; 4722 status_t err = mSampleTable->findSampleAtTime( 4723 seekTimeUs, 1000000, mTimescale, 4724 &sampleIndex, findFlags); 4725 4726 if (mode == ReadOptions::SEEK_CLOSEST 4727 || mode == ReadOptions::SEEK_FRAME_INDEX) { 4728 // We found the closest sample already, now we want the sync 4729 // sample preceding it (or the sample itself of course), even 4730 // if the subsequent sync sample is closer. 4731 findFlags = SampleTable::kFlagBefore; 4732 } 4733 4734 uint32_t syncSampleIndex; 4735 if (err == OK) { 4736 err = mSampleTable->findSyncSampleNear( 4737 sampleIndex, &syncSampleIndex, findFlags); 4738 } 4739 4740 uint32_t sampleTime; 4741 if (err == OK) { 4742 err = mSampleTable->getMetaDataForSample( 4743 sampleIndex, NULL, NULL, &sampleTime); 4744 } 4745 4746 if (err != OK) { 4747 if (err == ERROR_OUT_OF_RANGE) { 4748 // An attempt to seek past the end of the stream would 4749 // normally cause this ERROR_OUT_OF_RANGE error. Propagating 4750 // this all the way to the MediaPlayer would cause abnormal 4751 // termination. Legacy behaviour appears to be to behave as if 4752 // we had seeked to the end of stream, ending normally. 4753 err = ERROR_END_OF_STREAM; 4754 } 4755 ALOGV("end of stream"); 4756 return err; 4757 } 4758 4759 if (mode == ReadOptions::SEEK_CLOSEST 4760 || mode == ReadOptions::SEEK_FRAME_INDEX) { 4761 targetSampleTimeUs = (sampleTime * 1000000ll) / mTimescale; 4762 } 4763 4764#if 0 4765 uint32_t syncSampleTime; 4766 CHECK_EQ(OK, mSampleTable->getMetaDataForSample( 4767 syncSampleIndex, NULL, NULL, &syncSampleTime)); 4768 4769 ALOGI("seek to time %lld us => sample at time %lld us, " 4770 "sync sample at time %lld us", 4771 seekTimeUs, 4772 sampleTime * 1000000ll / mTimescale, 4773 syncSampleTime * 1000000ll / mTimescale); 4774#endif 4775 4776 mCurrentSampleIndex = syncSampleIndex; 4777 } 4778 4779 if (mBuffer != NULL) { 4780 mBuffer->release(); 4781 mBuffer = NULL; 4782 } 4783 4784 // fall through 4785 } 4786 4787 off64_t offset = 0; 4788 size_t size = 0; 4789 uint32_t cts, stts; 4790 bool isSyncSample; 4791 bool newBuffer = false; 4792 if (mBuffer == NULL) { 4793 newBuffer = true; 4794 4795 status_t err; 4796 if (!mIsHeif) { 4797 err = mSampleTable->getMetaDataForSample( 4798 mCurrentSampleIndex, &offset, &size, &cts, &isSyncSample, &stts); 4799 } else { 4800 err = mItemTable->getImageOffsetAndSize( 4801 options && options->getSeekTo(&seekTimeUs, &mode) ? 4802 &mCurrentSampleIndex : NULL, &offset, &size); 4803 4804 cts = stts = 0; 4805 isSyncSample = 0; 4806 ALOGV("image offset %lld, size %zu", (long long)offset, size); 4807 } 4808 4809 if (err != OK) { 4810 return err; 4811 } 4812 4813 err = mGroup->acquire_buffer(&mBuffer); 4814 4815 if (err != OK) { 4816 CHECK(mBuffer == NULL); 4817 return err; 4818 } 4819 if (size > mBuffer->size()) { 4820 ALOGE("buffer too small: %zu > %zu", size, mBuffer->size()); 4821 mBuffer->release(); 4822 mBuffer = NULL; 4823 return ERROR_BUFFER_TOO_SMALL; 4824 } 4825 } 4826 4827 if ((!mIsAVC && !mIsHEVC) || mWantsNALFragments) { 4828 if (newBuffer) { 4829 ssize_t num_bytes_read = 4830 mDataSource->readAt(offset, (uint8_t *)mBuffer->data(), size); 4831 4832 if (num_bytes_read < (ssize_t)size) { 4833 mBuffer->release(); 4834 mBuffer = NULL; 4835 4836 return ERROR_IO; 4837 } 4838 4839 CHECK(mBuffer != NULL); 4840 mBuffer->set_range(0, size); 4841 mBuffer->meta_data().clear(); 4842 mBuffer->meta_data().setInt64( 4843 kKeyTime, ((int64_t)cts * 1000000) / mTimescale); 4844 mBuffer->meta_data().setInt64( 4845 kKeyDuration, ((int64_t)stts * 1000000) / mTimescale); 4846 4847 if (targetSampleTimeUs >= 0) { 4848 mBuffer->meta_data().setInt64( 4849 kKeyTargetTime, targetSampleTimeUs); 4850 } 4851 4852 if (isSyncSample) { 4853 mBuffer->meta_data().setInt32(kKeyIsSyncFrame, 1); 4854 } 4855 4856 ++mCurrentSampleIndex; 4857 } 4858 4859 if (!mIsAVC && !mIsHEVC) { 4860 *out = mBuffer; 4861 mBuffer = NULL; 4862 4863 return OK; 4864 } 4865 4866 // Each NAL unit is split up into its constituent fragments and 4867 // each one of them returned in its own buffer. 4868 4869 CHECK(mBuffer->range_length() >= mNALLengthSize); 4870 4871 const uint8_t *src = 4872 (const uint8_t *)mBuffer->data() + mBuffer->range_offset(); 4873 4874 size_t nal_size = parseNALSize(src); 4875 if (mNALLengthSize > SIZE_MAX - nal_size) { 4876 ALOGE("b/24441553, b/24445122"); 4877 } 4878 if (mBuffer->range_length() - mNALLengthSize < nal_size) { 4879 ALOGE("incomplete NAL unit."); 4880 4881 mBuffer->release(); 4882 mBuffer = NULL; 4883 4884 return ERROR_MALFORMED; 4885 } 4886 4887 MediaBufferBase *clone = mBuffer->clone(); 4888 CHECK(clone != NULL); 4889 clone->set_range(mBuffer->range_offset() + mNALLengthSize, nal_size); 4890 4891 CHECK(mBuffer != NULL); 4892 mBuffer->set_range( 4893 mBuffer->range_offset() + mNALLengthSize + nal_size, 4894 mBuffer->range_length() - mNALLengthSize - nal_size); 4895 4896 if (mBuffer->range_length() == 0) { 4897 mBuffer->release(); 4898 mBuffer = NULL; 4899 } 4900 4901 *out = clone; 4902 4903 return OK; 4904 } else { 4905 // Whole NAL units are returned but each fragment is prefixed by 4906 // the start code (0x00 00 00 01). 4907 ssize_t num_bytes_read = 0; 4908 int32_t drm = 0; 4909 bool usesDRM = (mFormat.findInt32(kKeyIsDRM, &drm) && drm != 0); 4910 if (usesDRM) { 4911 num_bytes_read = 4912 mDataSource->readAt(offset, (uint8_t*)mBuffer->data(), size); 4913 } else { 4914 num_bytes_read = mDataSource->readAt(offset, mSrcBuffer, size); 4915 } 4916 4917 if (num_bytes_read < (ssize_t)size) { 4918 mBuffer->release(); 4919 mBuffer = NULL; 4920 4921 return ERROR_IO; 4922 } 4923 4924 if (usesDRM) { 4925 CHECK(mBuffer != NULL); 4926 mBuffer->set_range(0, size); 4927 4928 } else { 4929 uint8_t *dstData = (uint8_t *)mBuffer->data(); 4930 size_t srcOffset = 0; 4931 size_t dstOffset = 0; 4932 4933 while (srcOffset < size) { 4934 bool isMalFormed = !isInRange((size_t)0u, size, srcOffset, mNALLengthSize); 4935 size_t nalLength = 0; 4936 if (!isMalFormed) { 4937 nalLength = parseNALSize(&mSrcBuffer[srcOffset]); 4938 srcOffset += mNALLengthSize; 4939 isMalFormed = !isInRange((size_t)0u, size, srcOffset, nalLength); 4940 } 4941 4942 if (isMalFormed) { 4943 ALOGE("Video is malformed"); 4944 mBuffer->release(); 4945 mBuffer = NULL; 4946 return ERROR_MALFORMED; 4947 } 4948 4949 if (nalLength == 0) { 4950 continue; 4951 } 4952 4953 if (dstOffset > SIZE_MAX - 4 || 4954 dstOffset + 4 > SIZE_MAX - nalLength || 4955 dstOffset + 4 + nalLength > mBuffer->size()) { 4956 ALOGE("b/27208621 : %zu %zu", dstOffset, mBuffer->size()); 4957 android_errorWriteLog(0x534e4554, "27208621"); 4958 mBuffer->release(); 4959 mBuffer = NULL; 4960 return ERROR_MALFORMED; 4961 } 4962 4963 dstData[dstOffset++] = 0; 4964 dstData[dstOffset++] = 0; 4965 dstData[dstOffset++] = 0; 4966 dstData[dstOffset++] = 1; 4967 memcpy(&dstData[dstOffset], &mSrcBuffer[srcOffset], nalLength); 4968 srcOffset += nalLength; 4969 dstOffset += nalLength; 4970 } 4971 CHECK_EQ(srcOffset, size); 4972 CHECK(mBuffer != NULL); 4973 mBuffer->set_range(0, dstOffset); 4974 } 4975 4976 mBuffer->meta_data().clear(); 4977 mBuffer->meta_data().setInt64( 4978 kKeyTime, ((int64_t)cts * 1000000) / mTimescale); 4979 mBuffer->meta_data().setInt64( 4980 kKeyDuration, ((int64_t)stts * 1000000) / mTimescale); 4981 4982 if (targetSampleTimeUs >= 0) { 4983 mBuffer->meta_data().setInt64( 4984 kKeyTargetTime, targetSampleTimeUs); 4985 } 4986 4987 if (mIsAVC) { 4988 uint32_t layerId = FindAVCLayerId( 4989 (const uint8_t *)mBuffer->data(), mBuffer->range_length()); 4990 mBuffer->meta_data().setInt32(kKeyTemporalLayerId, layerId); 4991 } 4992 4993 if (isSyncSample) { 4994 mBuffer->meta_data().setInt32(kKeyIsSyncFrame, 1); 4995 } 4996 4997 ++mCurrentSampleIndex; 4998 4999 *out = mBuffer; 5000 mBuffer = NULL; 5001 5002 return OK; 5003 } 5004} 5005 5006status_t MPEG4Source::fragmentedRead( 5007 MediaBufferBase **out, const ReadOptions *options) { 5008 5009 ALOGV("MPEG4Source::fragmentedRead"); 5010 5011 CHECK(mStarted); 5012 5013 *out = NULL; 5014 5015 int64_t targetSampleTimeUs = -1; 5016 5017 int64_t seekTimeUs; 5018 ReadOptions::SeekMode mode; 5019 if (options && options->getSeekTo(&seekTimeUs, &mode)) { 5020 5021 int numSidxEntries = mSegments.size(); 5022 if (numSidxEntries != 0) { 5023 int64_t totalTime = 0; 5024 off64_t totalOffset = mFirstMoofOffset; 5025 for (int i = 0; i < numSidxEntries; i++) { 5026 const SidxEntry *se = &mSegments[i]; 5027 if (totalTime + se->mDurationUs > seekTimeUs) { 5028 // The requested time is somewhere in this segment 5029 if ((mode == ReadOptions::SEEK_NEXT_SYNC && seekTimeUs > totalTime) || 5030 (mode == ReadOptions::SEEK_CLOSEST_SYNC && 5031 (seekTimeUs - totalTime) > (totalTime + se->mDurationUs - seekTimeUs))) { 5032 // requested next sync, or closest sync and it was closer to the end of 5033 // this segment 5034 totalTime += se->mDurationUs; 5035 totalOffset += se->mSize; 5036 } 5037 break; 5038 } 5039 totalTime += se->mDurationUs; 5040 totalOffset += se->mSize; 5041 } 5042 mCurrentMoofOffset = totalOffset; 5043 mNextMoofOffset = -1; 5044 mCurrentSamples.clear(); 5045 mCurrentSampleIndex = 0; 5046 status_t err = parseChunk(&totalOffset); 5047 if (err != OK) { 5048 return err; 5049 } 5050 mCurrentTime = totalTime * mTimescale / 1000000ll; 5051 } else { 5052 // without sidx boxes, we can only seek to 0 5053 mCurrentMoofOffset = mFirstMoofOffset; 5054 mNextMoofOffset = -1; 5055 mCurrentSamples.clear(); 5056 mCurrentSampleIndex = 0; 5057 off64_t tmp = mCurrentMoofOffset; 5058 status_t err = parseChunk(&tmp); 5059 if (err != OK) { 5060 return err; 5061 } 5062 mCurrentTime = 0; 5063 } 5064 5065 if (mBuffer != NULL) { 5066 mBuffer->release(); 5067 mBuffer = NULL; 5068 } 5069 5070 // fall through 5071 } 5072 5073 off64_t offset = 0; 5074 size_t size = 0; 5075 uint32_t cts = 0; 5076 bool isSyncSample = false; 5077 bool newBuffer = false; 5078 if (mBuffer == NULL) { 5079 newBuffer = true; 5080 5081 if (mCurrentSampleIndex >= mCurrentSamples.size()) { 5082 // move to next fragment if there is one 5083 if (mNextMoofOffset <= mCurrentMoofOffset) { 5084 return ERROR_END_OF_STREAM; 5085 } 5086 off64_t nextMoof = mNextMoofOffset; 5087 mCurrentMoofOffset = nextMoof; 5088 mCurrentSamples.clear(); 5089 mCurrentSampleIndex = 0; 5090 status_t err = parseChunk(&nextMoof); 5091 if (err != OK) { 5092 return err; 5093 } 5094 if (mCurrentSampleIndex >= mCurrentSamples.size()) { 5095 return ERROR_END_OF_STREAM; 5096 } 5097 } 5098 5099 const Sample *smpl = &mCurrentSamples[mCurrentSampleIndex]; 5100 offset = smpl->offset; 5101 size = smpl->size; 5102 cts = mCurrentTime + smpl->compositionOffset; 5103 mCurrentTime += smpl->duration; 5104 isSyncSample = (mCurrentSampleIndex == 0); // XXX 5105 5106 status_t err = mGroup->acquire_buffer(&mBuffer); 5107 5108 if (err != OK) { 5109 CHECK(mBuffer == NULL); 5110 ALOGV("acquire_buffer returned %d", err); 5111 return err; 5112 } 5113 if (size > mBuffer->size()) { 5114 ALOGE("buffer too small: %zu > %zu", size, mBuffer->size()); 5115 mBuffer->release(); 5116 mBuffer = NULL; 5117 return ERROR_BUFFER_TOO_SMALL; 5118 } 5119 } 5120 5121 const Sample *smpl = &mCurrentSamples[mCurrentSampleIndex]; 5122 MetaDataBase &bufmeta = mBuffer->meta_data(); 5123 bufmeta.clear(); 5124 if (smpl->encryptedsizes.size()) { 5125 // store clear/encrypted lengths in metadata 5126 bufmeta.setData(kKeyPlainSizes, 0, 5127 smpl->clearsizes.array(), smpl->clearsizes.size() * 4); 5128 bufmeta.setData(kKeyEncryptedSizes, 0, 5129 smpl->encryptedsizes.array(), smpl->encryptedsizes.size() * 4); 5130 bufmeta.setInt32(kKeyCryptoDefaultIVSize, mDefaultIVSize); 5131 bufmeta.setInt32(kKeyCryptoMode, mCryptoMode); 5132 bufmeta.setData(kKeyCryptoKey, 0, mCryptoKey, 16); 5133 bufmeta.setInt32(kKeyEncryptedByteBlock, mDefaultEncryptedByteBlock); 5134 bufmeta.setInt32(kKeySkipByteBlock, mDefaultSkipByteBlock); 5135 5136 uint32_t type = 0; 5137 const void *iv = NULL; 5138 size_t ivlength = 0; 5139 if (!mFormat.findData( 5140 kKeyCryptoIV, &type, &iv, &ivlength)) { 5141 iv = smpl->iv; 5142 ivlength = 16; // use 16 or the actual size? 5143 } 5144 bufmeta.setData(kKeyCryptoIV, 0, iv, ivlength); 5145 5146 } 5147 5148 if ((!mIsAVC && !mIsHEVC)|| mWantsNALFragments) { 5149 if (newBuffer) { 5150 if (!isInRange((size_t)0u, mBuffer->size(), size)) { 5151 mBuffer->release(); 5152 mBuffer = NULL; 5153 5154 ALOGE("fragmentedRead ERROR_MALFORMED size %zu", size); 5155 return ERROR_MALFORMED; 5156 } 5157 5158 ssize_t num_bytes_read = 5159 mDataSource->readAt(offset, (uint8_t *)mBuffer->data(), size); 5160 5161 if (num_bytes_read < (ssize_t)size) { 5162 mBuffer->release(); 5163 mBuffer = NULL; 5164 5165 ALOGE("i/o error"); 5166 return ERROR_IO; 5167 } 5168 5169 CHECK(mBuffer != NULL); 5170 mBuffer->set_range(0, size); 5171 mBuffer->meta_data().setInt64( 5172 kKeyTime, ((int64_t)cts * 1000000) / mTimescale); 5173 mBuffer->meta_data().setInt64( 5174 kKeyDuration, ((int64_t)smpl->duration * 1000000) / mTimescale); 5175 5176 if (targetSampleTimeUs >= 0) { 5177 mBuffer->meta_data().setInt64( 5178 kKeyTargetTime, targetSampleTimeUs); 5179 } 5180 5181 if (mIsAVC) { 5182 uint32_t layerId = FindAVCLayerId( 5183 (const uint8_t *)mBuffer->data(), mBuffer->range_length()); 5184 mBuffer->meta_data().setInt32(kKeyTemporalLayerId, layerId); 5185 } 5186 5187 if (isSyncSample) { 5188 mBuffer->meta_data().setInt32(kKeyIsSyncFrame, 1); 5189 } 5190 5191 ++mCurrentSampleIndex; 5192 } 5193 5194 if (!mIsAVC && !mIsHEVC) { 5195 *out = mBuffer; 5196 mBuffer = NULL; 5197 5198 return OK; 5199 } 5200 5201 // Each NAL unit is split up into its constituent fragments and 5202 // each one of them returned in its own buffer. 5203 5204 CHECK(mBuffer->range_length() >= mNALLengthSize); 5205 5206 const uint8_t *src = 5207 (const uint8_t *)mBuffer->data() + mBuffer->range_offset(); 5208 5209 size_t nal_size = parseNALSize(src); 5210 if (mNALLengthSize > SIZE_MAX - nal_size) { 5211 ALOGE("b/24441553, b/24445122"); 5212 } 5213 5214 if (mBuffer->range_length() - mNALLengthSize < nal_size) { 5215 ALOGE("incomplete NAL unit."); 5216 5217 mBuffer->release(); 5218 mBuffer = NULL; 5219 5220 return ERROR_MALFORMED; 5221 } 5222 5223 MediaBufferBase *clone = mBuffer->clone(); 5224 CHECK(clone != NULL); 5225 clone->set_range(mBuffer->range_offset() + mNALLengthSize, nal_size); 5226 5227 CHECK(mBuffer != NULL); 5228 mBuffer->set_range( 5229 mBuffer->range_offset() + mNALLengthSize + nal_size, 5230 mBuffer->range_length() - mNALLengthSize - nal_size); 5231 5232 if (mBuffer->range_length() == 0) { 5233 mBuffer->release(); 5234 mBuffer = NULL; 5235 } 5236 5237 *out = clone; 5238 5239 return OK; 5240 } else { 5241 ALOGV("whole NAL"); 5242 // Whole NAL units are returned but each fragment is prefixed by 5243 // the start code (0x00 00 00 01). 5244 ssize_t num_bytes_read = 0; 5245 int32_t drm = 0; 5246 bool usesDRM = (mFormat.findInt32(kKeyIsDRM, &drm) && drm != 0); 5247 void *data = NULL; 5248 bool isMalFormed = false; 5249 if (usesDRM) { 5250 if (mBuffer == NULL || !isInRange((size_t)0u, mBuffer->size(), size)) { 5251 isMalFormed = true; 5252 } else { 5253 data = mBuffer->data(); 5254 } 5255 } else { 5256 int32_t max_size; 5257 if (!mFormat.findInt32(kKeyMaxInputSize, &max_size) 5258 || !isInRange((size_t)0u, (size_t)max_size, size)) { 5259 isMalFormed = true; 5260 } else { 5261 data = mSrcBuffer; 5262 } 5263 } 5264 5265 if (isMalFormed || data == NULL) { 5266 ALOGE("isMalFormed size %zu", size); 5267 if (mBuffer != NULL) { 5268 mBuffer->release(); 5269 mBuffer = NULL; 5270 } 5271 return ERROR_MALFORMED; 5272 } 5273 num_bytes_read = mDataSource->readAt(offset, data, size); 5274 5275 if (num_bytes_read < (ssize_t)size) { 5276 mBuffer->release(); 5277 mBuffer = NULL; 5278 5279 ALOGE("i/o error"); 5280 return ERROR_IO; 5281 } 5282 5283 if (usesDRM) { 5284 CHECK(mBuffer != NULL); 5285 mBuffer->set_range(0, size); 5286 5287 } else { 5288 uint8_t *dstData = (uint8_t *)mBuffer->data(); 5289 size_t srcOffset = 0; 5290 size_t dstOffset = 0; 5291 5292 while (srcOffset < size) { 5293 isMalFormed = !isInRange((size_t)0u, size, srcOffset, mNALLengthSize); 5294 size_t nalLength = 0; 5295 if (!isMalFormed) { 5296 nalLength = parseNALSize(&mSrcBuffer[srcOffset]); 5297 srcOffset += mNALLengthSize; 5298 isMalFormed = !isInRange((size_t)0u, size, srcOffset, nalLength) 5299 || !isInRange((size_t)0u, mBuffer->size(), dstOffset, (size_t)4u) 5300 || !isInRange((size_t)0u, mBuffer->size(), dstOffset + 4, nalLength); 5301 } 5302 5303 if (isMalFormed) { 5304 ALOGE("Video is malformed; nalLength %zu", nalLength); 5305 mBuffer->release(); 5306 mBuffer = NULL; 5307 return ERROR_MALFORMED; 5308 } 5309 5310 if (nalLength == 0) { 5311 continue; 5312 } 5313 5314 if (dstOffset > SIZE_MAX - 4 || 5315 dstOffset + 4 > SIZE_MAX - nalLength || 5316 dstOffset + 4 + nalLength > mBuffer->size()) { 5317 ALOGE("b/26365349 : %zu %zu", dstOffset, mBuffer->size()); 5318 android_errorWriteLog(0x534e4554, "26365349"); 5319 mBuffer->release(); 5320 mBuffer = NULL; 5321 return ERROR_MALFORMED; 5322 } 5323 5324 dstData[dstOffset++] = 0; 5325 dstData[dstOffset++] = 0; 5326 dstData[dstOffset++] = 0; 5327 dstData[dstOffset++] = 1; 5328 memcpy(&dstData[dstOffset], &mSrcBuffer[srcOffset], nalLength); 5329 srcOffset += nalLength; 5330 dstOffset += nalLength; 5331 } 5332 CHECK_EQ(srcOffset, size); 5333 CHECK(mBuffer != NULL); 5334 mBuffer->set_range(0, dstOffset); 5335 } 5336 5337 mBuffer->meta_data().setInt64( 5338 kKeyTime, ((int64_t)cts * 1000000) / mTimescale); 5339 mBuffer->meta_data().setInt64( 5340 kKeyDuration, ((int64_t)smpl->duration * 1000000) / mTimescale); 5341 5342 if (targetSampleTimeUs >= 0) { 5343 mBuffer->meta_data().setInt64( 5344 kKeyTargetTime, targetSampleTimeUs); 5345 } 5346 5347 if (isSyncSample) { 5348 mBuffer->meta_data().setInt32(kKeyIsSyncFrame, 1); 5349 } 5350 5351 ++mCurrentSampleIndex; 5352 5353 *out = mBuffer; 5354 mBuffer = NULL; 5355 5356 return OK; 5357 } 5358} 5359 5360MPEG4Extractor::Track *MPEG4Extractor::findTrackByMimePrefix( 5361 const char *mimePrefix) { 5362 for (Track *track = mFirstTrack; track != NULL; track = track->next) { 5363 const char *mime; 5364 if (track->meta.findCString(kKeyMIMEType, &mime) 5365 && !strncasecmp(mime, mimePrefix, strlen(mimePrefix))) { 5366 return track; 5367 } 5368 } 5369 5370 return NULL; 5371} 5372 5373static bool LegacySniffMPEG4(DataSourceBase *source, float *confidence) { 5374 uint8_t header[8]; 5375 5376 ssize_t n = source->readAt(4, header, sizeof(header)); 5377 if (n < (ssize_t)sizeof(header)) { 5378 return false; 5379 } 5380 5381 if (!memcmp(header, "ftyp3gp", 7) || !memcmp(header, "ftypmp42", 8) 5382 || !memcmp(header, "ftyp3gr6", 8) || !memcmp(header, "ftyp3gs6", 8) 5383 || !memcmp(header, "ftyp3ge6", 8) || !memcmp(header, "ftyp3gg6", 8) 5384 || !memcmp(header, "ftypisom", 8) || !memcmp(header, "ftypM4V ", 8) 5385 || !memcmp(header, "ftypM4A ", 8) || !memcmp(header, "ftypf4v ", 8) 5386 || !memcmp(header, "ftypkddi", 8) || !memcmp(header, "ftypM4VP", 8) 5387 || !memcmp(header, "ftypmif1", 8) || !memcmp(header, "ftypheic", 8) 5388 || !memcmp(header, "ftypmsf1", 8) || !memcmp(header, "ftyphevc", 8)) { 5389 *confidence = 0.4; 5390 5391 return true; 5392 } 5393 5394 return false; 5395} 5396 5397static bool isCompatibleBrand(uint32_t fourcc) { 5398 static const uint32_t kCompatibleBrands[] = { 5399 FOURCC('i', 's', 'o', 'm'), 5400 FOURCC('i', 's', 'o', '2'), 5401 FOURCC('a', 'v', 'c', '1'), 5402 FOURCC('h', 'v', 'c', '1'), 5403 FOURCC('h', 'e', 'v', '1'), 5404 FOURCC('3', 'g', 'p', '4'), 5405 FOURCC('m', 'p', '4', '1'), 5406 FOURCC('m', 'p', '4', '2'), 5407 FOURCC('d', 'a', 's', 'h'), 5408 5409 // Won't promise that the following file types can be played. 5410 // Just give these file types a chance. 5411 FOURCC('q', 't', ' ', ' '), // Apple's QuickTime 5412 FOURCC('M', 'S', 'N', 'V'), // Sony's PSP 5413 5414 FOURCC('3', 'g', '2', 'a'), // 3GPP2 5415 FOURCC('3', 'g', '2', 'b'), 5416 FOURCC('m', 'i', 'f', '1'), // HEIF image 5417 FOURCC('h', 'e', 'i', 'c'), // HEIF image 5418 FOURCC('m', 's', 'f', '1'), // HEIF image sequence 5419 FOURCC('h', 'e', 'v', 'c'), // HEIF image sequence 5420 }; 5421 5422 for (size_t i = 0; 5423 i < sizeof(kCompatibleBrands) / sizeof(kCompatibleBrands[0]); 5424 ++i) { 5425 if (kCompatibleBrands[i] == fourcc) { 5426 return true; 5427 } 5428 } 5429 5430 return false; 5431} 5432 5433// Attempt to actually parse the 'ftyp' atom and determine if a suitable 5434// compatible brand is present. 5435// Also try to identify where this file's metadata ends 5436// (end of the 'moov' atom) and report it to the caller as part of 5437// the metadata. 5438static bool BetterSniffMPEG4(DataSourceBase *source, float *confidence) { 5439 // We scan up to 128 bytes to identify this file as an MP4. 5440 static const off64_t kMaxScanOffset = 128ll; 5441 5442 off64_t offset = 0ll; 5443 bool foundGoodFileType = false; 5444 off64_t moovAtomEndOffset = -1ll; 5445 bool done = false; 5446 5447 while (!done && offset < kMaxScanOffset) { 5448 uint32_t hdr[2]; 5449 if (source->readAt(offset, hdr, 8) < 8) { 5450 return false; 5451 } 5452 5453 uint64_t chunkSize = ntohl(hdr[0]); 5454 uint32_t chunkType = ntohl(hdr[1]); 5455 off64_t chunkDataOffset = offset + 8; 5456 5457 if (chunkSize == 1) { 5458 if (source->readAt(offset + 8, &chunkSize, 8) < 8) { 5459 return false; 5460 } 5461 5462 chunkSize = ntoh64(chunkSize); 5463 chunkDataOffset += 8; 5464 5465 if (chunkSize < 16) { 5466 // The smallest valid chunk is 16 bytes long in this case. 5467 return false; 5468 } 5469 5470 } else if (chunkSize < 8) { 5471 // The smallest valid chunk is 8 bytes long. 5472 return false; 5473 } 5474 5475 // (data_offset - offset) is either 8 or 16 5476 off64_t chunkDataSize = chunkSize - (chunkDataOffset - offset); 5477 if (chunkDataSize < 0) { 5478 ALOGE("b/23540914"); 5479 return false; 5480 } 5481 5482 char chunkstring[5]; 5483 MakeFourCCString(chunkType, chunkstring); 5484 ALOGV("saw chunk type %s, size %" PRIu64 " @ %lld", chunkstring, chunkSize, (long long)offset); 5485 switch (chunkType) { 5486 case FOURCC('f', 't', 'y', 'p'): 5487 { 5488 if (chunkDataSize < 8) { 5489 return false; 5490 } 5491 5492 uint32_t numCompatibleBrands = (chunkDataSize - 8) / 4; 5493 for (size_t i = 0; i < numCompatibleBrands + 2; ++i) { 5494 if (i == 1) { 5495 // Skip this index, it refers to the minorVersion, 5496 // not a brand. 5497 continue; 5498 } 5499 5500 uint32_t brand; 5501 if (source->readAt( 5502 chunkDataOffset + 4 * i, &brand, 4) < 4) { 5503 return false; 5504 } 5505 5506 brand = ntohl(brand); 5507 5508 if (isCompatibleBrand(brand)) { 5509 foundGoodFileType = true; 5510 break; 5511 } 5512 } 5513 5514 if (!foundGoodFileType) { 5515 return false; 5516 } 5517 5518 break; 5519 } 5520 5521 case FOURCC('m', 'o', 'o', 'v'): 5522 { 5523 moovAtomEndOffset = offset + chunkSize; 5524 5525 done = true; 5526 break; 5527 } 5528 5529 default: 5530 break; 5531 } 5532 5533 offset += chunkSize; 5534 } 5535 5536 if (!foundGoodFileType) { 5537 return false; 5538 } 5539 5540 *confidence = 0.4f; 5541 5542 return true; 5543} 5544 5545static MediaExtractor* CreateExtractor(DataSourceBase *source, void *) { 5546 return new MPEG4Extractor(source); 5547} 5548 5549static MediaExtractor::CreatorFunc Sniff( 5550 DataSourceBase *source, float *confidence, void **, 5551 MediaExtractor::FreeMetaFunc *) { 5552 if (BetterSniffMPEG4(source, confidence)) { 5553 return CreateExtractor; 5554 } 5555 5556 if (LegacySniffMPEG4(source, confidence)) { 5557 ALOGW("Identified supported mpeg4 through LegacySniffMPEG4."); 5558 return CreateExtractor; 5559 } 5560 5561 return NULL; 5562} 5563 5564extern "C" { 5565// This is the only symbol that needs to be exported 5566__attribute__ ((visibility ("default"))) 5567MediaExtractor::ExtractorDef GETEXTRACTORDEF() { 5568 return { 5569 MediaExtractor::EXTRACTORDEF_VERSION, 5570 UUID("27575c67-4417-4c54-8d3d-8e626985a164"), 5571 1, // version 5572 "MP4 Extractor", 5573 Sniff 5574 }; 5575} 5576 5577} // extern "C" 5578 5579} // namespace android 5580