MPEG4Extractor.cpp revision 8c66cf58054d705c3b169eaf830f793adb8d4019
1/* 2 * Copyright (C) 2009 The Android Open Source Project 3 * 4 * Licensed under the Apache License, Version 2.0 (the "License"); 5 * you may not use this file except in compliance with the License. 6 * You may obtain a copy of the License at 7 * 8 * http://www.apache.org/licenses/LICENSE-2.0 9 * 10 * Unless required by applicable law or agreed to in writing, software 11 * distributed under the License is distributed on an "AS IS" BASIS, 12 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 * See the License for the specific language governing permissions and 14 * limitations under the License. 15 */ 16 17//#define LOG_NDEBUG 0 18#define LOG_TAG "MPEG4Extractor" 19 20#include <ctype.h> 21#include <inttypes.h> 22#include <memory> 23#include <stdint.h> 24#include <stdlib.h> 25#include <string.h> 26 27#include <utils/Log.h> 28 29#include "MPEG4Extractor.h" 30#include "SampleTable.h" 31#include "ItemTable.h" 32#include "include/ESDS.h" 33 34#include <media/ExtractorUtils.h> 35#include <media/MediaTrack.h> 36#include <media/stagefright/foundation/ABitReader.h> 37#include <media/stagefright/foundation/ABuffer.h> 38#include <media/stagefright/foundation/ADebug.h> 39#include <media/stagefright/foundation/AMessage.h> 40#include <media/stagefright/foundation/AUtils.h> 41#include <media/stagefright/foundation/ByteUtils.h> 42#include <media/stagefright/foundation/ColorUtils.h> 43#include <media/stagefright/foundation/avc_utils.h> 44#include <media/stagefright/foundation/hexdump.h> 45#include <media/stagefright/MediaBufferBase.h> 46#include <media/stagefright/MediaBufferGroup.h> 47#include <media/stagefright/MediaDefs.h> 48#include <media/stagefright/MetaData.h> 49#include <utils/String8.h> 50 51#include <byteswap.h> 52#include "include/ID3.h" 53 54#ifndef UINT32_MAX 55#define UINT32_MAX (4294967295U) 56#endif 57 58namespace android { 59 60enum { 61 // max track header chunk to return 62 kMaxTrackHeaderSize = 32, 63 64 // maximum size of an atom. Some atoms can be bigger according to the spec, 65 // but we only allow up to this size. 66 kMaxAtomSize = 64 * 1024 * 1024, 67}; 68 69class MPEG4Source : public MediaTrack { 70public: 71 // Caller retains ownership of both "dataSource" and "sampleTable". 72 MPEG4Source(MetaDataBase &format, 73 DataSourceBase *dataSource, 74 int32_t timeScale, 75 const sp<SampleTable> &sampleTable, 76 Vector<SidxEntry> &sidx, 77 const Trex *trex, 78 off64_t firstMoofOffset, 79 const sp<ItemTable> &itemTable); 80 virtual status_t init(); 81 82 virtual status_t start(MetaDataBase *params = NULL); 83 virtual status_t stop(); 84 85 virtual status_t getFormat(MetaDataBase &); 86 87 virtual status_t read(MediaBufferBase **buffer, const ReadOptions *options = NULL); 88 virtual bool supportNonblockingRead() { return true; } 89 virtual status_t fragmentedRead(MediaBufferBase **buffer, const ReadOptions *options = NULL); 90 91 virtual ~MPEG4Source(); 92 93private: 94 Mutex mLock; 95 96 MetaDataBase &mFormat; 97 DataSourceBase *mDataSource; 98 int32_t mTimescale; 99 sp<SampleTable> mSampleTable; 100 uint32_t mCurrentSampleIndex; 101 uint32_t mCurrentFragmentIndex; 102 Vector<SidxEntry> &mSegments; 103 const Trex *mTrex; 104 off64_t mFirstMoofOffset; 105 off64_t mCurrentMoofOffset; 106 off64_t mNextMoofOffset; 107 uint32_t mCurrentTime; 108 int32_t mLastParsedTrackId; 109 int32_t mTrackId; 110 111 int32_t mCryptoMode; // passed in from extractor 112 int32_t mDefaultIVSize; // passed in from extractor 113 uint8_t mCryptoKey[16]; // passed in from extractor 114 int32_t mDefaultEncryptedByteBlock; 115 int32_t mDefaultSkipByteBlock; 116 uint32_t mCurrentAuxInfoType; 117 uint32_t mCurrentAuxInfoTypeParameter; 118 int32_t mCurrentDefaultSampleInfoSize; 119 uint32_t mCurrentSampleInfoCount; 120 uint32_t mCurrentSampleInfoAllocSize; 121 uint8_t* mCurrentSampleInfoSizes; 122 uint32_t mCurrentSampleInfoOffsetCount; 123 uint32_t mCurrentSampleInfoOffsetsAllocSize; 124 uint64_t* mCurrentSampleInfoOffsets; 125 126 bool mIsAVC; 127 bool mIsHEVC; 128 size_t mNALLengthSize; 129 130 bool mStarted; 131 132 MediaBufferGroup *mGroup; 133 134 MediaBufferBase *mBuffer; 135 136 bool mWantsNALFragments; 137 138 uint8_t *mSrcBuffer; 139 140 bool mIsHeif; 141 sp<ItemTable> mItemTable; 142 143 size_t parseNALSize(const uint8_t *data) const; 144 status_t parseChunk(off64_t *offset); 145 status_t parseTrackFragmentHeader(off64_t offset, off64_t size); 146 status_t parseTrackFragmentRun(off64_t offset, off64_t size); 147 status_t parseSampleAuxiliaryInformationSizes(off64_t offset, off64_t size); 148 status_t parseSampleAuxiliaryInformationOffsets(off64_t offset, off64_t size); 149 status_t parseClearEncryptedSizes(off64_t offset, bool isSubsampleEncryption, uint32_t flags); 150 status_t parseSampleEncryption(off64_t offset); 151 152 struct TrackFragmentHeaderInfo { 153 enum Flags { 154 kBaseDataOffsetPresent = 0x01, 155 kSampleDescriptionIndexPresent = 0x02, 156 kDefaultSampleDurationPresent = 0x08, 157 kDefaultSampleSizePresent = 0x10, 158 kDefaultSampleFlagsPresent = 0x20, 159 kDurationIsEmpty = 0x10000, 160 }; 161 162 uint32_t mTrackID; 163 uint32_t mFlags; 164 uint64_t mBaseDataOffset; 165 uint32_t mSampleDescriptionIndex; 166 uint32_t mDefaultSampleDuration; 167 uint32_t mDefaultSampleSize; 168 uint32_t mDefaultSampleFlags; 169 170 uint64_t mDataOffset; 171 }; 172 TrackFragmentHeaderInfo mTrackFragmentHeaderInfo; 173 174 struct Sample { 175 off64_t offset; 176 size_t size; 177 uint32_t duration; 178 int32_t compositionOffset; 179 uint8_t iv[16]; 180 Vector<size_t> clearsizes; 181 Vector<size_t> encryptedsizes; 182 }; 183 Vector<Sample> mCurrentSamples; 184 185 MPEG4Source(const MPEG4Source &); 186 MPEG4Source &operator=(const MPEG4Source &); 187}; 188 189// This custom data source wraps an existing one and satisfies requests 190// falling entirely within a cached range from the cache while forwarding 191// all remaining requests to the wrapped datasource. 192// This is used to cache the full sampletable metadata for a single track, 193// possibly wrapping multiple times to cover all tracks, i.e. 194// Each CachedRangedDataSource caches the sampletable metadata for a single track. 195 196struct CachedRangedDataSource : public DataSourceBase { 197 explicit CachedRangedDataSource(DataSourceBase *source); 198 virtual ~CachedRangedDataSource(); 199 200 virtual status_t initCheck() const; 201 virtual ssize_t readAt(off64_t offset, void *data, size_t size); 202 virtual status_t getSize(off64_t *size); 203 virtual uint32_t flags(); 204 205 status_t setCachedRange(off64_t offset, size_t size, bool assumeSourceOwnershipOnSuccess); 206 207 208private: 209 Mutex mLock; 210 211 DataSourceBase *mSource; 212 bool mOwnsDataSource; 213 off64_t mCachedOffset; 214 size_t mCachedSize; 215 uint8_t *mCache; 216 217 void clearCache(); 218 219 CachedRangedDataSource(const CachedRangedDataSource &); 220 CachedRangedDataSource &operator=(const CachedRangedDataSource &); 221}; 222 223CachedRangedDataSource::CachedRangedDataSource(DataSourceBase *source) 224 : mSource(source), 225 mOwnsDataSource(false), 226 mCachedOffset(0), 227 mCachedSize(0), 228 mCache(NULL) { 229} 230 231CachedRangedDataSource::~CachedRangedDataSource() { 232 clearCache(); 233 if (mOwnsDataSource) { 234 delete (CachedRangedDataSource*)mSource; 235 } 236} 237 238void CachedRangedDataSource::clearCache() { 239 if (mCache) { 240 free(mCache); 241 mCache = NULL; 242 } 243 244 mCachedOffset = 0; 245 mCachedSize = 0; 246} 247 248status_t CachedRangedDataSource::initCheck() const { 249 return mSource->initCheck(); 250} 251 252ssize_t CachedRangedDataSource::readAt(off64_t offset, void *data, size_t size) { 253 Mutex::Autolock autoLock(mLock); 254 255 if (isInRange(mCachedOffset, mCachedSize, offset, size)) { 256 memcpy(data, &mCache[offset - mCachedOffset], size); 257 return size; 258 } 259 260 return mSource->readAt(offset, data, size); 261} 262 263status_t CachedRangedDataSource::getSize(off64_t *size) { 264 return mSource->getSize(size); 265} 266 267uint32_t CachedRangedDataSource::flags() { 268 return mSource->flags(); 269} 270 271status_t CachedRangedDataSource::setCachedRange(off64_t offset, 272 size_t size, 273 bool assumeSourceOwnershipOnSuccess) { 274 Mutex::Autolock autoLock(mLock); 275 276 clearCache(); 277 278 mCache = (uint8_t *)malloc(size); 279 280 if (mCache == NULL) { 281 return -ENOMEM; 282 } 283 284 mCachedOffset = offset; 285 mCachedSize = size; 286 287 ssize_t err = mSource->readAt(mCachedOffset, mCache, mCachedSize); 288 289 if (err < (ssize_t)size) { 290 clearCache(); 291 292 return ERROR_IO; 293 } 294 mOwnsDataSource = assumeSourceOwnershipOnSuccess; 295 return OK; 296} 297 298//////////////////////////////////////////////////////////////////////////////// 299 300static const bool kUseHexDump = false; 301 302static const char *FourCC2MIME(uint32_t fourcc) { 303 switch (fourcc) { 304 case FOURCC('m', 'p', '4', 'a'): 305 return MEDIA_MIMETYPE_AUDIO_AAC; 306 307 case FOURCC('s', 'a', 'm', 'r'): 308 return MEDIA_MIMETYPE_AUDIO_AMR_NB; 309 310 case FOURCC('s', 'a', 'w', 'b'): 311 return MEDIA_MIMETYPE_AUDIO_AMR_WB; 312 313 case FOURCC('m', 'p', '4', 'v'): 314 return MEDIA_MIMETYPE_VIDEO_MPEG4; 315 316 case FOURCC('s', '2', '6', '3'): 317 case FOURCC('h', '2', '6', '3'): 318 case FOURCC('H', '2', '6', '3'): 319 return MEDIA_MIMETYPE_VIDEO_H263; 320 321 case FOURCC('a', 'v', 'c', '1'): 322 return MEDIA_MIMETYPE_VIDEO_AVC; 323 324 case FOURCC('h', 'v', 'c', '1'): 325 case FOURCC('h', 'e', 'v', '1'): 326 return MEDIA_MIMETYPE_VIDEO_HEVC; 327 default: 328 ALOGW("Unknown fourcc: %c%c%c%c", 329 (fourcc >> 24) & 0xff, 330 (fourcc >> 16) & 0xff, 331 (fourcc >> 8) & 0xff, 332 fourcc & 0xff 333 ); 334 return "application/octet-stream"; 335 } 336} 337 338static bool AdjustChannelsAndRate(uint32_t fourcc, uint32_t *channels, uint32_t *rate) { 339 if (!strcasecmp(MEDIA_MIMETYPE_AUDIO_AMR_NB, FourCC2MIME(fourcc))) { 340 // AMR NB audio is always mono, 8kHz 341 *channels = 1; 342 *rate = 8000; 343 return true; 344 } else if (!strcasecmp(MEDIA_MIMETYPE_AUDIO_AMR_WB, FourCC2MIME(fourcc))) { 345 // AMR WB audio is always mono, 16kHz 346 *channels = 1; 347 *rate = 16000; 348 return true; 349 } 350 return false; 351} 352 353MPEG4Extractor::MPEG4Extractor(DataSourceBase *source, const char *mime) 354 : mMoofOffset(0), 355 mMoofFound(false), 356 mMdatFound(false), 357 mDataSource(source), 358 mCachedSource(NULL), 359 mInitCheck(NO_INIT), 360 mHeaderTimescale(0), 361 mIsQT(false), 362 mIsHeif(false), 363 mHasMoovBox(false), 364 mPreferHeif(mime != NULL && !strcasecmp(mime, MEDIA_MIMETYPE_CONTAINER_HEIF)), 365 mFirstTrack(NULL), 366 mLastTrack(NULL) { 367 ALOGV("mime=%s, mPreferHeif=%d", mime, mPreferHeif); 368} 369 370MPEG4Extractor::~MPEG4Extractor() { 371 Track *track = mFirstTrack; 372 while (track) { 373 Track *next = track->next; 374 375 delete track; 376 track = next; 377 } 378 mFirstTrack = mLastTrack = NULL; 379 380 for (size_t i = 0; i < mPssh.size(); i++) { 381 delete [] mPssh[i].data; 382 } 383 mPssh.clear(); 384 385 delete mCachedSource; 386} 387 388uint32_t MPEG4Extractor::flags() const { 389 return CAN_PAUSE | 390 ((mMoofOffset == 0 || mSidxEntries.size() != 0) ? 391 (CAN_SEEK_BACKWARD | CAN_SEEK_FORWARD | CAN_SEEK) : 0); 392} 393 394status_t MPEG4Extractor::getMetaData(MetaDataBase &meta) { 395 status_t err; 396 if ((err = readMetaData()) != OK) { 397 return UNKNOWN_ERROR; 398 } 399 meta = mFileMetaData; 400 return OK; 401} 402 403size_t MPEG4Extractor::countTracks() { 404 status_t err; 405 if ((err = readMetaData()) != OK) { 406 ALOGV("MPEG4Extractor::countTracks: no tracks"); 407 return 0; 408 } 409 410 size_t n = 0; 411 Track *track = mFirstTrack; 412 while (track) { 413 ++n; 414 track = track->next; 415 } 416 417 ALOGV("MPEG4Extractor::countTracks: %zu tracks", n); 418 return n; 419} 420 421status_t MPEG4Extractor::getTrackMetaData( 422 MetaDataBase &meta, 423 size_t index, uint32_t flags) { 424 status_t err; 425 if ((err = readMetaData()) != OK) { 426 return UNKNOWN_ERROR; 427 } 428 429 Track *track = mFirstTrack; 430 while (index > 0) { 431 if (track == NULL) { 432 return UNKNOWN_ERROR; 433 } 434 435 track = track->next; 436 --index; 437 } 438 439 if (track == NULL) { 440 return UNKNOWN_ERROR; 441 } 442 443 [=] { 444 int64_t duration; 445 int32_t samplerate; 446 if (track->has_elst && mHeaderTimescale != 0 && 447 track->meta.findInt64(kKeyDuration, &duration) && 448 track->meta.findInt32(kKeySampleRate, &samplerate)) { 449 450 track->has_elst = false; 451 452 if (track->elst_segment_duration > INT64_MAX) { 453 return; 454 } 455 int64_t segment_duration = track->elst_segment_duration; 456 int64_t media_time = track->elst_media_time; 457 int64_t halfscale = mHeaderTimescale / 2; 458 ALOGV("segment_duration = %" PRId64 ", media_time = %" PRId64 459 ", halfscale = %" PRId64 ", timescale = %d", 460 segment_duration, 461 media_time, 462 halfscale, 463 mHeaderTimescale); 464 465 int64_t delay; 466 // delay = ((media_time * samplerate) + halfscale) / mHeaderTimescale; 467 if (__builtin_mul_overflow(media_time, samplerate, &delay) || 468 __builtin_add_overflow(delay, halfscale, &delay) || 469 (delay /= mHeaderTimescale, false) || 470 delay > INT32_MAX || 471 delay < INT32_MIN) { 472 return; 473 } 474 ALOGV("delay = %" PRId64, delay); 475 track->meta.setInt32(kKeyEncoderDelay, delay); 476 477 int64_t scaled_duration; 478 // scaled_duration = duration * mHeaderTimescale; 479 if (__builtin_mul_overflow(duration, mHeaderTimescale, &scaled_duration)) { 480 return; 481 } 482 ALOGV("scaled_duration = %" PRId64, scaled_duration); 483 484 int64_t segment_end; 485 int64_t padding; 486 // padding = scaled_duration - ((segment_duration + media_time) * 1000000); 487 if (__builtin_add_overflow(segment_duration, media_time, &segment_end) || 488 __builtin_mul_overflow(segment_end, 1000000, &segment_end) || 489 __builtin_sub_overflow(scaled_duration, segment_end, &padding)) { 490 return; 491 } 492 ALOGV("segment_end = %" PRId64 ", padding = %" PRId64, segment_end, padding); 493 494 if (padding < 0) { 495 // track duration from media header (which is what kKeyDuration is) might 496 // be slightly shorter than the segment duration, which would make the 497 // padding negative. Clamp to zero. 498 padding = 0; 499 } 500 501 int64_t paddingsamples; 502 int64_t halfscale_e6; 503 int64_t timescale_e6; 504 // paddingsamples = ((padding * samplerate) + (halfscale * 1000000)) 505 // / (mHeaderTimescale * 1000000); 506 if (__builtin_mul_overflow(padding, samplerate, &paddingsamples) || 507 __builtin_mul_overflow(halfscale, 1000000, &halfscale_e6) || 508 __builtin_mul_overflow(mHeaderTimescale, 1000000, ×cale_e6) || 509 __builtin_add_overflow(paddingsamples, halfscale_e6, &paddingsamples) || 510 (paddingsamples /= timescale_e6, false) || 511 paddingsamples > INT32_MAX) { 512 return; 513 } 514 ALOGV("paddingsamples = %" PRId64, paddingsamples); 515 track->meta.setInt32(kKeyEncoderPadding, paddingsamples); 516 } 517 }(); 518 519 if ((flags & kIncludeExtensiveMetaData) 520 && !track->includes_expensive_metadata) { 521 track->includes_expensive_metadata = true; 522 523 const char *mime; 524 CHECK(track->meta.findCString(kKeyMIMEType, &mime)); 525 if (!strncasecmp("video/", mime, 6)) { 526 // MPEG2 tracks do not provide CSD, so read the stream header 527 if (!strcmp(mime, MEDIA_MIMETYPE_VIDEO_MPEG2)) { 528 off64_t offset; 529 size_t size; 530 if (track->sampleTable->getMetaDataForSample( 531 0 /* sampleIndex */, &offset, &size, NULL /* sampleTime */) == OK) { 532 if (size > kMaxTrackHeaderSize) { 533 size = kMaxTrackHeaderSize; 534 } 535 uint8_t header[kMaxTrackHeaderSize]; 536 if (mDataSource->readAt(offset, &header, size) == (ssize_t)size) { 537 track->meta.setData(kKeyStreamHeader, 'mdat', header, size); 538 } 539 } 540 } 541 542 if (mMoofOffset > 0) { 543 int64_t duration; 544 if (track->meta.findInt64(kKeyDuration, &duration)) { 545 // nothing fancy, just pick a frame near 1/4th of the duration 546 track->meta.setInt64( 547 kKeyThumbnailTime, duration / 4); 548 } 549 } else { 550 uint32_t sampleIndex; 551 uint32_t sampleTime; 552 if (track->timescale != 0 && 553 track->sampleTable->findThumbnailSample(&sampleIndex) == OK 554 && track->sampleTable->getMetaDataForSample( 555 sampleIndex, NULL /* offset */, NULL /* size */, 556 &sampleTime) == OK) { 557 track->meta.setInt64( 558 kKeyThumbnailTime, 559 ((int64_t)sampleTime * 1000000) / track->timescale); 560 } 561 } 562 } 563 } 564 565 meta = track->meta; 566 return OK; 567} 568 569status_t MPEG4Extractor::readMetaData() { 570 if (mInitCheck != NO_INIT) { 571 return mInitCheck; 572 } 573 574 off64_t offset = 0; 575 status_t err; 576 bool sawMoovOrSidx = false; 577 578 while (!((mHasMoovBox && sawMoovOrSidx && (mMdatFound || mMoofFound)) || 579 (mIsHeif && (mPreferHeif || !mHasMoovBox) && 580 (mItemTable != NULL) && mItemTable->isValid()))) { 581 off64_t orig_offset = offset; 582 err = parseChunk(&offset, 0); 583 584 if (err != OK && err != UNKNOWN_ERROR) { 585 break; 586 } else if (offset <= orig_offset) { 587 // only continue parsing if the offset was advanced, 588 // otherwise we might end up in an infinite loop 589 ALOGE("did not advance: %lld->%lld", (long long)orig_offset, (long long)offset); 590 err = ERROR_MALFORMED; 591 break; 592 } else if (err == UNKNOWN_ERROR) { 593 sawMoovOrSidx = true; 594 } 595 } 596 597 if (mIsHeif && (mItemTable != NULL) && (mItemTable->countImages() > 0)) { 598 off64_t exifOffset; 599 size_t exifSize; 600 if (mItemTable->getExifOffsetAndSize(&exifOffset, &exifSize) == OK) { 601 mFileMetaData.setInt64(kKeyExifOffset, (int64_t)exifOffset); 602 mFileMetaData.setInt64(kKeyExifSize, (int64_t)exifSize); 603 } 604 for (uint32_t imageIndex = 0; 605 imageIndex < mItemTable->countImages(); imageIndex++) { 606 sp<MetaData> meta = mItemTable->getImageMeta(imageIndex); 607 if (meta == NULL) { 608 ALOGE("heif image %u has no meta!", imageIndex); 609 continue; 610 } 611 // Some heif files advertise image sequence brands (eg. 'hevc') in 612 // ftyp box, but don't have any valid tracks in them. Instead of 613 // reporting the entire file as malformed, we override the error 614 // to allow still images to be extracted. 615 if (err != OK) { 616 ALOGW("Extracting still images only"); 617 err = OK; 618 } 619 mInitCheck = OK; 620 621 ALOGV("adding HEIF image track %u", imageIndex); 622 Track *track = new Track; 623 track->next = NULL; 624 if (mLastTrack != NULL) { 625 mLastTrack->next = track; 626 } else { 627 mFirstTrack = track; 628 } 629 mLastTrack = track; 630 631 track->meta = *(meta.get()); 632 track->meta.setInt32(kKeyTrackID, imageIndex); 633 track->includes_expensive_metadata = false; 634 track->skipTrack = false; 635 track->timescale = 1000000; 636 } 637 } 638 639 if (mInitCheck == OK) { 640 if (findTrackByMimePrefix("video/") != NULL) { 641 mFileMetaData.setCString( 642 kKeyMIMEType, MEDIA_MIMETYPE_CONTAINER_MPEG4); 643 } else if (findTrackByMimePrefix("audio/") != NULL) { 644 mFileMetaData.setCString(kKeyMIMEType, "audio/mp4"); 645 } else if (findTrackByMimePrefix( 646 MEDIA_MIMETYPE_IMAGE_ANDROID_HEIC) != NULL) { 647 mFileMetaData.setCString( 648 kKeyMIMEType, MEDIA_MIMETYPE_CONTAINER_HEIF); 649 } else { 650 mFileMetaData.setCString(kKeyMIMEType, "application/octet-stream"); 651 } 652 } else { 653 mInitCheck = err; 654 } 655 656 CHECK_NE(err, (status_t)NO_INIT); 657 658 // copy pssh data into file metadata 659 uint64_t psshsize = 0; 660 for (size_t i = 0; i < mPssh.size(); i++) { 661 psshsize += 20 + mPssh[i].datalen; 662 } 663 if (psshsize > 0 && psshsize <= UINT32_MAX) { 664 char *buf = (char*)malloc(psshsize); 665 if (!buf) { 666 ALOGE("b/28471206"); 667 return NO_MEMORY; 668 } 669 char *ptr = buf; 670 for (size_t i = 0; i < mPssh.size(); i++) { 671 memcpy(ptr, mPssh[i].uuid, 20); // uuid + length 672 memcpy(ptr + 20, mPssh[i].data, mPssh[i].datalen); 673 ptr += (20 + mPssh[i].datalen); 674 } 675 mFileMetaData.setData(kKeyPssh, 'pssh', buf, psshsize); 676 free(buf); 677 } 678 679 return mInitCheck; 680} 681 682struct PathAdder { 683 PathAdder(Vector<uint32_t> *path, uint32_t chunkType) 684 : mPath(path) { 685 mPath->push(chunkType); 686 } 687 688 ~PathAdder() { 689 mPath->pop(); 690 } 691 692private: 693 Vector<uint32_t> *mPath; 694 695 PathAdder(const PathAdder &); 696 PathAdder &operator=(const PathAdder &); 697}; 698 699static bool underMetaDataPath(const Vector<uint32_t> &path) { 700 return path.size() >= 5 701 && path[0] == FOURCC('m', 'o', 'o', 'v') 702 && path[1] == FOURCC('u', 'd', 't', 'a') 703 && path[2] == FOURCC('m', 'e', 't', 'a') 704 && path[3] == FOURCC('i', 'l', 's', 't'); 705} 706 707static bool underQTMetaPath(const Vector<uint32_t> &path, int32_t depth) { 708 return path.size() >= 2 709 && path[0] == FOURCC('m', 'o', 'o', 'v') 710 && path[1] == FOURCC('m', 'e', 't', 'a') 711 && (depth == 2 712 || (depth == 3 713 && (path[2] == FOURCC('h', 'd', 'l', 'r') 714 || path[2] == FOURCC('i', 'l', 's', 't') 715 || path[2] == FOURCC('k', 'e', 'y', 's')))); 716} 717 718// Given a time in seconds since Jan 1 1904, produce a human-readable string. 719static bool convertTimeToDate(int64_t time_1904, String8 *s) { 720 // delta between mpeg4 time and unix epoch time 721 static const int64_t delta = (((66 * 365 + 17) * 24) * 3600); 722 if (time_1904 < INT64_MIN + delta) { 723 return false; 724 } 725 time_t time_1970 = time_1904 - delta; 726 727 char tmp[32]; 728 struct tm* tm = gmtime(&time_1970); 729 if (tm != NULL && 730 strftime(tmp, sizeof(tmp), "%Y%m%dT%H%M%S.000Z", tm) > 0) { 731 s->setTo(tmp); 732 return true; 733 } 734 return false; 735} 736 737status_t MPEG4Extractor::parseChunk(off64_t *offset, int depth) { 738 ALOGV("entering parseChunk %lld/%d", (long long)*offset, depth); 739 740 if (*offset < 0) { 741 ALOGE("b/23540914"); 742 return ERROR_MALFORMED; 743 } 744 if (depth > 100) { 745 ALOGE("b/27456299"); 746 return ERROR_MALFORMED; 747 } 748 uint32_t hdr[2]; 749 if (mDataSource->readAt(*offset, hdr, 8) < 8) { 750 return ERROR_IO; 751 } 752 uint64_t chunk_size = ntohl(hdr[0]); 753 int32_t chunk_type = ntohl(hdr[1]); 754 off64_t data_offset = *offset + 8; 755 756 if (chunk_size == 1) { 757 if (mDataSource->readAt(*offset + 8, &chunk_size, 8) < 8) { 758 return ERROR_IO; 759 } 760 chunk_size = ntoh64(chunk_size); 761 data_offset += 8; 762 763 if (chunk_size < 16) { 764 // The smallest valid chunk is 16 bytes long in this case. 765 return ERROR_MALFORMED; 766 } 767 } else if (chunk_size == 0) { 768 if (depth == 0) { 769 // atom extends to end of file 770 off64_t sourceSize; 771 if (mDataSource->getSize(&sourceSize) == OK) { 772 chunk_size = (sourceSize - *offset); 773 } else { 774 // XXX could we just pick a "sufficiently large" value here? 775 ALOGE("atom size is 0, and data source has no size"); 776 return ERROR_MALFORMED; 777 } 778 } else { 779 // not allowed for non-toplevel atoms, skip it 780 *offset += 4; 781 return OK; 782 } 783 } else if (chunk_size < 8) { 784 // The smallest valid chunk is 8 bytes long. 785 ALOGE("invalid chunk size: %" PRIu64, chunk_size); 786 return ERROR_MALFORMED; 787 } 788 789 char chunk[5]; 790 MakeFourCCString(chunk_type, chunk); 791 ALOGV("chunk: %s @ %lld, %d", chunk, (long long)*offset, depth); 792 793 if (kUseHexDump) { 794 static const char kWhitespace[] = " "; 795 const char *indent = &kWhitespace[sizeof(kWhitespace) - 1 - 2 * depth]; 796 printf("%sfound chunk '%s' of size %" PRIu64 "\n", indent, chunk, chunk_size); 797 798 char buffer[256]; 799 size_t n = chunk_size; 800 if (n > sizeof(buffer)) { 801 n = sizeof(buffer); 802 } 803 if (mDataSource->readAt(*offset, buffer, n) 804 < (ssize_t)n) { 805 return ERROR_IO; 806 } 807 808 hexdump(buffer, n); 809 } 810 811 PathAdder autoAdder(&mPath, chunk_type); 812 813 // (data_offset - *offset) is either 8 or 16 814 off64_t chunk_data_size = chunk_size - (data_offset - *offset); 815 if (chunk_data_size < 0) { 816 ALOGE("b/23540914"); 817 return ERROR_MALFORMED; 818 } 819 if (chunk_type != FOURCC('m', 'd', 'a', 't') && chunk_data_size > kMaxAtomSize) { 820 char errMsg[100]; 821 sprintf(errMsg, "%s atom has size %" PRId64, chunk, chunk_data_size); 822 ALOGE("%s (b/28615448)", errMsg); 823 android_errorWriteWithInfoLog(0x534e4554, "28615448", -1, errMsg, strlen(errMsg)); 824 return ERROR_MALFORMED; 825 } 826 827 if (chunk_type != FOURCC('c', 'p', 'r', 't') 828 && chunk_type != FOURCC('c', 'o', 'v', 'r') 829 && mPath.size() == 5 && underMetaDataPath(mPath)) { 830 off64_t stop_offset = *offset + chunk_size; 831 *offset = data_offset; 832 while (*offset < stop_offset) { 833 status_t err = parseChunk(offset, depth + 1); 834 if (err != OK) { 835 return err; 836 } 837 } 838 839 if (*offset != stop_offset) { 840 return ERROR_MALFORMED; 841 } 842 843 return OK; 844 } 845 846 switch(chunk_type) { 847 case FOURCC('m', 'o', 'o', 'v'): 848 case FOURCC('t', 'r', 'a', 'k'): 849 case FOURCC('m', 'd', 'i', 'a'): 850 case FOURCC('m', 'i', 'n', 'f'): 851 case FOURCC('d', 'i', 'n', 'f'): 852 case FOURCC('s', 't', 'b', 'l'): 853 case FOURCC('m', 'v', 'e', 'x'): 854 case FOURCC('m', 'o', 'o', 'f'): 855 case FOURCC('t', 'r', 'a', 'f'): 856 case FOURCC('m', 'f', 'r', 'a'): 857 case FOURCC('u', 'd', 't', 'a'): 858 case FOURCC('i', 'l', 's', 't'): 859 case FOURCC('s', 'i', 'n', 'f'): 860 case FOURCC('s', 'c', 'h', 'i'): 861 case FOURCC('e', 'd', 't', 's'): 862 case FOURCC('w', 'a', 'v', 'e'): 863 { 864 if (chunk_type == FOURCC('m', 'o', 'o', 'v') && depth != 0) { 865 ALOGE("moov: depth %d", depth); 866 return ERROR_MALFORMED; 867 } 868 869 if (chunk_type == FOURCC('m', 'o', 'o', 'v') && mInitCheck == OK) { 870 ALOGE("duplicate moov"); 871 return ERROR_MALFORMED; 872 } 873 874 if (chunk_type == FOURCC('m', 'o', 'o', 'f') && !mMoofFound) { 875 // store the offset of the first segment 876 mMoofFound = true; 877 mMoofOffset = *offset; 878 } 879 880 if (chunk_type == FOURCC('s', 't', 'b', 'l')) { 881 ALOGV("sampleTable chunk is %" PRIu64 " bytes long.", chunk_size); 882 883 if (mDataSource->flags() 884 & (DataSourceBase::kWantsPrefetching 885 | DataSourceBase::kIsCachingDataSource)) { 886 CachedRangedDataSource *cachedSource = 887 new CachedRangedDataSource(mDataSource); 888 889 if (cachedSource->setCachedRange( 890 *offset, chunk_size, 891 mCachedSource != NULL /* assume ownership on success */) == OK) { 892 mDataSource = mCachedSource = cachedSource; 893 } else { 894 delete cachedSource; 895 } 896 } 897 898 if (mLastTrack == NULL) { 899 return ERROR_MALFORMED; 900 } 901 902 mLastTrack->sampleTable = new SampleTable(mDataSource); 903 } 904 905 bool isTrack = false; 906 if (chunk_type == FOURCC('t', 'r', 'a', 'k')) { 907 if (depth != 1) { 908 ALOGE("trak: depth %d", depth); 909 return ERROR_MALFORMED; 910 } 911 isTrack = true; 912 913 ALOGV("adding new track"); 914 Track *track = new Track; 915 track->next = NULL; 916 if (mLastTrack) { 917 mLastTrack->next = track; 918 } else { 919 mFirstTrack = track; 920 } 921 mLastTrack = track; 922 923 track->includes_expensive_metadata = false; 924 track->skipTrack = false; 925 track->timescale = 0; 926 track->meta.setCString(kKeyMIMEType, "application/octet-stream"); 927 track->has_elst = false; 928 track->subsample_encryption = false; 929 } 930 931 off64_t stop_offset = *offset + chunk_size; 932 *offset = data_offset; 933 while (*offset < stop_offset) { 934 status_t err = parseChunk(offset, depth + 1); 935 if (err != OK) { 936 if (isTrack) { 937 mLastTrack->skipTrack = true; 938 break; 939 } 940 return err; 941 } 942 } 943 944 if (*offset != stop_offset) { 945 return ERROR_MALFORMED; 946 } 947 948 if (isTrack) { 949 int32_t trackId; 950 // There must be exact one track header per track. 951 if (!mLastTrack->meta.findInt32(kKeyTrackID, &trackId)) { 952 mLastTrack->skipTrack = true; 953 } 954 955 status_t err = verifyTrack(mLastTrack); 956 if (err != OK) { 957 mLastTrack->skipTrack = true; 958 } 959 960 if (mLastTrack->skipTrack) { 961 ALOGV("skipping this track..."); 962 Track *cur = mFirstTrack; 963 964 if (cur == mLastTrack) { 965 delete cur; 966 mFirstTrack = mLastTrack = NULL; 967 } else { 968 while (cur && cur->next != mLastTrack) { 969 cur = cur->next; 970 } 971 if (cur) { 972 cur->next = NULL; 973 } 974 delete mLastTrack; 975 mLastTrack = cur; 976 } 977 978 return OK; 979 } 980 } else if (chunk_type == FOURCC('m', 'o', 'o', 'v')) { 981 mInitCheck = OK; 982 983 return UNKNOWN_ERROR; // Return a dummy error. 984 } 985 break; 986 } 987 988 case FOURCC('s', 'c', 'h', 'm'): 989 { 990 991 *offset += chunk_size; 992 if (!mLastTrack) { 993 return ERROR_MALFORMED; 994 } 995 996 uint32_t scheme_type; 997 if (mDataSource->readAt(data_offset + 4, &scheme_type, 4) < 4) { 998 return ERROR_IO; 999 } 1000 scheme_type = ntohl(scheme_type); 1001 int32_t mode = kCryptoModeUnencrypted; 1002 switch(scheme_type) { 1003 case FOURCC('c', 'b', 'c', '1'): 1004 { 1005 mode = kCryptoModeAesCbc; 1006 break; 1007 } 1008 case FOURCC('c', 'b', 'c', 's'): 1009 { 1010 mode = kCryptoModeAesCbc; 1011 mLastTrack->subsample_encryption = true; 1012 break; 1013 } 1014 case FOURCC('c', 'e', 'n', 'c'): 1015 { 1016 mode = kCryptoModeAesCtr; 1017 break; 1018 } 1019 case FOURCC('c', 'e', 'n', 's'): 1020 { 1021 mode = kCryptoModeAesCtr; 1022 mLastTrack->subsample_encryption = true; 1023 break; 1024 } 1025 } 1026 mLastTrack->meta.setInt32(kKeyCryptoMode, mode); 1027 break; 1028 } 1029 1030 1031 case FOURCC('e', 'l', 's', 't'): 1032 { 1033 *offset += chunk_size; 1034 1035 if (!mLastTrack) { 1036 return ERROR_MALFORMED; 1037 } 1038 1039 // See 14496-12 8.6.6 1040 uint8_t version; 1041 if (mDataSource->readAt(data_offset, &version, 1) < 1) { 1042 return ERROR_IO; 1043 } 1044 1045 uint32_t entry_count; 1046 if (!mDataSource->getUInt32(data_offset + 4, &entry_count)) { 1047 return ERROR_IO; 1048 } 1049 1050 if (entry_count != 1) { 1051 // we only support a single entry at the moment, for gapless playback 1052 ALOGW("ignoring edit list with %d entries", entry_count); 1053 } else { 1054 off64_t entriesoffset = data_offset + 8; 1055 uint64_t segment_duration; 1056 int64_t media_time; 1057 1058 if (version == 1) { 1059 if (!mDataSource->getUInt64(entriesoffset, &segment_duration) || 1060 !mDataSource->getUInt64(entriesoffset + 8, (uint64_t*)&media_time)) { 1061 return ERROR_IO; 1062 } 1063 } else if (version == 0) { 1064 uint32_t sd; 1065 int32_t mt; 1066 if (!mDataSource->getUInt32(entriesoffset, &sd) || 1067 !mDataSource->getUInt32(entriesoffset + 4, (uint32_t*)&mt)) { 1068 return ERROR_IO; 1069 } 1070 segment_duration = sd; 1071 media_time = mt; 1072 } else { 1073 return ERROR_IO; 1074 } 1075 1076 // save these for later, because the elst atom might precede 1077 // the atoms that actually gives us the duration and sample rate 1078 // needed to calculate the padding and delay values 1079 mLastTrack->has_elst = true; 1080 mLastTrack->elst_media_time = media_time; 1081 mLastTrack->elst_segment_duration = segment_duration; 1082 } 1083 break; 1084 } 1085 1086 case FOURCC('f', 'r', 'm', 'a'): 1087 { 1088 *offset += chunk_size; 1089 1090 uint32_t original_fourcc; 1091 if (mDataSource->readAt(data_offset, &original_fourcc, 4) < 4) { 1092 return ERROR_IO; 1093 } 1094 original_fourcc = ntohl(original_fourcc); 1095 ALOGV("read original format: %d", original_fourcc); 1096 1097 if (mLastTrack == NULL) { 1098 return ERROR_MALFORMED; 1099 } 1100 1101 mLastTrack->meta.setCString(kKeyMIMEType, FourCC2MIME(original_fourcc)); 1102 uint32_t num_channels = 0; 1103 uint32_t sample_rate = 0; 1104 if (AdjustChannelsAndRate(original_fourcc, &num_channels, &sample_rate)) { 1105 mLastTrack->meta.setInt32(kKeyChannelCount, num_channels); 1106 mLastTrack->meta.setInt32(kKeySampleRate, sample_rate); 1107 } 1108 break; 1109 } 1110 1111 case FOURCC('t', 'e', 'n', 'c'): 1112 { 1113 *offset += chunk_size; 1114 1115 if (chunk_size < 32) { 1116 return ERROR_MALFORMED; 1117 } 1118 1119 // tenc box contains 1 byte version, 3 byte flags, 3 byte default algorithm id, one byte 1120 // default IV size, 16 bytes default KeyID 1121 // (ISO 23001-7) 1122 1123 uint8_t version; 1124 if (mDataSource->readAt(data_offset, &version, sizeof(version)) 1125 < (ssize_t)sizeof(version)) { 1126 return ERROR_IO; 1127 } 1128 1129 uint8_t buf[4]; 1130 memset(buf, 0, 4); 1131 if (mDataSource->readAt(data_offset + 4, buf + 1, 3) < 3) { 1132 return ERROR_IO; 1133 } 1134 1135 if (mLastTrack == NULL) { 1136 return ERROR_MALFORMED; 1137 } 1138 1139 uint8_t defaultEncryptedByteBlock = 0; 1140 uint8_t defaultSkipByteBlock = 0; 1141 uint32_t defaultAlgorithmId = ntohl(*((int32_t*)buf)); 1142 if (version == 1) { 1143 uint32_t pattern = buf[2]; 1144 defaultEncryptedByteBlock = pattern >> 4; 1145 defaultSkipByteBlock = pattern & 0xf; 1146 if (defaultEncryptedByteBlock == 0 && defaultSkipByteBlock == 0) { 1147 // use (1,0) to mean "encrypt everything" 1148 defaultEncryptedByteBlock = 1; 1149 } 1150 } else if (mLastTrack->subsample_encryption) { 1151 ALOGW("subsample_encryption should be version 1"); 1152 } else if (defaultAlgorithmId > 1) { 1153 // only 0 (clear) and 1 (AES-128) are valid 1154 ALOGW("defaultAlgorithmId: %u is a reserved value", defaultAlgorithmId); 1155 defaultAlgorithmId = 1; 1156 } 1157 1158 memset(buf, 0, 4); 1159 if (mDataSource->readAt(data_offset + 7, buf + 3, 1) < 1) { 1160 return ERROR_IO; 1161 } 1162 uint32_t defaultIVSize = ntohl(*((int32_t*)buf)); 1163 1164 if (defaultAlgorithmId == 0 && defaultIVSize != 0) { 1165 // only unencrypted data must have 0 IV size 1166 return ERROR_MALFORMED; 1167 } else if (defaultIVSize != 0 && 1168 defaultIVSize != 8 && 1169 defaultIVSize != 16) { 1170 return ERROR_MALFORMED; 1171 } 1172 1173 uint8_t defaultKeyId[16]; 1174 1175 if (mDataSource->readAt(data_offset + 8, &defaultKeyId, 16) < 16) { 1176 return ERROR_IO; 1177 } 1178 1179 sp<ABuffer> defaultConstantIv; 1180 if (defaultAlgorithmId != 0 && defaultIVSize == 0) { 1181 1182 uint8_t ivlength; 1183 if (mDataSource->readAt(data_offset + 24, &ivlength, sizeof(ivlength)) 1184 < (ssize_t)sizeof(ivlength)) { 1185 return ERROR_IO; 1186 } 1187 1188 if (ivlength != 8 && ivlength != 16) { 1189 ALOGW("unsupported IV length: %u", ivlength); 1190 return ERROR_MALFORMED; 1191 } 1192 1193 defaultConstantIv = new ABuffer(ivlength); 1194 if (mDataSource->readAt(data_offset + 25, defaultConstantIv->data(), ivlength) 1195 < (ssize_t)ivlength) { 1196 return ERROR_IO; 1197 } 1198 1199 defaultConstantIv->setRange(0, ivlength); 1200 } 1201 1202 int32_t tmpAlgorithmId; 1203 if (!mLastTrack->meta.findInt32(kKeyCryptoMode, &tmpAlgorithmId)) { 1204 mLastTrack->meta.setInt32(kKeyCryptoMode, defaultAlgorithmId); 1205 } 1206 1207 mLastTrack->meta.setInt32(kKeyCryptoDefaultIVSize, defaultIVSize); 1208 mLastTrack->meta.setData(kKeyCryptoKey, 'tenc', defaultKeyId, 16); 1209 mLastTrack->meta.setInt32(kKeyEncryptedByteBlock, defaultEncryptedByteBlock); 1210 mLastTrack->meta.setInt32(kKeySkipByteBlock, defaultSkipByteBlock); 1211 if (defaultConstantIv != NULL) { 1212 mLastTrack->meta.setData(kKeyCryptoIV, 'dciv', defaultConstantIv->data(), defaultConstantIv->size()); 1213 } 1214 break; 1215 } 1216 1217 case FOURCC('t', 'k', 'h', 'd'): 1218 { 1219 *offset += chunk_size; 1220 1221 status_t err; 1222 if ((err = parseTrackHeader(data_offset, chunk_data_size)) != OK) { 1223 return err; 1224 } 1225 1226 break; 1227 } 1228 1229 case FOURCC('t', 'r', 'e', 'f'): 1230 { 1231 off64_t stop_offset = *offset + chunk_size; 1232 *offset = data_offset; 1233 while (*offset < stop_offset) { 1234 status_t err = parseChunk(offset, depth + 1); 1235 if (err != OK) { 1236 return err; 1237 } 1238 } 1239 if (*offset != stop_offset) { 1240 return ERROR_MALFORMED; 1241 } 1242 break; 1243 } 1244 1245 case FOURCC('t', 'h', 'm', 'b'): 1246 { 1247 *offset += chunk_size; 1248 1249 if (mLastTrack != NULL) { 1250 // Skip thumbnail track for now since we don't have an 1251 // API to retrieve it yet. 1252 // The thumbnail track can't be accessed by negative index or time, 1253 // because each timed sample has its own corresponding thumbnail 1254 // in the thumbnail track. We'll need a dedicated API to retrieve 1255 // thumbnail at time instead. 1256 mLastTrack->skipTrack = true; 1257 } 1258 1259 break; 1260 } 1261 1262 case FOURCC('p', 's', 's', 'h'): 1263 { 1264 *offset += chunk_size; 1265 1266 PsshInfo pssh; 1267 1268 if (mDataSource->readAt(data_offset + 4, &pssh.uuid, 16) < 16) { 1269 return ERROR_IO; 1270 } 1271 1272 uint32_t psshdatalen = 0; 1273 if (mDataSource->readAt(data_offset + 20, &psshdatalen, 4) < 4) { 1274 return ERROR_IO; 1275 } 1276 pssh.datalen = ntohl(psshdatalen); 1277 ALOGV("pssh data size: %d", pssh.datalen); 1278 if (chunk_size < 20 || pssh.datalen > chunk_size - 20) { 1279 // pssh data length exceeds size of containing box 1280 return ERROR_MALFORMED; 1281 } 1282 1283 pssh.data = new (std::nothrow) uint8_t[pssh.datalen]; 1284 if (pssh.data == NULL) { 1285 return ERROR_MALFORMED; 1286 } 1287 ALOGV("allocated pssh @ %p", pssh.data); 1288 ssize_t requested = (ssize_t) pssh.datalen; 1289 if (mDataSource->readAt(data_offset + 24, pssh.data, requested) < requested) { 1290 delete[] pssh.data; 1291 return ERROR_IO; 1292 } 1293 mPssh.push_back(pssh); 1294 1295 break; 1296 } 1297 1298 case FOURCC('m', 'd', 'h', 'd'): 1299 { 1300 *offset += chunk_size; 1301 1302 if (chunk_data_size < 4 || mLastTrack == NULL) { 1303 return ERROR_MALFORMED; 1304 } 1305 1306 uint8_t version; 1307 if (mDataSource->readAt( 1308 data_offset, &version, sizeof(version)) 1309 < (ssize_t)sizeof(version)) { 1310 return ERROR_IO; 1311 } 1312 1313 off64_t timescale_offset; 1314 1315 if (version == 1) { 1316 timescale_offset = data_offset + 4 + 16; 1317 } else if (version == 0) { 1318 timescale_offset = data_offset + 4 + 8; 1319 } else { 1320 return ERROR_IO; 1321 } 1322 1323 uint32_t timescale; 1324 if (mDataSource->readAt( 1325 timescale_offset, ×cale, sizeof(timescale)) 1326 < (ssize_t)sizeof(timescale)) { 1327 return ERROR_IO; 1328 } 1329 1330 if (!timescale) { 1331 ALOGE("timescale should not be ZERO."); 1332 return ERROR_MALFORMED; 1333 } 1334 1335 mLastTrack->timescale = ntohl(timescale); 1336 1337 // 14496-12 says all ones means indeterminate, but some files seem to use 1338 // 0 instead. We treat both the same. 1339 int64_t duration = 0; 1340 if (version == 1) { 1341 if (mDataSource->readAt( 1342 timescale_offset + 4, &duration, sizeof(duration)) 1343 < (ssize_t)sizeof(duration)) { 1344 return ERROR_IO; 1345 } 1346 if (duration != -1) { 1347 duration = ntoh64(duration); 1348 } 1349 } else { 1350 uint32_t duration32; 1351 if (mDataSource->readAt( 1352 timescale_offset + 4, &duration32, sizeof(duration32)) 1353 < (ssize_t)sizeof(duration32)) { 1354 return ERROR_IO; 1355 } 1356 if (duration32 != 0xffffffff) { 1357 duration = ntohl(duration32); 1358 } 1359 } 1360 if (duration != 0 && mLastTrack->timescale != 0) { 1361 mLastTrack->meta.setInt64( 1362 kKeyDuration, (duration * 1000000) / mLastTrack->timescale); 1363 } 1364 1365 uint8_t lang[2]; 1366 off64_t lang_offset; 1367 if (version == 1) { 1368 lang_offset = timescale_offset + 4 + 8; 1369 } else if (version == 0) { 1370 lang_offset = timescale_offset + 4 + 4; 1371 } else { 1372 return ERROR_IO; 1373 } 1374 1375 if (mDataSource->readAt(lang_offset, &lang, sizeof(lang)) 1376 < (ssize_t)sizeof(lang)) { 1377 return ERROR_IO; 1378 } 1379 1380 // To get the ISO-639-2/T three character language code 1381 // 1 bit pad followed by 3 5-bits characters. Each character 1382 // is packed as the difference between its ASCII value and 0x60. 1383 char lang_code[4]; 1384 lang_code[0] = ((lang[0] >> 2) & 0x1f) + 0x60; 1385 lang_code[1] = ((lang[0] & 0x3) << 3 | (lang[1] >> 5)) + 0x60; 1386 lang_code[2] = (lang[1] & 0x1f) + 0x60; 1387 lang_code[3] = '\0'; 1388 1389 mLastTrack->meta.setCString( 1390 kKeyMediaLanguage, lang_code); 1391 1392 break; 1393 } 1394 1395 case FOURCC('s', 't', 's', 'd'): 1396 { 1397 uint8_t buffer[8]; 1398 if (chunk_data_size < (off64_t)sizeof(buffer)) { 1399 return ERROR_MALFORMED; 1400 } 1401 1402 if (mDataSource->readAt( 1403 data_offset, buffer, 8) < 8) { 1404 return ERROR_IO; 1405 } 1406 1407 if (U32_AT(buffer) != 0) { 1408 // Should be version 0, flags 0. 1409 return ERROR_MALFORMED; 1410 } 1411 1412 uint32_t entry_count = U32_AT(&buffer[4]); 1413 1414 if (entry_count > 1) { 1415 // For 3GPP timed text, there could be multiple tx3g boxes contain 1416 // multiple text display formats. These formats will be used to 1417 // display the timed text. 1418 // For encrypted files, there may also be more than one entry. 1419 const char *mime; 1420 1421 if (mLastTrack == NULL) 1422 return ERROR_MALFORMED; 1423 1424 CHECK(mLastTrack->meta.findCString(kKeyMIMEType, &mime)); 1425 if (strcasecmp(mime, MEDIA_MIMETYPE_TEXT_3GPP) && 1426 strcasecmp(mime, "application/octet-stream")) { 1427 // For now we only support a single type of media per track. 1428 mLastTrack->skipTrack = true; 1429 *offset += chunk_size; 1430 break; 1431 } 1432 } 1433 off64_t stop_offset = *offset + chunk_size; 1434 *offset = data_offset + 8; 1435 for (uint32_t i = 0; i < entry_count; ++i) { 1436 status_t err = parseChunk(offset, depth + 1); 1437 if (err != OK) { 1438 return err; 1439 } 1440 } 1441 1442 if (*offset != stop_offset) { 1443 return ERROR_MALFORMED; 1444 } 1445 break; 1446 } 1447 case FOURCC('m', 'e', 't', 't'): 1448 { 1449 *offset += chunk_size; 1450 1451 if (mLastTrack == NULL) 1452 return ERROR_MALFORMED; 1453 1454 auto buffer = heapbuffer<uint8_t>(chunk_data_size); 1455 if (buffer.get() == NULL) { 1456 return NO_MEMORY; 1457 } 1458 1459 if (mDataSource->readAt( 1460 data_offset, buffer.get(), chunk_data_size) < chunk_data_size) { 1461 return ERROR_IO; 1462 } 1463 1464 String8 mimeFormat((const char *)(buffer.get()), chunk_data_size); 1465 mLastTrack->meta.setCString(kKeyMIMEType, mimeFormat.string()); 1466 1467 break; 1468 } 1469 1470 case FOURCC('m', 'p', '4', 'a'): 1471 case FOURCC('e', 'n', 'c', 'a'): 1472 case FOURCC('s', 'a', 'm', 'r'): 1473 case FOURCC('s', 'a', 'w', 'b'): 1474 { 1475 if (mIsQT && chunk_type == FOURCC('m', 'p', '4', 'a') 1476 && depth >= 1 && mPath[depth - 1] == FOURCC('w', 'a', 'v', 'e')) { 1477 // Ignore mp4a embedded in QT wave atom 1478 *offset += chunk_size; 1479 break; 1480 } 1481 1482 uint8_t buffer[8 + 20]; 1483 if (chunk_data_size < (ssize_t)sizeof(buffer)) { 1484 // Basic AudioSampleEntry size. 1485 return ERROR_MALFORMED; 1486 } 1487 1488 if (mDataSource->readAt( 1489 data_offset, buffer, sizeof(buffer)) < (ssize_t)sizeof(buffer)) { 1490 return ERROR_IO; 1491 } 1492 1493 uint16_t data_ref_index __unused = U16_AT(&buffer[6]); 1494 uint16_t version = U16_AT(&buffer[8]); 1495 uint32_t num_channels = U16_AT(&buffer[16]); 1496 1497 uint16_t sample_size = U16_AT(&buffer[18]); 1498 uint32_t sample_rate = U32_AT(&buffer[24]) >> 16; 1499 1500 if (mLastTrack == NULL) 1501 return ERROR_MALFORMED; 1502 1503 off64_t stop_offset = *offset + chunk_size; 1504 *offset = data_offset + sizeof(buffer); 1505 1506 if (mIsQT && chunk_type == FOURCC('m', 'p', '4', 'a')) { 1507 if (version == 1) { 1508 if (mDataSource->readAt(*offset, buffer, 16) < 16) { 1509 return ERROR_IO; 1510 } 1511 1512#if 0 1513 U32_AT(buffer); // samples per packet 1514 U32_AT(&buffer[4]); // bytes per packet 1515 U32_AT(&buffer[8]); // bytes per frame 1516 U32_AT(&buffer[12]); // bytes per sample 1517#endif 1518 *offset += 16; 1519 } else if (version == 2) { 1520 uint8_t v2buffer[36]; 1521 if (mDataSource->readAt(*offset, v2buffer, 36) < 36) { 1522 return ERROR_IO; 1523 } 1524 1525#if 0 1526 U32_AT(v2buffer); // size of struct only 1527 sample_rate = (uint32_t)U64_AT(&v2buffer[4]); // audio sample rate 1528 num_channels = U32_AT(&v2buffer[12]); // num audio channels 1529 U32_AT(&v2buffer[16]); // always 0x7f000000 1530 sample_size = (uint16_t)U32_AT(&v2buffer[20]); // const bits per channel 1531 U32_AT(&v2buffer[24]); // format specifc flags 1532 U32_AT(&v2buffer[28]); // const bytes per audio packet 1533 U32_AT(&v2buffer[32]); // const LPCM frames per audio packet 1534#endif 1535 *offset += 36; 1536 } 1537 } 1538 1539 if (chunk_type != FOURCC('e', 'n', 'c', 'a')) { 1540 // if the chunk type is enca, we'll get the type from the frma box later 1541 mLastTrack->meta.setCString(kKeyMIMEType, FourCC2MIME(chunk_type)); 1542 AdjustChannelsAndRate(chunk_type, &num_channels, &sample_rate); 1543 } 1544 ALOGV("*** coding='%s' %d channels, size %d, rate %d\n", 1545 chunk, num_channels, sample_size, sample_rate); 1546 mLastTrack->meta.setInt32(kKeyChannelCount, num_channels); 1547 mLastTrack->meta.setInt32(kKeySampleRate, sample_rate); 1548 1549 while (*offset < stop_offset) { 1550 status_t err = parseChunk(offset, depth + 1); 1551 if (err != OK) { 1552 return err; 1553 } 1554 } 1555 1556 if (*offset != stop_offset) { 1557 return ERROR_MALFORMED; 1558 } 1559 break; 1560 } 1561 1562 case FOURCC('m', 'p', '4', 'v'): 1563 case FOURCC('e', 'n', 'c', 'v'): 1564 case FOURCC('s', '2', '6', '3'): 1565 case FOURCC('H', '2', '6', '3'): 1566 case FOURCC('h', '2', '6', '3'): 1567 case FOURCC('a', 'v', 'c', '1'): 1568 case FOURCC('h', 'v', 'c', '1'): 1569 case FOURCC('h', 'e', 'v', '1'): 1570 { 1571 uint8_t buffer[78]; 1572 if (chunk_data_size < (ssize_t)sizeof(buffer)) { 1573 // Basic VideoSampleEntry size. 1574 return ERROR_MALFORMED; 1575 } 1576 1577 if (mDataSource->readAt( 1578 data_offset, buffer, sizeof(buffer)) < (ssize_t)sizeof(buffer)) { 1579 return ERROR_IO; 1580 } 1581 1582 uint16_t data_ref_index __unused = U16_AT(&buffer[6]); 1583 uint16_t width = U16_AT(&buffer[6 + 18]); 1584 uint16_t height = U16_AT(&buffer[6 + 20]); 1585 1586 // The video sample is not standard-compliant if it has invalid dimension. 1587 // Use some default width and height value, and 1588 // let the decoder figure out the actual width and height (and thus 1589 // be prepared for INFO_FOMRAT_CHANGED event). 1590 if (width == 0) width = 352; 1591 if (height == 0) height = 288; 1592 1593 // printf("*** coding='%s' width=%d height=%d\n", 1594 // chunk, width, height); 1595 1596 if (mLastTrack == NULL) 1597 return ERROR_MALFORMED; 1598 1599 if (chunk_type != FOURCC('e', 'n', 'c', 'v')) { 1600 // if the chunk type is encv, we'll get the type from the frma box later 1601 mLastTrack->meta.setCString(kKeyMIMEType, FourCC2MIME(chunk_type)); 1602 } 1603 mLastTrack->meta.setInt32(kKeyWidth, width); 1604 mLastTrack->meta.setInt32(kKeyHeight, height); 1605 1606 off64_t stop_offset = *offset + chunk_size; 1607 *offset = data_offset + sizeof(buffer); 1608 while (*offset < stop_offset) { 1609 status_t err = parseChunk(offset, depth + 1); 1610 if (err != OK) { 1611 return err; 1612 } 1613 } 1614 1615 if (*offset != stop_offset) { 1616 return ERROR_MALFORMED; 1617 } 1618 break; 1619 } 1620 1621 case FOURCC('s', 't', 'c', 'o'): 1622 case FOURCC('c', 'o', '6', '4'): 1623 { 1624 if ((mLastTrack == NULL) || (mLastTrack->sampleTable == NULL)) { 1625 return ERROR_MALFORMED; 1626 } 1627 1628 status_t err = 1629 mLastTrack->sampleTable->setChunkOffsetParams( 1630 chunk_type, data_offset, chunk_data_size); 1631 1632 *offset += chunk_size; 1633 1634 if (err != OK) { 1635 return err; 1636 } 1637 1638 break; 1639 } 1640 1641 case FOURCC('s', 't', 's', 'c'): 1642 { 1643 if ((mLastTrack == NULL) || (mLastTrack->sampleTable == NULL)) 1644 return ERROR_MALFORMED; 1645 1646 status_t err = 1647 mLastTrack->sampleTable->setSampleToChunkParams( 1648 data_offset, chunk_data_size); 1649 1650 *offset += chunk_size; 1651 1652 if (err != OK) { 1653 return err; 1654 } 1655 1656 break; 1657 } 1658 1659 case FOURCC('s', 't', 's', 'z'): 1660 case FOURCC('s', 't', 'z', '2'): 1661 { 1662 if ((mLastTrack == NULL) || (mLastTrack->sampleTable == NULL)) { 1663 return ERROR_MALFORMED; 1664 } 1665 1666 status_t err = 1667 mLastTrack->sampleTable->setSampleSizeParams( 1668 chunk_type, data_offset, chunk_data_size); 1669 1670 *offset += chunk_size; 1671 1672 if (err != OK) { 1673 return err; 1674 } 1675 1676 size_t max_size; 1677 err = mLastTrack->sampleTable->getMaxSampleSize(&max_size); 1678 1679 if (err != OK) { 1680 return err; 1681 } 1682 1683 if (max_size != 0) { 1684 // Assume that a given buffer only contains at most 10 chunks, 1685 // each chunk originally prefixed with a 2 byte length will 1686 // have a 4 byte header (0x00 0x00 0x00 0x01) after conversion, 1687 // and thus will grow by 2 bytes per chunk. 1688 if (max_size > SIZE_MAX - 10 * 2) { 1689 ALOGE("max sample size too big: %zu", max_size); 1690 return ERROR_MALFORMED; 1691 } 1692 mLastTrack->meta.setInt32(kKeyMaxInputSize, max_size + 10 * 2); 1693 } else { 1694 // No size was specified. Pick a conservatively large size. 1695 uint32_t width, height; 1696 if (!mLastTrack->meta.findInt32(kKeyWidth, (int32_t*)&width) || 1697 !mLastTrack->meta.findInt32(kKeyHeight,(int32_t*) &height)) { 1698 ALOGE("No width or height, assuming worst case 1080p"); 1699 width = 1920; 1700 height = 1080; 1701 } else { 1702 // A resolution was specified, check that it's not too big. The values below 1703 // were chosen so that the calculations below don't cause overflows, they're 1704 // not indicating that resolutions up to 32kx32k are actually supported. 1705 if (width > 32768 || height > 32768) { 1706 ALOGE("can't support %u x %u video", width, height); 1707 return ERROR_MALFORMED; 1708 } 1709 } 1710 1711 const char *mime; 1712 CHECK(mLastTrack->meta.findCString(kKeyMIMEType, &mime)); 1713 if (!strncmp(mime, "audio/", 6)) { 1714 // for audio, use 128KB 1715 max_size = 1024 * 128; 1716 } else if (!strcmp(mime, MEDIA_MIMETYPE_VIDEO_AVC) 1717 || !strcmp(mime, MEDIA_MIMETYPE_VIDEO_HEVC)) { 1718 // AVC & HEVC requires compression ratio of at least 2, and uses 1719 // macroblocks 1720 max_size = ((width + 15) / 16) * ((height + 15) / 16) * 192; 1721 } else { 1722 // For all other formats there is no minimum compression 1723 // ratio. Use compression ratio of 1. 1724 max_size = width * height * 3 / 2; 1725 } 1726 // HACK: allow 10% overhead 1727 // TODO: read sample size from traf atom for fragmented MPEG4. 1728 max_size += max_size / 10; 1729 mLastTrack->meta.setInt32(kKeyMaxInputSize, max_size); 1730 } 1731 1732 // NOTE: setting another piece of metadata invalidates any pointers (such as the 1733 // mimetype) previously obtained, so don't cache them. 1734 const char *mime; 1735 CHECK(mLastTrack->meta.findCString(kKeyMIMEType, &mime)); 1736 // Calculate average frame rate. 1737 if (!strncasecmp("video/", mime, 6)) { 1738 size_t nSamples = mLastTrack->sampleTable->countSamples(); 1739 if (nSamples == 0) { 1740 int32_t trackId; 1741 if (mLastTrack->meta.findInt32(kKeyTrackID, &trackId)) { 1742 for (size_t i = 0; i < mTrex.size(); i++) { 1743 Trex *t = &mTrex.editItemAt(i); 1744 if (t->track_ID == (uint32_t) trackId) { 1745 if (t->default_sample_duration > 0) { 1746 int32_t frameRate = 1747 mLastTrack->timescale / t->default_sample_duration; 1748 mLastTrack->meta.setInt32(kKeyFrameRate, frameRate); 1749 } 1750 break; 1751 } 1752 } 1753 } 1754 } else { 1755 int64_t durationUs; 1756 if (mLastTrack->meta.findInt64(kKeyDuration, &durationUs)) { 1757 if (durationUs > 0) { 1758 int32_t frameRate = (nSamples * 1000000LL + 1759 (durationUs >> 1)) / durationUs; 1760 mLastTrack->meta.setInt32(kKeyFrameRate, frameRate); 1761 } 1762 } 1763 ALOGV("setting frame count %zu", nSamples); 1764 mLastTrack->meta.setInt32(kKeyFrameCount, nSamples); 1765 } 1766 } 1767 1768 break; 1769 } 1770 1771 case FOURCC('s', 't', 't', 's'): 1772 { 1773 if ((mLastTrack == NULL) || (mLastTrack->sampleTable == NULL)) 1774 return ERROR_MALFORMED; 1775 1776 *offset += chunk_size; 1777 1778 status_t err = 1779 mLastTrack->sampleTable->setTimeToSampleParams( 1780 data_offset, chunk_data_size); 1781 1782 if (err != OK) { 1783 return err; 1784 } 1785 1786 break; 1787 } 1788 1789 case FOURCC('c', 't', 't', 's'): 1790 { 1791 if ((mLastTrack == NULL) || (mLastTrack->sampleTable == NULL)) 1792 return ERROR_MALFORMED; 1793 1794 *offset += chunk_size; 1795 1796 status_t err = 1797 mLastTrack->sampleTable->setCompositionTimeToSampleParams( 1798 data_offset, chunk_data_size); 1799 1800 if (err != OK) { 1801 return err; 1802 } 1803 1804 break; 1805 } 1806 1807 case FOURCC('s', 't', 's', 's'): 1808 { 1809 if ((mLastTrack == NULL) || (mLastTrack->sampleTable == NULL)) 1810 return ERROR_MALFORMED; 1811 1812 *offset += chunk_size; 1813 1814 status_t err = 1815 mLastTrack->sampleTable->setSyncSampleParams( 1816 data_offset, chunk_data_size); 1817 1818 if (err != OK) { 1819 return err; 1820 } 1821 1822 break; 1823 } 1824 1825 // \xA9xyz 1826 case FOURCC(0xA9, 'x', 'y', 'z'): 1827 { 1828 *offset += chunk_size; 1829 1830 // Best case the total data length inside "\xA9xyz" box would 1831 // be 9, for instance "\xA9xyz" + "\x00\x05\x15\xc7" + "+0+0/", 1832 // where "\x00\x05" is the text string length with value = 5, 1833 // "\0x15\xc7" is the language code = en, and "+0+0/" is a 1834 // location (string) value with longitude = 0 and latitude = 0. 1835 // Since some devices encountered in the wild omit the trailing 1836 // slash, we'll allow that. 1837 if (chunk_data_size < 8) { // 8 instead of 9 to allow for missing / 1838 return ERROR_MALFORMED; 1839 } 1840 1841 uint16_t len; 1842 if (!mDataSource->getUInt16(data_offset, &len)) { 1843 return ERROR_IO; 1844 } 1845 1846 // allow "+0+0" without trailing slash 1847 if (len < 4 || len > chunk_data_size - 4) { 1848 return ERROR_MALFORMED; 1849 } 1850 // The location string following the language code is formatted 1851 // according to ISO 6709:2008 (https://en.wikipedia.org/wiki/ISO_6709). 1852 // Allocate 2 extra bytes, in case we need to add a trailing slash, 1853 // and to add a terminating 0. 1854 std::unique_ptr<char[]> buffer(new (std::nothrow) char[len+2]()); 1855 if (!buffer) { 1856 return NO_MEMORY; 1857 } 1858 1859 if (mDataSource->readAt( 1860 data_offset + 4, &buffer[0], len) < len) { 1861 return ERROR_IO; 1862 } 1863 1864 len = strlen(&buffer[0]); 1865 if (len < 4) { 1866 return ERROR_MALFORMED; 1867 } 1868 // Add a trailing slash if there wasn't one. 1869 if (buffer[len - 1] != '/') { 1870 buffer[len] = '/'; 1871 } 1872 mFileMetaData.setCString(kKeyLocation, &buffer[0]); 1873 break; 1874 } 1875 1876 case FOURCC('e', 's', 'd', 's'): 1877 { 1878 *offset += chunk_size; 1879 1880 if (chunk_data_size < 4) { 1881 return ERROR_MALFORMED; 1882 } 1883 1884 uint8_t buffer[256]; 1885 if (chunk_data_size > (off64_t)sizeof(buffer)) { 1886 return ERROR_BUFFER_TOO_SMALL; 1887 } 1888 1889 if (mDataSource->readAt( 1890 data_offset, buffer, chunk_data_size) < chunk_data_size) { 1891 return ERROR_IO; 1892 } 1893 1894 if (U32_AT(buffer) != 0) { 1895 // Should be version 0, flags 0. 1896 return ERROR_MALFORMED; 1897 } 1898 1899 if (mLastTrack == NULL) 1900 return ERROR_MALFORMED; 1901 1902 mLastTrack->meta.setData( 1903 kKeyESDS, kTypeESDS, &buffer[4], chunk_data_size - 4); 1904 1905 if (mPath.size() >= 2 1906 && mPath[mPath.size() - 2] == FOURCC('m', 'p', '4', 'a')) { 1907 // Information from the ESDS must be relied on for proper 1908 // setup of sample rate and channel count for MPEG4 Audio. 1909 // The generic header appears to only contain generic 1910 // information... 1911 1912 status_t err = updateAudioTrackInfoFromESDS_MPEG4Audio( 1913 &buffer[4], chunk_data_size - 4); 1914 1915 if (err != OK) { 1916 return err; 1917 } 1918 } 1919 if (mPath.size() >= 2 1920 && mPath[mPath.size() - 2] == FOURCC('m', 'p', '4', 'v')) { 1921 // Check if the video is MPEG2 1922 ESDS esds(&buffer[4], chunk_data_size - 4); 1923 1924 uint8_t objectTypeIndication; 1925 if (esds.getObjectTypeIndication(&objectTypeIndication) == OK) { 1926 if (objectTypeIndication >= 0x60 && objectTypeIndication <= 0x65) { 1927 mLastTrack->meta.setCString(kKeyMIMEType, MEDIA_MIMETYPE_VIDEO_MPEG2); 1928 } 1929 } 1930 } 1931 break; 1932 } 1933 1934 case FOURCC('b', 't', 'r', 't'): 1935 { 1936 *offset += chunk_size; 1937 if (mLastTrack == NULL) { 1938 return ERROR_MALFORMED; 1939 } 1940 1941 uint8_t buffer[12]; 1942 if (chunk_data_size != sizeof(buffer)) { 1943 return ERROR_MALFORMED; 1944 } 1945 1946 if (mDataSource->readAt( 1947 data_offset, buffer, chunk_data_size) < chunk_data_size) { 1948 return ERROR_IO; 1949 } 1950 1951 uint32_t maxBitrate = U32_AT(&buffer[4]); 1952 uint32_t avgBitrate = U32_AT(&buffer[8]); 1953 if (maxBitrate > 0 && maxBitrate < INT32_MAX) { 1954 mLastTrack->meta.setInt32(kKeyMaxBitRate, (int32_t)maxBitrate); 1955 } 1956 if (avgBitrate > 0 && avgBitrate < INT32_MAX) { 1957 mLastTrack->meta.setInt32(kKeyBitRate, (int32_t)avgBitrate); 1958 } 1959 break; 1960 } 1961 1962 case FOURCC('a', 'v', 'c', 'C'): 1963 { 1964 *offset += chunk_size; 1965 1966 auto buffer = heapbuffer<uint8_t>(chunk_data_size); 1967 1968 if (buffer.get() == NULL) { 1969 ALOGE("b/28471206"); 1970 return NO_MEMORY; 1971 } 1972 1973 if (mDataSource->readAt( 1974 data_offset, buffer.get(), chunk_data_size) < chunk_data_size) { 1975 return ERROR_IO; 1976 } 1977 1978 if (mLastTrack == NULL) 1979 return ERROR_MALFORMED; 1980 1981 mLastTrack->meta.setData( 1982 kKeyAVCC, kTypeAVCC, buffer.get(), chunk_data_size); 1983 1984 break; 1985 } 1986 case FOURCC('h', 'v', 'c', 'C'): 1987 { 1988 auto buffer = heapbuffer<uint8_t>(chunk_data_size); 1989 1990 if (buffer.get() == NULL) { 1991 ALOGE("b/28471206"); 1992 return NO_MEMORY; 1993 } 1994 1995 if (mDataSource->readAt( 1996 data_offset, buffer.get(), chunk_data_size) < chunk_data_size) { 1997 return ERROR_IO; 1998 } 1999 2000 if (mLastTrack == NULL) 2001 return ERROR_MALFORMED; 2002 2003 mLastTrack->meta.setData( 2004 kKeyHVCC, kTypeHVCC, buffer.get(), chunk_data_size); 2005 2006 *offset += chunk_size; 2007 break; 2008 } 2009 2010 case FOURCC('d', '2', '6', '3'): 2011 { 2012 *offset += chunk_size; 2013 /* 2014 * d263 contains a fixed 7 bytes part: 2015 * vendor - 4 bytes 2016 * version - 1 byte 2017 * level - 1 byte 2018 * profile - 1 byte 2019 * optionally, "d263" box itself may contain a 16-byte 2020 * bit rate box (bitr) 2021 * average bit rate - 4 bytes 2022 * max bit rate - 4 bytes 2023 */ 2024 char buffer[23]; 2025 if (chunk_data_size != 7 && 2026 chunk_data_size != 23) { 2027 ALOGE("Incorrect D263 box size %lld", (long long)chunk_data_size); 2028 return ERROR_MALFORMED; 2029 } 2030 2031 if (mDataSource->readAt( 2032 data_offset, buffer, chunk_data_size) < chunk_data_size) { 2033 return ERROR_IO; 2034 } 2035 2036 if (mLastTrack == NULL) 2037 return ERROR_MALFORMED; 2038 2039 mLastTrack->meta.setData(kKeyD263, kTypeD263, buffer, chunk_data_size); 2040 2041 break; 2042 } 2043 2044 case FOURCC('m', 'e', 't', 'a'): 2045 { 2046 off64_t stop_offset = *offset + chunk_size; 2047 *offset = data_offset; 2048 bool isParsingMetaKeys = underQTMetaPath(mPath, 2); 2049 if (!isParsingMetaKeys) { 2050 uint8_t buffer[4]; 2051 if (chunk_data_size < (off64_t)sizeof(buffer)) { 2052 *offset = stop_offset; 2053 return ERROR_MALFORMED; 2054 } 2055 2056 if (mDataSource->readAt( 2057 data_offset, buffer, 4) < 4) { 2058 *offset = stop_offset; 2059 return ERROR_IO; 2060 } 2061 2062 if (U32_AT(buffer) != 0) { 2063 // Should be version 0, flags 0. 2064 2065 // If it's not, let's assume this is one of those 2066 // apparently malformed chunks that don't have flags 2067 // and completely different semantics than what's 2068 // in the MPEG4 specs and skip it. 2069 *offset = stop_offset; 2070 return OK; 2071 } 2072 *offset += sizeof(buffer); 2073 } 2074 2075 while (*offset < stop_offset) { 2076 status_t err = parseChunk(offset, depth + 1); 2077 if (err != OK) { 2078 return err; 2079 } 2080 } 2081 2082 if (*offset != stop_offset) { 2083 return ERROR_MALFORMED; 2084 } 2085 break; 2086 } 2087 2088 case FOURCC('i', 'l', 'o', 'c'): 2089 case FOURCC('i', 'i', 'n', 'f'): 2090 case FOURCC('i', 'p', 'r', 'p'): 2091 case FOURCC('p', 'i', 't', 'm'): 2092 case FOURCC('i', 'd', 'a', 't'): 2093 case FOURCC('i', 'r', 'e', 'f'): 2094 case FOURCC('i', 'p', 'r', 'o'): 2095 { 2096 if (mIsHeif) { 2097 if (mItemTable == NULL) { 2098 mItemTable = new ItemTable(mDataSource); 2099 } 2100 status_t err = mItemTable->parse( 2101 chunk_type, data_offset, chunk_data_size); 2102 if (err != OK) { 2103 return err; 2104 } 2105 } 2106 *offset += chunk_size; 2107 break; 2108 } 2109 2110 case FOURCC('m', 'e', 'a', 'n'): 2111 case FOURCC('n', 'a', 'm', 'e'): 2112 case FOURCC('d', 'a', 't', 'a'): 2113 { 2114 *offset += chunk_size; 2115 2116 if (mPath.size() == 6 && underMetaDataPath(mPath)) { 2117 status_t err = parseITunesMetaData(data_offset, chunk_data_size); 2118 2119 if (err != OK) { 2120 return err; 2121 } 2122 } 2123 2124 break; 2125 } 2126 2127 case FOURCC('m', 'v', 'h', 'd'): 2128 { 2129 *offset += chunk_size; 2130 2131 if (depth != 1) { 2132 ALOGE("mvhd: depth %d", depth); 2133 return ERROR_MALFORMED; 2134 } 2135 if (chunk_data_size < 32) { 2136 return ERROR_MALFORMED; 2137 } 2138 2139 uint8_t header[32]; 2140 if (mDataSource->readAt( 2141 data_offset, header, sizeof(header)) 2142 < (ssize_t)sizeof(header)) { 2143 return ERROR_IO; 2144 } 2145 2146 uint64_t creationTime; 2147 uint64_t duration = 0; 2148 if (header[0] == 1) { 2149 creationTime = U64_AT(&header[4]); 2150 mHeaderTimescale = U32_AT(&header[20]); 2151 duration = U64_AT(&header[24]); 2152 if (duration == 0xffffffffffffffff) { 2153 duration = 0; 2154 } 2155 } else if (header[0] != 0) { 2156 return ERROR_MALFORMED; 2157 } else { 2158 creationTime = U32_AT(&header[4]); 2159 mHeaderTimescale = U32_AT(&header[12]); 2160 uint32_t d32 = U32_AT(&header[16]); 2161 if (d32 == 0xffffffff) { 2162 d32 = 0; 2163 } 2164 duration = d32; 2165 } 2166 if (duration != 0 && mHeaderTimescale != 0 && duration < UINT64_MAX / 1000000) { 2167 mFileMetaData.setInt64(kKeyDuration, duration * 1000000 / mHeaderTimescale); 2168 } 2169 2170 String8 s; 2171 if (convertTimeToDate(creationTime, &s)) { 2172 mFileMetaData.setCString(kKeyDate, s.string()); 2173 } 2174 2175 2176 break; 2177 } 2178 2179 case FOURCC('m', 'e', 'h', 'd'): 2180 { 2181 *offset += chunk_size; 2182 2183 if (chunk_data_size < 8) { 2184 return ERROR_MALFORMED; 2185 } 2186 2187 uint8_t flags[4]; 2188 if (mDataSource->readAt( 2189 data_offset, flags, sizeof(flags)) 2190 < (ssize_t)sizeof(flags)) { 2191 return ERROR_IO; 2192 } 2193 2194 uint64_t duration = 0; 2195 if (flags[0] == 1) { 2196 // 64 bit 2197 if (chunk_data_size < 12) { 2198 return ERROR_MALFORMED; 2199 } 2200 mDataSource->getUInt64(data_offset + 4, &duration); 2201 if (duration == 0xffffffffffffffff) { 2202 duration = 0; 2203 } 2204 } else if (flags[0] == 0) { 2205 // 32 bit 2206 uint32_t d32; 2207 mDataSource->getUInt32(data_offset + 4, &d32); 2208 if (d32 == 0xffffffff) { 2209 d32 = 0; 2210 } 2211 duration = d32; 2212 } else { 2213 return ERROR_MALFORMED; 2214 } 2215 2216 if (duration != 0 && mHeaderTimescale != 0) { 2217 mFileMetaData.setInt64(kKeyDuration, duration * 1000000 / mHeaderTimescale); 2218 } 2219 2220 break; 2221 } 2222 2223 case FOURCC('m', 'd', 'a', 't'): 2224 { 2225 mMdatFound = true; 2226 2227 *offset += chunk_size; 2228 break; 2229 } 2230 2231 case FOURCC('h', 'd', 'l', 'r'): 2232 { 2233 *offset += chunk_size; 2234 2235 if (underQTMetaPath(mPath, 3)) { 2236 break; 2237 } 2238 2239 uint32_t buffer; 2240 if (mDataSource->readAt( 2241 data_offset + 8, &buffer, 4) < 4) { 2242 return ERROR_IO; 2243 } 2244 2245 uint32_t type = ntohl(buffer); 2246 // For the 3GPP file format, the handler-type within the 'hdlr' box 2247 // shall be 'text'. We also want to support 'sbtl' handler type 2248 // for a practical reason as various MPEG4 containers use it. 2249 if (type == FOURCC('t', 'e', 'x', 't') || type == FOURCC('s', 'b', 't', 'l')) { 2250 if (mLastTrack != NULL) { 2251 mLastTrack->meta.setCString(kKeyMIMEType, MEDIA_MIMETYPE_TEXT_3GPP); 2252 } 2253 } 2254 2255 break; 2256 } 2257 2258 case FOURCC('k', 'e', 'y', 's'): 2259 { 2260 *offset += chunk_size; 2261 2262 if (underQTMetaPath(mPath, 3)) { 2263 status_t err = parseQTMetaKey(data_offset, chunk_data_size); 2264 if (err != OK) { 2265 return err; 2266 } 2267 } 2268 break; 2269 } 2270 2271 case FOURCC('t', 'r', 'e', 'x'): 2272 { 2273 *offset += chunk_size; 2274 2275 if (chunk_data_size < 24) { 2276 return ERROR_IO; 2277 } 2278 Trex trex; 2279 if (!mDataSource->getUInt32(data_offset + 4, &trex.track_ID) || 2280 !mDataSource->getUInt32(data_offset + 8, &trex.default_sample_description_index) || 2281 !mDataSource->getUInt32(data_offset + 12, &trex.default_sample_duration) || 2282 !mDataSource->getUInt32(data_offset + 16, &trex.default_sample_size) || 2283 !mDataSource->getUInt32(data_offset + 20, &trex.default_sample_flags)) { 2284 return ERROR_IO; 2285 } 2286 mTrex.add(trex); 2287 break; 2288 } 2289 2290 case FOURCC('t', 'x', '3', 'g'): 2291 { 2292 if (mLastTrack == NULL) 2293 return ERROR_MALFORMED; 2294 2295 uint32_t type; 2296 const void *data; 2297 size_t size = 0; 2298 if (!mLastTrack->meta.findData( 2299 kKeyTextFormatData, &type, &data, &size)) { 2300 size = 0; 2301 } 2302 2303 if ((chunk_size > SIZE_MAX) || (SIZE_MAX - chunk_size <= size)) { 2304 return ERROR_MALFORMED; 2305 } 2306 2307 uint8_t *buffer = new (std::nothrow) uint8_t[size + chunk_size]; 2308 if (buffer == NULL) { 2309 return ERROR_MALFORMED; 2310 } 2311 2312 if (size > 0) { 2313 memcpy(buffer, data, size); 2314 } 2315 2316 if ((size_t)(mDataSource->readAt(*offset, buffer + size, chunk_size)) 2317 < chunk_size) { 2318 delete[] buffer; 2319 buffer = NULL; 2320 2321 // advance read pointer so we don't end up reading this again 2322 *offset += chunk_size; 2323 return ERROR_IO; 2324 } 2325 2326 mLastTrack->meta.setData( 2327 kKeyTextFormatData, 0, buffer, size + chunk_size); 2328 2329 delete[] buffer; 2330 2331 *offset += chunk_size; 2332 break; 2333 } 2334 2335 case FOURCC('c', 'o', 'v', 'r'): 2336 { 2337 *offset += chunk_size; 2338 2339 ALOGV("chunk_data_size = %" PRId64 " and data_offset = %" PRId64, 2340 chunk_data_size, data_offset); 2341 2342 if (chunk_data_size < 0 || static_cast<uint64_t>(chunk_data_size) >= SIZE_MAX - 1) { 2343 return ERROR_MALFORMED; 2344 } 2345 auto buffer = heapbuffer<uint8_t>(chunk_data_size); 2346 if (buffer.get() == NULL) { 2347 ALOGE("b/28471206"); 2348 return NO_MEMORY; 2349 } 2350 if (mDataSource->readAt( 2351 data_offset, buffer.get(), chunk_data_size) != (ssize_t)chunk_data_size) { 2352 return ERROR_IO; 2353 } 2354 const int kSkipBytesOfDataBox = 16; 2355 if (chunk_data_size <= kSkipBytesOfDataBox) { 2356 return ERROR_MALFORMED; 2357 } 2358 2359 mFileMetaData.setData( 2360 kKeyAlbumArt, MetaData::TYPE_NONE, 2361 buffer.get() + kSkipBytesOfDataBox, chunk_data_size - kSkipBytesOfDataBox); 2362 2363 break; 2364 } 2365 2366 case FOURCC('c', 'o', 'l', 'r'): 2367 { 2368 *offset += chunk_size; 2369 // this must be in a VisualSampleEntry box under the Sample Description Box ('stsd') 2370 // ignore otherwise 2371 if (depth >= 2 && mPath[depth - 2] == FOURCC('s', 't', 's', 'd')) { 2372 status_t err = parseColorInfo(data_offset, chunk_data_size); 2373 if (err != OK) { 2374 return err; 2375 } 2376 } 2377 2378 break; 2379 } 2380 2381 case FOURCC('t', 'i', 't', 'l'): 2382 case FOURCC('p', 'e', 'r', 'f'): 2383 case FOURCC('a', 'u', 't', 'h'): 2384 case FOURCC('g', 'n', 'r', 'e'): 2385 case FOURCC('a', 'l', 'b', 'm'): 2386 case FOURCC('y', 'r', 'r', 'c'): 2387 { 2388 *offset += chunk_size; 2389 2390 status_t err = parse3GPPMetaData(data_offset, chunk_data_size, depth); 2391 2392 if (err != OK) { 2393 return err; 2394 } 2395 2396 break; 2397 } 2398 2399 case FOURCC('I', 'D', '3', '2'): 2400 { 2401 *offset += chunk_size; 2402 2403 if (chunk_data_size < 6) { 2404 return ERROR_MALFORMED; 2405 } 2406 2407 parseID3v2MetaData(data_offset + 6); 2408 2409 break; 2410 } 2411 2412 case FOURCC('-', '-', '-', '-'): 2413 { 2414 mLastCommentMean.clear(); 2415 mLastCommentName.clear(); 2416 mLastCommentData.clear(); 2417 *offset += chunk_size; 2418 break; 2419 } 2420 2421 case FOURCC('s', 'i', 'd', 'x'): 2422 { 2423 status_t err = parseSegmentIndex(data_offset, chunk_data_size); 2424 if (err != OK) { 2425 return err; 2426 } 2427 *offset += chunk_size; 2428 return UNKNOWN_ERROR; // stop parsing after sidx 2429 } 2430 2431 case FOURCC('a', 'c', '-', '3'): 2432 { 2433 *offset += chunk_size; 2434 return parseAC3SampleEntry(data_offset); 2435 } 2436 2437 case FOURCC('f', 't', 'y', 'p'): 2438 { 2439 if (chunk_data_size < 8 || depth != 0) { 2440 return ERROR_MALFORMED; 2441 } 2442 2443 off64_t stop_offset = *offset + chunk_size; 2444 uint32_t numCompatibleBrands = (chunk_data_size - 8) / 4; 2445 std::set<uint32_t> brandSet; 2446 for (size_t i = 0; i < numCompatibleBrands + 2; ++i) { 2447 if (i == 1) { 2448 // Skip this index, it refers to the minorVersion, 2449 // not a brand. 2450 continue; 2451 } 2452 2453 uint32_t brand; 2454 if (mDataSource->readAt(data_offset + 4 * i, &brand, 4) < 4) { 2455 return ERROR_MALFORMED; 2456 } 2457 2458 brand = ntohl(brand); 2459 brandSet.insert(brand); 2460 } 2461 2462 if (brandSet.count(FOURCC('q', 't', ' ', ' ')) > 0) { 2463 mIsQT = true; 2464 } else { 2465 if (brandSet.count(FOURCC('m', 'i', 'f', '1')) > 0 2466 && brandSet.count(FOURCC('h', 'e', 'i', 'c')) > 0) { 2467 ALOGV("identified HEIF image"); 2468 2469 mIsHeif = true; 2470 brandSet.erase(FOURCC('m', 'i', 'f', '1')); 2471 brandSet.erase(FOURCC('h', 'e', 'i', 'c')); 2472 } 2473 2474 if (!brandSet.empty()) { 2475 // This means that the file should have moov box. 2476 // It could be any iso files (mp4, heifs, etc.) 2477 mHasMoovBox = true; 2478 if (mIsHeif) { 2479 ALOGV("identified HEIF image with other tracks"); 2480 } 2481 } 2482 } 2483 2484 *offset = stop_offset; 2485 2486 break; 2487 } 2488 2489 default: 2490 { 2491 // check if we're parsing 'ilst' for meta keys 2492 // if so, treat type as a number (key-id). 2493 if (underQTMetaPath(mPath, 3)) { 2494 status_t err = parseQTMetaVal(chunk_type, data_offset, chunk_data_size); 2495 if (err != OK) { 2496 return err; 2497 } 2498 } 2499 2500 *offset += chunk_size; 2501 break; 2502 } 2503 } 2504 2505 return OK; 2506} 2507 2508status_t MPEG4Extractor::parseAC3SampleEntry(off64_t offset) { 2509 // skip 16 bytes: 2510 // + 6-byte reserved, 2511 // + 2-byte data reference index, 2512 // + 8-byte reserved 2513 offset += 16; 2514 uint16_t channelCount; 2515 if (!mDataSource->getUInt16(offset, &channelCount)) { 2516 return ERROR_MALFORMED; 2517 } 2518 // skip 8 bytes: 2519 // + 2-byte channelCount, 2520 // + 2-byte sample size, 2521 // + 4-byte reserved 2522 offset += 8; 2523 uint16_t sampleRate; 2524 if (!mDataSource->getUInt16(offset, &sampleRate)) { 2525 ALOGE("MPEG4Extractor: error while reading ac-3 block: cannot read sample rate"); 2526 return ERROR_MALFORMED; 2527 } 2528 2529 // skip 4 bytes: 2530 // + 2-byte sampleRate, 2531 // + 2-byte reserved 2532 offset += 4; 2533 return parseAC3SpecificBox(offset, sampleRate); 2534} 2535 2536status_t MPEG4Extractor::parseAC3SpecificBox( 2537 off64_t offset, uint16_t sampleRate) { 2538 uint32_t size; 2539 // + 4-byte size 2540 // + 4-byte type 2541 // + 3-byte payload 2542 const uint32_t kAC3SpecificBoxSize = 11; 2543 if (!mDataSource->getUInt32(offset, &size) || size < kAC3SpecificBoxSize) { 2544 ALOGE("MPEG4Extractor: error while reading ac-3 block: cannot read specific box size"); 2545 return ERROR_MALFORMED; 2546 } 2547 2548 offset += 4; 2549 uint32_t type; 2550 if (!mDataSource->getUInt32(offset, &type) || type != FOURCC('d', 'a', 'c', '3')) { 2551 ALOGE("MPEG4Extractor: error while reading ac-3 specific block: header not dac3"); 2552 return ERROR_MALFORMED; 2553 } 2554 2555 offset += 4; 2556 const uint32_t kAC3SpecificBoxPayloadSize = 3; 2557 uint8_t chunk[kAC3SpecificBoxPayloadSize]; 2558 if (mDataSource->readAt(offset, chunk, sizeof(chunk)) != sizeof(chunk)) { 2559 ALOGE("MPEG4Extractor: error while reading ac-3 specific block: bitstream fields"); 2560 return ERROR_MALFORMED; 2561 } 2562 2563 ABitReader br(chunk, sizeof(chunk)); 2564 static const unsigned channelCountTable[] = {2, 1, 2, 3, 3, 4, 4, 5}; 2565 static const unsigned sampleRateTable[] = {48000, 44100, 32000}; 2566 2567 unsigned fscod = br.getBits(2); 2568 if (fscod == 3) { 2569 ALOGE("Incorrect fscod (3) in AC3 header"); 2570 return ERROR_MALFORMED; 2571 } 2572 unsigned boxSampleRate = sampleRateTable[fscod]; 2573 if (boxSampleRate != sampleRate) { 2574 ALOGE("sample rate mismatch: boxSampleRate = %d, sampleRate = %d", 2575 boxSampleRate, sampleRate); 2576 return ERROR_MALFORMED; 2577 } 2578 2579 unsigned bsid = br.getBits(5); 2580 if (bsid > 8) { 2581 ALOGW("Incorrect bsid in AC3 header. Possibly E-AC-3?"); 2582 return ERROR_MALFORMED; 2583 } 2584 2585 // skip 2586 unsigned bsmod __unused = br.getBits(3); 2587 2588 unsigned acmod = br.getBits(3); 2589 unsigned lfeon = br.getBits(1); 2590 unsigned channelCount = channelCountTable[acmod] + lfeon; 2591 2592 if (mLastTrack == NULL) { 2593 return ERROR_MALFORMED; 2594 } 2595 mLastTrack->meta.setCString(kKeyMIMEType, MEDIA_MIMETYPE_AUDIO_AC3); 2596 mLastTrack->meta.setInt32(kKeyChannelCount, channelCount); 2597 mLastTrack->meta.setInt32(kKeySampleRate, sampleRate); 2598 return OK; 2599} 2600 2601status_t MPEG4Extractor::parseSegmentIndex(off64_t offset, size_t size) { 2602 ALOGV("MPEG4Extractor::parseSegmentIndex"); 2603 2604 if (size < 12) { 2605 return -EINVAL; 2606 } 2607 2608 uint32_t flags; 2609 if (!mDataSource->getUInt32(offset, &flags)) { 2610 return ERROR_MALFORMED; 2611 } 2612 2613 uint32_t version = flags >> 24; 2614 flags &= 0xffffff; 2615 2616 ALOGV("sidx version %d", version); 2617 2618 uint32_t referenceId; 2619 if (!mDataSource->getUInt32(offset + 4, &referenceId)) { 2620 return ERROR_MALFORMED; 2621 } 2622 2623 uint32_t timeScale; 2624 if (!mDataSource->getUInt32(offset + 8, &timeScale)) { 2625 return ERROR_MALFORMED; 2626 } 2627 ALOGV("sidx refid/timescale: %d/%d", referenceId, timeScale); 2628 if (timeScale == 0) 2629 return ERROR_MALFORMED; 2630 2631 uint64_t earliestPresentationTime; 2632 uint64_t firstOffset; 2633 2634 offset += 12; 2635 size -= 12; 2636 2637 if (version == 0) { 2638 if (size < 8) { 2639 return -EINVAL; 2640 } 2641 uint32_t tmp; 2642 if (!mDataSource->getUInt32(offset, &tmp)) { 2643 return ERROR_MALFORMED; 2644 } 2645 earliestPresentationTime = tmp; 2646 if (!mDataSource->getUInt32(offset + 4, &tmp)) { 2647 return ERROR_MALFORMED; 2648 } 2649 firstOffset = tmp; 2650 offset += 8; 2651 size -= 8; 2652 } else { 2653 if (size < 16) { 2654 return -EINVAL; 2655 } 2656 if (!mDataSource->getUInt64(offset, &earliestPresentationTime)) { 2657 return ERROR_MALFORMED; 2658 } 2659 if (!mDataSource->getUInt64(offset + 8, &firstOffset)) { 2660 return ERROR_MALFORMED; 2661 } 2662 offset += 16; 2663 size -= 16; 2664 } 2665 ALOGV("sidx pres/off: %" PRIu64 "/%" PRIu64, earliestPresentationTime, firstOffset); 2666 2667 if (size < 4) { 2668 return -EINVAL; 2669 } 2670 2671 uint16_t referenceCount; 2672 if (!mDataSource->getUInt16(offset + 2, &referenceCount)) { 2673 return ERROR_MALFORMED; 2674 } 2675 offset += 4; 2676 size -= 4; 2677 ALOGV("refcount: %d", referenceCount); 2678 2679 if (size < referenceCount * 12) { 2680 return -EINVAL; 2681 } 2682 2683 uint64_t total_duration = 0; 2684 for (unsigned int i = 0; i < referenceCount; i++) { 2685 uint32_t d1, d2, d3; 2686 2687 if (!mDataSource->getUInt32(offset, &d1) || // size 2688 !mDataSource->getUInt32(offset + 4, &d2) || // duration 2689 !mDataSource->getUInt32(offset + 8, &d3)) { // flags 2690 return ERROR_MALFORMED; 2691 } 2692 2693 if (d1 & 0x80000000) { 2694 ALOGW("sub-sidx boxes not supported yet"); 2695 } 2696 bool sap = d3 & 0x80000000; 2697 uint32_t saptype = (d3 >> 28) & 7; 2698 if (!sap || (saptype != 1 && saptype != 2)) { 2699 // type 1 and 2 are sync samples 2700 ALOGW("not a stream access point, or unsupported type: %08x", d3); 2701 } 2702 total_duration += d2; 2703 offset += 12; 2704 ALOGV(" item %d, %08x %08x %08x", i, d1, d2, d3); 2705 SidxEntry se; 2706 se.mSize = d1 & 0x7fffffff; 2707 se.mDurationUs = 1000000LL * d2 / timeScale; 2708 mSidxEntries.add(se); 2709 } 2710 2711 uint64_t sidxDuration = total_duration * 1000000 / timeScale; 2712 2713 if (mLastTrack == NULL) 2714 return ERROR_MALFORMED; 2715 2716 int64_t metaDuration; 2717 if (!mLastTrack->meta.findInt64(kKeyDuration, &metaDuration) || metaDuration == 0) { 2718 mLastTrack->meta.setInt64(kKeyDuration, sidxDuration); 2719 } 2720 return OK; 2721} 2722 2723status_t MPEG4Extractor::parseQTMetaKey(off64_t offset, size_t size) { 2724 if (size < 8) { 2725 return ERROR_MALFORMED; 2726 } 2727 2728 uint32_t count; 2729 if (!mDataSource->getUInt32(offset + 4, &count)) { 2730 return ERROR_MALFORMED; 2731 } 2732 2733 if (mMetaKeyMap.size() > 0) { 2734 ALOGW("'keys' atom seen again, discarding existing entries"); 2735 mMetaKeyMap.clear(); 2736 } 2737 2738 off64_t keyOffset = offset + 8; 2739 off64_t stopOffset = offset + size; 2740 for (size_t i = 1; i <= count; i++) { 2741 if (keyOffset + 8 > stopOffset) { 2742 return ERROR_MALFORMED; 2743 } 2744 2745 uint32_t keySize; 2746 if (!mDataSource->getUInt32(keyOffset, &keySize) 2747 || keySize < 8 2748 || keyOffset + keySize > stopOffset) { 2749 return ERROR_MALFORMED; 2750 } 2751 2752 uint32_t type; 2753 if (!mDataSource->getUInt32(keyOffset + 4, &type) 2754 || type != FOURCC('m', 'd', 't', 'a')) { 2755 return ERROR_MALFORMED; 2756 } 2757 2758 keySize -= 8; 2759 keyOffset += 8; 2760 2761 auto keyData = heapbuffer<uint8_t>(keySize); 2762 if (keyData.get() == NULL) { 2763 return ERROR_MALFORMED; 2764 } 2765 if (mDataSource->readAt( 2766 keyOffset, keyData.get(), keySize) < (ssize_t) keySize) { 2767 return ERROR_MALFORMED; 2768 } 2769 2770 AString key((const char *)keyData.get(), keySize); 2771 mMetaKeyMap.add(i, key); 2772 2773 keyOffset += keySize; 2774 } 2775 return OK; 2776} 2777 2778status_t MPEG4Extractor::parseQTMetaVal( 2779 int32_t keyId, off64_t offset, size_t size) { 2780 ssize_t index = mMetaKeyMap.indexOfKey(keyId); 2781 if (index < 0) { 2782 // corresponding key is not present, ignore 2783 return ERROR_MALFORMED; 2784 } 2785 2786 if (size <= 16) { 2787 return ERROR_MALFORMED; 2788 } 2789 uint32_t dataSize; 2790 if (!mDataSource->getUInt32(offset, &dataSize) 2791 || dataSize > size || dataSize <= 16) { 2792 return ERROR_MALFORMED; 2793 } 2794 uint32_t atomFourCC; 2795 if (!mDataSource->getUInt32(offset + 4, &atomFourCC) 2796 || atomFourCC != FOURCC('d', 'a', 't', 'a')) { 2797 return ERROR_MALFORMED; 2798 } 2799 uint32_t dataType; 2800 if (!mDataSource->getUInt32(offset + 8, &dataType) 2801 || ((dataType & 0xff000000) != 0)) { 2802 // not well-known type 2803 return ERROR_MALFORMED; 2804 } 2805 2806 dataSize -= 16; 2807 offset += 16; 2808 2809 if (dataType == 23 && dataSize >= 4) { 2810 // BE Float32 2811 uint32_t val; 2812 if (!mDataSource->getUInt32(offset, &val)) { 2813 return ERROR_MALFORMED; 2814 } 2815 if (!strcasecmp(mMetaKeyMap[index].c_str(), "com.android.capture.fps")) { 2816 mFileMetaData.setFloat(kKeyCaptureFramerate, *(float *)&val); 2817 } 2818 } else if (dataType == 67 && dataSize >= 4) { 2819 // BE signed int32 2820 uint32_t val; 2821 if (!mDataSource->getUInt32(offset, &val)) { 2822 return ERROR_MALFORMED; 2823 } 2824 if (!strcasecmp(mMetaKeyMap[index].c_str(), "com.android.video.temporal_layers_count")) { 2825 mFileMetaData.setInt32(kKeyTemporalLayerCount, val); 2826 } 2827 } else { 2828 // add more keys if needed 2829 ALOGV("ignoring key: type %d, size %d", dataType, dataSize); 2830 } 2831 2832 return OK; 2833} 2834 2835status_t MPEG4Extractor::parseTrackHeader( 2836 off64_t data_offset, off64_t data_size) { 2837 if (data_size < 4) { 2838 return ERROR_MALFORMED; 2839 } 2840 2841 uint8_t version; 2842 if (mDataSource->readAt(data_offset, &version, 1) < 1) { 2843 return ERROR_IO; 2844 } 2845 2846 size_t dynSize = (version == 1) ? 36 : 24; 2847 2848 uint8_t buffer[36 + 60]; 2849 2850 if (data_size != (off64_t)dynSize + 60) { 2851 return ERROR_MALFORMED; 2852 } 2853 2854 if (mDataSource->readAt( 2855 data_offset, buffer, data_size) < (ssize_t)data_size) { 2856 return ERROR_IO; 2857 } 2858 2859 uint64_t ctime __unused, mtime __unused, duration __unused; 2860 int32_t id; 2861 2862 if (version == 1) { 2863 ctime = U64_AT(&buffer[4]); 2864 mtime = U64_AT(&buffer[12]); 2865 id = U32_AT(&buffer[20]); 2866 duration = U64_AT(&buffer[28]); 2867 } else if (version == 0) { 2868 ctime = U32_AT(&buffer[4]); 2869 mtime = U32_AT(&buffer[8]); 2870 id = U32_AT(&buffer[12]); 2871 duration = U32_AT(&buffer[20]); 2872 } else { 2873 return ERROR_UNSUPPORTED; 2874 } 2875 2876 if (mLastTrack == NULL) 2877 return ERROR_MALFORMED; 2878 2879 mLastTrack->meta.setInt32(kKeyTrackID, id); 2880 2881 size_t matrixOffset = dynSize + 16; 2882 int32_t a00 = U32_AT(&buffer[matrixOffset]); 2883 int32_t a01 = U32_AT(&buffer[matrixOffset + 4]); 2884 int32_t a10 = U32_AT(&buffer[matrixOffset + 12]); 2885 int32_t a11 = U32_AT(&buffer[matrixOffset + 16]); 2886 2887#if 0 2888 int32_t dx = U32_AT(&buffer[matrixOffset + 8]); 2889 int32_t dy = U32_AT(&buffer[matrixOffset + 20]); 2890 2891 ALOGI("x' = %.2f * x + %.2f * y + %.2f", 2892 a00 / 65536.0f, a01 / 65536.0f, dx / 65536.0f); 2893 ALOGI("y' = %.2f * x + %.2f * y + %.2f", 2894 a10 / 65536.0f, a11 / 65536.0f, dy / 65536.0f); 2895#endif 2896 2897 uint32_t rotationDegrees; 2898 2899 static const int32_t kFixedOne = 0x10000; 2900 if (a00 == kFixedOne && a01 == 0 && a10 == 0 && a11 == kFixedOne) { 2901 // Identity, no rotation 2902 rotationDegrees = 0; 2903 } else if (a00 == 0 && a01 == kFixedOne && a10 == -kFixedOne && a11 == 0) { 2904 rotationDegrees = 90; 2905 } else if (a00 == 0 && a01 == -kFixedOne && a10 == kFixedOne && a11 == 0) { 2906 rotationDegrees = 270; 2907 } else if (a00 == -kFixedOne && a01 == 0 && a10 == 0 && a11 == -kFixedOne) { 2908 rotationDegrees = 180; 2909 } else { 2910 ALOGW("We only support 0,90,180,270 degree rotation matrices"); 2911 rotationDegrees = 0; 2912 } 2913 2914 if (rotationDegrees != 0) { 2915 mLastTrack->meta.setInt32(kKeyRotation, rotationDegrees); 2916 } 2917 2918 // Handle presentation display size, which could be different 2919 // from the image size indicated by kKeyWidth and kKeyHeight. 2920 uint32_t width = U32_AT(&buffer[dynSize + 52]); 2921 uint32_t height = U32_AT(&buffer[dynSize + 56]); 2922 mLastTrack->meta.setInt32(kKeyDisplayWidth, width >> 16); 2923 mLastTrack->meta.setInt32(kKeyDisplayHeight, height >> 16); 2924 2925 return OK; 2926} 2927 2928status_t MPEG4Extractor::parseITunesMetaData(off64_t offset, size_t size) { 2929 if (size == 0) { 2930 return OK; 2931 } 2932 2933 if (size < 4 || size == SIZE_MAX) { 2934 return ERROR_MALFORMED; 2935 } 2936 2937 uint8_t *buffer = new (std::nothrow) uint8_t[size + 1]; 2938 if (buffer == NULL) { 2939 return ERROR_MALFORMED; 2940 } 2941 if (mDataSource->readAt( 2942 offset, buffer, size) != (ssize_t)size) { 2943 delete[] buffer; 2944 buffer = NULL; 2945 2946 return ERROR_IO; 2947 } 2948 2949 uint32_t flags = U32_AT(buffer); 2950 2951 uint32_t metadataKey = 0; 2952 char chunk[5]; 2953 MakeFourCCString(mPath[4], chunk); 2954 ALOGV("meta: %s @ %lld", chunk, (long long)offset); 2955 switch ((int32_t)mPath[4]) { 2956 case FOURCC(0xa9, 'a', 'l', 'b'): 2957 { 2958 metadataKey = kKeyAlbum; 2959 break; 2960 } 2961 case FOURCC(0xa9, 'A', 'R', 'T'): 2962 { 2963 metadataKey = kKeyArtist; 2964 break; 2965 } 2966 case FOURCC('a', 'A', 'R', 'T'): 2967 { 2968 metadataKey = kKeyAlbumArtist; 2969 break; 2970 } 2971 case FOURCC(0xa9, 'd', 'a', 'y'): 2972 { 2973 metadataKey = kKeyYear; 2974 break; 2975 } 2976 case FOURCC(0xa9, 'n', 'a', 'm'): 2977 { 2978 metadataKey = kKeyTitle; 2979 break; 2980 } 2981 case FOURCC(0xa9, 'w', 'r', 't'): 2982 { 2983 metadataKey = kKeyWriter; 2984 break; 2985 } 2986 case FOURCC('c', 'o', 'v', 'r'): 2987 { 2988 metadataKey = kKeyAlbumArt; 2989 break; 2990 } 2991 case FOURCC('g', 'n', 'r', 'e'): 2992 { 2993 metadataKey = kKeyGenre; 2994 break; 2995 } 2996 case FOURCC(0xa9, 'g', 'e', 'n'): 2997 { 2998 metadataKey = kKeyGenre; 2999 break; 3000 } 3001 case FOURCC('c', 'p', 'i', 'l'): 3002 { 3003 if (size == 9 && flags == 21) { 3004 char tmp[16]; 3005 sprintf(tmp, "%d", 3006 (int)buffer[size - 1]); 3007 3008 mFileMetaData.setCString(kKeyCompilation, tmp); 3009 } 3010 break; 3011 } 3012 case FOURCC('t', 'r', 'k', 'n'): 3013 { 3014 if (size == 16 && flags == 0) { 3015 char tmp[16]; 3016 uint16_t* pTrack = (uint16_t*)&buffer[10]; 3017 uint16_t* pTotalTracks = (uint16_t*)&buffer[12]; 3018 sprintf(tmp, "%d/%d", ntohs(*pTrack), ntohs(*pTotalTracks)); 3019 3020 mFileMetaData.setCString(kKeyCDTrackNumber, tmp); 3021 } 3022 break; 3023 } 3024 case FOURCC('d', 'i', 's', 'k'): 3025 { 3026 if ((size == 14 || size == 16) && flags == 0) { 3027 char tmp[16]; 3028 uint16_t* pDisc = (uint16_t*)&buffer[10]; 3029 uint16_t* pTotalDiscs = (uint16_t*)&buffer[12]; 3030 sprintf(tmp, "%d/%d", ntohs(*pDisc), ntohs(*pTotalDiscs)); 3031 3032 mFileMetaData.setCString(kKeyDiscNumber, tmp); 3033 } 3034 break; 3035 } 3036 case FOURCC('-', '-', '-', '-'): 3037 { 3038 buffer[size] = '\0'; 3039 switch (mPath[5]) { 3040 case FOURCC('m', 'e', 'a', 'n'): 3041 mLastCommentMean.setTo((const char *)buffer + 4); 3042 break; 3043 case FOURCC('n', 'a', 'm', 'e'): 3044 mLastCommentName.setTo((const char *)buffer + 4); 3045 break; 3046 case FOURCC('d', 'a', 't', 'a'): 3047 if (size < 8) { 3048 delete[] buffer; 3049 buffer = NULL; 3050 ALOGE("b/24346430"); 3051 return ERROR_MALFORMED; 3052 } 3053 mLastCommentData.setTo((const char *)buffer + 8); 3054 break; 3055 } 3056 3057 // Once we have a set of mean/name/data info, go ahead and process 3058 // it to see if its something we are interested in. Whether or not 3059 // were are interested in the specific tag, make sure to clear out 3060 // the set so we can be ready to process another tuple should one 3061 // show up later in the file. 3062 if ((mLastCommentMean.length() != 0) && 3063 (mLastCommentName.length() != 0) && 3064 (mLastCommentData.length() != 0)) { 3065 3066 if (mLastCommentMean == "com.apple.iTunes" 3067 && mLastCommentName == "iTunSMPB") { 3068 int32_t delay, padding; 3069 if (sscanf(mLastCommentData, 3070 " %*x %x %x %*x", &delay, &padding) == 2) { 3071 if (mLastTrack == NULL) { 3072 delete[] buffer; 3073 return ERROR_MALFORMED; 3074 } 3075 3076 mLastTrack->meta.setInt32(kKeyEncoderDelay, delay); 3077 mLastTrack->meta.setInt32(kKeyEncoderPadding, padding); 3078 } 3079 } 3080 3081 mLastCommentMean.clear(); 3082 mLastCommentName.clear(); 3083 mLastCommentData.clear(); 3084 } 3085 break; 3086 } 3087 3088 default: 3089 break; 3090 } 3091 3092 if (size >= 8 && metadataKey && !mFileMetaData.hasData(metadataKey)) { 3093 if (metadataKey == kKeyAlbumArt) { 3094 mFileMetaData.setData( 3095 kKeyAlbumArt, MetaData::TYPE_NONE, 3096 buffer + 8, size - 8); 3097 } else if (metadataKey == kKeyGenre) { 3098 if (flags == 0) { 3099 // uint8_t genre code, iTunes genre codes are 3100 // the standard id3 codes, except they start 3101 // at 1 instead of 0 (e.g. Pop is 14, not 13) 3102 // We use standard id3 numbering, so subtract 1. 3103 int genrecode = (int)buffer[size - 1]; 3104 genrecode--; 3105 if (genrecode < 0) { 3106 genrecode = 255; // reserved for 'unknown genre' 3107 } 3108 char genre[10]; 3109 sprintf(genre, "%d", genrecode); 3110 3111 mFileMetaData.setCString(metadataKey, genre); 3112 } else if (flags == 1) { 3113 // custom genre string 3114 buffer[size] = '\0'; 3115 3116 mFileMetaData.setCString( 3117 metadataKey, (const char *)buffer + 8); 3118 } 3119 } else { 3120 buffer[size] = '\0'; 3121 3122 mFileMetaData.setCString( 3123 metadataKey, (const char *)buffer + 8); 3124 } 3125 } 3126 3127 delete[] buffer; 3128 buffer = NULL; 3129 3130 return OK; 3131} 3132 3133status_t MPEG4Extractor::parseColorInfo(off64_t offset, size_t size) { 3134 if (size < 4 || size == SIZE_MAX || mLastTrack == NULL) { 3135 return ERROR_MALFORMED; 3136 } 3137 3138 uint8_t *buffer = new (std::nothrow) uint8_t[size + 1]; 3139 if (buffer == NULL) { 3140 return ERROR_MALFORMED; 3141 } 3142 if (mDataSource->readAt(offset, buffer, size) != (ssize_t)size) { 3143 delete[] buffer; 3144 buffer = NULL; 3145 3146 return ERROR_IO; 3147 } 3148 3149 int32_t type = U32_AT(&buffer[0]); 3150 if ((type == FOURCC('n', 'c', 'l', 'x') && size >= 11) 3151 || (type == FOURCC('n', 'c', 'l', 'c') && size >= 10)) { 3152 int32_t primaries = U16_AT(&buffer[4]); 3153 int32_t transfer = U16_AT(&buffer[6]); 3154 int32_t coeffs = U16_AT(&buffer[8]); 3155 bool fullRange = (type == FOURCC('n', 'c', 'l', 'x')) && (buffer[10] & 128); 3156 3157 ColorAspects aspects; 3158 ColorUtils::convertIsoColorAspectsToCodecAspects( 3159 primaries, transfer, coeffs, fullRange, aspects); 3160 3161 // only store the first color specification 3162 if (!mLastTrack->meta.hasData(kKeyColorPrimaries)) { 3163 mLastTrack->meta.setInt32(kKeyColorPrimaries, aspects.mPrimaries); 3164 mLastTrack->meta.setInt32(kKeyTransferFunction, aspects.mTransfer); 3165 mLastTrack->meta.setInt32(kKeyColorMatrix, aspects.mMatrixCoeffs); 3166 mLastTrack->meta.setInt32(kKeyColorRange, aspects.mRange); 3167 } 3168 } 3169 3170 delete[] buffer; 3171 buffer = NULL; 3172 3173 return OK; 3174} 3175 3176status_t MPEG4Extractor::parse3GPPMetaData(off64_t offset, size_t size, int depth) { 3177 if (size < 4 || size == SIZE_MAX) { 3178 return ERROR_MALFORMED; 3179 } 3180 3181 uint8_t *buffer = new (std::nothrow) uint8_t[size + 1]; 3182 if (buffer == NULL) { 3183 return ERROR_MALFORMED; 3184 } 3185 if (mDataSource->readAt( 3186 offset, buffer, size) != (ssize_t)size) { 3187 delete[] buffer; 3188 buffer = NULL; 3189 3190 return ERROR_IO; 3191 } 3192 3193 uint32_t metadataKey = 0; 3194 switch (mPath[depth]) { 3195 case FOURCC('t', 'i', 't', 'l'): 3196 { 3197 metadataKey = kKeyTitle; 3198 break; 3199 } 3200 case FOURCC('p', 'e', 'r', 'f'): 3201 { 3202 metadataKey = kKeyArtist; 3203 break; 3204 } 3205 case FOURCC('a', 'u', 't', 'h'): 3206 { 3207 metadataKey = kKeyWriter; 3208 break; 3209 } 3210 case FOURCC('g', 'n', 'r', 'e'): 3211 { 3212 metadataKey = kKeyGenre; 3213 break; 3214 } 3215 case FOURCC('a', 'l', 'b', 'm'): 3216 { 3217 if (buffer[size - 1] != '\0') { 3218 char tmp[4]; 3219 sprintf(tmp, "%u", buffer[size - 1]); 3220 3221 mFileMetaData.setCString(kKeyCDTrackNumber, tmp); 3222 } 3223 3224 metadataKey = kKeyAlbum; 3225 break; 3226 } 3227 case FOURCC('y', 'r', 'r', 'c'): 3228 { 3229 if (size < 6) { 3230 delete[] buffer; 3231 buffer = NULL; 3232 ALOGE("b/62133227"); 3233 android_errorWriteLog(0x534e4554, "62133227"); 3234 return ERROR_MALFORMED; 3235 } 3236 char tmp[5]; 3237 uint16_t year = U16_AT(&buffer[4]); 3238 3239 if (year < 10000) { 3240 sprintf(tmp, "%u", year); 3241 3242 mFileMetaData.setCString(kKeyYear, tmp); 3243 } 3244 break; 3245 } 3246 3247 default: 3248 break; 3249 } 3250 3251 if (metadataKey > 0) { 3252 bool isUTF8 = true; // Common case 3253 char16_t *framedata = NULL; 3254 int len16 = 0; // Number of UTF-16 characters 3255 3256 // smallest possible valid UTF-16 string w BOM: 0xfe 0xff 0x00 0x00 3257 if (size < 6) { 3258 delete[] buffer; 3259 buffer = NULL; 3260 return ERROR_MALFORMED; 3261 } 3262 3263 if (size - 6 >= 4) { 3264 len16 = ((size - 6) / 2) - 1; // don't include 0x0000 terminator 3265 framedata = (char16_t *)(buffer + 6); 3266 if (0xfffe == *framedata) { 3267 // endianness marker (BOM) doesn't match host endianness 3268 for (int i = 0; i < len16; i++) { 3269 framedata[i] = bswap_16(framedata[i]); 3270 } 3271 // BOM is now swapped to 0xfeff, we will execute next block too 3272 } 3273 3274 if (0xfeff == *framedata) { 3275 // Remove the BOM 3276 framedata++; 3277 len16--; 3278 isUTF8 = false; 3279 } 3280 // else normal non-zero-length UTF-8 string 3281 // we can't handle UTF-16 without BOM as there is no other 3282 // indication of encoding. 3283 } 3284 3285 if (isUTF8) { 3286 buffer[size] = 0; 3287 mFileMetaData.setCString(metadataKey, (const char *)buffer + 6); 3288 } else { 3289 // Convert from UTF-16 string to UTF-8 string. 3290 String8 tmpUTF8str(framedata, len16); 3291 mFileMetaData.setCString(metadataKey, tmpUTF8str.string()); 3292 } 3293 } 3294 3295 delete[] buffer; 3296 buffer = NULL; 3297 3298 return OK; 3299} 3300 3301void MPEG4Extractor::parseID3v2MetaData(off64_t offset) { 3302 ID3 id3(mDataSource, true /* ignorev1 */, offset); 3303 3304 if (id3.isValid()) { 3305 struct Map { 3306 int key; 3307 const char *tag1; 3308 const char *tag2; 3309 }; 3310 static const Map kMap[] = { 3311 { kKeyAlbum, "TALB", "TAL" }, 3312 { kKeyArtist, "TPE1", "TP1" }, 3313 { kKeyAlbumArtist, "TPE2", "TP2" }, 3314 { kKeyComposer, "TCOM", "TCM" }, 3315 { kKeyGenre, "TCON", "TCO" }, 3316 { kKeyTitle, "TIT2", "TT2" }, 3317 { kKeyYear, "TYE", "TYER" }, 3318 { kKeyAuthor, "TXT", "TEXT" }, 3319 { kKeyCDTrackNumber, "TRK", "TRCK" }, 3320 { kKeyDiscNumber, "TPA", "TPOS" }, 3321 { kKeyCompilation, "TCP", "TCMP" }, 3322 }; 3323 static const size_t kNumMapEntries = sizeof(kMap) / sizeof(kMap[0]); 3324 3325 for (size_t i = 0; i < kNumMapEntries; ++i) { 3326 if (!mFileMetaData.hasData(kMap[i].key)) { 3327 ID3::Iterator *it = new ID3::Iterator(id3, kMap[i].tag1); 3328 if (it->done()) { 3329 delete it; 3330 it = new ID3::Iterator(id3, kMap[i].tag2); 3331 } 3332 3333 if (it->done()) { 3334 delete it; 3335 continue; 3336 } 3337 3338 String8 s; 3339 it->getString(&s); 3340 delete it; 3341 3342 mFileMetaData.setCString(kMap[i].key, s); 3343 } 3344 } 3345 3346 size_t dataSize; 3347 String8 mime; 3348 const void *data = id3.getAlbumArt(&dataSize, &mime); 3349 3350 if (data) { 3351 mFileMetaData.setData(kKeyAlbumArt, MetaData::TYPE_NONE, data, dataSize); 3352 mFileMetaData.setCString(kKeyAlbumArtMIME, mime.string()); 3353 } 3354 } 3355} 3356 3357MediaTrack *MPEG4Extractor::getTrack(size_t index) { 3358 status_t err; 3359 if ((err = readMetaData()) != OK) { 3360 return NULL; 3361 } 3362 3363 Track *track = mFirstTrack; 3364 while (index > 0) { 3365 if (track == NULL) { 3366 return NULL; 3367 } 3368 3369 track = track->next; 3370 --index; 3371 } 3372 3373 if (track == NULL) { 3374 return NULL; 3375 } 3376 3377 3378 Trex *trex = NULL; 3379 int32_t trackId; 3380 if (track->meta.findInt32(kKeyTrackID, &trackId)) { 3381 for (size_t i = 0; i < mTrex.size(); i++) { 3382 Trex *t = &mTrex.editItemAt(i); 3383 if (t->track_ID == (uint32_t) trackId) { 3384 trex = t; 3385 break; 3386 } 3387 } 3388 } else { 3389 ALOGE("b/21657957"); 3390 return NULL; 3391 } 3392 3393 ALOGV("getTrack called, pssh: %zu", mPssh.size()); 3394 3395 const char *mime; 3396 if (!track->meta.findCString(kKeyMIMEType, &mime)) { 3397 return NULL; 3398 } 3399 3400 sp<ItemTable> itemTable; 3401 if (!strcasecmp(mime, MEDIA_MIMETYPE_VIDEO_AVC)) { 3402 uint32_t type; 3403 const void *data; 3404 size_t size; 3405 if (!track->meta.findData(kKeyAVCC, &type, &data, &size)) { 3406 return NULL; 3407 } 3408 3409 const uint8_t *ptr = (const uint8_t *)data; 3410 3411 if (size < 7 || ptr[0] != 1) { // configurationVersion == 1 3412 return NULL; 3413 } 3414 } else if (!strcasecmp(mime, MEDIA_MIMETYPE_VIDEO_HEVC) 3415 || !strcasecmp(mime, MEDIA_MIMETYPE_IMAGE_ANDROID_HEIC)) { 3416 uint32_t type; 3417 const void *data; 3418 size_t size; 3419 if (!track->meta.findData(kKeyHVCC, &type, &data, &size)) { 3420 return NULL; 3421 } 3422 3423 const uint8_t *ptr = (const uint8_t *)data; 3424 3425 if (size < 22 || ptr[0] != 1) { // configurationVersion == 1 3426 return NULL; 3427 } 3428 if (!strcasecmp(mime, MEDIA_MIMETYPE_IMAGE_ANDROID_HEIC)) { 3429 itemTable = mItemTable; 3430 } 3431 } 3432 3433 MPEG4Source *source = new MPEG4Source( 3434 track->meta, mDataSource, track->timescale, track->sampleTable, 3435 mSidxEntries, trex, mMoofOffset, itemTable); 3436 if (source->init() != OK) { 3437 delete source; 3438 return NULL; 3439 } 3440 return source; 3441} 3442 3443// static 3444status_t MPEG4Extractor::verifyTrack(Track *track) { 3445 const char *mime; 3446 CHECK(track->meta.findCString(kKeyMIMEType, &mime)); 3447 3448 uint32_t type; 3449 const void *data; 3450 size_t size; 3451 if (!strcasecmp(mime, MEDIA_MIMETYPE_VIDEO_AVC)) { 3452 if (!track->meta.findData(kKeyAVCC, &type, &data, &size) 3453 || type != kTypeAVCC) { 3454 return ERROR_MALFORMED; 3455 } 3456 } else if (!strcasecmp(mime, MEDIA_MIMETYPE_VIDEO_HEVC)) { 3457 if (!track->meta.findData(kKeyHVCC, &type, &data, &size) 3458 || type != kTypeHVCC) { 3459 return ERROR_MALFORMED; 3460 } 3461 } else if (!strcasecmp(mime, MEDIA_MIMETYPE_VIDEO_MPEG4) 3462 || !strcasecmp(mime, MEDIA_MIMETYPE_VIDEO_MPEG2) 3463 || !strcasecmp(mime, MEDIA_MIMETYPE_AUDIO_AAC)) { 3464 if (!track->meta.findData(kKeyESDS, &type, &data, &size) 3465 || type != kTypeESDS) { 3466 return ERROR_MALFORMED; 3467 } 3468 } 3469 3470 if (track->sampleTable == NULL || !track->sampleTable->isValid()) { 3471 // Make sure we have all the metadata we need. 3472 ALOGE("stbl atom missing/invalid."); 3473 return ERROR_MALFORMED; 3474 } 3475 3476 if (track->timescale == 0) { 3477 ALOGE("timescale invalid."); 3478 return ERROR_MALFORMED; 3479 } 3480 3481 return OK; 3482} 3483 3484typedef enum { 3485 //AOT_NONE = -1, 3486 //AOT_NULL_OBJECT = 0, 3487 //AOT_AAC_MAIN = 1, /**< Main profile */ 3488 AOT_AAC_LC = 2, /**< Low Complexity object */ 3489 //AOT_AAC_SSR = 3, 3490 //AOT_AAC_LTP = 4, 3491 AOT_SBR = 5, 3492 //AOT_AAC_SCAL = 6, 3493 //AOT_TWIN_VQ = 7, 3494 //AOT_CELP = 8, 3495 //AOT_HVXC = 9, 3496 //AOT_RSVD_10 = 10, /**< (reserved) */ 3497 //AOT_RSVD_11 = 11, /**< (reserved) */ 3498 //AOT_TTSI = 12, /**< TTSI Object */ 3499 //AOT_MAIN_SYNTH = 13, /**< Main Synthetic object */ 3500 //AOT_WAV_TAB_SYNTH = 14, /**< Wavetable Synthesis object */ 3501 //AOT_GEN_MIDI = 15, /**< General MIDI object */ 3502 //AOT_ALG_SYNTH_AUD_FX = 16, /**< Algorithmic Synthesis and Audio FX object */ 3503 AOT_ER_AAC_LC = 17, /**< Error Resilient(ER) AAC Low Complexity */ 3504 //AOT_RSVD_18 = 18, /**< (reserved) */ 3505 //AOT_ER_AAC_LTP = 19, /**< Error Resilient(ER) AAC LTP object */ 3506 AOT_ER_AAC_SCAL = 20, /**< Error Resilient(ER) AAC Scalable object */ 3507 //AOT_ER_TWIN_VQ = 21, /**< Error Resilient(ER) TwinVQ object */ 3508 AOT_ER_BSAC = 22, /**< Error Resilient(ER) BSAC object */ 3509 AOT_ER_AAC_LD = 23, /**< Error Resilient(ER) AAC LowDelay object */ 3510 //AOT_ER_CELP = 24, /**< Error Resilient(ER) CELP object */ 3511 //AOT_ER_HVXC = 25, /**< Error Resilient(ER) HVXC object */ 3512 //AOT_ER_HILN = 26, /**< Error Resilient(ER) HILN object */ 3513 //AOT_ER_PARA = 27, /**< Error Resilient(ER) Parametric object */ 3514 //AOT_RSVD_28 = 28, /**< might become SSC */ 3515 AOT_PS = 29, /**< PS, Parametric Stereo (includes SBR) */ 3516 //AOT_MPEGS = 30, /**< MPEG Surround */ 3517 3518 AOT_ESCAPE = 31, /**< Signal AOT uses more than 5 bits */ 3519 3520 //AOT_MP3ONMP4_L1 = 32, /**< MPEG-Layer1 in mp4 */ 3521 //AOT_MP3ONMP4_L2 = 33, /**< MPEG-Layer2 in mp4 */ 3522 //AOT_MP3ONMP4_L3 = 34, /**< MPEG-Layer3 in mp4 */ 3523 //AOT_RSVD_35 = 35, /**< might become DST */ 3524 //AOT_RSVD_36 = 36, /**< might become ALS */ 3525 //AOT_AAC_SLS = 37, /**< AAC + SLS */ 3526 //AOT_SLS = 38, /**< SLS */ 3527 //AOT_ER_AAC_ELD = 39, /**< AAC Enhanced Low Delay */ 3528 3529 //AOT_USAC = 42, /**< USAC */ 3530 //AOT_SAOC = 43, /**< SAOC */ 3531 //AOT_LD_MPEGS = 44, /**< Low Delay MPEG Surround */ 3532 3533 //AOT_RSVD50 = 50, /**< Interim AOT for Rsvd50 */ 3534} AUDIO_OBJECT_TYPE; 3535 3536status_t MPEG4Extractor::updateAudioTrackInfoFromESDS_MPEG4Audio( 3537 const void *esds_data, size_t esds_size) { 3538 ESDS esds(esds_data, esds_size); 3539 3540 uint8_t objectTypeIndication; 3541 if (esds.getObjectTypeIndication(&objectTypeIndication) != OK) { 3542 return ERROR_MALFORMED; 3543 } 3544 3545 if (objectTypeIndication == 0xe1) { 3546 // This isn't MPEG4 audio at all, it's QCELP 14k... 3547 if (mLastTrack == NULL) 3548 return ERROR_MALFORMED; 3549 3550 mLastTrack->meta.setCString(kKeyMIMEType, MEDIA_MIMETYPE_AUDIO_QCELP); 3551 return OK; 3552 } 3553 3554 if (objectTypeIndication == 0x6b) { 3555 // The media subtype is MP3 audio 3556 // Our software MP3 audio decoder may not be able to handle 3557 // packetized MP3 audio; for now, lets just return ERROR_UNSUPPORTED 3558 ALOGE("MP3 track in MP4/3GPP file is not supported"); 3559 return ERROR_UNSUPPORTED; 3560 } 3561 3562 if (mLastTrack != NULL) { 3563 uint32_t maxBitrate = 0; 3564 uint32_t avgBitrate = 0; 3565 esds.getBitRate(&maxBitrate, &avgBitrate); 3566 if (maxBitrate > 0 && maxBitrate < INT32_MAX) { 3567 mLastTrack->meta.setInt32(kKeyMaxBitRate, (int32_t)maxBitrate); 3568 } 3569 if (avgBitrate > 0 && avgBitrate < INT32_MAX) { 3570 mLastTrack->meta.setInt32(kKeyBitRate, (int32_t)avgBitrate); 3571 } 3572 } 3573 3574 const uint8_t *csd; 3575 size_t csd_size; 3576 if (esds.getCodecSpecificInfo( 3577 (const void **)&csd, &csd_size) != OK) { 3578 return ERROR_MALFORMED; 3579 } 3580 3581 if (kUseHexDump) { 3582 printf("ESD of size %zu\n", csd_size); 3583 hexdump(csd, csd_size); 3584 } 3585 3586 if (csd_size == 0) { 3587 // There's no further information, i.e. no codec specific data 3588 // Let's assume that the information provided in the mpeg4 headers 3589 // is accurate and hope for the best. 3590 3591 return OK; 3592 } 3593 3594 if (csd_size < 2) { 3595 return ERROR_MALFORMED; 3596 } 3597 3598 static uint32_t kSamplingRate[] = { 3599 96000, 88200, 64000, 48000, 44100, 32000, 24000, 22050, 3600 16000, 12000, 11025, 8000, 7350 3601 }; 3602 3603 ABitReader br(csd, csd_size); 3604 uint32_t objectType = br.getBits(5); 3605 3606 if (objectType == 31) { // AAC-ELD => additional 6 bits 3607 objectType = 32 + br.getBits(6); 3608 } 3609 3610 if (mLastTrack == NULL) 3611 return ERROR_MALFORMED; 3612 3613 //keep AOT type 3614 mLastTrack->meta.setInt32(kKeyAACAOT, objectType); 3615 3616 uint32_t freqIndex = br.getBits(4); 3617 3618 int32_t sampleRate = 0; 3619 int32_t numChannels = 0; 3620 if (freqIndex == 15) { 3621 if (br.numBitsLeft() < 28) return ERROR_MALFORMED; 3622 sampleRate = br.getBits(24); 3623 numChannels = br.getBits(4); 3624 } else { 3625 if (br.numBitsLeft() < 4) return ERROR_MALFORMED; 3626 numChannels = br.getBits(4); 3627 3628 if (freqIndex == 13 || freqIndex == 14) { 3629 return ERROR_MALFORMED; 3630 } 3631 3632 sampleRate = kSamplingRate[freqIndex]; 3633 } 3634 3635 if (objectType == AOT_SBR || objectType == AOT_PS) {//SBR specific config per 14496-3 table 1.13 3636 if (br.numBitsLeft() < 4) return ERROR_MALFORMED; 3637 uint32_t extFreqIndex = br.getBits(4); 3638 int32_t extSampleRate __unused; 3639 if (extFreqIndex == 15) { 3640 if (csd_size < 8) { 3641 return ERROR_MALFORMED; 3642 } 3643 if (br.numBitsLeft() < 24) return ERROR_MALFORMED; 3644 extSampleRate = br.getBits(24); 3645 } else { 3646 if (extFreqIndex == 13 || extFreqIndex == 14) { 3647 return ERROR_MALFORMED; 3648 } 3649 extSampleRate = kSamplingRate[extFreqIndex]; 3650 } 3651 //TODO: save the extension sampling rate value in meta data => 3652 // mLastTrack->meta.setInt32(kKeyExtSampleRate, extSampleRate); 3653 } 3654 3655 switch (numChannels) { 3656 // values defined in 14496-3_2009 amendment-4 Table 1.19 - Channel Configuration 3657 case 0: 3658 case 1:// FC 3659 case 2:// FL FR 3660 case 3:// FC, FL FR 3661 case 4:// FC, FL FR, RC 3662 case 5:// FC, FL FR, SL SR 3663 case 6:// FC, FL FR, SL SR, LFE 3664 //numChannels already contains the right value 3665 break; 3666 case 11:// FC, FL FR, SL SR, RC, LFE 3667 numChannels = 7; 3668 break; 3669 case 7: // FC, FCL FCR, FL FR, SL SR, LFE 3670 case 12:// FC, FL FR, SL SR, RL RR, LFE 3671 case 14:// FC, FL FR, SL SR, LFE, FHL FHR 3672 numChannels = 8; 3673 break; 3674 default: 3675 return ERROR_UNSUPPORTED; 3676 } 3677 3678 { 3679 if (objectType == AOT_SBR || objectType == AOT_PS) { 3680 if (br.numBitsLeft() < 5) return ERROR_MALFORMED; 3681 objectType = br.getBits(5); 3682 3683 if (objectType == AOT_ESCAPE) { 3684 if (br.numBitsLeft() < 6) return ERROR_MALFORMED; 3685 objectType = 32 + br.getBits(6); 3686 } 3687 } 3688 if (objectType == AOT_AAC_LC || objectType == AOT_ER_AAC_LC || 3689 objectType == AOT_ER_AAC_LD || objectType == AOT_ER_AAC_SCAL || 3690 objectType == AOT_ER_BSAC) { 3691 if (br.numBitsLeft() < 2) return ERROR_MALFORMED; 3692 const int32_t frameLengthFlag __unused = br.getBits(1); 3693 3694 const int32_t dependsOnCoreCoder = br.getBits(1); 3695 3696 if (dependsOnCoreCoder ) { 3697 if (br.numBitsLeft() < 14) return ERROR_MALFORMED; 3698 const int32_t coreCoderDelay __unused = br.getBits(14); 3699 } 3700 3701 int32_t extensionFlag = -1; 3702 if (br.numBitsLeft() > 0) { 3703 extensionFlag = br.getBits(1); 3704 } else { 3705 switch (objectType) { 3706 // 14496-3 4.5.1.1 extensionFlag 3707 case AOT_AAC_LC: 3708 extensionFlag = 0; 3709 break; 3710 case AOT_ER_AAC_LC: 3711 case AOT_ER_AAC_SCAL: 3712 case AOT_ER_BSAC: 3713 case AOT_ER_AAC_LD: 3714 extensionFlag = 1; 3715 break; 3716 default: 3717 return ERROR_MALFORMED; 3718 break; 3719 } 3720 ALOGW("csd missing extension flag; assuming %d for object type %u.", 3721 extensionFlag, objectType); 3722 } 3723 3724 if (numChannels == 0) { 3725 int32_t channelsEffectiveNum = 0; 3726 int32_t channelsNum = 0; 3727 if (br.numBitsLeft() < 32) { 3728 return ERROR_MALFORMED; 3729 } 3730 const int32_t ElementInstanceTag __unused = br.getBits(4); 3731 const int32_t Profile __unused = br.getBits(2); 3732 const int32_t SamplingFrequencyIndex __unused = br.getBits(4); 3733 const int32_t NumFrontChannelElements = br.getBits(4); 3734 const int32_t NumSideChannelElements = br.getBits(4); 3735 const int32_t NumBackChannelElements = br.getBits(4); 3736 const int32_t NumLfeChannelElements = br.getBits(2); 3737 const int32_t NumAssocDataElements __unused = br.getBits(3); 3738 const int32_t NumValidCcElements __unused = br.getBits(4); 3739 3740 const int32_t MonoMixdownPresent = br.getBits(1); 3741 3742 if (MonoMixdownPresent != 0) { 3743 if (br.numBitsLeft() < 4) return ERROR_MALFORMED; 3744 const int32_t MonoMixdownElementNumber __unused = br.getBits(4); 3745 } 3746 3747 if (br.numBitsLeft() < 1) return ERROR_MALFORMED; 3748 const int32_t StereoMixdownPresent = br.getBits(1); 3749 if (StereoMixdownPresent != 0) { 3750 if (br.numBitsLeft() < 4) return ERROR_MALFORMED; 3751 const int32_t StereoMixdownElementNumber __unused = br.getBits(4); 3752 } 3753 3754 if (br.numBitsLeft() < 1) return ERROR_MALFORMED; 3755 const int32_t MatrixMixdownIndexPresent = br.getBits(1); 3756 if (MatrixMixdownIndexPresent != 0) { 3757 if (br.numBitsLeft() < 3) return ERROR_MALFORMED; 3758 const int32_t MatrixMixdownIndex __unused = br.getBits(2); 3759 const int32_t PseudoSurroundEnable __unused = br.getBits(1); 3760 } 3761 3762 int i; 3763 for (i=0; i < NumFrontChannelElements; i++) { 3764 if (br.numBitsLeft() < 5) return ERROR_MALFORMED; 3765 const int32_t FrontElementIsCpe = br.getBits(1); 3766 const int32_t FrontElementTagSelect __unused = br.getBits(4); 3767 channelsNum += FrontElementIsCpe ? 2 : 1; 3768 } 3769 3770 for (i=0; i < NumSideChannelElements; i++) { 3771 if (br.numBitsLeft() < 5) return ERROR_MALFORMED; 3772 const int32_t SideElementIsCpe = br.getBits(1); 3773 const int32_t SideElementTagSelect __unused = br.getBits(4); 3774 channelsNum += SideElementIsCpe ? 2 : 1; 3775 } 3776 3777 for (i=0; i < NumBackChannelElements; i++) { 3778 if (br.numBitsLeft() < 5) return ERROR_MALFORMED; 3779 const int32_t BackElementIsCpe = br.getBits(1); 3780 const int32_t BackElementTagSelect __unused = br.getBits(4); 3781 channelsNum += BackElementIsCpe ? 2 : 1; 3782 } 3783 channelsEffectiveNum = channelsNum; 3784 3785 for (i=0; i < NumLfeChannelElements; i++) { 3786 if (br.numBitsLeft() < 4) return ERROR_MALFORMED; 3787 const int32_t LfeElementTagSelect __unused = br.getBits(4); 3788 channelsNum += 1; 3789 } 3790 ALOGV("mpeg4 audio channelsNum = %d", channelsNum); 3791 ALOGV("mpeg4 audio channelsEffectiveNum = %d", channelsEffectiveNum); 3792 numChannels = channelsNum; 3793 } 3794 } 3795 } 3796 3797 if (numChannels == 0) { 3798 return ERROR_UNSUPPORTED; 3799 } 3800 3801 if (mLastTrack == NULL) 3802 return ERROR_MALFORMED; 3803 3804 int32_t prevSampleRate; 3805 CHECK(mLastTrack->meta.findInt32(kKeySampleRate, &prevSampleRate)); 3806 3807 if (prevSampleRate != sampleRate) { 3808 ALOGV("mpeg4 audio sample rate different from previous setting. " 3809 "was: %d, now: %d", prevSampleRate, sampleRate); 3810 } 3811 3812 mLastTrack->meta.setInt32(kKeySampleRate, sampleRate); 3813 3814 int32_t prevChannelCount; 3815 CHECK(mLastTrack->meta.findInt32(kKeyChannelCount, &prevChannelCount)); 3816 3817 if (prevChannelCount != numChannels) { 3818 ALOGV("mpeg4 audio channel count different from previous setting. " 3819 "was: %d, now: %d", prevChannelCount, numChannels); 3820 } 3821 3822 mLastTrack->meta.setInt32(kKeyChannelCount, numChannels); 3823 3824 return OK; 3825} 3826 3827//////////////////////////////////////////////////////////////////////////////// 3828 3829MPEG4Source::MPEG4Source( 3830 MetaDataBase &format, 3831 DataSourceBase *dataSource, 3832 int32_t timeScale, 3833 const sp<SampleTable> &sampleTable, 3834 Vector<SidxEntry> &sidx, 3835 const Trex *trex, 3836 off64_t firstMoofOffset, 3837 const sp<ItemTable> &itemTable) 3838 : mFormat(format), 3839 mDataSource(dataSource), 3840 mTimescale(timeScale), 3841 mSampleTable(sampleTable), 3842 mCurrentSampleIndex(0), 3843 mCurrentFragmentIndex(0), 3844 mSegments(sidx), 3845 mTrex(trex), 3846 mFirstMoofOffset(firstMoofOffset), 3847 mCurrentMoofOffset(firstMoofOffset), 3848 mNextMoofOffset(-1), 3849 mCurrentTime(0), 3850 mDefaultEncryptedByteBlock(0), 3851 mDefaultSkipByteBlock(0), 3852 mCurrentSampleInfoAllocSize(0), 3853 mCurrentSampleInfoSizes(NULL), 3854 mCurrentSampleInfoOffsetsAllocSize(0), 3855 mCurrentSampleInfoOffsets(NULL), 3856 mIsAVC(false), 3857 mIsHEVC(false), 3858 mNALLengthSize(0), 3859 mStarted(false), 3860 mGroup(NULL), 3861 mBuffer(NULL), 3862 mWantsNALFragments(false), 3863 mSrcBuffer(NULL), 3864 mIsHeif(itemTable != NULL), 3865 mItemTable(itemTable) { 3866 3867 memset(&mTrackFragmentHeaderInfo, 0, sizeof(mTrackFragmentHeaderInfo)); 3868 3869 mFormat.findInt32(kKeyCryptoMode, &mCryptoMode); 3870 mDefaultIVSize = 0; 3871 mFormat.findInt32(kKeyCryptoDefaultIVSize, &mDefaultIVSize); 3872 uint32_t keytype; 3873 const void *key; 3874 size_t keysize; 3875 if (mFormat.findData(kKeyCryptoKey, &keytype, &key, &keysize)) { 3876 CHECK(keysize <= 16); 3877 memset(mCryptoKey, 0, 16); 3878 memcpy(mCryptoKey, key, keysize); 3879 } 3880 3881 mFormat.findInt32(kKeyEncryptedByteBlock, &mDefaultEncryptedByteBlock); 3882 mFormat.findInt32(kKeySkipByteBlock, &mDefaultSkipByteBlock); 3883 3884 const char *mime; 3885 bool success = mFormat.findCString(kKeyMIMEType, &mime); 3886 CHECK(success); 3887 3888 mIsAVC = !strcasecmp(mime, MEDIA_MIMETYPE_VIDEO_AVC); 3889 mIsHEVC = !strcasecmp(mime, MEDIA_MIMETYPE_VIDEO_HEVC) || 3890 !strcasecmp(mime, MEDIA_MIMETYPE_IMAGE_ANDROID_HEIC); 3891 3892 if (mIsAVC) { 3893 uint32_t type; 3894 const void *data; 3895 size_t size; 3896 CHECK(format.findData(kKeyAVCC, &type, &data, &size)); 3897 3898 const uint8_t *ptr = (const uint8_t *)data; 3899 3900 CHECK(size >= 7); 3901 CHECK_EQ((unsigned)ptr[0], 1u); // configurationVersion == 1 3902 3903 // The number of bytes used to encode the length of a NAL unit. 3904 mNALLengthSize = 1 + (ptr[4] & 3); 3905 } else if (mIsHEVC) { 3906 uint32_t type; 3907 const void *data; 3908 size_t size; 3909 CHECK(format.findData(kKeyHVCC, &type, &data, &size)); 3910 3911 const uint8_t *ptr = (const uint8_t *)data; 3912 3913 CHECK(size >= 22); 3914 CHECK_EQ((unsigned)ptr[0], 1u); // configurationVersion == 1 3915 3916 mNALLengthSize = 1 + (ptr[14 + 7] & 3); 3917 } 3918 3919 CHECK(format.findInt32(kKeyTrackID, &mTrackId)); 3920 3921} 3922 3923status_t MPEG4Source::init() { 3924 if (mFirstMoofOffset != 0) { 3925 off64_t offset = mFirstMoofOffset; 3926 return parseChunk(&offset); 3927 } 3928 return OK; 3929} 3930 3931MPEG4Source::~MPEG4Source() { 3932 if (mStarted) { 3933 stop(); 3934 } 3935 free(mCurrentSampleInfoSizes); 3936 free(mCurrentSampleInfoOffsets); 3937} 3938 3939status_t MPEG4Source::start(MetaDataBase *params) { 3940 Mutex::Autolock autoLock(mLock); 3941 3942 CHECK(!mStarted); 3943 3944 int32_t val; 3945 if (params && params->findInt32(kKeyWantsNALFragments, &val) 3946 && val != 0) { 3947 mWantsNALFragments = true; 3948 } else { 3949 mWantsNALFragments = false; 3950 } 3951 3952 int32_t tmp; 3953 CHECK(mFormat.findInt32(kKeyMaxInputSize, &tmp)); 3954 size_t max_size = tmp; 3955 3956 // A somewhat arbitrary limit that should be sufficient for 8k video frames 3957 // If you see the message below for a valid input stream: increase the limit 3958 const size_t kMaxBufferSize = 64 * 1024 * 1024; 3959 if (max_size > kMaxBufferSize) { 3960 ALOGE("bogus max input size: %zu > %zu", max_size, kMaxBufferSize); 3961 return ERROR_MALFORMED; 3962 } 3963 if (max_size == 0) { 3964 ALOGE("zero max input size"); 3965 return ERROR_MALFORMED; 3966 } 3967 3968 // Allow up to kMaxBuffers, but not if the total exceeds kMaxBufferSize. 3969 const size_t kInitialBuffers = 2; 3970 const size_t kMaxBuffers = 8; 3971 const size_t realMaxBuffers = min(kMaxBufferSize / max_size, kMaxBuffers); 3972 mGroup = new MediaBufferGroup(kInitialBuffers, max_size, realMaxBuffers); 3973 mSrcBuffer = new (std::nothrow) uint8_t[max_size]; 3974 if (mSrcBuffer == NULL) { 3975 // file probably specified a bad max size 3976 delete mGroup; 3977 mGroup = NULL; 3978 return ERROR_MALFORMED; 3979 } 3980 3981 mStarted = true; 3982 3983 return OK; 3984} 3985 3986status_t MPEG4Source::stop() { 3987 Mutex::Autolock autoLock(mLock); 3988 3989 CHECK(mStarted); 3990 3991 if (mBuffer != NULL) { 3992 mBuffer->release(); 3993 mBuffer = NULL; 3994 } 3995 3996 delete[] mSrcBuffer; 3997 mSrcBuffer = NULL; 3998 3999 delete mGroup; 4000 mGroup = NULL; 4001 4002 mStarted = false; 4003 mCurrentSampleIndex = 0; 4004 4005 return OK; 4006} 4007 4008status_t MPEG4Source::parseChunk(off64_t *offset) { 4009 uint32_t hdr[2]; 4010 if (mDataSource->readAt(*offset, hdr, 8) < 8) { 4011 return ERROR_IO; 4012 } 4013 uint64_t chunk_size = ntohl(hdr[0]); 4014 uint32_t chunk_type = ntohl(hdr[1]); 4015 off64_t data_offset = *offset + 8; 4016 4017 if (chunk_size == 1) { 4018 if (mDataSource->readAt(*offset + 8, &chunk_size, 8) < 8) { 4019 return ERROR_IO; 4020 } 4021 chunk_size = ntoh64(chunk_size); 4022 data_offset += 8; 4023 4024 if (chunk_size < 16) { 4025 // The smallest valid chunk is 16 bytes long in this case. 4026 return ERROR_MALFORMED; 4027 } 4028 } else if (chunk_size < 8) { 4029 // The smallest valid chunk is 8 bytes long. 4030 return ERROR_MALFORMED; 4031 } 4032 4033 char chunk[5]; 4034 MakeFourCCString(chunk_type, chunk); 4035 ALOGV("MPEG4Source chunk %s @ %#llx", chunk, (long long)*offset); 4036 4037 off64_t chunk_data_size = *offset + chunk_size - data_offset; 4038 4039 switch(chunk_type) { 4040 4041 case FOURCC('t', 'r', 'a', 'f'): 4042 case FOURCC('m', 'o', 'o', 'f'): { 4043 off64_t stop_offset = *offset + chunk_size; 4044 *offset = data_offset; 4045 while (*offset < stop_offset) { 4046 status_t err = parseChunk(offset); 4047 if (err != OK) { 4048 return err; 4049 } 4050 } 4051 if (chunk_type == FOURCC('m', 'o', 'o', 'f')) { 4052 // *offset points to the box following this moof. Find the next moof from there. 4053 4054 while (true) { 4055 if (mDataSource->readAt(*offset, hdr, 8) < 8) { 4056 // no more box to the end of file. 4057 break; 4058 } 4059 chunk_size = ntohl(hdr[0]); 4060 chunk_type = ntohl(hdr[1]); 4061 if (chunk_size == 1) { 4062 // ISO/IEC 14496-12:2012, 8.8.4 Movie Fragment Box, moof is a Box 4063 // which is defined in 4.2 Object Structure. 4064 // When chunk_size==1, 8 bytes follows as "largesize". 4065 if (mDataSource->readAt(*offset + 8, &chunk_size, 8) < 8) { 4066 return ERROR_IO; 4067 } 4068 chunk_size = ntoh64(chunk_size); 4069 if (chunk_size < 16) { 4070 // The smallest valid chunk is 16 bytes long in this case. 4071 return ERROR_MALFORMED; 4072 } 4073 } else if (chunk_size == 0) { 4074 // next box extends to end of file. 4075 } else if (chunk_size < 8) { 4076 // The smallest valid chunk is 8 bytes long in this case. 4077 return ERROR_MALFORMED; 4078 } 4079 4080 if (chunk_type == FOURCC('m', 'o', 'o', 'f')) { 4081 mNextMoofOffset = *offset; 4082 break; 4083 } else if (chunk_size == 0) { 4084 break; 4085 } 4086 *offset += chunk_size; 4087 } 4088 } 4089 break; 4090 } 4091 4092 case FOURCC('t', 'f', 'h', 'd'): { 4093 status_t err; 4094 if ((err = parseTrackFragmentHeader(data_offset, chunk_data_size)) != OK) { 4095 return err; 4096 } 4097 *offset += chunk_size; 4098 break; 4099 } 4100 4101 case FOURCC('t', 'r', 'u', 'n'): { 4102 status_t err; 4103 if (mLastParsedTrackId == mTrackId) { 4104 if ((err = parseTrackFragmentRun(data_offset, chunk_data_size)) != OK) { 4105 return err; 4106 } 4107 } 4108 4109 *offset += chunk_size; 4110 break; 4111 } 4112 4113 case FOURCC('s', 'a', 'i', 'z'): { 4114 status_t err; 4115 if ((err = parseSampleAuxiliaryInformationSizes(data_offset, chunk_data_size)) != OK) { 4116 return err; 4117 } 4118 *offset += chunk_size; 4119 break; 4120 } 4121 case FOURCC('s', 'a', 'i', 'o'): { 4122 status_t err; 4123 if ((err = parseSampleAuxiliaryInformationOffsets(data_offset, chunk_data_size)) != OK) { 4124 return err; 4125 } 4126 *offset += chunk_size; 4127 break; 4128 } 4129 4130 case FOURCC('s', 'e', 'n', 'c'): { 4131 status_t err; 4132 if ((err = parseSampleEncryption(data_offset)) != OK) { 4133 return err; 4134 } 4135 *offset += chunk_size; 4136 break; 4137 } 4138 4139 case FOURCC('m', 'd', 'a', 't'): { 4140 // parse DRM info if present 4141 ALOGV("MPEG4Source::parseChunk mdat"); 4142 // if saiz/saoi was previously observed, do something with the sampleinfos 4143 *offset += chunk_size; 4144 break; 4145 } 4146 4147 default: { 4148 *offset += chunk_size; 4149 break; 4150 } 4151 } 4152 return OK; 4153} 4154 4155status_t MPEG4Source::parseSampleAuxiliaryInformationSizes( 4156 off64_t offset, off64_t /* size */) { 4157 ALOGV("parseSampleAuxiliaryInformationSizes"); 4158 // 14496-12 8.7.12 4159 uint8_t version; 4160 if (mDataSource->readAt( 4161 offset, &version, sizeof(version)) 4162 < (ssize_t)sizeof(version)) { 4163 return ERROR_IO; 4164 } 4165 4166 if (version != 0) { 4167 return ERROR_UNSUPPORTED; 4168 } 4169 offset++; 4170 4171 uint32_t flags; 4172 if (!mDataSource->getUInt24(offset, &flags)) { 4173 return ERROR_IO; 4174 } 4175 offset += 3; 4176 4177 if (flags & 1) { 4178 uint32_t tmp; 4179 if (!mDataSource->getUInt32(offset, &tmp)) { 4180 return ERROR_MALFORMED; 4181 } 4182 mCurrentAuxInfoType = tmp; 4183 offset += 4; 4184 if (!mDataSource->getUInt32(offset, &tmp)) { 4185 return ERROR_MALFORMED; 4186 } 4187 mCurrentAuxInfoTypeParameter = tmp; 4188 offset += 4; 4189 } 4190 4191 uint8_t defsize; 4192 if (mDataSource->readAt(offset, &defsize, 1) != 1) { 4193 return ERROR_MALFORMED; 4194 } 4195 mCurrentDefaultSampleInfoSize = defsize; 4196 offset++; 4197 4198 uint32_t smplcnt; 4199 if (!mDataSource->getUInt32(offset, &smplcnt)) { 4200 return ERROR_MALFORMED; 4201 } 4202 mCurrentSampleInfoCount = smplcnt; 4203 offset += 4; 4204 4205 if (mCurrentDefaultSampleInfoSize != 0) { 4206 ALOGV("@@@@ using default sample info size of %d", mCurrentDefaultSampleInfoSize); 4207 return OK; 4208 } 4209 if (smplcnt > mCurrentSampleInfoAllocSize) { 4210 uint8_t * newPtr = (uint8_t*) realloc(mCurrentSampleInfoSizes, smplcnt); 4211 if (newPtr == NULL) { 4212 ALOGE("failed to realloc %u -> %u", mCurrentSampleInfoAllocSize, smplcnt); 4213 return NO_MEMORY; 4214 } 4215 mCurrentSampleInfoSizes = newPtr; 4216 mCurrentSampleInfoAllocSize = smplcnt; 4217 } 4218 4219 mDataSource->readAt(offset, mCurrentSampleInfoSizes, smplcnt); 4220 return OK; 4221} 4222 4223status_t MPEG4Source::parseSampleAuxiliaryInformationOffsets( 4224 off64_t offset, off64_t /* size */) { 4225 ALOGV("parseSampleAuxiliaryInformationOffsets"); 4226 // 14496-12 8.7.13 4227 uint8_t version; 4228 if (mDataSource->readAt(offset, &version, sizeof(version)) != 1) { 4229 return ERROR_IO; 4230 } 4231 offset++; 4232 4233 uint32_t flags; 4234 if (!mDataSource->getUInt24(offset, &flags)) { 4235 return ERROR_IO; 4236 } 4237 offset += 3; 4238 4239 uint32_t entrycount; 4240 if (!mDataSource->getUInt32(offset, &entrycount)) { 4241 return ERROR_IO; 4242 } 4243 offset += 4; 4244 if (entrycount == 0) { 4245 return OK; 4246 } 4247 if (entrycount > UINT32_MAX / 8) { 4248 return ERROR_MALFORMED; 4249 } 4250 4251 if (entrycount > mCurrentSampleInfoOffsetsAllocSize) { 4252 uint64_t *newPtr = (uint64_t *)realloc(mCurrentSampleInfoOffsets, entrycount * 8); 4253 if (newPtr == NULL) { 4254 ALOGE("failed to realloc %u -> %u", mCurrentSampleInfoOffsetsAllocSize, entrycount * 8); 4255 return NO_MEMORY; 4256 } 4257 mCurrentSampleInfoOffsets = newPtr; 4258 mCurrentSampleInfoOffsetsAllocSize = entrycount; 4259 } 4260 mCurrentSampleInfoOffsetCount = entrycount; 4261 4262 if (mCurrentSampleInfoOffsets == NULL) { 4263 return OK; 4264 } 4265 4266 for (size_t i = 0; i < entrycount; i++) { 4267 if (version == 0) { 4268 uint32_t tmp; 4269 if (!mDataSource->getUInt32(offset, &tmp)) { 4270 return ERROR_IO; 4271 } 4272 mCurrentSampleInfoOffsets[i] = tmp; 4273 offset += 4; 4274 } else { 4275 uint64_t tmp; 4276 if (!mDataSource->getUInt64(offset, &tmp)) { 4277 return ERROR_IO; 4278 } 4279 mCurrentSampleInfoOffsets[i] = tmp; 4280 offset += 8; 4281 } 4282 } 4283 4284 // parse clear/encrypted data 4285 4286 off64_t drmoffset = mCurrentSampleInfoOffsets[0]; // from moof 4287 4288 drmoffset += mCurrentMoofOffset; 4289 4290 return parseClearEncryptedSizes(drmoffset, false, 0); 4291} 4292 4293status_t MPEG4Source::parseClearEncryptedSizes(off64_t offset, bool isSubsampleEncryption, uint32_t flags) { 4294 4295 int ivlength; 4296 CHECK(mFormat.findInt32(kKeyCryptoDefaultIVSize, &ivlength)); 4297 4298 // only 0, 8 and 16 byte initialization vectors are supported 4299 if (ivlength != 0 && ivlength != 8 && ivlength != 16) { 4300 ALOGW("unsupported IV length: %d", ivlength); 4301 return ERROR_MALFORMED; 4302 } 4303 4304 uint32_t sampleCount = mCurrentSampleInfoCount; 4305 if (isSubsampleEncryption) { 4306 if (!mDataSource->getUInt32(offset, &sampleCount)) { 4307 return ERROR_IO; 4308 } 4309 offset += 4; 4310 } 4311 4312 // read CencSampleAuxiliaryDataFormats 4313 for (size_t i = 0; i < sampleCount; i++) { 4314 if (i >= mCurrentSamples.size()) { 4315 ALOGW("too few samples"); 4316 break; 4317 } 4318 Sample *smpl = &mCurrentSamples.editItemAt(i); 4319 if (!smpl->clearsizes.isEmpty()) { 4320 continue; 4321 } 4322 4323 memset(smpl->iv, 0, 16); 4324 if (mDataSource->readAt(offset, smpl->iv, ivlength) != ivlength) { 4325 return ERROR_IO; 4326 } 4327 4328 offset += ivlength; 4329 4330 bool readSubsamples; 4331 if (isSubsampleEncryption) { 4332 readSubsamples = flags & 2; 4333 } else { 4334 int32_t smplinfosize = mCurrentDefaultSampleInfoSize; 4335 if (smplinfosize == 0) { 4336 smplinfosize = mCurrentSampleInfoSizes[i]; 4337 } 4338 readSubsamples = smplinfosize > ivlength; 4339 } 4340 4341 if (readSubsamples) { 4342 uint16_t numsubsamples; 4343 if (!mDataSource->getUInt16(offset, &numsubsamples)) { 4344 return ERROR_IO; 4345 } 4346 offset += 2; 4347 for (size_t j = 0; j < numsubsamples; j++) { 4348 uint16_t numclear; 4349 uint32_t numencrypted; 4350 if (!mDataSource->getUInt16(offset, &numclear)) { 4351 return ERROR_IO; 4352 } 4353 offset += 2; 4354 if (!mDataSource->getUInt32(offset, &numencrypted)) { 4355 return ERROR_IO; 4356 } 4357 offset += 4; 4358 smpl->clearsizes.add(numclear); 4359 smpl->encryptedsizes.add(numencrypted); 4360 } 4361 } else { 4362 smpl->clearsizes.add(0); 4363 smpl->encryptedsizes.add(smpl->size); 4364 } 4365 } 4366 4367 return OK; 4368} 4369 4370status_t MPEG4Source::parseSampleEncryption(off64_t offset) { 4371 uint32_t flags; 4372 if (!mDataSource->getUInt32(offset, &flags)) { // actually version + flags 4373 return ERROR_MALFORMED; 4374 } 4375 return parseClearEncryptedSizes(offset + 4, true, flags); 4376} 4377 4378status_t MPEG4Source::parseTrackFragmentHeader(off64_t offset, off64_t size) { 4379 4380 if (size < 8) { 4381 return -EINVAL; 4382 } 4383 4384 uint32_t flags; 4385 if (!mDataSource->getUInt32(offset, &flags)) { // actually version + flags 4386 return ERROR_MALFORMED; 4387 } 4388 4389 if (flags & 0xff000000) { 4390 return -EINVAL; 4391 } 4392 4393 if (!mDataSource->getUInt32(offset + 4, (uint32_t*)&mLastParsedTrackId)) { 4394 return ERROR_MALFORMED; 4395 } 4396 4397 if (mLastParsedTrackId != mTrackId) { 4398 // this is not the right track, skip it 4399 return OK; 4400 } 4401 4402 mTrackFragmentHeaderInfo.mFlags = flags; 4403 mTrackFragmentHeaderInfo.mTrackID = mLastParsedTrackId; 4404 offset += 8; 4405 size -= 8; 4406 4407 ALOGV("fragment header: %08x %08x", flags, mTrackFragmentHeaderInfo.mTrackID); 4408 4409 if (flags & TrackFragmentHeaderInfo::kBaseDataOffsetPresent) { 4410 if (size < 8) { 4411 return -EINVAL; 4412 } 4413 4414 if (!mDataSource->getUInt64(offset, &mTrackFragmentHeaderInfo.mBaseDataOffset)) { 4415 return ERROR_MALFORMED; 4416 } 4417 offset += 8; 4418 size -= 8; 4419 } 4420 4421 if (flags & TrackFragmentHeaderInfo::kSampleDescriptionIndexPresent) { 4422 if (size < 4) { 4423 return -EINVAL; 4424 } 4425 4426 if (!mDataSource->getUInt32(offset, &mTrackFragmentHeaderInfo.mSampleDescriptionIndex)) { 4427 return ERROR_MALFORMED; 4428 } 4429 offset += 4; 4430 size -= 4; 4431 } 4432 4433 if (flags & TrackFragmentHeaderInfo::kDefaultSampleDurationPresent) { 4434 if (size < 4) { 4435 return -EINVAL; 4436 } 4437 4438 if (!mDataSource->getUInt32(offset, &mTrackFragmentHeaderInfo.mDefaultSampleDuration)) { 4439 return ERROR_MALFORMED; 4440 } 4441 offset += 4; 4442 size -= 4; 4443 } 4444 4445 if (flags & TrackFragmentHeaderInfo::kDefaultSampleSizePresent) { 4446 if (size < 4) { 4447 return -EINVAL; 4448 } 4449 4450 if (!mDataSource->getUInt32(offset, &mTrackFragmentHeaderInfo.mDefaultSampleSize)) { 4451 return ERROR_MALFORMED; 4452 } 4453 offset += 4; 4454 size -= 4; 4455 } 4456 4457 if (flags & TrackFragmentHeaderInfo::kDefaultSampleFlagsPresent) { 4458 if (size < 4) { 4459 return -EINVAL; 4460 } 4461 4462 if (!mDataSource->getUInt32(offset, &mTrackFragmentHeaderInfo.mDefaultSampleFlags)) { 4463 return ERROR_MALFORMED; 4464 } 4465 offset += 4; 4466 size -= 4; 4467 } 4468 4469 if (!(flags & TrackFragmentHeaderInfo::kBaseDataOffsetPresent)) { 4470 mTrackFragmentHeaderInfo.mBaseDataOffset = mCurrentMoofOffset; 4471 } 4472 4473 mTrackFragmentHeaderInfo.mDataOffset = 0; 4474 return OK; 4475} 4476 4477status_t MPEG4Source::parseTrackFragmentRun(off64_t offset, off64_t size) { 4478 4479 ALOGV("MPEG4Extractor::parseTrackFragmentRun"); 4480 if (size < 8) { 4481 return -EINVAL; 4482 } 4483 4484 enum { 4485 kDataOffsetPresent = 0x01, 4486 kFirstSampleFlagsPresent = 0x04, 4487 kSampleDurationPresent = 0x100, 4488 kSampleSizePresent = 0x200, 4489 kSampleFlagsPresent = 0x400, 4490 kSampleCompositionTimeOffsetPresent = 0x800, 4491 }; 4492 4493 uint32_t flags; 4494 if (!mDataSource->getUInt32(offset, &flags)) { 4495 return ERROR_MALFORMED; 4496 } 4497 // |version| only affects SampleCompositionTimeOffset field. 4498 // If version == 0, SampleCompositionTimeOffset is uint32_t; 4499 // Otherwise, SampleCompositionTimeOffset is int32_t. 4500 // Sample.compositionOffset is defined as int32_t. 4501 uint8_t version = flags >> 24; 4502 flags &= 0xffffff; 4503 ALOGV("fragment run version: 0x%02x, flags: 0x%06x", version, flags); 4504 4505 if ((flags & kFirstSampleFlagsPresent) && (flags & kSampleFlagsPresent)) { 4506 // These two shall not be used together. 4507 return -EINVAL; 4508 } 4509 4510 uint32_t sampleCount; 4511 if (!mDataSource->getUInt32(offset + 4, &sampleCount)) { 4512 return ERROR_MALFORMED; 4513 } 4514 offset += 8; 4515 size -= 8; 4516 4517 uint64_t dataOffset = mTrackFragmentHeaderInfo.mDataOffset; 4518 4519 uint32_t firstSampleFlags = 0; 4520 4521 if (flags & kDataOffsetPresent) { 4522 if (size < 4) { 4523 return -EINVAL; 4524 } 4525 4526 int32_t dataOffsetDelta; 4527 if (!mDataSource->getUInt32(offset, (uint32_t*)&dataOffsetDelta)) { 4528 return ERROR_MALFORMED; 4529 } 4530 4531 dataOffset = mTrackFragmentHeaderInfo.mBaseDataOffset + dataOffsetDelta; 4532 4533 offset += 4; 4534 size -= 4; 4535 } 4536 4537 if (flags & kFirstSampleFlagsPresent) { 4538 if (size < 4) { 4539 return -EINVAL; 4540 } 4541 4542 if (!mDataSource->getUInt32(offset, &firstSampleFlags)) { 4543 return ERROR_MALFORMED; 4544 } 4545 offset += 4; 4546 size -= 4; 4547 } 4548 4549 uint32_t sampleDuration = 0, sampleSize = 0, sampleFlags = 0, 4550 sampleCtsOffset = 0; 4551 4552 size_t bytesPerSample = 0; 4553 if (flags & kSampleDurationPresent) { 4554 bytesPerSample += 4; 4555 } else if (mTrackFragmentHeaderInfo.mFlags 4556 & TrackFragmentHeaderInfo::kDefaultSampleDurationPresent) { 4557 sampleDuration = mTrackFragmentHeaderInfo.mDefaultSampleDuration; 4558 } else if (mTrex) { 4559 sampleDuration = mTrex->default_sample_duration; 4560 } 4561 4562 if (flags & kSampleSizePresent) { 4563 bytesPerSample += 4; 4564 } else if (mTrackFragmentHeaderInfo.mFlags 4565 & TrackFragmentHeaderInfo::kDefaultSampleSizePresent) { 4566 sampleSize = mTrackFragmentHeaderInfo.mDefaultSampleSize; 4567 } else { 4568 sampleSize = mTrackFragmentHeaderInfo.mDefaultSampleSize; 4569 } 4570 4571 if (flags & kSampleFlagsPresent) { 4572 bytesPerSample += 4; 4573 } else if (mTrackFragmentHeaderInfo.mFlags 4574 & TrackFragmentHeaderInfo::kDefaultSampleFlagsPresent) { 4575 sampleFlags = mTrackFragmentHeaderInfo.mDefaultSampleFlags; 4576 } else { 4577 sampleFlags = mTrackFragmentHeaderInfo.mDefaultSampleFlags; 4578 } 4579 4580 if (flags & kSampleCompositionTimeOffsetPresent) { 4581 bytesPerSample += 4; 4582 } else { 4583 sampleCtsOffset = 0; 4584 } 4585 4586 if (size < (off64_t)(sampleCount * bytesPerSample)) { 4587 return -EINVAL; 4588 } 4589 4590 Sample tmp; 4591 for (uint32_t i = 0; i < sampleCount; ++i) { 4592 if (flags & kSampleDurationPresent) { 4593 if (!mDataSource->getUInt32(offset, &sampleDuration)) { 4594 return ERROR_MALFORMED; 4595 } 4596 offset += 4; 4597 } 4598 4599 if (flags & kSampleSizePresent) { 4600 if (!mDataSource->getUInt32(offset, &sampleSize)) { 4601 return ERROR_MALFORMED; 4602 } 4603 offset += 4; 4604 } 4605 4606 if (flags & kSampleFlagsPresent) { 4607 if (!mDataSource->getUInt32(offset, &sampleFlags)) { 4608 return ERROR_MALFORMED; 4609 } 4610 offset += 4; 4611 } 4612 4613 if (flags & kSampleCompositionTimeOffsetPresent) { 4614 if (!mDataSource->getUInt32(offset, &sampleCtsOffset)) { 4615 return ERROR_MALFORMED; 4616 } 4617 offset += 4; 4618 } 4619 4620 ALOGV("adding sample %d at offset 0x%08" PRIx64 ", size %u, duration %u, " 4621 " flags 0x%08x", i + 1, 4622 dataOffset, sampleSize, sampleDuration, 4623 (flags & kFirstSampleFlagsPresent) && i == 0 4624 ? firstSampleFlags : sampleFlags); 4625 tmp.offset = dataOffset; 4626 tmp.size = sampleSize; 4627 tmp.duration = sampleDuration; 4628 tmp.compositionOffset = sampleCtsOffset; 4629 memset(tmp.iv, 0, sizeof(tmp.iv)); 4630 mCurrentSamples.add(tmp); 4631 4632 dataOffset += sampleSize; 4633 } 4634 4635 mTrackFragmentHeaderInfo.mDataOffset = dataOffset; 4636 4637 return OK; 4638} 4639 4640status_t MPEG4Source::getFormat(MetaDataBase &meta) { 4641 Mutex::Autolock autoLock(mLock); 4642 meta = mFormat; 4643 return OK; 4644} 4645 4646size_t MPEG4Source::parseNALSize(const uint8_t *data) const { 4647 switch (mNALLengthSize) { 4648 case 1: 4649 return *data; 4650 case 2: 4651 return U16_AT(data); 4652 case 3: 4653 return ((size_t)data[0] << 16) | U16_AT(&data[1]); 4654 case 4: 4655 return U32_AT(data); 4656 } 4657 4658 // This cannot happen, mNALLengthSize springs to life by adding 1 to 4659 // a 2-bit integer. 4660 CHECK(!"Should not be here."); 4661 4662 return 0; 4663} 4664 4665status_t MPEG4Source::read( 4666 MediaBufferBase **out, const ReadOptions *options) { 4667 Mutex::Autolock autoLock(mLock); 4668 4669 CHECK(mStarted); 4670 4671 if (options != nullptr && options->getNonBlocking() && !mGroup->has_buffers()) { 4672 *out = nullptr; 4673 return WOULD_BLOCK; 4674 } 4675 4676 if (mFirstMoofOffset > 0) { 4677 return fragmentedRead(out, options); 4678 } 4679 4680 *out = NULL; 4681 4682 int64_t targetSampleTimeUs = -1; 4683 4684 int64_t seekTimeUs; 4685 ReadOptions::SeekMode mode; 4686 if (options && options->getSeekTo(&seekTimeUs, &mode)) { 4687 if (mIsHeif) { 4688 CHECK(mSampleTable == NULL); 4689 CHECK(mItemTable != NULL); 4690 int32_t imageIndex; 4691 if (!mFormat.findInt32(kKeyTrackID, &imageIndex)) { 4692 return ERROR_MALFORMED; 4693 } 4694 4695 status_t err; 4696 if (seekTimeUs >= 0) { 4697 err = mItemTable->findImageItem(imageIndex, &mCurrentSampleIndex); 4698 } else { 4699 err = mItemTable->findThumbnailItem(imageIndex, &mCurrentSampleIndex); 4700 } 4701 if (err != OK) { 4702 return err; 4703 } 4704 } else { 4705 uint32_t findFlags = 0; 4706 switch (mode) { 4707 case ReadOptions::SEEK_PREVIOUS_SYNC: 4708 findFlags = SampleTable::kFlagBefore; 4709 break; 4710 case ReadOptions::SEEK_NEXT_SYNC: 4711 findFlags = SampleTable::kFlagAfter; 4712 break; 4713 case ReadOptions::SEEK_CLOSEST_SYNC: 4714 case ReadOptions::SEEK_CLOSEST: 4715 findFlags = SampleTable::kFlagClosest; 4716 break; 4717 case ReadOptions::SEEK_FRAME_INDEX: 4718 findFlags = SampleTable::kFlagFrameIndex; 4719 break; 4720 default: 4721 CHECK(!"Should not be here."); 4722 break; 4723 } 4724 4725 uint32_t sampleIndex; 4726 status_t err = mSampleTable->findSampleAtTime( 4727 seekTimeUs, 1000000, mTimescale, 4728 &sampleIndex, findFlags); 4729 4730 if (mode == ReadOptions::SEEK_CLOSEST 4731 || mode == ReadOptions::SEEK_FRAME_INDEX) { 4732 // We found the closest sample already, now we want the sync 4733 // sample preceding it (or the sample itself of course), even 4734 // if the subsequent sync sample is closer. 4735 findFlags = SampleTable::kFlagBefore; 4736 } 4737 4738 uint32_t syncSampleIndex; 4739 if (err == OK) { 4740 err = mSampleTable->findSyncSampleNear( 4741 sampleIndex, &syncSampleIndex, findFlags); 4742 } 4743 4744 uint32_t sampleTime; 4745 if (err == OK) { 4746 err = mSampleTable->getMetaDataForSample( 4747 sampleIndex, NULL, NULL, &sampleTime); 4748 } 4749 4750 if (err != OK) { 4751 if (err == ERROR_OUT_OF_RANGE) { 4752 // An attempt to seek past the end of the stream would 4753 // normally cause this ERROR_OUT_OF_RANGE error. Propagating 4754 // this all the way to the MediaPlayer would cause abnormal 4755 // termination. Legacy behaviour appears to be to behave as if 4756 // we had seeked to the end of stream, ending normally. 4757 err = ERROR_END_OF_STREAM; 4758 } 4759 ALOGV("end of stream"); 4760 return err; 4761 } 4762 4763 if (mode == ReadOptions::SEEK_CLOSEST 4764 || mode == ReadOptions::SEEK_FRAME_INDEX) { 4765 targetSampleTimeUs = (sampleTime * 1000000ll) / mTimescale; 4766 } 4767 4768#if 0 4769 uint32_t syncSampleTime; 4770 CHECK_EQ(OK, mSampleTable->getMetaDataForSample( 4771 syncSampleIndex, NULL, NULL, &syncSampleTime)); 4772 4773 ALOGI("seek to time %lld us => sample at time %lld us, " 4774 "sync sample at time %lld us", 4775 seekTimeUs, 4776 sampleTime * 1000000ll / mTimescale, 4777 syncSampleTime * 1000000ll / mTimescale); 4778#endif 4779 4780 mCurrentSampleIndex = syncSampleIndex; 4781 } 4782 4783 if (mBuffer != NULL) { 4784 mBuffer->release(); 4785 mBuffer = NULL; 4786 } 4787 4788 // fall through 4789 } 4790 4791 off64_t offset = 0; 4792 size_t size = 0; 4793 uint32_t cts, stts; 4794 bool isSyncSample; 4795 bool newBuffer = false; 4796 if (mBuffer == NULL) { 4797 newBuffer = true; 4798 4799 status_t err; 4800 if (!mIsHeif) { 4801 err = mSampleTable->getMetaDataForSample( 4802 mCurrentSampleIndex, &offset, &size, &cts, &isSyncSample, &stts); 4803 } else { 4804 err = mItemTable->getImageOffsetAndSize( 4805 options && options->getSeekTo(&seekTimeUs, &mode) ? 4806 &mCurrentSampleIndex : NULL, &offset, &size); 4807 4808 cts = stts = 0; 4809 isSyncSample = 0; 4810 ALOGV("image offset %lld, size %zu", (long long)offset, size); 4811 } 4812 4813 if (err != OK) { 4814 return err; 4815 } 4816 4817 err = mGroup->acquire_buffer(&mBuffer); 4818 4819 if (err != OK) { 4820 CHECK(mBuffer == NULL); 4821 return err; 4822 } 4823 if (size > mBuffer->size()) { 4824 ALOGE("buffer too small: %zu > %zu", size, mBuffer->size()); 4825 mBuffer->release(); 4826 mBuffer = NULL; 4827 return ERROR_BUFFER_TOO_SMALL; 4828 } 4829 } 4830 4831 if ((!mIsAVC && !mIsHEVC) || mWantsNALFragments) { 4832 if (newBuffer) { 4833 ssize_t num_bytes_read = 4834 mDataSource->readAt(offset, (uint8_t *)mBuffer->data(), size); 4835 4836 if (num_bytes_read < (ssize_t)size) { 4837 mBuffer->release(); 4838 mBuffer = NULL; 4839 4840 return ERROR_IO; 4841 } 4842 4843 CHECK(mBuffer != NULL); 4844 mBuffer->set_range(0, size); 4845 mBuffer->meta_data().clear(); 4846 mBuffer->meta_data().setInt64( 4847 kKeyTime, ((int64_t)cts * 1000000) / mTimescale); 4848 mBuffer->meta_data().setInt64( 4849 kKeyDuration, ((int64_t)stts * 1000000) / mTimescale); 4850 4851 if (targetSampleTimeUs >= 0) { 4852 mBuffer->meta_data().setInt64( 4853 kKeyTargetTime, targetSampleTimeUs); 4854 } 4855 4856 if (isSyncSample) { 4857 mBuffer->meta_data().setInt32(kKeyIsSyncFrame, 1); 4858 } 4859 4860 ++mCurrentSampleIndex; 4861 } 4862 4863 if (!mIsAVC && !mIsHEVC) { 4864 *out = mBuffer; 4865 mBuffer = NULL; 4866 4867 return OK; 4868 } 4869 4870 // Each NAL unit is split up into its constituent fragments and 4871 // each one of them returned in its own buffer. 4872 4873 CHECK(mBuffer->range_length() >= mNALLengthSize); 4874 4875 const uint8_t *src = 4876 (const uint8_t *)mBuffer->data() + mBuffer->range_offset(); 4877 4878 size_t nal_size = parseNALSize(src); 4879 if (mNALLengthSize > SIZE_MAX - nal_size) { 4880 ALOGE("b/24441553, b/24445122"); 4881 } 4882 if (mBuffer->range_length() - mNALLengthSize < nal_size) { 4883 ALOGE("incomplete NAL unit."); 4884 4885 mBuffer->release(); 4886 mBuffer = NULL; 4887 4888 return ERROR_MALFORMED; 4889 } 4890 4891 MediaBufferBase *clone = mBuffer->clone(); 4892 CHECK(clone != NULL); 4893 clone->set_range(mBuffer->range_offset() + mNALLengthSize, nal_size); 4894 4895 CHECK(mBuffer != NULL); 4896 mBuffer->set_range( 4897 mBuffer->range_offset() + mNALLengthSize + nal_size, 4898 mBuffer->range_length() - mNALLengthSize - nal_size); 4899 4900 if (mBuffer->range_length() == 0) { 4901 mBuffer->release(); 4902 mBuffer = NULL; 4903 } 4904 4905 *out = clone; 4906 4907 return OK; 4908 } else { 4909 // Whole NAL units are returned but each fragment is prefixed by 4910 // the start code (0x00 00 00 01). 4911 ssize_t num_bytes_read = 0; 4912 int32_t drm = 0; 4913 bool usesDRM = (mFormat.findInt32(kKeyIsDRM, &drm) && drm != 0); 4914 if (usesDRM) { 4915 num_bytes_read = 4916 mDataSource->readAt(offset, (uint8_t*)mBuffer->data(), size); 4917 } else { 4918 num_bytes_read = mDataSource->readAt(offset, mSrcBuffer, size); 4919 } 4920 4921 if (num_bytes_read < (ssize_t)size) { 4922 mBuffer->release(); 4923 mBuffer = NULL; 4924 4925 return ERROR_IO; 4926 } 4927 4928 if (usesDRM) { 4929 CHECK(mBuffer != NULL); 4930 mBuffer->set_range(0, size); 4931 4932 } else { 4933 uint8_t *dstData = (uint8_t *)mBuffer->data(); 4934 size_t srcOffset = 0; 4935 size_t dstOffset = 0; 4936 4937 while (srcOffset < size) { 4938 bool isMalFormed = !isInRange((size_t)0u, size, srcOffset, mNALLengthSize); 4939 size_t nalLength = 0; 4940 if (!isMalFormed) { 4941 nalLength = parseNALSize(&mSrcBuffer[srcOffset]); 4942 srcOffset += mNALLengthSize; 4943 isMalFormed = !isInRange((size_t)0u, size, srcOffset, nalLength); 4944 } 4945 4946 if (isMalFormed) { 4947 ALOGE("Video is malformed"); 4948 mBuffer->release(); 4949 mBuffer = NULL; 4950 return ERROR_MALFORMED; 4951 } 4952 4953 if (nalLength == 0) { 4954 continue; 4955 } 4956 4957 if (dstOffset > SIZE_MAX - 4 || 4958 dstOffset + 4 > SIZE_MAX - nalLength || 4959 dstOffset + 4 + nalLength > mBuffer->size()) { 4960 ALOGE("b/27208621 : %zu %zu", dstOffset, mBuffer->size()); 4961 android_errorWriteLog(0x534e4554, "27208621"); 4962 mBuffer->release(); 4963 mBuffer = NULL; 4964 return ERROR_MALFORMED; 4965 } 4966 4967 dstData[dstOffset++] = 0; 4968 dstData[dstOffset++] = 0; 4969 dstData[dstOffset++] = 0; 4970 dstData[dstOffset++] = 1; 4971 memcpy(&dstData[dstOffset], &mSrcBuffer[srcOffset], nalLength); 4972 srcOffset += nalLength; 4973 dstOffset += nalLength; 4974 } 4975 CHECK_EQ(srcOffset, size); 4976 CHECK(mBuffer != NULL); 4977 mBuffer->set_range(0, dstOffset); 4978 } 4979 4980 mBuffer->meta_data().clear(); 4981 mBuffer->meta_data().setInt64( 4982 kKeyTime, ((int64_t)cts * 1000000) / mTimescale); 4983 mBuffer->meta_data().setInt64( 4984 kKeyDuration, ((int64_t)stts * 1000000) / mTimescale); 4985 4986 if (targetSampleTimeUs >= 0) { 4987 mBuffer->meta_data().setInt64( 4988 kKeyTargetTime, targetSampleTimeUs); 4989 } 4990 4991 if (mIsAVC) { 4992 uint32_t layerId = FindAVCLayerId( 4993 (const uint8_t *)mBuffer->data(), mBuffer->range_length()); 4994 mBuffer->meta_data().setInt32(kKeyTemporalLayerId, layerId); 4995 } 4996 4997 if (isSyncSample) { 4998 mBuffer->meta_data().setInt32(kKeyIsSyncFrame, 1); 4999 } 5000 5001 ++mCurrentSampleIndex; 5002 5003 *out = mBuffer; 5004 mBuffer = NULL; 5005 5006 return OK; 5007 } 5008} 5009 5010status_t MPEG4Source::fragmentedRead( 5011 MediaBufferBase **out, const ReadOptions *options) { 5012 5013 ALOGV("MPEG4Source::fragmentedRead"); 5014 5015 CHECK(mStarted); 5016 5017 *out = NULL; 5018 5019 int64_t targetSampleTimeUs = -1; 5020 5021 int64_t seekTimeUs; 5022 ReadOptions::SeekMode mode; 5023 if (options && options->getSeekTo(&seekTimeUs, &mode)) { 5024 5025 int numSidxEntries = mSegments.size(); 5026 if (numSidxEntries != 0) { 5027 int64_t totalTime = 0; 5028 off64_t totalOffset = mFirstMoofOffset; 5029 for (int i = 0; i < numSidxEntries; i++) { 5030 const SidxEntry *se = &mSegments[i]; 5031 if (totalTime + se->mDurationUs > seekTimeUs) { 5032 // The requested time is somewhere in this segment 5033 if ((mode == ReadOptions::SEEK_NEXT_SYNC && seekTimeUs > totalTime) || 5034 (mode == ReadOptions::SEEK_CLOSEST_SYNC && 5035 (seekTimeUs - totalTime) > (totalTime + se->mDurationUs - seekTimeUs))) { 5036 // requested next sync, or closest sync and it was closer to the end of 5037 // this segment 5038 totalTime += se->mDurationUs; 5039 totalOffset += se->mSize; 5040 } 5041 break; 5042 } 5043 totalTime += se->mDurationUs; 5044 totalOffset += se->mSize; 5045 } 5046 mCurrentMoofOffset = totalOffset; 5047 mNextMoofOffset = -1; 5048 mCurrentSamples.clear(); 5049 mCurrentSampleIndex = 0; 5050 status_t err = parseChunk(&totalOffset); 5051 if (err != OK) { 5052 return err; 5053 } 5054 mCurrentTime = totalTime * mTimescale / 1000000ll; 5055 } else { 5056 // without sidx boxes, we can only seek to 0 5057 mCurrentMoofOffset = mFirstMoofOffset; 5058 mNextMoofOffset = -1; 5059 mCurrentSamples.clear(); 5060 mCurrentSampleIndex = 0; 5061 off64_t tmp = mCurrentMoofOffset; 5062 status_t err = parseChunk(&tmp); 5063 if (err != OK) { 5064 return err; 5065 } 5066 mCurrentTime = 0; 5067 } 5068 5069 if (mBuffer != NULL) { 5070 mBuffer->release(); 5071 mBuffer = NULL; 5072 } 5073 5074 // fall through 5075 } 5076 5077 off64_t offset = 0; 5078 size_t size = 0; 5079 uint32_t cts = 0; 5080 bool isSyncSample = false; 5081 bool newBuffer = false; 5082 if (mBuffer == NULL) { 5083 newBuffer = true; 5084 5085 if (mCurrentSampleIndex >= mCurrentSamples.size()) { 5086 // move to next fragment if there is one 5087 if (mNextMoofOffset <= mCurrentMoofOffset) { 5088 return ERROR_END_OF_STREAM; 5089 } 5090 off64_t nextMoof = mNextMoofOffset; 5091 mCurrentMoofOffset = nextMoof; 5092 mCurrentSamples.clear(); 5093 mCurrentSampleIndex = 0; 5094 status_t err = parseChunk(&nextMoof); 5095 if (err != OK) { 5096 return err; 5097 } 5098 if (mCurrentSampleIndex >= mCurrentSamples.size()) { 5099 return ERROR_END_OF_STREAM; 5100 } 5101 } 5102 5103 const Sample *smpl = &mCurrentSamples[mCurrentSampleIndex]; 5104 offset = smpl->offset; 5105 size = smpl->size; 5106 cts = mCurrentTime + smpl->compositionOffset; 5107 mCurrentTime += smpl->duration; 5108 isSyncSample = (mCurrentSampleIndex == 0); // XXX 5109 5110 status_t err = mGroup->acquire_buffer(&mBuffer); 5111 5112 if (err != OK) { 5113 CHECK(mBuffer == NULL); 5114 ALOGV("acquire_buffer returned %d", err); 5115 return err; 5116 } 5117 if (size > mBuffer->size()) { 5118 ALOGE("buffer too small: %zu > %zu", size, mBuffer->size()); 5119 mBuffer->release(); 5120 mBuffer = NULL; 5121 return ERROR_BUFFER_TOO_SMALL; 5122 } 5123 } 5124 5125 const Sample *smpl = &mCurrentSamples[mCurrentSampleIndex]; 5126 MetaDataBase &bufmeta = mBuffer->meta_data(); 5127 bufmeta.clear(); 5128 if (smpl->encryptedsizes.size()) { 5129 // store clear/encrypted lengths in metadata 5130 bufmeta.setData(kKeyPlainSizes, 0, 5131 smpl->clearsizes.array(), smpl->clearsizes.size() * 4); 5132 bufmeta.setData(kKeyEncryptedSizes, 0, 5133 smpl->encryptedsizes.array(), smpl->encryptedsizes.size() * 4); 5134 bufmeta.setInt32(kKeyCryptoDefaultIVSize, mDefaultIVSize); 5135 bufmeta.setInt32(kKeyCryptoMode, mCryptoMode); 5136 bufmeta.setData(kKeyCryptoKey, 0, mCryptoKey, 16); 5137 bufmeta.setInt32(kKeyEncryptedByteBlock, mDefaultEncryptedByteBlock); 5138 bufmeta.setInt32(kKeySkipByteBlock, mDefaultSkipByteBlock); 5139 5140 uint32_t type = 0; 5141 const void *iv = NULL; 5142 size_t ivlength = 0; 5143 if (!mFormat.findData( 5144 kKeyCryptoIV, &type, &iv, &ivlength)) { 5145 iv = smpl->iv; 5146 ivlength = 16; // use 16 or the actual size? 5147 } 5148 bufmeta.setData(kKeyCryptoIV, 0, iv, ivlength); 5149 5150 } 5151 5152 if ((!mIsAVC && !mIsHEVC)|| mWantsNALFragments) { 5153 if (newBuffer) { 5154 if (!isInRange((size_t)0u, mBuffer->size(), size)) { 5155 mBuffer->release(); 5156 mBuffer = NULL; 5157 5158 ALOGE("fragmentedRead ERROR_MALFORMED size %zu", size); 5159 return ERROR_MALFORMED; 5160 } 5161 5162 ssize_t num_bytes_read = 5163 mDataSource->readAt(offset, (uint8_t *)mBuffer->data(), size); 5164 5165 if (num_bytes_read < (ssize_t)size) { 5166 mBuffer->release(); 5167 mBuffer = NULL; 5168 5169 ALOGE("i/o error"); 5170 return ERROR_IO; 5171 } 5172 5173 CHECK(mBuffer != NULL); 5174 mBuffer->set_range(0, size); 5175 mBuffer->meta_data().setInt64( 5176 kKeyTime, ((int64_t)cts * 1000000) / mTimescale); 5177 mBuffer->meta_data().setInt64( 5178 kKeyDuration, ((int64_t)smpl->duration * 1000000) / mTimescale); 5179 5180 if (targetSampleTimeUs >= 0) { 5181 mBuffer->meta_data().setInt64( 5182 kKeyTargetTime, targetSampleTimeUs); 5183 } 5184 5185 if (mIsAVC) { 5186 uint32_t layerId = FindAVCLayerId( 5187 (const uint8_t *)mBuffer->data(), mBuffer->range_length()); 5188 mBuffer->meta_data().setInt32(kKeyTemporalLayerId, layerId); 5189 } 5190 5191 if (isSyncSample) { 5192 mBuffer->meta_data().setInt32(kKeyIsSyncFrame, 1); 5193 } 5194 5195 ++mCurrentSampleIndex; 5196 } 5197 5198 if (!mIsAVC && !mIsHEVC) { 5199 *out = mBuffer; 5200 mBuffer = NULL; 5201 5202 return OK; 5203 } 5204 5205 // Each NAL unit is split up into its constituent fragments and 5206 // each one of them returned in its own buffer. 5207 5208 CHECK(mBuffer->range_length() >= mNALLengthSize); 5209 5210 const uint8_t *src = 5211 (const uint8_t *)mBuffer->data() + mBuffer->range_offset(); 5212 5213 size_t nal_size = parseNALSize(src); 5214 if (mNALLengthSize > SIZE_MAX - nal_size) { 5215 ALOGE("b/24441553, b/24445122"); 5216 } 5217 5218 if (mBuffer->range_length() - mNALLengthSize < nal_size) { 5219 ALOGE("incomplete NAL unit."); 5220 5221 mBuffer->release(); 5222 mBuffer = NULL; 5223 5224 return ERROR_MALFORMED; 5225 } 5226 5227 MediaBufferBase *clone = mBuffer->clone(); 5228 CHECK(clone != NULL); 5229 clone->set_range(mBuffer->range_offset() + mNALLengthSize, nal_size); 5230 5231 CHECK(mBuffer != NULL); 5232 mBuffer->set_range( 5233 mBuffer->range_offset() + mNALLengthSize + nal_size, 5234 mBuffer->range_length() - mNALLengthSize - nal_size); 5235 5236 if (mBuffer->range_length() == 0) { 5237 mBuffer->release(); 5238 mBuffer = NULL; 5239 } 5240 5241 *out = clone; 5242 5243 return OK; 5244 } else { 5245 ALOGV("whole NAL"); 5246 // Whole NAL units are returned but each fragment is prefixed by 5247 // the start code (0x00 00 00 01). 5248 ssize_t num_bytes_read = 0; 5249 int32_t drm = 0; 5250 bool usesDRM = (mFormat.findInt32(kKeyIsDRM, &drm) && drm != 0); 5251 void *data = NULL; 5252 bool isMalFormed = false; 5253 if (usesDRM) { 5254 if (mBuffer == NULL || !isInRange((size_t)0u, mBuffer->size(), size)) { 5255 isMalFormed = true; 5256 } else { 5257 data = mBuffer->data(); 5258 } 5259 } else { 5260 int32_t max_size; 5261 if (!mFormat.findInt32(kKeyMaxInputSize, &max_size) 5262 || !isInRange((size_t)0u, (size_t)max_size, size)) { 5263 isMalFormed = true; 5264 } else { 5265 data = mSrcBuffer; 5266 } 5267 } 5268 5269 if (isMalFormed || data == NULL) { 5270 ALOGE("isMalFormed size %zu", size); 5271 if (mBuffer != NULL) { 5272 mBuffer->release(); 5273 mBuffer = NULL; 5274 } 5275 return ERROR_MALFORMED; 5276 } 5277 num_bytes_read = mDataSource->readAt(offset, data, size); 5278 5279 if (num_bytes_read < (ssize_t)size) { 5280 mBuffer->release(); 5281 mBuffer = NULL; 5282 5283 ALOGE("i/o error"); 5284 return ERROR_IO; 5285 } 5286 5287 if (usesDRM) { 5288 CHECK(mBuffer != NULL); 5289 mBuffer->set_range(0, size); 5290 5291 } else { 5292 uint8_t *dstData = (uint8_t *)mBuffer->data(); 5293 size_t srcOffset = 0; 5294 size_t dstOffset = 0; 5295 5296 while (srcOffset < size) { 5297 isMalFormed = !isInRange((size_t)0u, size, srcOffset, mNALLengthSize); 5298 size_t nalLength = 0; 5299 if (!isMalFormed) { 5300 nalLength = parseNALSize(&mSrcBuffer[srcOffset]); 5301 srcOffset += mNALLengthSize; 5302 isMalFormed = !isInRange((size_t)0u, size, srcOffset, nalLength) 5303 || !isInRange((size_t)0u, mBuffer->size(), dstOffset, (size_t)4u) 5304 || !isInRange((size_t)0u, mBuffer->size(), dstOffset + 4, nalLength); 5305 } 5306 5307 if (isMalFormed) { 5308 ALOGE("Video is malformed; nalLength %zu", nalLength); 5309 mBuffer->release(); 5310 mBuffer = NULL; 5311 return ERROR_MALFORMED; 5312 } 5313 5314 if (nalLength == 0) { 5315 continue; 5316 } 5317 5318 if (dstOffset > SIZE_MAX - 4 || 5319 dstOffset + 4 > SIZE_MAX - nalLength || 5320 dstOffset + 4 + nalLength > mBuffer->size()) { 5321 ALOGE("b/26365349 : %zu %zu", dstOffset, mBuffer->size()); 5322 android_errorWriteLog(0x534e4554, "26365349"); 5323 mBuffer->release(); 5324 mBuffer = NULL; 5325 return ERROR_MALFORMED; 5326 } 5327 5328 dstData[dstOffset++] = 0; 5329 dstData[dstOffset++] = 0; 5330 dstData[dstOffset++] = 0; 5331 dstData[dstOffset++] = 1; 5332 memcpy(&dstData[dstOffset], &mSrcBuffer[srcOffset], nalLength); 5333 srcOffset += nalLength; 5334 dstOffset += nalLength; 5335 } 5336 CHECK_EQ(srcOffset, size); 5337 CHECK(mBuffer != NULL); 5338 mBuffer->set_range(0, dstOffset); 5339 } 5340 5341 mBuffer->meta_data().setInt64( 5342 kKeyTime, ((int64_t)cts * 1000000) / mTimescale); 5343 mBuffer->meta_data().setInt64( 5344 kKeyDuration, ((int64_t)smpl->duration * 1000000) / mTimescale); 5345 5346 if (targetSampleTimeUs >= 0) { 5347 mBuffer->meta_data().setInt64( 5348 kKeyTargetTime, targetSampleTimeUs); 5349 } 5350 5351 if (isSyncSample) { 5352 mBuffer->meta_data().setInt32(kKeyIsSyncFrame, 1); 5353 } 5354 5355 ++mCurrentSampleIndex; 5356 5357 *out = mBuffer; 5358 mBuffer = NULL; 5359 5360 return OK; 5361 } 5362} 5363 5364MPEG4Extractor::Track *MPEG4Extractor::findTrackByMimePrefix( 5365 const char *mimePrefix) { 5366 for (Track *track = mFirstTrack; track != NULL; track = track->next) { 5367 const char *mime; 5368 if (track->meta.findCString(kKeyMIMEType, &mime) 5369 && !strncasecmp(mime, mimePrefix, strlen(mimePrefix))) { 5370 return track; 5371 } 5372 } 5373 5374 return NULL; 5375} 5376 5377static bool LegacySniffMPEG4(DataSourceBase *source, float *confidence) { 5378 uint8_t header[8]; 5379 5380 ssize_t n = source->readAt(4, header, sizeof(header)); 5381 if (n < (ssize_t)sizeof(header)) { 5382 return false; 5383 } 5384 5385 if (!memcmp(header, "ftyp3gp", 7) || !memcmp(header, "ftypmp42", 8) 5386 || !memcmp(header, "ftyp3gr6", 8) || !memcmp(header, "ftyp3gs6", 8) 5387 || !memcmp(header, "ftyp3ge6", 8) || !memcmp(header, "ftyp3gg6", 8) 5388 || !memcmp(header, "ftypisom", 8) || !memcmp(header, "ftypM4V ", 8) 5389 || !memcmp(header, "ftypM4A ", 8) || !memcmp(header, "ftypf4v ", 8) 5390 || !memcmp(header, "ftypkddi", 8) || !memcmp(header, "ftypM4VP", 8) 5391 || !memcmp(header, "ftypmif1", 8) || !memcmp(header, "ftypheic", 8) 5392 || !memcmp(header, "ftypmsf1", 8) || !memcmp(header, "ftyphevc", 8)) { 5393 *confidence = 0.4; 5394 5395 return true; 5396 } 5397 5398 return false; 5399} 5400 5401static bool isCompatibleBrand(uint32_t fourcc) { 5402 static const uint32_t kCompatibleBrands[] = { 5403 FOURCC('i', 's', 'o', 'm'), 5404 FOURCC('i', 's', 'o', '2'), 5405 FOURCC('a', 'v', 'c', '1'), 5406 FOURCC('h', 'v', 'c', '1'), 5407 FOURCC('h', 'e', 'v', '1'), 5408 FOURCC('3', 'g', 'p', '4'), 5409 FOURCC('m', 'p', '4', '1'), 5410 FOURCC('m', 'p', '4', '2'), 5411 FOURCC('d', 'a', 's', 'h'), 5412 5413 // Won't promise that the following file types can be played. 5414 // Just give these file types a chance. 5415 FOURCC('q', 't', ' ', ' '), // Apple's QuickTime 5416 FOURCC('M', 'S', 'N', 'V'), // Sony's PSP 5417 5418 FOURCC('3', 'g', '2', 'a'), // 3GPP2 5419 FOURCC('3', 'g', '2', 'b'), 5420 FOURCC('m', 'i', 'f', '1'), // HEIF image 5421 FOURCC('h', 'e', 'i', 'c'), // HEIF image 5422 FOURCC('m', 's', 'f', '1'), // HEIF image sequence 5423 FOURCC('h', 'e', 'v', 'c'), // HEIF image sequence 5424 }; 5425 5426 for (size_t i = 0; 5427 i < sizeof(kCompatibleBrands) / sizeof(kCompatibleBrands[0]); 5428 ++i) { 5429 if (kCompatibleBrands[i] == fourcc) { 5430 return true; 5431 } 5432 } 5433 5434 return false; 5435} 5436 5437// Attempt to actually parse the 'ftyp' atom and determine if a suitable 5438// compatible brand is present. 5439// Also try to identify where this file's metadata ends 5440// (end of the 'moov' atom) and report it to the caller as part of 5441// the metadata. 5442static bool BetterSniffMPEG4(DataSourceBase *source, float *confidence) { 5443 // We scan up to 128 bytes to identify this file as an MP4. 5444 static const off64_t kMaxScanOffset = 128ll; 5445 5446 off64_t offset = 0ll; 5447 bool foundGoodFileType = false; 5448 off64_t moovAtomEndOffset = -1ll; 5449 bool done = false; 5450 5451 while (!done && offset < kMaxScanOffset) { 5452 uint32_t hdr[2]; 5453 if (source->readAt(offset, hdr, 8) < 8) { 5454 return false; 5455 } 5456 5457 uint64_t chunkSize = ntohl(hdr[0]); 5458 uint32_t chunkType = ntohl(hdr[1]); 5459 off64_t chunkDataOffset = offset + 8; 5460 5461 if (chunkSize == 1) { 5462 if (source->readAt(offset + 8, &chunkSize, 8) < 8) { 5463 return false; 5464 } 5465 5466 chunkSize = ntoh64(chunkSize); 5467 chunkDataOffset += 8; 5468 5469 if (chunkSize < 16) { 5470 // The smallest valid chunk is 16 bytes long in this case. 5471 return false; 5472 } 5473 5474 } else if (chunkSize < 8) { 5475 // The smallest valid chunk is 8 bytes long. 5476 return false; 5477 } 5478 5479 // (data_offset - offset) is either 8 or 16 5480 off64_t chunkDataSize = chunkSize - (chunkDataOffset - offset); 5481 if (chunkDataSize < 0) { 5482 ALOGE("b/23540914"); 5483 return false; 5484 } 5485 5486 char chunkstring[5]; 5487 MakeFourCCString(chunkType, chunkstring); 5488 ALOGV("saw chunk type %s, size %" PRIu64 " @ %lld", chunkstring, chunkSize, (long long)offset); 5489 switch (chunkType) { 5490 case FOURCC('f', 't', 'y', 'p'): 5491 { 5492 if (chunkDataSize < 8) { 5493 return false; 5494 } 5495 5496 uint32_t numCompatibleBrands = (chunkDataSize - 8) / 4; 5497 for (size_t i = 0; i < numCompatibleBrands + 2; ++i) { 5498 if (i == 1) { 5499 // Skip this index, it refers to the minorVersion, 5500 // not a brand. 5501 continue; 5502 } 5503 5504 uint32_t brand; 5505 if (source->readAt( 5506 chunkDataOffset + 4 * i, &brand, 4) < 4) { 5507 return false; 5508 } 5509 5510 brand = ntohl(brand); 5511 5512 if (isCompatibleBrand(brand)) { 5513 foundGoodFileType = true; 5514 break; 5515 } 5516 } 5517 5518 if (!foundGoodFileType) { 5519 return false; 5520 } 5521 5522 break; 5523 } 5524 5525 case FOURCC('m', 'o', 'o', 'v'): 5526 { 5527 moovAtomEndOffset = offset + chunkSize; 5528 5529 done = true; 5530 break; 5531 } 5532 5533 default: 5534 break; 5535 } 5536 5537 offset += chunkSize; 5538 } 5539 5540 if (!foundGoodFileType) { 5541 return false; 5542 } 5543 5544 *confidence = 0.4f; 5545 5546 return true; 5547} 5548 5549static MediaExtractor* CreateExtractor(DataSourceBase *source, void *) { 5550 return new MPEG4Extractor(source); 5551} 5552 5553static MediaExtractor::CreatorFunc Sniff( 5554 DataSourceBase *source, float *confidence, void **, 5555 MediaExtractor::FreeMetaFunc *) { 5556 if (BetterSniffMPEG4(source, confidence)) { 5557 return CreateExtractor; 5558 } 5559 5560 if (LegacySniffMPEG4(source, confidence)) { 5561 ALOGW("Identified supported mpeg4 through LegacySniffMPEG4."); 5562 return CreateExtractor; 5563 } 5564 5565 return NULL; 5566} 5567 5568extern "C" { 5569// This is the only symbol that needs to be exported 5570__attribute__ ((visibility ("default"))) 5571MediaExtractor::ExtractorDef GETEXTRACTORDEF() { 5572 return { 5573 MediaExtractor::EXTRACTORDEF_VERSION, 5574 UUID("27575c67-4417-4c54-8d3d-8e626985a164"), 5575 1, // version 5576 "MP4 Extractor", 5577 Sniff 5578 }; 5579} 5580 5581} // extern "C" 5582 5583} // namespace android 5584