MPEG4Extractor.cpp revision c1e24ce7fe17981e80f85d2345c53599ba5f850d
1/* 2 * Copyright (C) 2009 The Android Open Source Project 3 * 4 * Licensed under the Apache License, Version 2.0 (the "License"); 5 * you may not use this file except in compliance with the License. 6 * You may obtain a copy of the License at 7 * 8 * http://www.apache.org/licenses/LICENSE-2.0 9 * 10 * Unless required by applicable law or agreed to in writing, software 11 * distributed under the License is distributed on an "AS IS" BASIS, 12 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 * See the License for the specific language governing permissions and 14 * limitations under the License. 15 */ 16 17//#define LOG_NDEBUG 0 18#define LOG_TAG "MPEG4Extractor" 19 20#include <ctype.h> 21#include <inttypes.h> 22#include <memory> 23#include <stdint.h> 24#include <stdlib.h> 25#include <string.h> 26 27#include <utils/Log.h> 28 29#include "MPEG4Extractor.h" 30#include "SampleTable.h" 31#include "ItemTable.h" 32#include "include/ESDS.h" 33 34#include <media/ExtractorUtils.h> 35#include <media/MediaTrack.h> 36#include <media/stagefright/foundation/ABitReader.h> 37#include <media/stagefright/foundation/ABuffer.h> 38#include <media/stagefright/foundation/ADebug.h> 39#include <media/stagefright/foundation/AMessage.h> 40#include <media/stagefright/foundation/AUtils.h> 41#include <media/stagefright/foundation/ByteUtils.h> 42#include <media/stagefright/foundation/ColorUtils.h> 43#include <media/stagefright/foundation/avc_utils.h> 44#include <media/stagefright/foundation/hexdump.h> 45#include <media/stagefright/MediaBufferBase.h> 46#include <media/stagefright/MediaBufferGroup.h> 47#include <media/stagefright/MediaDefs.h> 48#include <media/stagefright/MetaData.h> 49#include <utils/String8.h> 50 51#include <byteswap.h> 52#include "include/ID3.h" 53 54#ifndef UINT32_MAX 55#define UINT32_MAX (4294967295U) 56#endif 57 58namespace android { 59 60enum { 61 // max track header chunk to return 62 kMaxTrackHeaderSize = 32, 63 64 // maximum size of an atom. Some atoms can be bigger according to the spec, 65 // but we only allow up to this size. 66 kMaxAtomSize = 64 * 1024 * 1024, 67}; 68 69class MPEG4Source : public MediaTrack { 70public: 71 // Caller retains ownership of both "dataSource" and "sampleTable". 72 MPEG4Source(MetaDataBase &format, 73 DataSourceBase *dataSource, 74 int32_t timeScale, 75 const sp<SampleTable> &sampleTable, 76 Vector<SidxEntry> &sidx, 77 const Trex *trex, 78 off64_t firstMoofOffset, 79 const sp<ItemTable> &itemTable); 80 virtual status_t init(); 81 82 virtual status_t start(MetaDataBase *params = NULL); 83 virtual status_t stop(); 84 85 virtual status_t getFormat(MetaDataBase &); 86 87 virtual status_t read(MediaBufferBase **buffer, const ReadOptions *options = NULL); 88 virtual bool supportNonblockingRead() { return true; } 89 virtual status_t fragmentedRead(MediaBufferBase **buffer, const ReadOptions *options = NULL); 90 91 virtual ~MPEG4Source(); 92 93private: 94 Mutex mLock; 95 96 MetaDataBase &mFormat; 97 DataSourceBase *mDataSource; 98 int32_t mTimescale; 99 sp<SampleTable> mSampleTable; 100 uint32_t mCurrentSampleIndex; 101 uint32_t mCurrentFragmentIndex; 102 Vector<SidxEntry> &mSegments; 103 const Trex *mTrex; 104 off64_t mFirstMoofOffset; 105 off64_t mCurrentMoofOffset; 106 off64_t mNextMoofOffset; 107 uint32_t mCurrentTime; 108 int32_t mLastParsedTrackId; 109 int32_t mTrackId; 110 111 int32_t mCryptoMode; // passed in from extractor 112 int32_t mDefaultIVSize; // passed in from extractor 113 uint8_t mCryptoKey[16]; // passed in from extractor 114 uint32_t mCurrentAuxInfoType; 115 uint32_t mCurrentAuxInfoTypeParameter; 116 int32_t mCurrentDefaultSampleInfoSize; 117 uint32_t mCurrentSampleInfoCount; 118 uint32_t mCurrentSampleInfoAllocSize; 119 uint8_t* mCurrentSampleInfoSizes; 120 uint32_t mCurrentSampleInfoOffsetCount; 121 uint32_t mCurrentSampleInfoOffsetsAllocSize; 122 uint64_t* mCurrentSampleInfoOffsets; 123 124 bool mIsAVC; 125 bool mIsHEVC; 126 size_t mNALLengthSize; 127 128 bool mStarted; 129 130 MediaBufferGroup *mGroup; 131 132 MediaBufferBase *mBuffer; 133 134 bool mWantsNALFragments; 135 136 uint8_t *mSrcBuffer; 137 138 bool mIsHeif; 139 sp<ItemTable> mItemTable; 140 141 size_t parseNALSize(const uint8_t *data) const; 142 status_t parseChunk(off64_t *offset); 143 status_t parseTrackFragmentHeader(off64_t offset, off64_t size); 144 status_t parseTrackFragmentRun(off64_t offset, off64_t size); 145 status_t parseSampleAuxiliaryInformationSizes(off64_t offset, off64_t size); 146 status_t parseSampleAuxiliaryInformationOffsets(off64_t offset, off64_t size); 147 148 struct TrackFragmentHeaderInfo { 149 enum Flags { 150 kBaseDataOffsetPresent = 0x01, 151 kSampleDescriptionIndexPresent = 0x02, 152 kDefaultSampleDurationPresent = 0x08, 153 kDefaultSampleSizePresent = 0x10, 154 kDefaultSampleFlagsPresent = 0x20, 155 kDurationIsEmpty = 0x10000, 156 }; 157 158 uint32_t mTrackID; 159 uint32_t mFlags; 160 uint64_t mBaseDataOffset; 161 uint32_t mSampleDescriptionIndex; 162 uint32_t mDefaultSampleDuration; 163 uint32_t mDefaultSampleSize; 164 uint32_t mDefaultSampleFlags; 165 166 uint64_t mDataOffset; 167 }; 168 TrackFragmentHeaderInfo mTrackFragmentHeaderInfo; 169 170 struct Sample { 171 off64_t offset; 172 size_t size; 173 uint32_t duration; 174 int32_t compositionOffset; 175 uint8_t iv[16]; 176 Vector<size_t> clearsizes; 177 Vector<size_t> encryptedsizes; 178 }; 179 Vector<Sample> mCurrentSamples; 180 181 MPEG4Source(const MPEG4Source &); 182 MPEG4Source &operator=(const MPEG4Source &); 183}; 184 185// This custom data source wraps an existing one and satisfies requests 186// falling entirely within a cached range from the cache while forwarding 187// all remaining requests to the wrapped datasource. 188// This is used to cache the full sampletable metadata for a single track, 189// possibly wrapping multiple times to cover all tracks, i.e. 190// Each CachedRangedDataSource caches the sampletable metadata for a single track. 191 192struct CachedRangedDataSource : public DataSourceBase { 193 explicit CachedRangedDataSource(DataSourceBase *source); 194 virtual ~CachedRangedDataSource(); 195 196 virtual status_t initCheck() const; 197 virtual ssize_t readAt(off64_t offset, void *data, size_t size); 198 virtual status_t getSize(off64_t *size); 199 virtual uint32_t flags(); 200 201 status_t setCachedRange(off64_t offset, size_t size, bool assumeSourceOwnershipOnSuccess); 202 203 204private: 205 Mutex mLock; 206 207 DataSourceBase *mSource; 208 bool mOwnsDataSource; 209 off64_t mCachedOffset; 210 size_t mCachedSize; 211 uint8_t *mCache; 212 213 void clearCache(); 214 215 CachedRangedDataSource(const CachedRangedDataSource &); 216 CachedRangedDataSource &operator=(const CachedRangedDataSource &); 217}; 218 219CachedRangedDataSource::CachedRangedDataSource(DataSourceBase *source) 220 : mSource(source), 221 mOwnsDataSource(false), 222 mCachedOffset(0), 223 mCachedSize(0), 224 mCache(NULL) { 225} 226 227CachedRangedDataSource::~CachedRangedDataSource() { 228 clearCache(); 229 if (mOwnsDataSource) { 230 delete (CachedRangedDataSource*)mSource; 231 } 232} 233 234void CachedRangedDataSource::clearCache() { 235 if (mCache) { 236 free(mCache); 237 mCache = NULL; 238 } 239 240 mCachedOffset = 0; 241 mCachedSize = 0; 242} 243 244status_t CachedRangedDataSource::initCheck() const { 245 return mSource->initCheck(); 246} 247 248ssize_t CachedRangedDataSource::readAt(off64_t offset, void *data, size_t size) { 249 Mutex::Autolock autoLock(mLock); 250 251 if (isInRange(mCachedOffset, mCachedSize, offset, size)) { 252 memcpy(data, &mCache[offset - mCachedOffset], size); 253 return size; 254 } 255 256 return mSource->readAt(offset, data, size); 257} 258 259status_t CachedRangedDataSource::getSize(off64_t *size) { 260 return mSource->getSize(size); 261} 262 263uint32_t CachedRangedDataSource::flags() { 264 return mSource->flags(); 265} 266 267status_t CachedRangedDataSource::setCachedRange(off64_t offset, 268 size_t size, 269 bool assumeSourceOwnershipOnSuccess) { 270 Mutex::Autolock autoLock(mLock); 271 272 clearCache(); 273 274 mCache = (uint8_t *)malloc(size); 275 276 if (mCache == NULL) { 277 return -ENOMEM; 278 } 279 280 mCachedOffset = offset; 281 mCachedSize = size; 282 283 ssize_t err = mSource->readAt(mCachedOffset, mCache, mCachedSize); 284 285 if (err < (ssize_t)size) { 286 clearCache(); 287 288 return ERROR_IO; 289 } 290 mOwnsDataSource = assumeSourceOwnershipOnSuccess; 291 return OK; 292} 293 294//////////////////////////////////////////////////////////////////////////////// 295 296static const bool kUseHexDump = false; 297 298static const char *FourCC2MIME(uint32_t fourcc) { 299 switch (fourcc) { 300 case FOURCC('m', 'p', '4', 'a'): 301 return MEDIA_MIMETYPE_AUDIO_AAC; 302 303 case FOURCC('s', 'a', 'm', 'r'): 304 return MEDIA_MIMETYPE_AUDIO_AMR_NB; 305 306 case FOURCC('s', 'a', 'w', 'b'): 307 return MEDIA_MIMETYPE_AUDIO_AMR_WB; 308 309 case FOURCC('m', 'p', '4', 'v'): 310 return MEDIA_MIMETYPE_VIDEO_MPEG4; 311 312 case FOURCC('s', '2', '6', '3'): 313 case FOURCC('h', '2', '6', '3'): 314 case FOURCC('H', '2', '6', '3'): 315 return MEDIA_MIMETYPE_VIDEO_H263; 316 317 case FOURCC('a', 'v', 'c', '1'): 318 return MEDIA_MIMETYPE_VIDEO_AVC; 319 320 case FOURCC('h', 'v', 'c', '1'): 321 case FOURCC('h', 'e', 'v', '1'): 322 return MEDIA_MIMETYPE_VIDEO_HEVC; 323 default: 324 ALOGW("Unknown fourcc: %c%c%c%c", 325 (fourcc >> 24) & 0xff, 326 (fourcc >> 16) & 0xff, 327 (fourcc >> 8) & 0xff, 328 fourcc & 0xff 329 ); 330 return "application/octet-stream"; 331 } 332} 333 334static bool AdjustChannelsAndRate(uint32_t fourcc, uint32_t *channels, uint32_t *rate) { 335 if (!strcasecmp(MEDIA_MIMETYPE_AUDIO_AMR_NB, FourCC2MIME(fourcc))) { 336 // AMR NB audio is always mono, 8kHz 337 *channels = 1; 338 *rate = 8000; 339 return true; 340 } else if (!strcasecmp(MEDIA_MIMETYPE_AUDIO_AMR_WB, FourCC2MIME(fourcc))) { 341 // AMR WB audio is always mono, 16kHz 342 *channels = 1; 343 *rate = 16000; 344 return true; 345 } 346 return false; 347} 348 349MPEG4Extractor::MPEG4Extractor(DataSourceBase *source, const char *mime) 350 : mMoofOffset(0), 351 mMoofFound(false), 352 mMdatFound(false), 353 mDataSource(source), 354 mCachedSource(NULL), 355 mInitCheck(NO_INIT), 356 mHeaderTimescale(0), 357 mIsQT(false), 358 mIsHeif(false), 359 mHasMoovBox(false), 360 mPreferHeif(mime != NULL && !strcasecmp(mime, MEDIA_MIMETYPE_CONTAINER_HEIF)), 361 mFirstTrack(NULL), 362 mLastTrack(NULL) { 363 ALOGV("mime=%s, mPreferHeif=%d", mime, mPreferHeif); 364} 365 366MPEG4Extractor::~MPEG4Extractor() { 367 Track *track = mFirstTrack; 368 while (track) { 369 Track *next = track->next; 370 371 delete track; 372 track = next; 373 } 374 mFirstTrack = mLastTrack = NULL; 375 376 for (size_t i = 0; i < mPssh.size(); i++) { 377 delete [] mPssh[i].data; 378 } 379 mPssh.clear(); 380 381 delete mCachedSource; 382} 383 384uint32_t MPEG4Extractor::flags() const { 385 return CAN_PAUSE | 386 ((mMoofOffset == 0 || mSidxEntries.size() != 0) ? 387 (CAN_SEEK_BACKWARD | CAN_SEEK_FORWARD | CAN_SEEK) : 0); 388} 389 390status_t MPEG4Extractor::getMetaData(MetaDataBase &meta) { 391 status_t err; 392 if ((err = readMetaData()) != OK) { 393 return UNKNOWN_ERROR; 394 } 395 meta = mFileMetaData; 396 return OK; 397} 398 399size_t MPEG4Extractor::countTracks() { 400 status_t err; 401 if ((err = readMetaData()) != OK) { 402 ALOGV("MPEG4Extractor::countTracks: no tracks"); 403 return 0; 404 } 405 406 size_t n = 0; 407 Track *track = mFirstTrack; 408 while (track) { 409 ++n; 410 track = track->next; 411 } 412 413 ALOGV("MPEG4Extractor::countTracks: %zu tracks", n); 414 return n; 415} 416 417status_t MPEG4Extractor::getTrackMetaData( 418 MetaDataBase &meta, 419 size_t index, uint32_t flags) { 420 status_t err; 421 if ((err = readMetaData()) != OK) { 422 return UNKNOWN_ERROR; 423 } 424 425 Track *track = mFirstTrack; 426 while (index > 0) { 427 if (track == NULL) { 428 return UNKNOWN_ERROR; 429 } 430 431 track = track->next; 432 --index; 433 } 434 435 if (track == NULL) { 436 return UNKNOWN_ERROR; 437 } 438 439 [=] { 440 int64_t duration; 441 int32_t samplerate; 442 if (track->has_elst && mHeaderTimescale != 0 && 443 track->meta.findInt64(kKeyDuration, &duration) && 444 track->meta.findInt32(kKeySampleRate, &samplerate)) { 445 446 track->has_elst = false; 447 448 if (track->elst_segment_duration > INT64_MAX) { 449 return; 450 } 451 int64_t segment_duration = track->elst_segment_duration; 452 int64_t media_time = track->elst_media_time; 453 int64_t halfscale = mHeaderTimescale / 2; 454 ALOGV("segment_duration = %" PRId64 ", media_time = %" PRId64 455 ", halfscale = %" PRId64 ", timescale = %d", 456 segment_duration, 457 media_time, 458 halfscale, 459 mHeaderTimescale); 460 461 int64_t delay; 462 // delay = ((media_time * samplerate) + halfscale) / mHeaderTimescale; 463 if (__builtin_mul_overflow(media_time, samplerate, &delay) || 464 __builtin_add_overflow(delay, halfscale, &delay) || 465 (delay /= mHeaderTimescale, false) || 466 delay > INT32_MAX || 467 delay < INT32_MIN) { 468 return; 469 } 470 ALOGV("delay = %" PRId64, delay); 471 track->meta.setInt32(kKeyEncoderDelay, delay); 472 473 int64_t scaled_duration; 474 // scaled_duration = duration * mHeaderTimescale; 475 if (__builtin_mul_overflow(duration, mHeaderTimescale, &scaled_duration)) { 476 return; 477 } 478 ALOGV("scaled_duration = %" PRId64, scaled_duration); 479 480 int64_t segment_end; 481 int64_t padding; 482 // padding = scaled_duration - ((segment_duration + media_time) * 1000000); 483 if (__builtin_add_overflow(segment_duration, media_time, &segment_end) || 484 __builtin_mul_overflow(segment_end, 1000000, &segment_end) || 485 __builtin_sub_overflow(scaled_duration, segment_end, &padding)) { 486 return; 487 } 488 ALOGV("segment_end = %" PRId64 ", padding = %" PRId64, segment_end, padding); 489 490 if (padding < 0) { 491 // track duration from media header (which is what kKeyDuration is) might 492 // be slightly shorter than the segment duration, which would make the 493 // padding negative. Clamp to zero. 494 padding = 0; 495 } 496 497 int64_t paddingsamples; 498 int64_t halfscale_e6; 499 int64_t timescale_e6; 500 // paddingsamples = ((padding * samplerate) + (halfscale * 1000000)) 501 // / (mHeaderTimescale * 1000000); 502 if (__builtin_mul_overflow(padding, samplerate, &paddingsamples) || 503 __builtin_mul_overflow(halfscale, 1000000, &halfscale_e6) || 504 __builtin_mul_overflow(mHeaderTimescale, 1000000, ×cale_e6) || 505 __builtin_add_overflow(paddingsamples, halfscale_e6, &paddingsamples) || 506 (paddingsamples /= timescale_e6, false) || 507 paddingsamples > INT32_MAX) { 508 return; 509 } 510 ALOGV("paddingsamples = %" PRId64, paddingsamples); 511 track->meta.setInt32(kKeyEncoderPadding, paddingsamples); 512 } 513 }(); 514 515 if ((flags & kIncludeExtensiveMetaData) 516 && !track->includes_expensive_metadata) { 517 track->includes_expensive_metadata = true; 518 519 const char *mime; 520 CHECK(track->meta.findCString(kKeyMIMEType, &mime)); 521 if (!strncasecmp("video/", mime, 6)) { 522 // MPEG2 tracks do not provide CSD, so read the stream header 523 if (!strcmp(mime, MEDIA_MIMETYPE_VIDEO_MPEG2)) { 524 off64_t offset; 525 size_t size; 526 if (track->sampleTable->getMetaDataForSample( 527 0 /* sampleIndex */, &offset, &size, NULL /* sampleTime */) == OK) { 528 if (size > kMaxTrackHeaderSize) { 529 size = kMaxTrackHeaderSize; 530 } 531 uint8_t header[kMaxTrackHeaderSize]; 532 if (mDataSource->readAt(offset, &header, size) == (ssize_t)size) { 533 track->meta.setData(kKeyStreamHeader, 'mdat', header, size); 534 } 535 } 536 } 537 538 if (mMoofOffset > 0) { 539 int64_t duration; 540 if (track->meta.findInt64(kKeyDuration, &duration)) { 541 // nothing fancy, just pick a frame near 1/4th of the duration 542 track->meta.setInt64( 543 kKeyThumbnailTime, duration / 4); 544 } 545 } else { 546 uint32_t sampleIndex; 547 uint32_t sampleTime; 548 if (track->timescale != 0 && 549 track->sampleTable->findThumbnailSample(&sampleIndex) == OK 550 && track->sampleTable->getMetaDataForSample( 551 sampleIndex, NULL /* offset */, NULL /* size */, 552 &sampleTime) == OK) { 553 track->meta.setInt64( 554 kKeyThumbnailTime, 555 ((int64_t)sampleTime * 1000000) / track->timescale); 556 } 557 } 558 } 559 } 560 561 meta = track->meta; 562 return OK; 563} 564 565status_t MPEG4Extractor::readMetaData() { 566 if (mInitCheck != NO_INIT) { 567 return mInitCheck; 568 } 569 570 off64_t offset = 0; 571 status_t err; 572 bool sawMoovOrSidx = false; 573 574 while (!((mHasMoovBox && sawMoovOrSidx && (mMdatFound || mMoofFound)) || 575 (mIsHeif && (mPreferHeif || !mHasMoovBox) && 576 (mItemTable != NULL) && mItemTable->isValid()))) { 577 off64_t orig_offset = offset; 578 err = parseChunk(&offset, 0); 579 580 if (err != OK && err != UNKNOWN_ERROR) { 581 break; 582 } else if (offset <= orig_offset) { 583 // only continue parsing if the offset was advanced, 584 // otherwise we might end up in an infinite loop 585 ALOGE("did not advance: %lld->%lld", (long long)orig_offset, (long long)offset); 586 err = ERROR_MALFORMED; 587 break; 588 } else if (err == UNKNOWN_ERROR) { 589 sawMoovOrSidx = true; 590 } 591 } 592 593 if (mIsHeif && (mItemTable != NULL) && (mItemTable->countImages() > 0)) { 594 off64_t exifOffset; 595 size_t exifSize; 596 if (mItemTable->getExifOffsetAndSize(&exifOffset, &exifSize) == OK) { 597 mFileMetaData.setInt64(kKeyExifOffset, (int64_t)exifOffset); 598 mFileMetaData.setInt64(kKeyExifSize, (int64_t)exifSize); 599 } 600 for (uint32_t imageIndex = 0; 601 imageIndex < mItemTable->countImages(); imageIndex++) { 602 sp<MetaData> meta = mItemTable->getImageMeta(imageIndex); 603 if (meta == NULL) { 604 ALOGE("heif image %u has no meta!", imageIndex); 605 continue; 606 } 607 // Some heif files advertise image sequence brands (eg. 'hevc') in 608 // ftyp box, but don't have any valid tracks in them. Instead of 609 // reporting the entire file as malformed, we override the error 610 // to allow still images to be extracted. 611 if (err != OK) { 612 ALOGW("Extracting still images only"); 613 err = OK; 614 } 615 mInitCheck = OK; 616 617 ALOGV("adding HEIF image track %u", imageIndex); 618 Track *track = new Track; 619 track->next = NULL; 620 if (mLastTrack != NULL) { 621 mLastTrack->next = track; 622 } else { 623 mFirstTrack = track; 624 } 625 mLastTrack = track; 626 627 track->meta = *(meta.get()); 628 track->meta.setInt32(kKeyTrackID, imageIndex); 629 track->includes_expensive_metadata = false; 630 track->skipTrack = false; 631 track->timescale = 1000000; 632 } 633 } 634 635 if (mInitCheck == OK) { 636 if (findTrackByMimePrefix("video/") != NULL) { 637 mFileMetaData.setCString( 638 kKeyMIMEType, MEDIA_MIMETYPE_CONTAINER_MPEG4); 639 } else if (findTrackByMimePrefix("audio/") != NULL) { 640 mFileMetaData.setCString(kKeyMIMEType, "audio/mp4"); 641 } else if (findTrackByMimePrefix( 642 MEDIA_MIMETYPE_IMAGE_ANDROID_HEIC) != NULL) { 643 mFileMetaData.setCString( 644 kKeyMIMEType, MEDIA_MIMETYPE_CONTAINER_HEIF); 645 } else { 646 mFileMetaData.setCString(kKeyMIMEType, "application/octet-stream"); 647 } 648 } else { 649 mInitCheck = err; 650 } 651 652 CHECK_NE(err, (status_t)NO_INIT); 653 654 // copy pssh data into file metadata 655 uint64_t psshsize = 0; 656 for (size_t i = 0; i < mPssh.size(); i++) { 657 psshsize += 20 + mPssh[i].datalen; 658 } 659 if (psshsize > 0 && psshsize <= UINT32_MAX) { 660 char *buf = (char*)malloc(psshsize); 661 if (!buf) { 662 ALOGE("b/28471206"); 663 return NO_MEMORY; 664 } 665 char *ptr = buf; 666 for (size_t i = 0; i < mPssh.size(); i++) { 667 memcpy(ptr, mPssh[i].uuid, 20); // uuid + length 668 memcpy(ptr + 20, mPssh[i].data, mPssh[i].datalen); 669 ptr += (20 + mPssh[i].datalen); 670 } 671 mFileMetaData.setData(kKeyPssh, 'pssh', buf, psshsize); 672 free(buf); 673 } 674 675 return mInitCheck; 676} 677 678struct PathAdder { 679 PathAdder(Vector<uint32_t> *path, uint32_t chunkType) 680 : mPath(path) { 681 mPath->push(chunkType); 682 } 683 684 ~PathAdder() { 685 mPath->pop(); 686 } 687 688private: 689 Vector<uint32_t> *mPath; 690 691 PathAdder(const PathAdder &); 692 PathAdder &operator=(const PathAdder &); 693}; 694 695static bool underMetaDataPath(const Vector<uint32_t> &path) { 696 return path.size() >= 5 697 && path[0] == FOURCC('m', 'o', 'o', 'v') 698 && path[1] == FOURCC('u', 'd', 't', 'a') 699 && path[2] == FOURCC('m', 'e', 't', 'a') 700 && path[3] == FOURCC('i', 'l', 's', 't'); 701} 702 703static bool underQTMetaPath(const Vector<uint32_t> &path, int32_t depth) { 704 return path.size() >= 2 705 && path[0] == FOURCC('m', 'o', 'o', 'v') 706 && path[1] == FOURCC('m', 'e', 't', 'a') 707 && (depth == 2 708 || (depth == 3 709 && (path[2] == FOURCC('h', 'd', 'l', 'r') 710 || path[2] == FOURCC('i', 'l', 's', 't') 711 || path[2] == FOURCC('k', 'e', 'y', 's')))); 712} 713 714// Given a time in seconds since Jan 1 1904, produce a human-readable string. 715static bool convertTimeToDate(int64_t time_1904, String8 *s) { 716 // delta between mpeg4 time and unix epoch time 717 static const int64_t delta = (((66 * 365 + 17) * 24) * 3600); 718 if (time_1904 < INT64_MIN + delta) { 719 return false; 720 } 721 time_t time_1970 = time_1904 - delta; 722 723 char tmp[32]; 724 struct tm* tm = gmtime(&time_1970); 725 if (tm != NULL && 726 strftime(tmp, sizeof(tmp), "%Y%m%dT%H%M%S.000Z", tm) > 0) { 727 s->setTo(tmp); 728 return true; 729 } 730 return false; 731} 732 733status_t MPEG4Extractor::parseChunk(off64_t *offset, int depth) { 734 ALOGV("entering parseChunk %lld/%d", (long long)*offset, depth); 735 736 if (*offset < 0) { 737 ALOGE("b/23540914"); 738 return ERROR_MALFORMED; 739 } 740 if (depth > 100) { 741 ALOGE("b/27456299"); 742 return ERROR_MALFORMED; 743 } 744 uint32_t hdr[2]; 745 if (mDataSource->readAt(*offset, hdr, 8) < 8) { 746 return ERROR_IO; 747 } 748 uint64_t chunk_size = ntohl(hdr[0]); 749 int32_t chunk_type = ntohl(hdr[1]); 750 off64_t data_offset = *offset + 8; 751 752 if (chunk_size == 1) { 753 if (mDataSource->readAt(*offset + 8, &chunk_size, 8) < 8) { 754 return ERROR_IO; 755 } 756 chunk_size = ntoh64(chunk_size); 757 data_offset += 8; 758 759 if (chunk_size < 16) { 760 // The smallest valid chunk is 16 bytes long in this case. 761 return ERROR_MALFORMED; 762 } 763 } else if (chunk_size == 0) { 764 if (depth == 0) { 765 // atom extends to end of file 766 off64_t sourceSize; 767 if (mDataSource->getSize(&sourceSize) == OK) { 768 chunk_size = (sourceSize - *offset); 769 } else { 770 // XXX could we just pick a "sufficiently large" value here? 771 ALOGE("atom size is 0, and data source has no size"); 772 return ERROR_MALFORMED; 773 } 774 } else { 775 // not allowed for non-toplevel atoms, skip it 776 *offset += 4; 777 return OK; 778 } 779 } else if (chunk_size < 8) { 780 // The smallest valid chunk is 8 bytes long. 781 ALOGE("invalid chunk size: %" PRIu64, chunk_size); 782 return ERROR_MALFORMED; 783 } 784 785 char chunk[5]; 786 MakeFourCCString(chunk_type, chunk); 787 ALOGV("chunk: %s @ %lld, %d", chunk, (long long)*offset, depth); 788 789 if (kUseHexDump) { 790 static const char kWhitespace[] = " "; 791 const char *indent = &kWhitespace[sizeof(kWhitespace) - 1 - 2 * depth]; 792 printf("%sfound chunk '%s' of size %" PRIu64 "\n", indent, chunk, chunk_size); 793 794 char buffer[256]; 795 size_t n = chunk_size; 796 if (n > sizeof(buffer)) { 797 n = sizeof(buffer); 798 } 799 if (mDataSource->readAt(*offset, buffer, n) 800 < (ssize_t)n) { 801 return ERROR_IO; 802 } 803 804 hexdump(buffer, n); 805 } 806 807 PathAdder autoAdder(&mPath, chunk_type); 808 809 // (data_offset - *offset) is either 8 or 16 810 off64_t chunk_data_size = chunk_size - (data_offset - *offset); 811 if (chunk_data_size < 0) { 812 ALOGE("b/23540914"); 813 return ERROR_MALFORMED; 814 } 815 if (chunk_type != FOURCC('m', 'd', 'a', 't') && chunk_data_size > kMaxAtomSize) { 816 char errMsg[100]; 817 sprintf(errMsg, "%s atom has size %" PRId64, chunk, chunk_data_size); 818 ALOGE("%s (b/28615448)", errMsg); 819 android_errorWriteWithInfoLog(0x534e4554, "28615448", -1, errMsg, strlen(errMsg)); 820 return ERROR_MALFORMED; 821 } 822 823 if (chunk_type != FOURCC('c', 'p', 'r', 't') 824 && chunk_type != FOURCC('c', 'o', 'v', 'r') 825 && mPath.size() == 5 && underMetaDataPath(mPath)) { 826 off64_t stop_offset = *offset + chunk_size; 827 *offset = data_offset; 828 while (*offset < stop_offset) { 829 status_t err = parseChunk(offset, depth + 1); 830 if (err != OK) { 831 return err; 832 } 833 } 834 835 if (*offset != stop_offset) { 836 return ERROR_MALFORMED; 837 } 838 839 return OK; 840 } 841 842 switch(chunk_type) { 843 case FOURCC('m', 'o', 'o', 'v'): 844 case FOURCC('t', 'r', 'a', 'k'): 845 case FOURCC('m', 'd', 'i', 'a'): 846 case FOURCC('m', 'i', 'n', 'f'): 847 case FOURCC('d', 'i', 'n', 'f'): 848 case FOURCC('s', 't', 'b', 'l'): 849 case FOURCC('m', 'v', 'e', 'x'): 850 case FOURCC('m', 'o', 'o', 'f'): 851 case FOURCC('t', 'r', 'a', 'f'): 852 case FOURCC('m', 'f', 'r', 'a'): 853 case FOURCC('u', 'd', 't', 'a'): 854 case FOURCC('i', 'l', 's', 't'): 855 case FOURCC('s', 'i', 'n', 'f'): 856 case FOURCC('s', 'c', 'h', 'i'): 857 case FOURCC('e', 'd', 't', 's'): 858 case FOURCC('w', 'a', 'v', 'e'): 859 { 860 if (chunk_type == FOURCC('m', 'o', 'o', 'v') && depth != 0) { 861 ALOGE("moov: depth %d", depth); 862 return ERROR_MALFORMED; 863 } 864 865 if (chunk_type == FOURCC('m', 'o', 'o', 'v') && mInitCheck == OK) { 866 ALOGE("duplicate moov"); 867 return ERROR_MALFORMED; 868 } 869 870 if (chunk_type == FOURCC('m', 'o', 'o', 'f') && !mMoofFound) { 871 // store the offset of the first segment 872 mMoofFound = true; 873 mMoofOffset = *offset; 874 } 875 876 if (chunk_type == FOURCC('s', 't', 'b', 'l')) { 877 ALOGV("sampleTable chunk is %" PRIu64 " bytes long.", chunk_size); 878 879 if (mDataSource->flags() 880 & (DataSourceBase::kWantsPrefetching 881 | DataSourceBase::kIsCachingDataSource)) { 882 CachedRangedDataSource *cachedSource = 883 new CachedRangedDataSource(mDataSource); 884 885 if (cachedSource->setCachedRange( 886 *offset, chunk_size, 887 mCachedSource != NULL /* assume ownership on success */) == OK) { 888 mDataSource = mCachedSource = cachedSource; 889 } else { 890 delete cachedSource; 891 } 892 } 893 894 if (mLastTrack == NULL) { 895 return ERROR_MALFORMED; 896 } 897 898 mLastTrack->sampleTable = new SampleTable(mDataSource); 899 } 900 901 bool isTrack = false; 902 if (chunk_type == FOURCC('t', 'r', 'a', 'k')) { 903 if (depth != 1) { 904 ALOGE("trak: depth %d", depth); 905 return ERROR_MALFORMED; 906 } 907 isTrack = true; 908 909 ALOGV("adding new track"); 910 Track *track = new Track; 911 track->next = NULL; 912 if (mLastTrack) { 913 mLastTrack->next = track; 914 } else { 915 mFirstTrack = track; 916 } 917 mLastTrack = track; 918 919 track->includes_expensive_metadata = false; 920 track->skipTrack = false; 921 track->timescale = 0; 922 track->meta.setCString(kKeyMIMEType, "application/octet-stream"); 923 track->has_elst = false; 924 } 925 926 off64_t stop_offset = *offset + chunk_size; 927 *offset = data_offset; 928 while (*offset < stop_offset) { 929 status_t err = parseChunk(offset, depth + 1); 930 if (err != OK) { 931 if (isTrack) { 932 mLastTrack->skipTrack = true; 933 break; 934 } 935 return err; 936 } 937 } 938 939 if (*offset != stop_offset) { 940 return ERROR_MALFORMED; 941 } 942 943 if (isTrack) { 944 int32_t trackId; 945 // There must be exact one track header per track. 946 if (!mLastTrack->meta.findInt32(kKeyTrackID, &trackId)) { 947 mLastTrack->skipTrack = true; 948 } 949 950 status_t err = verifyTrack(mLastTrack); 951 if (err != OK) { 952 mLastTrack->skipTrack = true; 953 } 954 955 if (mLastTrack->skipTrack) { 956 ALOGV("skipping this track..."); 957 Track *cur = mFirstTrack; 958 959 if (cur == mLastTrack) { 960 delete cur; 961 mFirstTrack = mLastTrack = NULL; 962 } else { 963 while (cur && cur->next != mLastTrack) { 964 cur = cur->next; 965 } 966 if (cur) { 967 cur->next = NULL; 968 } 969 delete mLastTrack; 970 mLastTrack = cur; 971 } 972 973 return OK; 974 } 975 } else if (chunk_type == FOURCC('m', 'o', 'o', 'v')) { 976 mInitCheck = OK; 977 978 return UNKNOWN_ERROR; // Return a dummy error. 979 } 980 break; 981 } 982 983 case FOURCC('e', 'l', 's', 't'): 984 { 985 *offset += chunk_size; 986 987 if (!mLastTrack) { 988 return ERROR_MALFORMED; 989 } 990 991 // See 14496-12 8.6.6 992 uint8_t version; 993 if (mDataSource->readAt(data_offset, &version, 1) < 1) { 994 return ERROR_IO; 995 } 996 997 uint32_t entry_count; 998 if (!mDataSource->getUInt32(data_offset + 4, &entry_count)) { 999 return ERROR_IO; 1000 } 1001 1002 if (entry_count != 1) { 1003 // we only support a single entry at the moment, for gapless playback 1004 ALOGW("ignoring edit list with %d entries", entry_count); 1005 } else { 1006 off64_t entriesoffset = data_offset + 8; 1007 uint64_t segment_duration; 1008 int64_t media_time; 1009 1010 if (version == 1) { 1011 if (!mDataSource->getUInt64(entriesoffset, &segment_duration) || 1012 !mDataSource->getUInt64(entriesoffset + 8, (uint64_t*)&media_time)) { 1013 return ERROR_IO; 1014 } 1015 } else if (version == 0) { 1016 uint32_t sd; 1017 int32_t mt; 1018 if (!mDataSource->getUInt32(entriesoffset, &sd) || 1019 !mDataSource->getUInt32(entriesoffset + 4, (uint32_t*)&mt)) { 1020 return ERROR_IO; 1021 } 1022 segment_duration = sd; 1023 media_time = mt; 1024 } else { 1025 return ERROR_IO; 1026 } 1027 1028 // save these for later, because the elst atom might precede 1029 // the atoms that actually gives us the duration and sample rate 1030 // needed to calculate the padding and delay values 1031 mLastTrack->has_elst = true; 1032 mLastTrack->elst_media_time = media_time; 1033 mLastTrack->elst_segment_duration = segment_duration; 1034 } 1035 break; 1036 } 1037 1038 case FOURCC('f', 'r', 'm', 'a'): 1039 { 1040 *offset += chunk_size; 1041 1042 uint32_t original_fourcc; 1043 if (mDataSource->readAt(data_offset, &original_fourcc, 4) < 4) { 1044 return ERROR_IO; 1045 } 1046 original_fourcc = ntohl(original_fourcc); 1047 ALOGV("read original format: %d", original_fourcc); 1048 1049 if (mLastTrack == NULL) { 1050 return ERROR_MALFORMED; 1051 } 1052 1053 mLastTrack->meta.setCString(kKeyMIMEType, FourCC2MIME(original_fourcc)); 1054 uint32_t num_channels = 0; 1055 uint32_t sample_rate = 0; 1056 if (AdjustChannelsAndRate(original_fourcc, &num_channels, &sample_rate)) { 1057 mLastTrack->meta.setInt32(kKeyChannelCount, num_channels); 1058 mLastTrack->meta.setInt32(kKeySampleRate, sample_rate); 1059 } 1060 break; 1061 } 1062 1063 case FOURCC('t', 'e', 'n', 'c'): 1064 { 1065 *offset += chunk_size; 1066 1067 if (chunk_size < 32) { 1068 return ERROR_MALFORMED; 1069 } 1070 1071 // tenc box contains 1 byte version, 3 byte flags, 3 byte default algorithm id, one byte 1072 // default IV size, 16 bytes default KeyID 1073 // (ISO 23001-7) 1074 char buf[4]; 1075 memset(buf, 0, 4); 1076 if (mDataSource->readAt(data_offset + 4, buf + 1, 3) < 3) { 1077 return ERROR_IO; 1078 } 1079 uint32_t defaultAlgorithmId = ntohl(*((int32_t*)buf)); 1080 if (defaultAlgorithmId > 1) { 1081 // only 0 (clear) and 1 (AES-128) are valid 1082 return ERROR_MALFORMED; 1083 } 1084 1085 memset(buf, 0, 4); 1086 if (mDataSource->readAt(data_offset + 7, buf + 3, 1) < 1) { 1087 return ERROR_IO; 1088 } 1089 uint32_t defaultIVSize = ntohl(*((int32_t*)buf)); 1090 1091 if ((defaultAlgorithmId == 0 && defaultIVSize != 0) || 1092 (defaultAlgorithmId != 0 && defaultIVSize == 0)) { 1093 // only unencrypted data must have 0 IV size 1094 return ERROR_MALFORMED; 1095 } else if (defaultIVSize != 0 && 1096 defaultIVSize != 8 && 1097 defaultIVSize != 16) { 1098 // only supported sizes are 0, 8 and 16 1099 return ERROR_MALFORMED; 1100 } 1101 1102 uint8_t defaultKeyId[16]; 1103 1104 if (mDataSource->readAt(data_offset + 8, &defaultKeyId, 16) < 16) { 1105 return ERROR_IO; 1106 } 1107 1108 if (mLastTrack == NULL) 1109 return ERROR_MALFORMED; 1110 1111 mLastTrack->meta.setInt32(kKeyCryptoMode, defaultAlgorithmId); 1112 mLastTrack->meta.setInt32(kKeyCryptoDefaultIVSize, defaultIVSize); 1113 mLastTrack->meta.setData(kKeyCryptoKey, 'tenc', defaultKeyId, 16); 1114 break; 1115 } 1116 1117 case FOURCC('t', 'k', 'h', 'd'): 1118 { 1119 *offset += chunk_size; 1120 1121 status_t err; 1122 if ((err = parseTrackHeader(data_offset, chunk_data_size)) != OK) { 1123 return err; 1124 } 1125 1126 break; 1127 } 1128 1129 case FOURCC('t', 'r', 'e', 'f'): 1130 { 1131 off64_t stop_offset = *offset + chunk_size; 1132 *offset = data_offset; 1133 while (*offset < stop_offset) { 1134 status_t err = parseChunk(offset, depth + 1); 1135 if (err != OK) { 1136 return err; 1137 } 1138 } 1139 if (*offset != stop_offset) { 1140 return ERROR_MALFORMED; 1141 } 1142 break; 1143 } 1144 1145 case FOURCC('t', 'h', 'm', 'b'): 1146 { 1147 *offset += chunk_size; 1148 1149 if (mLastTrack != NULL) { 1150 // Skip thumbnail track for now since we don't have an 1151 // API to retrieve it yet. 1152 // The thumbnail track can't be accessed by negative index or time, 1153 // because each timed sample has its own corresponding thumbnail 1154 // in the thumbnail track. We'll need a dedicated API to retrieve 1155 // thumbnail at time instead. 1156 mLastTrack->skipTrack = true; 1157 } 1158 1159 break; 1160 } 1161 1162 case FOURCC('p', 's', 's', 'h'): 1163 { 1164 *offset += chunk_size; 1165 1166 PsshInfo pssh; 1167 1168 if (mDataSource->readAt(data_offset + 4, &pssh.uuid, 16) < 16) { 1169 return ERROR_IO; 1170 } 1171 1172 uint32_t psshdatalen = 0; 1173 if (mDataSource->readAt(data_offset + 20, &psshdatalen, 4) < 4) { 1174 return ERROR_IO; 1175 } 1176 pssh.datalen = ntohl(psshdatalen); 1177 ALOGV("pssh data size: %d", pssh.datalen); 1178 if (chunk_size < 20 || pssh.datalen > chunk_size - 20) { 1179 // pssh data length exceeds size of containing box 1180 return ERROR_MALFORMED; 1181 } 1182 1183 pssh.data = new (std::nothrow) uint8_t[pssh.datalen]; 1184 if (pssh.data == NULL) { 1185 return ERROR_MALFORMED; 1186 } 1187 ALOGV("allocated pssh @ %p", pssh.data); 1188 ssize_t requested = (ssize_t) pssh.datalen; 1189 if (mDataSource->readAt(data_offset + 24, pssh.data, requested) < requested) { 1190 delete[] pssh.data; 1191 return ERROR_IO; 1192 } 1193 mPssh.push_back(pssh); 1194 1195 break; 1196 } 1197 1198 case FOURCC('m', 'd', 'h', 'd'): 1199 { 1200 *offset += chunk_size; 1201 1202 if (chunk_data_size < 4 || mLastTrack == NULL) { 1203 return ERROR_MALFORMED; 1204 } 1205 1206 uint8_t version; 1207 if (mDataSource->readAt( 1208 data_offset, &version, sizeof(version)) 1209 < (ssize_t)sizeof(version)) { 1210 return ERROR_IO; 1211 } 1212 1213 off64_t timescale_offset; 1214 1215 if (version == 1) { 1216 timescale_offset = data_offset + 4 + 16; 1217 } else if (version == 0) { 1218 timescale_offset = data_offset + 4 + 8; 1219 } else { 1220 return ERROR_IO; 1221 } 1222 1223 uint32_t timescale; 1224 if (mDataSource->readAt( 1225 timescale_offset, ×cale, sizeof(timescale)) 1226 < (ssize_t)sizeof(timescale)) { 1227 return ERROR_IO; 1228 } 1229 1230 if (!timescale) { 1231 ALOGE("timescale should not be ZERO."); 1232 return ERROR_MALFORMED; 1233 } 1234 1235 mLastTrack->timescale = ntohl(timescale); 1236 1237 // 14496-12 says all ones means indeterminate, but some files seem to use 1238 // 0 instead. We treat both the same. 1239 int64_t duration = 0; 1240 if (version == 1) { 1241 if (mDataSource->readAt( 1242 timescale_offset + 4, &duration, sizeof(duration)) 1243 < (ssize_t)sizeof(duration)) { 1244 return ERROR_IO; 1245 } 1246 if (duration != -1) { 1247 duration = ntoh64(duration); 1248 } 1249 } else { 1250 uint32_t duration32; 1251 if (mDataSource->readAt( 1252 timescale_offset + 4, &duration32, sizeof(duration32)) 1253 < (ssize_t)sizeof(duration32)) { 1254 return ERROR_IO; 1255 } 1256 if (duration32 != 0xffffffff) { 1257 duration = ntohl(duration32); 1258 } 1259 } 1260 if (duration != 0 && mLastTrack->timescale != 0) { 1261 mLastTrack->meta.setInt64( 1262 kKeyDuration, (duration * 1000000) / mLastTrack->timescale); 1263 } 1264 1265 uint8_t lang[2]; 1266 off64_t lang_offset; 1267 if (version == 1) { 1268 lang_offset = timescale_offset + 4 + 8; 1269 } else if (version == 0) { 1270 lang_offset = timescale_offset + 4 + 4; 1271 } else { 1272 return ERROR_IO; 1273 } 1274 1275 if (mDataSource->readAt(lang_offset, &lang, sizeof(lang)) 1276 < (ssize_t)sizeof(lang)) { 1277 return ERROR_IO; 1278 } 1279 1280 // To get the ISO-639-2/T three character language code 1281 // 1 bit pad followed by 3 5-bits characters. Each character 1282 // is packed as the difference between its ASCII value and 0x60. 1283 char lang_code[4]; 1284 lang_code[0] = ((lang[0] >> 2) & 0x1f) + 0x60; 1285 lang_code[1] = ((lang[0] & 0x3) << 3 | (lang[1] >> 5)) + 0x60; 1286 lang_code[2] = (lang[1] & 0x1f) + 0x60; 1287 lang_code[3] = '\0'; 1288 1289 mLastTrack->meta.setCString( 1290 kKeyMediaLanguage, lang_code); 1291 1292 break; 1293 } 1294 1295 case FOURCC('s', 't', 's', 'd'): 1296 { 1297 uint8_t buffer[8]; 1298 if (chunk_data_size < (off64_t)sizeof(buffer)) { 1299 return ERROR_MALFORMED; 1300 } 1301 1302 if (mDataSource->readAt( 1303 data_offset, buffer, 8) < 8) { 1304 return ERROR_IO; 1305 } 1306 1307 if (U32_AT(buffer) != 0) { 1308 // Should be version 0, flags 0. 1309 return ERROR_MALFORMED; 1310 } 1311 1312 uint32_t entry_count = U32_AT(&buffer[4]); 1313 1314 if (entry_count > 1) { 1315 // For 3GPP timed text, there could be multiple tx3g boxes contain 1316 // multiple text display formats. These formats will be used to 1317 // display the timed text. 1318 // For encrypted files, there may also be more than one entry. 1319 const char *mime; 1320 1321 if (mLastTrack == NULL) 1322 return ERROR_MALFORMED; 1323 1324 CHECK(mLastTrack->meta.findCString(kKeyMIMEType, &mime)); 1325 if (strcasecmp(mime, MEDIA_MIMETYPE_TEXT_3GPP) && 1326 strcasecmp(mime, "application/octet-stream")) { 1327 // For now we only support a single type of media per track. 1328 mLastTrack->skipTrack = true; 1329 *offset += chunk_size; 1330 break; 1331 } 1332 } 1333 off64_t stop_offset = *offset + chunk_size; 1334 *offset = data_offset + 8; 1335 for (uint32_t i = 0; i < entry_count; ++i) { 1336 status_t err = parseChunk(offset, depth + 1); 1337 if (err != OK) { 1338 return err; 1339 } 1340 } 1341 1342 if (*offset != stop_offset) { 1343 return ERROR_MALFORMED; 1344 } 1345 break; 1346 } 1347 case FOURCC('m', 'e', 't', 't'): 1348 { 1349 *offset += chunk_size; 1350 1351 if (mLastTrack == NULL) 1352 return ERROR_MALFORMED; 1353 1354 auto buffer = heapbuffer<uint8_t>(chunk_data_size); 1355 if (buffer.get() == NULL) { 1356 return NO_MEMORY; 1357 } 1358 1359 if (mDataSource->readAt( 1360 data_offset, buffer.get(), chunk_data_size) < chunk_data_size) { 1361 return ERROR_IO; 1362 } 1363 1364 String8 mimeFormat((const char *)(buffer.get()), chunk_data_size); 1365 mLastTrack->meta.setCString(kKeyMIMEType, mimeFormat.string()); 1366 1367 break; 1368 } 1369 1370 case FOURCC('m', 'p', '4', 'a'): 1371 case FOURCC('e', 'n', 'c', 'a'): 1372 case FOURCC('s', 'a', 'm', 'r'): 1373 case FOURCC('s', 'a', 'w', 'b'): 1374 { 1375 if (mIsQT && chunk_type == FOURCC('m', 'p', '4', 'a') 1376 && depth >= 1 && mPath[depth - 1] == FOURCC('w', 'a', 'v', 'e')) { 1377 // Ignore mp4a embedded in QT wave atom 1378 *offset += chunk_size; 1379 break; 1380 } 1381 1382 uint8_t buffer[8 + 20]; 1383 if (chunk_data_size < (ssize_t)sizeof(buffer)) { 1384 // Basic AudioSampleEntry size. 1385 return ERROR_MALFORMED; 1386 } 1387 1388 if (mDataSource->readAt( 1389 data_offset, buffer, sizeof(buffer)) < (ssize_t)sizeof(buffer)) { 1390 return ERROR_IO; 1391 } 1392 1393 uint16_t data_ref_index __unused = U16_AT(&buffer[6]); 1394 uint16_t version = U16_AT(&buffer[8]); 1395 uint32_t num_channels = U16_AT(&buffer[16]); 1396 1397 uint16_t sample_size = U16_AT(&buffer[18]); 1398 uint32_t sample_rate = U32_AT(&buffer[24]) >> 16; 1399 1400 if (mLastTrack == NULL) 1401 return ERROR_MALFORMED; 1402 1403 off64_t stop_offset = *offset + chunk_size; 1404 *offset = data_offset + sizeof(buffer); 1405 1406 if (mIsQT && chunk_type == FOURCC('m', 'p', '4', 'a')) { 1407 if (version == 1) { 1408 if (mDataSource->readAt(*offset, buffer, 16) < 16) { 1409 return ERROR_IO; 1410 } 1411 1412#if 0 1413 U32_AT(buffer); // samples per packet 1414 U32_AT(&buffer[4]); // bytes per packet 1415 U32_AT(&buffer[8]); // bytes per frame 1416 U32_AT(&buffer[12]); // bytes per sample 1417#endif 1418 *offset += 16; 1419 } else if (version == 2) { 1420 uint8_t v2buffer[36]; 1421 if (mDataSource->readAt(*offset, v2buffer, 36) < 36) { 1422 return ERROR_IO; 1423 } 1424 1425#if 0 1426 U32_AT(v2buffer); // size of struct only 1427 sample_rate = (uint32_t)U64_AT(&v2buffer[4]); // audio sample rate 1428 num_channels = U32_AT(&v2buffer[12]); // num audio channels 1429 U32_AT(&v2buffer[16]); // always 0x7f000000 1430 sample_size = (uint16_t)U32_AT(&v2buffer[20]); // const bits per channel 1431 U32_AT(&v2buffer[24]); // format specifc flags 1432 U32_AT(&v2buffer[28]); // const bytes per audio packet 1433 U32_AT(&v2buffer[32]); // const LPCM frames per audio packet 1434#endif 1435 *offset += 36; 1436 } 1437 } 1438 1439 if (chunk_type != FOURCC('e', 'n', 'c', 'a')) { 1440 // if the chunk type is enca, we'll get the type from the frma box later 1441 mLastTrack->meta.setCString(kKeyMIMEType, FourCC2MIME(chunk_type)); 1442 AdjustChannelsAndRate(chunk_type, &num_channels, &sample_rate); 1443 } 1444 ALOGV("*** coding='%s' %d channels, size %d, rate %d\n", 1445 chunk, num_channels, sample_size, sample_rate); 1446 mLastTrack->meta.setInt32(kKeyChannelCount, num_channels); 1447 mLastTrack->meta.setInt32(kKeySampleRate, sample_rate); 1448 1449 while (*offset < stop_offset) { 1450 status_t err = parseChunk(offset, depth + 1); 1451 if (err != OK) { 1452 return err; 1453 } 1454 } 1455 1456 if (*offset != stop_offset) { 1457 return ERROR_MALFORMED; 1458 } 1459 break; 1460 } 1461 1462 case FOURCC('m', 'p', '4', 'v'): 1463 case FOURCC('e', 'n', 'c', 'v'): 1464 case FOURCC('s', '2', '6', '3'): 1465 case FOURCC('H', '2', '6', '3'): 1466 case FOURCC('h', '2', '6', '3'): 1467 case FOURCC('a', 'v', 'c', '1'): 1468 case FOURCC('h', 'v', 'c', '1'): 1469 case FOURCC('h', 'e', 'v', '1'): 1470 { 1471 uint8_t buffer[78]; 1472 if (chunk_data_size < (ssize_t)sizeof(buffer)) { 1473 // Basic VideoSampleEntry size. 1474 return ERROR_MALFORMED; 1475 } 1476 1477 if (mDataSource->readAt( 1478 data_offset, buffer, sizeof(buffer)) < (ssize_t)sizeof(buffer)) { 1479 return ERROR_IO; 1480 } 1481 1482 uint16_t data_ref_index __unused = U16_AT(&buffer[6]); 1483 uint16_t width = U16_AT(&buffer[6 + 18]); 1484 uint16_t height = U16_AT(&buffer[6 + 20]); 1485 1486 // The video sample is not standard-compliant if it has invalid dimension. 1487 // Use some default width and height value, and 1488 // let the decoder figure out the actual width and height (and thus 1489 // be prepared for INFO_FOMRAT_CHANGED event). 1490 if (width == 0) width = 352; 1491 if (height == 0) height = 288; 1492 1493 // printf("*** coding='%s' width=%d height=%d\n", 1494 // chunk, width, height); 1495 1496 if (mLastTrack == NULL) 1497 return ERROR_MALFORMED; 1498 1499 if (chunk_type != FOURCC('e', 'n', 'c', 'v')) { 1500 // if the chunk type is encv, we'll get the type from the frma box later 1501 mLastTrack->meta.setCString(kKeyMIMEType, FourCC2MIME(chunk_type)); 1502 } 1503 mLastTrack->meta.setInt32(kKeyWidth, width); 1504 mLastTrack->meta.setInt32(kKeyHeight, height); 1505 1506 off64_t stop_offset = *offset + chunk_size; 1507 *offset = data_offset + sizeof(buffer); 1508 while (*offset < stop_offset) { 1509 status_t err = parseChunk(offset, depth + 1); 1510 if (err != OK) { 1511 return err; 1512 } 1513 } 1514 1515 if (*offset != stop_offset) { 1516 return ERROR_MALFORMED; 1517 } 1518 break; 1519 } 1520 1521 case FOURCC('s', 't', 'c', 'o'): 1522 case FOURCC('c', 'o', '6', '4'): 1523 { 1524 if ((mLastTrack == NULL) || (mLastTrack->sampleTable == NULL)) { 1525 return ERROR_MALFORMED; 1526 } 1527 1528 status_t err = 1529 mLastTrack->sampleTable->setChunkOffsetParams( 1530 chunk_type, data_offset, chunk_data_size); 1531 1532 *offset += chunk_size; 1533 1534 if (err != OK) { 1535 return err; 1536 } 1537 1538 break; 1539 } 1540 1541 case FOURCC('s', 't', 's', 'c'): 1542 { 1543 if ((mLastTrack == NULL) || (mLastTrack->sampleTable == NULL)) 1544 return ERROR_MALFORMED; 1545 1546 status_t err = 1547 mLastTrack->sampleTable->setSampleToChunkParams( 1548 data_offset, chunk_data_size); 1549 1550 *offset += chunk_size; 1551 1552 if (err != OK) { 1553 return err; 1554 } 1555 1556 break; 1557 } 1558 1559 case FOURCC('s', 't', 's', 'z'): 1560 case FOURCC('s', 't', 'z', '2'): 1561 { 1562 if ((mLastTrack == NULL) || (mLastTrack->sampleTable == NULL)) { 1563 return ERROR_MALFORMED; 1564 } 1565 1566 status_t err = 1567 mLastTrack->sampleTable->setSampleSizeParams( 1568 chunk_type, data_offset, chunk_data_size); 1569 1570 *offset += chunk_size; 1571 1572 if (err != OK) { 1573 return err; 1574 } 1575 1576 size_t max_size; 1577 err = mLastTrack->sampleTable->getMaxSampleSize(&max_size); 1578 1579 if (err != OK) { 1580 return err; 1581 } 1582 1583 if (max_size != 0) { 1584 // Assume that a given buffer only contains at most 10 chunks, 1585 // each chunk originally prefixed with a 2 byte length will 1586 // have a 4 byte header (0x00 0x00 0x00 0x01) after conversion, 1587 // and thus will grow by 2 bytes per chunk. 1588 if (max_size > SIZE_MAX - 10 * 2) { 1589 ALOGE("max sample size too big: %zu", max_size); 1590 return ERROR_MALFORMED; 1591 } 1592 mLastTrack->meta.setInt32(kKeyMaxInputSize, max_size + 10 * 2); 1593 } else { 1594 // No size was specified. Pick a conservatively large size. 1595 uint32_t width, height; 1596 if (!mLastTrack->meta.findInt32(kKeyWidth, (int32_t*)&width) || 1597 !mLastTrack->meta.findInt32(kKeyHeight,(int32_t*) &height)) { 1598 ALOGE("No width or height, assuming worst case 1080p"); 1599 width = 1920; 1600 height = 1080; 1601 } else { 1602 // A resolution was specified, check that it's not too big. The values below 1603 // were chosen so that the calculations below don't cause overflows, they're 1604 // not indicating that resolutions up to 32kx32k are actually supported. 1605 if (width > 32768 || height > 32768) { 1606 ALOGE("can't support %u x %u video", width, height); 1607 return ERROR_MALFORMED; 1608 } 1609 } 1610 1611 const char *mime; 1612 CHECK(mLastTrack->meta.findCString(kKeyMIMEType, &mime)); 1613 if (!strncmp(mime, "audio/", 6)) { 1614 // for audio, use 128KB 1615 max_size = 1024 * 128; 1616 } else if (!strcmp(mime, MEDIA_MIMETYPE_VIDEO_AVC) 1617 || !strcmp(mime, MEDIA_MIMETYPE_VIDEO_HEVC)) { 1618 // AVC & HEVC requires compression ratio of at least 2, and uses 1619 // macroblocks 1620 max_size = ((width + 15) / 16) * ((height + 15) / 16) * 192; 1621 } else { 1622 // For all other formats there is no minimum compression 1623 // ratio. Use compression ratio of 1. 1624 max_size = width * height * 3 / 2; 1625 } 1626 // HACK: allow 10% overhead 1627 // TODO: read sample size from traf atom for fragmented MPEG4. 1628 max_size += max_size / 10; 1629 mLastTrack->meta.setInt32(kKeyMaxInputSize, max_size); 1630 } 1631 1632 // NOTE: setting another piece of metadata invalidates any pointers (such as the 1633 // mimetype) previously obtained, so don't cache them. 1634 const char *mime; 1635 CHECK(mLastTrack->meta.findCString(kKeyMIMEType, &mime)); 1636 // Calculate average frame rate. 1637 if (!strncasecmp("video/", mime, 6)) { 1638 size_t nSamples = mLastTrack->sampleTable->countSamples(); 1639 if (nSamples == 0) { 1640 int32_t trackId; 1641 if (mLastTrack->meta.findInt32(kKeyTrackID, &trackId)) { 1642 for (size_t i = 0; i < mTrex.size(); i++) { 1643 Trex *t = &mTrex.editItemAt(i); 1644 if (t->track_ID == (uint32_t) trackId) { 1645 if (t->default_sample_duration > 0) { 1646 int32_t frameRate = 1647 mLastTrack->timescale / t->default_sample_duration; 1648 mLastTrack->meta.setInt32(kKeyFrameRate, frameRate); 1649 } 1650 break; 1651 } 1652 } 1653 } 1654 } else { 1655 int64_t durationUs; 1656 if (mLastTrack->meta.findInt64(kKeyDuration, &durationUs)) { 1657 if (durationUs > 0) { 1658 int32_t frameRate = (nSamples * 1000000LL + 1659 (durationUs >> 1)) / durationUs; 1660 mLastTrack->meta.setInt32(kKeyFrameRate, frameRate); 1661 } 1662 } 1663 ALOGV("setting frame count %zu", nSamples); 1664 mLastTrack->meta.setInt32(kKeyFrameCount, nSamples); 1665 } 1666 } 1667 1668 break; 1669 } 1670 1671 case FOURCC('s', 't', 't', 's'): 1672 { 1673 if ((mLastTrack == NULL) || (mLastTrack->sampleTable == NULL)) 1674 return ERROR_MALFORMED; 1675 1676 *offset += chunk_size; 1677 1678 status_t err = 1679 mLastTrack->sampleTable->setTimeToSampleParams( 1680 data_offset, chunk_data_size); 1681 1682 if (err != OK) { 1683 return err; 1684 } 1685 1686 break; 1687 } 1688 1689 case FOURCC('c', 't', 't', 's'): 1690 { 1691 if ((mLastTrack == NULL) || (mLastTrack->sampleTable == NULL)) 1692 return ERROR_MALFORMED; 1693 1694 *offset += chunk_size; 1695 1696 status_t err = 1697 mLastTrack->sampleTable->setCompositionTimeToSampleParams( 1698 data_offset, chunk_data_size); 1699 1700 if (err != OK) { 1701 return err; 1702 } 1703 1704 break; 1705 } 1706 1707 case FOURCC('s', 't', 's', 's'): 1708 { 1709 if ((mLastTrack == NULL) || (mLastTrack->sampleTable == NULL)) 1710 return ERROR_MALFORMED; 1711 1712 *offset += chunk_size; 1713 1714 status_t err = 1715 mLastTrack->sampleTable->setSyncSampleParams( 1716 data_offset, chunk_data_size); 1717 1718 if (err != OK) { 1719 return err; 1720 } 1721 1722 break; 1723 } 1724 1725 // \xA9xyz 1726 case FOURCC(0xA9, 'x', 'y', 'z'): 1727 { 1728 *offset += chunk_size; 1729 1730 // Best case the total data length inside "\xA9xyz" box would 1731 // be 9, for instance "\xA9xyz" + "\x00\x05\x15\xc7" + "+0+0/", 1732 // where "\x00\x05" is the text string length with value = 5, 1733 // "\0x15\xc7" is the language code = en, and "+0+0/" is a 1734 // location (string) value with longitude = 0 and latitude = 0. 1735 // Since some devices encountered in the wild omit the trailing 1736 // slash, we'll allow that. 1737 if (chunk_data_size < 8) { // 8 instead of 9 to allow for missing / 1738 return ERROR_MALFORMED; 1739 } 1740 1741 uint16_t len; 1742 if (!mDataSource->getUInt16(data_offset, &len)) { 1743 return ERROR_IO; 1744 } 1745 1746 // allow "+0+0" without trailing slash 1747 if (len < 4 || len > chunk_data_size - 4) { 1748 return ERROR_MALFORMED; 1749 } 1750 // The location string following the language code is formatted 1751 // according to ISO 6709:2008 (https://en.wikipedia.org/wiki/ISO_6709). 1752 // Allocate 2 extra bytes, in case we need to add a trailing slash, 1753 // and to add a terminating 0. 1754 std::unique_ptr<char[]> buffer(new (std::nothrow) char[len+2]()); 1755 if (!buffer) { 1756 return NO_MEMORY; 1757 } 1758 1759 if (mDataSource->readAt( 1760 data_offset + 4, &buffer[0], len) < len) { 1761 return ERROR_IO; 1762 } 1763 1764 len = strlen(&buffer[0]); 1765 if (len < 4) { 1766 return ERROR_MALFORMED; 1767 } 1768 // Add a trailing slash if there wasn't one. 1769 if (buffer[len - 1] != '/') { 1770 buffer[len] = '/'; 1771 } 1772 mFileMetaData.setCString(kKeyLocation, &buffer[0]); 1773 break; 1774 } 1775 1776 case FOURCC('e', 's', 'd', 's'): 1777 { 1778 *offset += chunk_size; 1779 1780 if (chunk_data_size < 4) { 1781 return ERROR_MALFORMED; 1782 } 1783 1784 uint8_t buffer[256]; 1785 if (chunk_data_size > (off64_t)sizeof(buffer)) { 1786 return ERROR_BUFFER_TOO_SMALL; 1787 } 1788 1789 if (mDataSource->readAt( 1790 data_offset, buffer, chunk_data_size) < chunk_data_size) { 1791 return ERROR_IO; 1792 } 1793 1794 if (U32_AT(buffer) != 0) { 1795 // Should be version 0, flags 0. 1796 return ERROR_MALFORMED; 1797 } 1798 1799 if (mLastTrack == NULL) 1800 return ERROR_MALFORMED; 1801 1802 mLastTrack->meta.setData( 1803 kKeyESDS, kTypeESDS, &buffer[4], chunk_data_size - 4); 1804 1805 if (mPath.size() >= 2 1806 && mPath[mPath.size() - 2] == FOURCC('m', 'p', '4', 'a')) { 1807 // Information from the ESDS must be relied on for proper 1808 // setup of sample rate and channel count for MPEG4 Audio. 1809 // The generic header appears to only contain generic 1810 // information... 1811 1812 status_t err = updateAudioTrackInfoFromESDS_MPEG4Audio( 1813 &buffer[4], chunk_data_size - 4); 1814 1815 if (err != OK) { 1816 return err; 1817 } 1818 } 1819 if (mPath.size() >= 2 1820 && mPath[mPath.size() - 2] == FOURCC('m', 'p', '4', 'v')) { 1821 // Check if the video is MPEG2 1822 ESDS esds(&buffer[4], chunk_data_size - 4); 1823 1824 uint8_t objectTypeIndication; 1825 if (esds.getObjectTypeIndication(&objectTypeIndication) == OK) { 1826 if (objectTypeIndication >= 0x60 && objectTypeIndication <= 0x65) { 1827 mLastTrack->meta.setCString(kKeyMIMEType, MEDIA_MIMETYPE_VIDEO_MPEG2); 1828 } 1829 } 1830 } 1831 break; 1832 } 1833 1834 case FOURCC('b', 't', 'r', 't'): 1835 { 1836 *offset += chunk_size; 1837 if (mLastTrack == NULL) { 1838 return ERROR_MALFORMED; 1839 } 1840 1841 uint8_t buffer[12]; 1842 if (chunk_data_size != sizeof(buffer)) { 1843 return ERROR_MALFORMED; 1844 } 1845 1846 if (mDataSource->readAt( 1847 data_offset, buffer, chunk_data_size) < chunk_data_size) { 1848 return ERROR_IO; 1849 } 1850 1851 uint32_t maxBitrate = U32_AT(&buffer[4]); 1852 uint32_t avgBitrate = U32_AT(&buffer[8]); 1853 if (maxBitrate > 0 && maxBitrate < INT32_MAX) { 1854 mLastTrack->meta.setInt32(kKeyMaxBitRate, (int32_t)maxBitrate); 1855 } 1856 if (avgBitrate > 0 && avgBitrate < INT32_MAX) { 1857 mLastTrack->meta.setInt32(kKeyBitRate, (int32_t)avgBitrate); 1858 } 1859 break; 1860 } 1861 1862 case FOURCC('a', 'v', 'c', 'C'): 1863 { 1864 *offset += chunk_size; 1865 1866 auto buffer = heapbuffer<uint8_t>(chunk_data_size); 1867 1868 if (buffer.get() == NULL) { 1869 ALOGE("b/28471206"); 1870 return NO_MEMORY; 1871 } 1872 1873 if (mDataSource->readAt( 1874 data_offset, buffer.get(), chunk_data_size) < chunk_data_size) { 1875 return ERROR_IO; 1876 } 1877 1878 if (mLastTrack == NULL) 1879 return ERROR_MALFORMED; 1880 1881 mLastTrack->meta.setData( 1882 kKeyAVCC, kTypeAVCC, buffer.get(), chunk_data_size); 1883 1884 break; 1885 } 1886 case FOURCC('h', 'v', 'c', 'C'): 1887 { 1888 auto buffer = heapbuffer<uint8_t>(chunk_data_size); 1889 1890 if (buffer.get() == NULL) { 1891 ALOGE("b/28471206"); 1892 return NO_MEMORY; 1893 } 1894 1895 if (mDataSource->readAt( 1896 data_offset, buffer.get(), chunk_data_size) < chunk_data_size) { 1897 return ERROR_IO; 1898 } 1899 1900 if (mLastTrack == NULL) 1901 return ERROR_MALFORMED; 1902 1903 mLastTrack->meta.setData( 1904 kKeyHVCC, kTypeHVCC, buffer.get(), chunk_data_size); 1905 1906 *offset += chunk_size; 1907 break; 1908 } 1909 1910 case FOURCC('d', '2', '6', '3'): 1911 { 1912 *offset += chunk_size; 1913 /* 1914 * d263 contains a fixed 7 bytes part: 1915 * vendor - 4 bytes 1916 * version - 1 byte 1917 * level - 1 byte 1918 * profile - 1 byte 1919 * optionally, "d263" box itself may contain a 16-byte 1920 * bit rate box (bitr) 1921 * average bit rate - 4 bytes 1922 * max bit rate - 4 bytes 1923 */ 1924 char buffer[23]; 1925 if (chunk_data_size != 7 && 1926 chunk_data_size != 23) { 1927 ALOGE("Incorrect D263 box size %lld", (long long)chunk_data_size); 1928 return ERROR_MALFORMED; 1929 } 1930 1931 if (mDataSource->readAt( 1932 data_offset, buffer, chunk_data_size) < chunk_data_size) { 1933 return ERROR_IO; 1934 } 1935 1936 if (mLastTrack == NULL) 1937 return ERROR_MALFORMED; 1938 1939 mLastTrack->meta.setData(kKeyD263, kTypeD263, buffer, chunk_data_size); 1940 1941 break; 1942 } 1943 1944 case FOURCC('m', 'e', 't', 'a'): 1945 { 1946 off64_t stop_offset = *offset + chunk_size; 1947 *offset = data_offset; 1948 bool isParsingMetaKeys = underQTMetaPath(mPath, 2); 1949 if (!isParsingMetaKeys) { 1950 uint8_t buffer[4]; 1951 if (chunk_data_size < (off64_t)sizeof(buffer)) { 1952 *offset = stop_offset; 1953 return ERROR_MALFORMED; 1954 } 1955 1956 if (mDataSource->readAt( 1957 data_offset, buffer, 4) < 4) { 1958 *offset = stop_offset; 1959 return ERROR_IO; 1960 } 1961 1962 if (U32_AT(buffer) != 0) { 1963 // Should be version 0, flags 0. 1964 1965 // If it's not, let's assume this is one of those 1966 // apparently malformed chunks that don't have flags 1967 // and completely different semantics than what's 1968 // in the MPEG4 specs and skip it. 1969 *offset = stop_offset; 1970 return OK; 1971 } 1972 *offset += sizeof(buffer); 1973 } 1974 1975 while (*offset < stop_offset) { 1976 status_t err = parseChunk(offset, depth + 1); 1977 if (err != OK) { 1978 return err; 1979 } 1980 } 1981 1982 if (*offset != stop_offset) { 1983 return ERROR_MALFORMED; 1984 } 1985 break; 1986 } 1987 1988 case FOURCC('i', 'l', 'o', 'c'): 1989 case FOURCC('i', 'i', 'n', 'f'): 1990 case FOURCC('i', 'p', 'r', 'p'): 1991 case FOURCC('p', 'i', 't', 'm'): 1992 case FOURCC('i', 'd', 'a', 't'): 1993 case FOURCC('i', 'r', 'e', 'f'): 1994 case FOURCC('i', 'p', 'r', 'o'): 1995 { 1996 if (mIsHeif) { 1997 if (mItemTable == NULL) { 1998 mItemTable = new ItemTable(mDataSource); 1999 } 2000 status_t err = mItemTable->parse( 2001 chunk_type, data_offset, chunk_data_size); 2002 if (err != OK) { 2003 return err; 2004 } 2005 } 2006 *offset += chunk_size; 2007 break; 2008 } 2009 2010 case FOURCC('m', 'e', 'a', 'n'): 2011 case FOURCC('n', 'a', 'm', 'e'): 2012 case FOURCC('d', 'a', 't', 'a'): 2013 { 2014 *offset += chunk_size; 2015 2016 if (mPath.size() == 6 && underMetaDataPath(mPath)) { 2017 status_t err = parseITunesMetaData(data_offset, chunk_data_size); 2018 2019 if (err != OK) { 2020 return err; 2021 } 2022 } 2023 2024 break; 2025 } 2026 2027 case FOURCC('m', 'v', 'h', 'd'): 2028 { 2029 *offset += chunk_size; 2030 2031 if (depth != 1) { 2032 ALOGE("mvhd: depth %d", depth); 2033 return ERROR_MALFORMED; 2034 } 2035 if (chunk_data_size < 32) { 2036 return ERROR_MALFORMED; 2037 } 2038 2039 uint8_t header[32]; 2040 if (mDataSource->readAt( 2041 data_offset, header, sizeof(header)) 2042 < (ssize_t)sizeof(header)) { 2043 return ERROR_IO; 2044 } 2045 2046 uint64_t creationTime; 2047 uint64_t duration = 0; 2048 if (header[0] == 1) { 2049 creationTime = U64_AT(&header[4]); 2050 mHeaderTimescale = U32_AT(&header[20]); 2051 duration = U64_AT(&header[24]); 2052 if (duration == 0xffffffffffffffff) { 2053 duration = 0; 2054 } 2055 } else if (header[0] != 0) { 2056 return ERROR_MALFORMED; 2057 } else { 2058 creationTime = U32_AT(&header[4]); 2059 mHeaderTimescale = U32_AT(&header[12]); 2060 uint32_t d32 = U32_AT(&header[16]); 2061 if (d32 == 0xffffffff) { 2062 d32 = 0; 2063 } 2064 duration = d32; 2065 } 2066 if (duration != 0 && mHeaderTimescale != 0 && duration < UINT64_MAX / 1000000) { 2067 mFileMetaData.setInt64(kKeyDuration, duration * 1000000 / mHeaderTimescale); 2068 } 2069 2070 String8 s; 2071 if (convertTimeToDate(creationTime, &s)) { 2072 mFileMetaData.setCString(kKeyDate, s.string()); 2073 } 2074 2075 2076 break; 2077 } 2078 2079 case FOURCC('m', 'e', 'h', 'd'): 2080 { 2081 *offset += chunk_size; 2082 2083 if (chunk_data_size < 8) { 2084 return ERROR_MALFORMED; 2085 } 2086 2087 uint8_t flags[4]; 2088 if (mDataSource->readAt( 2089 data_offset, flags, sizeof(flags)) 2090 < (ssize_t)sizeof(flags)) { 2091 return ERROR_IO; 2092 } 2093 2094 uint64_t duration = 0; 2095 if (flags[0] == 1) { 2096 // 64 bit 2097 if (chunk_data_size < 12) { 2098 return ERROR_MALFORMED; 2099 } 2100 mDataSource->getUInt64(data_offset + 4, &duration); 2101 if (duration == 0xffffffffffffffff) { 2102 duration = 0; 2103 } 2104 } else if (flags[0] == 0) { 2105 // 32 bit 2106 uint32_t d32; 2107 mDataSource->getUInt32(data_offset + 4, &d32); 2108 if (d32 == 0xffffffff) { 2109 d32 = 0; 2110 } 2111 duration = d32; 2112 } else { 2113 return ERROR_MALFORMED; 2114 } 2115 2116 if (duration != 0 && mHeaderTimescale != 0) { 2117 mFileMetaData.setInt64(kKeyDuration, duration * 1000000 / mHeaderTimescale); 2118 } 2119 2120 break; 2121 } 2122 2123 case FOURCC('m', 'd', 'a', 't'): 2124 { 2125 mMdatFound = true; 2126 2127 *offset += chunk_size; 2128 break; 2129 } 2130 2131 case FOURCC('h', 'd', 'l', 'r'): 2132 { 2133 *offset += chunk_size; 2134 2135 if (underQTMetaPath(mPath, 3)) { 2136 break; 2137 } 2138 2139 uint32_t buffer; 2140 if (mDataSource->readAt( 2141 data_offset + 8, &buffer, 4) < 4) { 2142 return ERROR_IO; 2143 } 2144 2145 uint32_t type = ntohl(buffer); 2146 // For the 3GPP file format, the handler-type within the 'hdlr' box 2147 // shall be 'text'. We also want to support 'sbtl' handler type 2148 // for a practical reason as various MPEG4 containers use it. 2149 if (type == FOURCC('t', 'e', 'x', 't') || type == FOURCC('s', 'b', 't', 'l')) { 2150 if (mLastTrack != NULL) { 2151 mLastTrack->meta.setCString(kKeyMIMEType, MEDIA_MIMETYPE_TEXT_3GPP); 2152 } 2153 } 2154 2155 break; 2156 } 2157 2158 case FOURCC('k', 'e', 'y', 's'): 2159 { 2160 *offset += chunk_size; 2161 2162 if (underQTMetaPath(mPath, 3)) { 2163 status_t err = parseQTMetaKey(data_offset, chunk_data_size); 2164 if (err != OK) { 2165 return err; 2166 } 2167 } 2168 break; 2169 } 2170 2171 case FOURCC('t', 'r', 'e', 'x'): 2172 { 2173 *offset += chunk_size; 2174 2175 if (chunk_data_size < 24) { 2176 return ERROR_IO; 2177 } 2178 Trex trex; 2179 if (!mDataSource->getUInt32(data_offset + 4, &trex.track_ID) || 2180 !mDataSource->getUInt32(data_offset + 8, &trex.default_sample_description_index) || 2181 !mDataSource->getUInt32(data_offset + 12, &trex.default_sample_duration) || 2182 !mDataSource->getUInt32(data_offset + 16, &trex.default_sample_size) || 2183 !mDataSource->getUInt32(data_offset + 20, &trex.default_sample_flags)) { 2184 return ERROR_IO; 2185 } 2186 mTrex.add(trex); 2187 break; 2188 } 2189 2190 case FOURCC('t', 'x', '3', 'g'): 2191 { 2192 if (mLastTrack == NULL) 2193 return ERROR_MALFORMED; 2194 2195 uint32_t type; 2196 const void *data; 2197 size_t size = 0; 2198 if (!mLastTrack->meta.findData( 2199 kKeyTextFormatData, &type, &data, &size)) { 2200 size = 0; 2201 } 2202 2203 if ((chunk_size > SIZE_MAX) || (SIZE_MAX - chunk_size <= size)) { 2204 return ERROR_MALFORMED; 2205 } 2206 2207 uint8_t *buffer = new (std::nothrow) uint8_t[size + chunk_size]; 2208 if (buffer == NULL) { 2209 return ERROR_MALFORMED; 2210 } 2211 2212 if (size > 0) { 2213 memcpy(buffer, data, size); 2214 } 2215 2216 if ((size_t)(mDataSource->readAt(*offset, buffer + size, chunk_size)) 2217 < chunk_size) { 2218 delete[] buffer; 2219 buffer = NULL; 2220 2221 // advance read pointer so we don't end up reading this again 2222 *offset += chunk_size; 2223 return ERROR_IO; 2224 } 2225 2226 mLastTrack->meta.setData( 2227 kKeyTextFormatData, 0, buffer, size + chunk_size); 2228 2229 delete[] buffer; 2230 2231 *offset += chunk_size; 2232 break; 2233 } 2234 2235 case FOURCC('c', 'o', 'v', 'r'): 2236 { 2237 *offset += chunk_size; 2238 2239 ALOGV("chunk_data_size = %" PRId64 " and data_offset = %" PRId64, 2240 chunk_data_size, data_offset); 2241 2242 if (chunk_data_size < 0 || static_cast<uint64_t>(chunk_data_size) >= SIZE_MAX - 1) { 2243 return ERROR_MALFORMED; 2244 } 2245 auto buffer = heapbuffer<uint8_t>(chunk_data_size); 2246 if (buffer.get() == NULL) { 2247 ALOGE("b/28471206"); 2248 return NO_MEMORY; 2249 } 2250 if (mDataSource->readAt( 2251 data_offset, buffer.get(), chunk_data_size) != (ssize_t)chunk_data_size) { 2252 return ERROR_IO; 2253 } 2254 const int kSkipBytesOfDataBox = 16; 2255 if (chunk_data_size <= kSkipBytesOfDataBox) { 2256 return ERROR_MALFORMED; 2257 } 2258 2259 mFileMetaData.setData( 2260 kKeyAlbumArt, MetaData::TYPE_NONE, 2261 buffer.get() + kSkipBytesOfDataBox, chunk_data_size - kSkipBytesOfDataBox); 2262 2263 break; 2264 } 2265 2266 case FOURCC('c', 'o', 'l', 'r'): 2267 { 2268 *offset += chunk_size; 2269 // this must be in a VisualSampleEntry box under the Sample Description Box ('stsd') 2270 // ignore otherwise 2271 if (depth >= 2 && mPath[depth - 2] == FOURCC('s', 't', 's', 'd')) { 2272 status_t err = parseColorInfo(data_offset, chunk_data_size); 2273 if (err != OK) { 2274 return err; 2275 } 2276 } 2277 2278 break; 2279 } 2280 2281 case FOURCC('t', 'i', 't', 'l'): 2282 case FOURCC('p', 'e', 'r', 'f'): 2283 case FOURCC('a', 'u', 't', 'h'): 2284 case FOURCC('g', 'n', 'r', 'e'): 2285 case FOURCC('a', 'l', 'b', 'm'): 2286 case FOURCC('y', 'r', 'r', 'c'): 2287 { 2288 *offset += chunk_size; 2289 2290 status_t err = parse3GPPMetaData(data_offset, chunk_data_size, depth); 2291 2292 if (err != OK) { 2293 return err; 2294 } 2295 2296 break; 2297 } 2298 2299 case FOURCC('I', 'D', '3', '2'): 2300 { 2301 *offset += chunk_size; 2302 2303 if (chunk_data_size < 6) { 2304 return ERROR_MALFORMED; 2305 } 2306 2307 parseID3v2MetaData(data_offset + 6); 2308 2309 break; 2310 } 2311 2312 case FOURCC('-', '-', '-', '-'): 2313 { 2314 mLastCommentMean.clear(); 2315 mLastCommentName.clear(); 2316 mLastCommentData.clear(); 2317 *offset += chunk_size; 2318 break; 2319 } 2320 2321 case FOURCC('s', 'i', 'd', 'x'): 2322 { 2323 status_t err = parseSegmentIndex(data_offset, chunk_data_size); 2324 if (err != OK) { 2325 return err; 2326 } 2327 *offset += chunk_size; 2328 return UNKNOWN_ERROR; // stop parsing after sidx 2329 } 2330 2331 case FOURCC('a', 'c', '-', '3'): 2332 { 2333 *offset += chunk_size; 2334 return parseAC3SampleEntry(data_offset); 2335 } 2336 2337 case FOURCC('f', 't', 'y', 'p'): 2338 { 2339 if (chunk_data_size < 8 || depth != 0) { 2340 return ERROR_MALFORMED; 2341 } 2342 2343 off64_t stop_offset = *offset + chunk_size; 2344 uint32_t numCompatibleBrands = (chunk_data_size - 8) / 4; 2345 std::set<uint32_t> brandSet; 2346 for (size_t i = 0; i < numCompatibleBrands + 2; ++i) { 2347 if (i == 1) { 2348 // Skip this index, it refers to the minorVersion, 2349 // not a brand. 2350 continue; 2351 } 2352 2353 uint32_t brand; 2354 if (mDataSource->readAt(data_offset + 4 * i, &brand, 4) < 4) { 2355 return ERROR_MALFORMED; 2356 } 2357 2358 brand = ntohl(brand); 2359 brandSet.insert(brand); 2360 } 2361 2362 if (brandSet.count(FOURCC('q', 't', ' ', ' ')) > 0) { 2363 mIsQT = true; 2364 } else { 2365 if (brandSet.count(FOURCC('m', 'i', 'f', '1')) > 0 2366 && brandSet.count(FOURCC('h', 'e', 'i', 'c')) > 0) { 2367 ALOGV("identified HEIF image"); 2368 2369 mIsHeif = true; 2370 brandSet.erase(FOURCC('m', 'i', 'f', '1')); 2371 brandSet.erase(FOURCC('h', 'e', 'i', 'c')); 2372 } 2373 2374 if (!brandSet.empty()) { 2375 // This means that the file should have moov box. 2376 // It could be any iso files (mp4, heifs, etc.) 2377 mHasMoovBox = true; 2378 if (mIsHeif) { 2379 ALOGV("identified HEIF image with other tracks"); 2380 } 2381 } 2382 } 2383 2384 *offset = stop_offset; 2385 2386 break; 2387 } 2388 2389 default: 2390 { 2391 // check if we're parsing 'ilst' for meta keys 2392 // if so, treat type as a number (key-id). 2393 if (underQTMetaPath(mPath, 3)) { 2394 status_t err = parseQTMetaVal(chunk_type, data_offset, chunk_data_size); 2395 if (err != OK) { 2396 return err; 2397 } 2398 } 2399 2400 *offset += chunk_size; 2401 break; 2402 } 2403 } 2404 2405 return OK; 2406} 2407 2408status_t MPEG4Extractor::parseAC3SampleEntry(off64_t offset) { 2409 // skip 16 bytes: 2410 // + 6-byte reserved, 2411 // + 2-byte data reference index, 2412 // + 8-byte reserved 2413 offset += 16; 2414 uint16_t channelCount; 2415 if (!mDataSource->getUInt16(offset, &channelCount)) { 2416 return ERROR_MALFORMED; 2417 } 2418 // skip 8 bytes: 2419 // + 2-byte channelCount, 2420 // + 2-byte sample size, 2421 // + 4-byte reserved 2422 offset += 8; 2423 uint16_t sampleRate; 2424 if (!mDataSource->getUInt16(offset, &sampleRate)) { 2425 ALOGE("MPEG4Extractor: error while reading ac-3 block: cannot read sample rate"); 2426 return ERROR_MALFORMED; 2427 } 2428 2429 // skip 4 bytes: 2430 // + 2-byte sampleRate, 2431 // + 2-byte reserved 2432 offset += 4; 2433 return parseAC3SpecificBox(offset, sampleRate); 2434} 2435 2436status_t MPEG4Extractor::parseAC3SpecificBox( 2437 off64_t offset, uint16_t sampleRate) { 2438 uint32_t size; 2439 // + 4-byte size 2440 // + 4-byte type 2441 // + 3-byte payload 2442 const uint32_t kAC3SpecificBoxSize = 11; 2443 if (!mDataSource->getUInt32(offset, &size) || size < kAC3SpecificBoxSize) { 2444 ALOGE("MPEG4Extractor: error while reading ac-3 block: cannot read specific box size"); 2445 return ERROR_MALFORMED; 2446 } 2447 2448 offset += 4; 2449 uint32_t type; 2450 if (!mDataSource->getUInt32(offset, &type) || type != FOURCC('d', 'a', 'c', '3')) { 2451 ALOGE("MPEG4Extractor: error while reading ac-3 specific block: header not dac3"); 2452 return ERROR_MALFORMED; 2453 } 2454 2455 offset += 4; 2456 const uint32_t kAC3SpecificBoxPayloadSize = 3; 2457 uint8_t chunk[kAC3SpecificBoxPayloadSize]; 2458 if (mDataSource->readAt(offset, chunk, sizeof(chunk)) != sizeof(chunk)) { 2459 ALOGE("MPEG4Extractor: error while reading ac-3 specific block: bitstream fields"); 2460 return ERROR_MALFORMED; 2461 } 2462 2463 ABitReader br(chunk, sizeof(chunk)); 2464 static const unsigned channelCountTable[] = {2, 1, 2, 3, 3, 4, 4, 5}; 2465 static const unsigned sampleRateTable[] = {48000, 44100, 32000}; 2466 2467 unsigned fscod = br.getBits(2); 2468 if (fscod == 3) { 2469 ALOGE("Incorrect fscod (3) in AC3 header"); 2470 return ERROR_MALFORMED; 2471 } 2472 unsigned boxSampleRate = sampleRateTable[fscod]; 2473 if (boxSampleRate != sampleRate) { 2474 ALOGE("sample rate mismatch: boxSampleRate = %d, sampleRate = %d", 2475 boxSampleRate, sampleRate); 2476 return ERROR_MALFORMED; 2477 } 2478 2479 unsigned bsid = br.getBits(5); 2480 if (bsid > 8) { 2481 ALOGW("Incorrect bsid in AC3 header. Possibly E-AC-3?"); 2482 return ERROR_MALFORMED; 2483 } 2484 2485 // skip 2486 unsigned bsmod __unused = br.getBits(3); 2487 2488 unsigned acmod = br.getBits(3); 2489 unsigned lfeon = br.getBits(1); 2490 unsigned channelCount = channelCountTable[acmod] + lfeon; 2491 2492 if (mLastTrack == NULL) { 2493 return ERROR_MALFORMED; 2494 } 2495 mLastTrack->meta.setCString(kKeyMIMEType, MEDIA_MIMETYPE_AUDIO_AC3); 2496 mLastTrack->meta.setInt32(kKeyChannelCount, channelCount); 2497 mLastTrack->meta.setInt32(kKeySampleRate, sampleRate); 2498 return OK; 2499} 2500 2501status_t MPEG4Extractor::parseSegmentIndex(off64_t offset, size_t size) { 2502 ALOGV("MPEG4Extractor::parseSegmentIndex"); 2503 2504 if (size < 12) { 2505 return -EINVAL; 2506 } 2507 2508 uint32_t flags; 2509 if (!mDataSource->getUInt32(offset, &flags)) { 2510 return ERROR_MALFORMED; 2511 } 2512 2513 uint32_t version = flags >> 24; 2514 flags &= 0xffffff; 2515 2516 ALOGV("sidx version %d", version); 2517 2518 uint32_t referenceId; 2519 if (!mDataSource->getUInt32(offset + 4, &referenceId)) { 2520 return ERROR_MALFORMED; 2521 } 2522 2523 uint32_t timeScale; 2524 if (!mDataSource->getUInt32(offset + 8, &timeScale)) { 2525 return ERROR_MALFORMED; 2526 } 2527 ALOGV("sidx refid/timescale: %d/%d", referenceId, timeScale); 2528 if (timeScale == 0) 2529 return ERROR_MALFORMED; 2530 2531 uint64_t earliestPresentationTime; 2532 uint64_t firstOffset; 2533 2534 offset += 12; 2535 size -= 12; 2536 2537 if (version == 0) { 2538 if (size < 8) { 2539 return -EINVAL; 2540 } 2541 uint32_t tmp; 2542 if (!mDataSource->getUInt32(offset, &tmp)) { 2543 return ERROR_MALFORMED; 2544 } 2545 earliestPresentationTime = tmp; 2546 if (!mDataSource->getUInt32(offset + 4, &tmp)) { 2547 return ERROR_MALFORMED; 2548 } 2549 firstOffset = tmp; 2550 offset += 8; 2551 size -= 8; 2552 } else { 2553 if (size < 16) { 2554 return -EINVAL; 2555 } 2556 if (!mDataSource->getUInt64(offset, &earliestPresentationTime)) { 2557 return ERROR_MALFORMED; 2558 } 2559 if (!mDataSource->getUInt64(offset + 8, &firstOffset)) { 2560 return ERROR_MALFORMED; 2561 } 2562 offset += 16; 2563 size -= 16; 2564 } 2565 ALOGV("sidx pres/off: %" PRIu64 "/%" PRIu64, earliestPresentationTime, firstOffset); 2566 2567 if (size < 4) { 2568 return -EINVAL; 2569 } 2570 2571 uint16_t referenceCount; 2572 if (!mDataSource->getUInt16(offset + 2, &referenceCount)) { 2573 return ERROR_MALFORMED; 2574 } 2575 offset += 4; 2576 size -= 4; 2577 ALOGV("refcount: %d", referenceCount); 2578 2579 if (size < referenceCount * 12) { 2580 return -EINVAL; 2581 } 2582 2583 uint64_t total_duration = 0; 2584 for (unsigned int i = 0; i < referenceCount; i++) { 2585 uint32_t d1, d2, d3; 2586 2587 if (!mDataSource->getUInt32(offset, &d1) || // size 2588 !mDataSource->getUInt32(offset + 4, &d2) || // duration 2589 !mDataSource->getUInt32(offset + 8, &d3)) { // flags 2590 return ERROR_MALFORMED; 2591 } 2592 2593 if (d1 & 0x80000000) { 2594 ALOGW("sub-sidx boxes not supported yet"); 2595 } 2596 bool sap = d3 & 0x80000000; 2597 uint32_t saptype = (d3 >> 28) & 7; 2598 if (!sap || (saptype != 1 && saptype != 2)) { 2599 // type 1 and 2 are sync samples 2600 ALOGW("not a stream access point, or unsupported type: %08x", d3); 2601 } 2602 total_duration += d2; 2603 offset += 12; 2604 ALOGV(" item %d, %08x %08x %08x", i, d1, d2, d3); 2605 SidxEntry se; 2606 se.mSize = d1 & 0x7fffffff; 2607 se.mDurationUs = 1000000LL * d2 / timeScale; 2608 mSidxEntries.add(se); 2609 } 2610 2611 uint64_t sidxDuration = total_duration * 1000000 / timeScale; 2612 2613 if (mLastTrack == NULL) 2614 return ERROR_MALFORMED; 2615 2616 int64_t metaDuration; 2617 if (!mLastTrack->meta.findInt64(kKeyDuration, &metaDuration) || metaDuration == 0) { 2618 mLastTrack->meta.setInt64(kKeyDuration, sidxDuration); 2619 } 2620 return OK; 2621} 2622 2623status_t MPEG4Extractor::parseQTMetaKey(off64_t offset, size_t size) { 2624 if (size < 8) { 2625 return ERROR_MALFORMED; 2626 } 2627 2628 uint32_t count; 2629 if (!mDataSource->getUInt32(offset + 4, &count)) { 2630 return ERROR_MALFORMED; 2631 } 2632 2633 if (mMetaKeyMap.size() > 0) { 2634 ALOGW("'keys' atom seen again, discarding existing entries"); 2635 mMetaKeyMap.clear(); 2636 } 2637 2638 off64_t keyOffset = offset + 8; 2639 off64_t stopOffset = offset + size; 2640 for (size_t i = 1; i <= count; i++) { 2641 if (keyOffset + 8 > stopOffset) { 2642 return ERROR_MALFORMED; 2643 } 2644 2645 uint32_t keySize; 2646 if (!mDataSource->getUInt32(keyOffset, &keySize) 2647 || keySize < 8 2648 || keyOffset + keySize > stopOffset) { 2649 return ERROR_MALFORMED; 2650 } 2651 2652 uint32_t type; 2653 if (!mDataSource->getUInt32(keyOffset + 4, &type) 2654 || type != FOURCC('m', 'd', 't', 'a')) { 2655 return ERROR_MALFORMED; 2656 } 2657 2658 keySize -= 8; 2659 keyOffset += 8; 2660 2661 auto keyData = heapbuffer<uint8_t>(keySize); 2662 if (keyData.get() == NULL) { 2663 return ERROR_MALFORMED; 2664 } 2665 if (mDataSource->readAt( 2666 keyOffset, keyData.get(), keySize) < (ssize_t) keySize) { 2667 return ERROR_MALFORMED; 2668 } 2669 2670 AString key((const char *)keyData.get(), keySize); 2671 mMetaKeyMap.add(i, key); 2672 2673 keyOffset += keySize; 2674 } 2675 return OK; 2676} 2677 2678status_t MPEG4Extractor::parseQTMetaVal( 2679 int32_t keyId, off64_t offset, size_t size) { 2680 ssize_t index = mMetaKeyMap.indexOfKey(keyId); 2681 if (index < 0) { 2682 // corresponding key is not present, ignore 2683 return ERROR_MALFORMED; 2684 } 2685 2686 if (size <= 16) { 2687 return ERROR_MALFORMED; 2688 } 2689 uint32_t dataSize; 2690 if (!mDataSource->getUInt32(offset, &dataSize) 2691 || dataSize > size || dataSize <= 16) { 2692 return ERROR_MALFORMED; 2693 } 2694 uint32_t atomFourCC; 2695 if (!mDataSource->getUInt32(offset + 4, &atomFourCC) 2696 || atomFourCC != FOURCC('d', 'a', 't', 'a')) { 2697 return ERROR_MALFORMED; 2698 } 2699 uint32_t dataType; 2700 if (!mDataSource->getUInt32(offset + 8, &dataType) 2701 || ((dataType & 0xff000000) != 0)) { 2702 // not well-known type 2703 return ERROR_MALFORMED; 2704 } 2705 2706 dataSize -= 16; 2707 offset += 16; 2708 2709 if (dataType == 23 && dataSize >= 4) { 2710 // BE Float32 2711 uint32_t val; 2712 if (!mDataSource->getUInt32(offset, &val)) { 2713 return ERROR_MALFORMED; 2714 } 2715 if (!strcasecmp(mMetaKeyMap[index].c_str(), "com.android.capture.fps")) { 2716 mFileMetaData.setFloat(kKeyCaptureFramerate, *(float *)&val); 2717 } 2718 } else if (dataType == 67 && dataSize >= 4) { 2719 // BE signed int32 2720 uint32_t val; 2721 if (!mDataSource->getUInt32(offset, &val)) { 2722 return ERROR_MALFORMED; 2723 } 2724 if (!strcasecmp(mMetaKeyMap[index].c_str(), "com.android.video.temporal_layers_count")) { 2725 mFileMetaData.setInt32(kKeyTemporalLayerCount, val); 2726 } 2727 } else { 2728 // add more keys if needed 2729 ALOGV("ignoring key: type %d, size %d", dataType, dataSize); 2730 } 2731 2732 return OK; 2733} 2734 2735status_t MPEG4Extractor::parseTrackHeader( 2736 off64_t data_offset, off64_t data_size) { 2737 if (data_size < 4) { 2738 return ERROR_MALFORMED; 2739 } 2740 2741 uint8_t version; 2742 if (mDataSource->readAt(data_offset, &version, 1) < 1) { 2743 return ERROR_IO; 2744 } 2745 2746 size_t dynSize = (version == 1) ? 36 : 24; 2747 2748 uint8_t buffer[36 + 60]; 2749 2750 if (data_size != (off64_t)dynSize + 60) { 2751 return ERROR_MALFORMED; 2752 } 2753 2754 if (mDataSource->readAt( 2755 data_offset, buffer, data_size) < (ssize_t)data_size) { 2756 return ERROR_IO; 2757 } 2758 2759 uint64_t ctime __unused, mtime __unused, duration __unused; 2760 int32_t id; 2761 2762 if (version == 1) { 2763 ctime = U64_AT(&buffer[4]); 2764 mtime = U64_AT(&buffer[12]); 2765 id = U32_AT(&buffer[20]); 2766 duration = U64_AT(&buffer[28]); 2767 } else if (version == 0) { 2768 ctime = U32_AT(&buffer[4]); 2769 mtime = U32_AT(&buffer[8]); 2770 id = U32_AT(&buffer[12]); 2771 duration = U32_AT(&buffer[20]); 2772 } else { 2773 return ERROR_UNSUPPORTED; 2774 } 2775 2776 if (mLastTrack == NULL) 2777 return ERROR_MALFORMED; 2778 2779 mLastTrack->meta.setInt32(kKeyTrackID, id); 2780 2781 size_t matrixOffset = dynSize + 16; 2782 int32_t a00 = U32_AT(&buffer[matrixOffset]); 2783 int32_t a01 = U32_AT(&buffer[matrixOffset + 4]); 2784 int32_t a10 = U32_AT(&buffer[matrixOffset + 12]); 2785 int32_t a11 = U32_AT(&buffer[matrixOffset + 16]); 2786 2787#if 0 2788 int32_t dx = U32_AT(&buffer[matrixOffset + 8]); 2789 int32_t dy = U32_AT(&buffer[matrixOffset + 20]); 2790 2791 ALOGI("x' = %.2f * x + %.2f * y + %.2f", 2792 a00 / 65536.0f, a01 / 65536.0f, dx / 65536.0f); 2793 ALOGI("y' = %.2f * x + %.2f * y + %.2f", 2794 a10 / 65536.0f, a11 / 65536.0f, dy / 65536.0f); 2795#endif 2796 2797 uint32_t rotationDegrees; 2798 2799 static const int32_t kFixedOne = 0x10000; 2800 if (a00 == kFixedOne && a01 == 0 && a10 == 0 && a11 == kFixedOne) { 2801 // Identity, no rotation 2802 rotationDegrees = 0; 2803 } else if (a00 == 0 && a01 == kFixedOne && a10 == -kFixedOne && a11 == 0) { 2804 rotationDegrees = 90; 2805 } else if (a00 == 0 && a01 == -kFixedOne && a10 == kFixedOne && a11 == 0) { 2806 rotationDegrees = 270; 2807 } else if (a00 == -kFixedOne && a01 == 0 && a10 == 0 && a11 == -kFixedOne) { 2808 rotationDegrees = 180; 2809 } else { 2810 ALOGW("We only support 0,90,180,270 degree rotation matrices"); 2811 rotationDegrees = 0; 2812 } 2813 2814 if (rotationDegrees != 0) { 2815 mLastTrack->meta.setInt32(kKeyRotation, rotationDegrees); 2816 } 2817 2818 // Handle presentation display size, which could be different 2819 // from the image size indicated by kKeyWidth and kKeyHeight. 2820 uint32_t width = U32_AT(&buffer[dynSize + 52]); 2821 uint32_t height = U32_AT(&buffer[dynSize + 56]); 2822 mLastTrack->meta.setInt32(kKeyDisplayWidth, width >> 16); 2823 mLastTrack->meta.setInt32(kKeyDisplayHeight, height >> 16); 2824 2825 return OK; 2826} 2827 2828status_t MPEG4Extractor::parseITunesMetaData(off64_t offset, size_t size) { 2829 if (size == 0) { 2830 return OK; 2831 } 2832 2833 if (size < 4 || size == SIZE_MAX) { 2834 return ERROR_MALFORMED; 2835 } 2836 2837 uint8_t *buffer = new (std::nothrow) uint8_t[size + 1]; 2838 if (buffer == NULL) { 2839 return ERROR_MALFORMED; 2840 } 2841 if (mDataSource->readAt( 2842 offset, buffer, size) != (ssize_t)size) { 2843 delete[] buffer; 2844 buffer = NULL; 2845 2846 return ERROR_IO; 2847 } 2848 2849 uint32_t flags = U32_AT(buffer); 2850 2851 uint32_t metadataKey = 0; 2852 char chunk[5]; 2853 MakeFourCCString(mPath[4], chunk); 2854 ALOGV("meta: %s @ %lld", chunk, (long long)offset); 2855 switch ((int32_t)mPath[4]) { 2856 case FOURCC(0xa9, 'a', 'l', 'b'): 2857 { 2858 metadataKey = kKeyAlbum; 2859 break; 2860 } 2861 case FOURCC(0xa9, 'A', 'R', 'T'): 2862 { 2863 metadataKey = kKeyArtist; 2864 break; 2865 } 2866 case FOURCC('a', 'A', 'R', 'T'): 2867 { 2868 metadataKey = kKeyAlbumArtist; 2869 break; 2870 } 2871 case FOURCC(0xa9, 'd', 'a', 'y'): 2872 { 2873 metadataKey = kKeyYear; 2874 break; 2875 } 2876 case FOURCC(0xa9, 'n', 'a', 'm'): 2877 { 2878 metadataKey = kKeyTitle; 2879 break; 2880 } 2881 case FOURCC(0xa9, 'w', 'r', 't'): 2882 { 2883 metadataKey = kKeyWriter; 2884 break; 2885 } 2886 case FOURCC('c', 'o', 'v', 'r'): 2887 { 2888 metadataKey = kKeyAlbumArt; 2889 break; 2890 } 2891 case FOURCC('g', 'n', 'r', 'e'): 2892 { 2893 metadataKey = kKeyGenre; 2894 break; 2895 } 2896 case FOURCC(0xa9, 'g', 'e', 'n'): 2897 { 2898 metadataKey = kKeyGenre; 2899 break; 2900 } 2901 case FOURCC('c', 'p', 'i', 'l'): 2902 { 2903 if (size == 9 && flags == 21) { 2904 char tmp[16]; 2905 sprintf(tmp, "%d", 2906 (int)buffer[size - 1]); 2907 2908 mFileMetaData.setCString(kKeyCompilation, tmp); 2909 } 2910 break; 2911 } 2912 case FOURCC('t', 'r', 'k', 'n'): 2913 { 2914 if (size == 16 && flags == 0) { 2915 char tmp[16]; 2916 uint16_t* pTrack = (uint16_t*)&buffer[10]; 2917 uint16_t* pTotalTracks = (uint16_t*)&buffer[12]; 2918 sprintf(tmp, "%d/%d", ntohs(*pTrack), ntohs(*pTotalTracks)); 2919 2920 mFileMetaData.setCString(kKeyCDTrackNumber, tmp); 2921 } 2922 break; 2923 } 2924 case FOURCC('d', 'i', 's', 'k'): 2925 { 2926 if ((size == 14 || size == 16) && flags == 0) { 2927 char tmp[16]; 2928 uint16_t* pDisc = (uint16_t*)&buffer[10]; 2929 uint16_t* pTotalDiscs = (uint16_t*)&buffer[12]; 2930 sprintf(tmp, "%d/%d", ntohs(*pDisc), ntohs(*pTotalDiscs)); 2931 2932 mFileMetaData.setCString(kKeyDiscNumber, tmp); 2933 } 2934 break; 2935 } 2936 case FOURCC('-', '-', '-', '-'): 2937 { 2938 buffer[size] = '\0'; 2939 switch (mPath[5]) { 2940 case FOURCC('m', 'e', 'a', 'n'): 2941 mLastCommentMean.setTo((const char *)buffer + 4); 2942 break; 2943 case FOURCC('n', 'a', 'm', 'e'): 2944 mLastCommentName.setTo((const char *)buffer + 4); 2945 break; 2946 case FOURCC('d', 'a', 't', 'a'): 2947 if (size < 8) { 2948 delete[] buffer; 2949 buffer = NULL; 2950 ALOGE("b/24346430"); 2951 return ERROR_MALFORMED; 2952 } 2953 mLastCommentData.setTo((const char *)buffer + 8); 2954 break; 2955 } 2956 2957 // Once we have a set of mean/name/data info, go ahead and process 2958 // it to see if its something we are interested in. Whether or not 2959 // were are interested in the specific tag, make sure to clear out 2960 // the set so we can be ready to process another tuple should one 2961 // show up later in the file. 2962 if ((mLastCommentMean.length() != 0) && 2963 (mLastCommentName.length() != 0) && 2964 (mLastCommentData.length() != 0)) { 2965 2966 if (mLastCommentMean == "com.apple.iTunes" 2967 && mLastCommentName == "iTunSMPB") { 2968 int32_t delay, padding; 2969 if (sscanf(mLastCommentData, 2970 " %*x %x %x %*x", &delay, &padding) == 2) { 2971 if (mLastTrack == NULL) { 2972 delete[] buffer; 2973 return ERROR_MALFORMED; 2974 } 2975 2976 mLastTrack->meta.setInt32(kKeyEncoderDelay, delay); 2977 mLastTrack->meta.setInt32(kKeyEncoderPadding, padding); 2978 } 2979 } 2980 2981 mLastCommentMean.clear(); 2982 mLastCommentName.clear(); 2983 mLastCommentData.clear(); 2984 } 2985 break; 2986 } 2987 2988 default: 2989 break; 2990 } 2991 2992 if (size >= 8 && metadataKey && !mFileMetaData.hasData(metadataKey)) { 2993 if (metadataKey == kKeyAlbumArt) { 2994 mFileMetaData.setData( 2995 kKeyAlbumArt, MetaData::TYPE_NONE, 2996 buffer + 8, size - 8); 2997 } else if (metadataKey == kKeyGenre) { 2998 if (flags == 0) { 2999 // uint8_t genre code, iTunes genre codes are 3000 // the standard id3 codes, except they start 3001 // at 1 instead of 0 (e.g. Pop is 14, not 13) 3002 // We use standard id3 numbering, so subtract 1. 3003 int genrecode = (int)buffer[size - 1]; 3004 genrecode--; 3005 if (genrecode < 0) { 3006 genrecode = 255; // reserved for 'unknown genre' 3007 } 3008 char genre[10]; 3009 sprintf(genre, "%d", genrecode); 3010 3011 mFileMetaData.setCString(metadataKey, genre); 3012 } else if (flags == 1) { 3013 // custom genre string 3014 buffer[size] = '\0'; 3015 3016 mFileMetaData.setCString( 3017 metadataKey, (const char *)buffer + 8); 3018 } 3019 } else { 3020 buffer[size] = '\0'; 3021 3022 mFileMetaData.setCString( 3023 metadataKey, (const char *)buffer + 8); 3024 } 3025 } 3026 3027 delete[] buffer; 3028 buffer = NULL; 3029 3030 return OK; 3031} 3032 3033status_t MPEG4Extractor::parseColorInfo(off64_t offset, size_t size) { 3034 if (size < 4 || size == SIZE_MAX || mLastTrack == NULL) { 3035 return ERROR_MALFORMED; 3036 } 3037 3038 uint8_t *buffer = new (std::nothrow) uint8_t[size + 1]; 3039 if (buffer == NULL) { 3040 return ERROR_MALFORMED; 3041 } 3042 if (mDataSource->readAt(offset, buffer, size) != (ssize_t)size) { 3043 delete[] buffer; 3044 buffer = NULL; 3045 3046 return ERROR_IO; 3047 } 3048 3049 int32_t type = U32_AT(&buffer[0]); 3050 if ((type == FOURCC('n', 'c', 'l', 'x') && size >= 11) 3051 || (type == FOURCC('n', 'c', 'l', 'c') && size >= 10)) { 3052 int32_t primaries = U16_AT(&buffer[4]); 3053 int32_t transfer = U16_AT(&buffer[6]); 3054 int32_t coeffs = U16_AT(&buffer[8]); 3055 bool fullRange = (type == FOURCC('n', 'c', 'l', 'x')) && (buffer[10] & 128); 3056 3057 ColorAspects aspects; 3058 ColorUtils::convertIsoColorAspectsToCodecAspects( 3059 primaries, transfer, coeffs, fullRange, aspects); 3060 3061 // only store the first color specification 3062 if (!mLastTrack->meta.hasData(kKeyColorPrimaries)) { 3063 mLastTrack->meta.setInt32(kKeyColorPrimaries, aspects.mPrimaries); 3064 mLastTrack->meta.setInt32(kKeyTransferFunction, aspects.mTransfer); 3065 mLastTrack->meta.setInt32(kKeyColorMatrix, aspects.mMatrixCoeffs); 3066 mLastTrack->meta.setInt32(kKeyColorRange, aspects.mRange); 3067 } 3068 } 3069 3070 delete[] buffer; 3071 buffer = NULL; 3072 3073 return OK; 3074} 3075 3076status_t MPEG4Extractor::parse3GPPMetaData(off64_t offset, size_t size, int depth) { 3077 if (size < 4 || size == SIZE_MAX) { 3078 return ERROR_MALFORMED; 3079 } 3080 3081 uint8_t *buffer = new (std::nothrow) uint8_t[size + 1]; 3082 if (buffer == NULL) { 3083 return ERROR_MALFORMED; 3084 } 3085 if (mDataSource->readAt( 3086 offset, buffer, size) != (ssize_t)size) { 3087 delete[] buffer; 3088 buffer = NULL; 3089 3090 return ERROR_IO; 3091 } 3092 3093 uint32_t metadataKey = 0; 3094 switch (mPath[depth]) { 3095 case FOURCC('t', 'i', 't', 'l'): 3096 { 3097 metadataKey = kKeyTitle; 3098 break; 3099 } 3100 case FOURCC('p', 'e', 'r', 'f'): 3101 { 3102 metadataKey = kKeyArtist; 3103 break; 3104 } 3105 case FOURCC('a', 'u', 't', 'h'): 3106 { 3107 metadataKey = kKeyWriter; 3108 break; 3109 } 3110 case FOURCC('g', 'n', 'r', 'e'): 3111 { 3112 metadataKey = kKeyGenre; 3113 break; 3114 } 3115 case FOURCC('a', 'l', 'b', 'm'): 3116 { 3117 if (buffer[size - 1] != '\0') { 3118 char tmp[4]; 3119 sprintf(tmp, "%u", buffer[size - 1]); 3120 3121 mFileMetaData.setCString(kKeyCDTrackNumber, tmp); 3122 } 3123 3124 metadataKey = kKeyAlbum; 3125 break; 3126 } 3127 case FOURCC('y', 'r', 'r', 'c'): 3128 { 3129 if (size < 6) { 3130 delete[] buffer; 3131 buffer = NULL; 3132 ALOGE("b/62133227"); 3133 android_errorWriteLog(0x534e4554, "62133227"); 3134 return ERROR_MALFORMED; 3135 } 3136 char tmp[5]; 3137 uint16_t year = U16_AT(&buffer[4]); 3138 3139 if (year < 10000) { 3140 sprintf(tmp, "%u", year); 3141 3142 mFileMetaData.setCString(kKeyYear, tmp); 3143 } 3144 break; 3145 } 3146 3147 default: 3148 break; 3149 } 3150 3151 if (metadataKey > 0) { 3152 bool isUTF8 = true; // Common case 3153 char16_t *framedata = NULL; 3154 int len16 = 0; // Number of UTF-16 characters 3155 3156 // smallest possible valid UTF-16 string w BOM: 0xfe 0xff 0x00 0x00 3157 if (size < 6) { 3158 delete[] buffer; 3159 buffer = NULL; 3160 return ERROR_MALFORMED; 3161 } 3162 3163 if (size - 6 >= 4) { 3164 len16 = ((size - 6) / 2) - 1; // don't include 0x0000 terminator 3165 framedata = (char16_t *)(buffer + 6); 3166 if (0xfffe == *framedata) { 3167 // endianness marker (BOM) doesn't match host endianness 3168 for (int i = 0; i < len16; i++) { 3169 framedata[i] = bswap_16(framedata[i]); 3170 } 3171 // BOM is now swapped to 0xfeff, we will execute next block too 3172 } 3173 3174 if (0xfeff == *framedata) { 3175 // Remove the BOM 3176 framedata++; 3177 len16--; 3178 isUTF8 = false; 3179 } 3180 // else normal non-zero-length UTF-8 string 3181 // we can't handle UTF-16 without BOM as there is no other 3182 // indication of encoding. 3183 } 3184 3185 if (isUTF8) { 3186 buffer[size] = 0; 3187 mFileMetaData.setCString(metadataKey, (const char *)buffer + 6); 3188 } else { 3189 // Convert from UTF-16 string to UTF-8 string. 3190 String8 tmpUTF8str(framedata, len16); 3191 mFileMetaData.setCString(metadataKey, tmpUTF8str.string()); 3192 } 3193 } 3194 3195 delete[] buffer; 3196 buffer = NULL; 3197 3198 return OK; 3199} 3200 3201void MPEG4Extractor::parseID3v2MetaData(off64_t offset) { 3202 ID3 id3(mDataSource, true /* ignorev1 */, offset); 3203 3204 if (id3.isValid()) { 3205 struct Map { 3206 int key; 3207 const char *tag1; 3208 const char *tag2; 3209 }; 3210 static const Map kMap[] = { 3211 { kKeyAlbum, "TALB", "TAL" }, 3212 { kKeyArtist, "TPE1", "TP1" }, 3213 { kKeyAlbumArtist, "TPE2", "TP2" }, 3214 { kKeyComposer, "TCOM", "TCM" }, 3215 { kKeyGenre, "TCON", "TCO" }, 3216 { kKeyTitle, "TIT2", "TT2" }, 3217 { kKeyYear, "TYE", "TYER" }, 3218 { kKeyAuthor, "TXT", "TEXT" }, 3219 { kKeyCDTrackNumber, "TRK", "TRCK" }, 3220 { kKeyDiscNumber, "TPA", "TPOS" }, 3221 { kKeyCompilation, "TCP", "TCMP" }, 3222 }; 3223 static const size_t kNumMapEntries = sizeof(kMap) / sizeof(kMap[0]); 3224 3225 for (size_t i = 0; i < kNumMapEntries; ++i) { 3226 if (!mFileMetaData.hasData(kMap[i].key)) { 3227 ID3::Iterator *it = new ID3::Iterator(id3, kMap[i].tag1); 3228 if (it->done()) { 3229 delete it; 3230 it = new ID3::Iterator(id3, kMap[i].tag2); 3231 } 3232 3233 if (it->done()) { 3234 delete it; 3235 continue; 3236 } 3237 3238 String8 s; 3239 it->getString(&s); 3240 delete it; 3241 3242 mFileMetaData.setCString(kMap[i].key, s); 3243 } 3244 } 3245 3246 size_t dataSize; 3247 String8 mime; 3248 const void *data = id3.getAlbumArt(&dataSize, &mime); 3249 3250 if (data) { 3251 mFileMetaData.setData(kKeyAlbumArt, MetaData::TYPE_NONE, data, dataSize); 3252 mFileMetaData.setCString(kKeyAlbumArtMIME, mime.string()); 3253 } 3254 } 3255} 3256 3257MediaTrack *MPEG4Extractor::getTrack(size_t index) { 3258 status_t err; 3259 if ((err = readMetaData()) != OK) { 3260 return NULL; 3261 } 3262 3263 Track *track = mFirstTrack; 3264 while (index > 0) { 3265 if (track == NULL) { 3266 return NULL; 3267 } 3268 3269 track = track->next; 3270 --index; 3271 } 3272 3273 if (track == NULL) { 3274 return NULL; 3275 } 3276 3277 3278 Trex *trex = NULL; 3279 int32_t trackId; 3280 if (track->meta.findInt32(kKeyTrackID, &trackId)) { 3281 for (size_t i = 0; i < mTrex.size(); i++) { 3282 Trex *t = &mTrex.editItemAt(i); 3283 if (t->track_ID == (uint32_t) trackId) { 3284 trex = t; 3285 break; 3286 } 3287 } 3288 } else { 3289 ALOGE("b/21657957"); 3290 return NULL; 3291 } 3292 3293 ALOGV("getTrack called, pssh: %zu", mPssh.size()); 3294 3295 const char *mime; 3296 if (!track->meta.findCString(kKeyMIMEType, &mime)) { 3297 return NULL; 3298 } 3299 3300 sp<ItemTable> itemTable; 3301 if (!strcasecmp(mime, MEDIA_MIMETYPE_VIDEO_AVC)) { 3302 uint32_t type; 3303 const void *data; 3304 size_t size; 3305 if (!track->meta.findData(kKeyAVCC, &type, &data, &size)) { 3306 return NULL; 3307 } 3308 3309 const uint8_t *ptr = (const uint8_t *)data; 3310 3311 if (size < 7 || ptr[0] != 1) { // configurationVersion == 1 3312 return NULL; 3313 } 3314 } else if (!strcasecmp(mime, MEDIA_MIMETYPE_VIDEO_HEVC) 3315 || !strcasecmp(mime, MEDIA_MIMETYPE_IMAGE_ANDROID_HEIC)) { 3316 uint32_t type; 3317 const void *data; 3318 size_t size; 3319 if (!track->meta.findData(kKeyHVCC, &type, &data, &size)) { 3320 return NULL; 3321 } 3322 3323 const uint8_t *ptr = (const uint8_t *)data; 3324 3325 if (size < 22 || ptr[0] != 1) { // configurationVersion == 1 3326 return NULL; 3327 } 3328 if (!strcasecmp(mime, MEDIA_MIMETYPE_IMAGE_ANDROID_HEIC)) { 3329 itemTable = mItemTable; 3330 } 3331 } 3332 3333 MPEG4Source *source = new MPEG4Source( 3334 track->meta, mDataSource, track->timescale, track->sampleTable, 3335 mSidxEntries, trex, mMoofOffset, itemTable); 3336 if (source->init() != OK) { 3337 delete source; 3338 return NULL; 3339 } 3340 return source; 3341} 3342 3343// static 3344status_t MPEG4Extractor::verifyTrack(Track *track) { 3345 const char *mime; 3346 CHECK(track->meta.findCString(kKeyMIMEType, &mime)); 3347 3348 uint32_t type; 3349 const void *data; 3350 size_t size; 3351 if (!strcasecmp(mime, MEDIA_MIMETYPE_VIDEO_AVC)) { 3352 if (!track->meta.findData(kKeyAVCC, &type, &data, &size) 3353 || type != kTypeAVCC) { 3354 return ERROR_MALFORMED; 3355 } 3356 } else if (!strcasecmp(mime, MEDIA_MIMETYPE_VIDEO_HEVC)) { 3357 if (!track->meta.findData(kKeyHVCC, &type, &data, &size) 3358 || type != kTypeHVCC) { 3359 return ERROR_MALFORMED; 3360 } 3361 } else if (!strcasecmp(mime, MEDIA_MIMETYPE_VIDEO_MPEG4) 3362 || !strcasecmp(mime, MEDIA_MIMETYPE_VIDEO_MPEG2) 3363 || !strcasecmp(mime, MEDIA_MIMETYPE_AUDIO_AAC)) { 3364 if (!track->meta.findData(kKeyESDS, &type, &data, &size) 3365 || type != kTypeESDS) { 3366 return ERROR_MALFORMED; 3367 } 3368 } 3369 3370 if (track->sampleTable == NULL || !track->sampleTable->isValid()) { 3371 // Make sure we have all the metadata we need. 3372 ALOGE("stbl atom missing/invalid."); 3373 return ERROR_MALFORMED; 3374 } 3375 3376 if (track->timescale == 0) { 3377 ALOGE("timescale invalid."); 3378 return ERROR_MALFORMED; 3379 } 3380 3381 return OK; 3382} 3383 3384typedef enum { 3385 //AOT_NONE = -1, 3386 //AOT_NULL_OBJECT = 0, 3387 //AOT_AAC_MAIN = 1, /**< Main profile */ 3388 AOT_AAC_LC = 2, /**< Low Complexity object */ 3389 //AOT_AAC_SSR = 3, 3390 //AOT_AAC_LTP = 4, 3391 AOT_SBR = 5, 3392 //AOT_AAC_SCAL = 6, 3393 //AOT_TWIN_VQ = 7, 3394 //AOT_CELP = 8, 3395 //AOT_HVXC = 9, 3396 //AOT_RSVD_10 = 10, /**< (reserved) */ 3397 //AOT_RSVD_11 = 11, /**< (reserved) */ 3398 //AOT_TTSI = 12, /**< TTSI Object */ 3399 //AOT_MAIN_SYNTH = 13, /**< Main Synthetic object */ 3400 //AOT_WAV_TAB_SYNTH = 14, /**< Wavetable Synthesis object */ 3401 //AOT_GEN_MIDI = 15, /**< General MIDI object */ 3402 //AOT_ALG_SYNTH_AUD_FX = 16, /**< Algorithmic Synthesis and Audio FX object */ 3403 AOT_ER_AAC_LC = 17, /**< Error Resilient(ER) AAC Low Complexity */ 3404 //AOT_RSVD_18 = 18, /**< (reserved) */ 3405 //AOT_ER_AAC_LTP = 19, /**< Error Resilient(ER) AAC LTP object */ 3406 AOT_ER_AAC_SCAL = 20, /**< Error Resilient(ER) AAC Scalable object */ 3407 //AOT_ER_TWIN_VQ = 21, /**< Error Resilient(ER) TwinVQ object */ 3408 AOT_ER_BSAC = 22, /**< Error Resilient(ER) BSAC object */ 3409 AOT_ER_AAC_LD = 23, /**< Error Resilient(ER) AAC LowDelay object */ 3410 //AOT_ER_CELP = 24, /**< Error Resilient(ER) CELP object */ 3411 //AOT_ER_HVXC = 25, /**< Error Resilient(ER) HVXC object */ 3412 //AOT_ER_HILN = 26, /**< Error Resilient(ER) HILN object */ 3413 //AOT_ER_PARA = 27, /**< Error Resilient(ER) Parametric object */ 3414 //AOT_RSVD_28 = 28, /**< might become SSC */ 3415 AOT_PS = 29, /**< PS, Parametric Stereo (includes SBR) */ 3416 //AOT_MPEGS = 30, /**< MPEG Surround */ 3417 3418 AOT_ESCAPE = 31, /**< Signal AOT uses more than 5 bits */ 3419 3420 //AOT_MP3ONMP4_L1 = 32, /**< MPEG-Layer1 in mp4 */ 3421 //AOT_MP3ONMP4_L2 = 33, /**< MPEG-Layer2 in mp4 */ 3422 //AOT_MP3ONMP4_L3 = 34, /**< MPEG-Layer3 in mp4 */ 3423 //AOT_RSVD_35 = 35, /**< might become DST */ 3424 //AOT_RSVD_36 = 36, /**< might become ALS */ 3425 //AOT_AAC_SLS = 37, /**< AAC + SLS */ 3426 //AOT_SLS = 38, /**< SLS */ 3427 //AOT_ER_AAC_ELD = 39, /**< AAC Enhanced Low Delay */ 3428 3429 //AOT_USAC = 42, /**< USAC */ 3430 //AOT_SAOC = 43, /**< SAOC */ 3431 //AOT_LD_MPEGS = 44, /**< Low Delay MPEG Surround */ 3432 3433 //AOT_RSVD50 = 50, /**< Interim AOT for Rsvd50 */ 3434} AUDIO_OBJECT_TYPE; 3435 3436status_t MPEG4Extractor::updateAudioTrackInfoFromESDS_MPEG4Audio( 3437 const void *esds_data, size_t esds_size) { 3438 ESDS esds(esds_data, esds_size); 3439 3440 uint8_t objectTypeIndication; 3441 if (esds.getObjectTypeIndication(&objectTypeIndication) != OK) { 3442 return ERROR_MALFORMED; 3443 } 3444 3445 if (objectTypeIndication == 0xe1) { 3446 // This isn't MPEG4 audio at all, it's QCELP 14k... 3447 if (mLastTrack == NULL) 3448 return ERROR_MALFORMED; 3449 3450 mLastTrack->meta.setCString(kKeyMIMEType, MEDIA_MIMETYPE_AUDIO_QCELP); 3451 return OK; 3452 } 3453 3454 if (objectTypeIndication == 0x6b) { 3455 // The media subtype is MP3 audio 3456 // Our software MP3 audio decoder may not be able to handle 3457 // packetized MP3 audio; for now, lets just return ERROR_UNSUPPORTED 3458 ALOGE("MP3 track in MP4/3GPP file is not supported"); 3459 return ERROR_UNSUPPORTED; 3460 } 3461 3462 if (mLastTrack != NULL) { 3463 uint32_t maxBitrate = 0; 3464 uint32_t avgBitrate = 0; 3465 esds.getBitRate(&maxBitrate, &avgBitrate); 3466 if (maxBitrate > 0 && maxBitrate < INT32_MAX) { 3467 mLastTrack->meta.setInt32(kKeyMaxBitRate, (int32_t)maxBitrate); 3468 } 3469 if (avgBitrate > 0 && avgBitrate < INT32_MAX) { 3470 mLastTrack->meta.setInt32(kKeyBitRate, (int32_t)avgBitrate); 3471 } 3472 } 3473 3474 const uint8_t *csd; 3475 size_t csd_size; 3476 if (esds.getCodecSpecificInfo( 3477 (const void **)&csd, &csd_size) != OK) { 3478 return ERROR_MALFORMED; 3479 } 3480 3481 if (kUseHexDump) { 3482 printf("ESD of size %zu\n", csd_size); 3483 hexdump(csd, csd_size); 3484 } 3485 3486 if (csd_size == 0) { 3487 // There's no further information, i.e. no codec specific data 3488 // Let's assume that the information provided in the mpeg4 headers 3489 // is accurate and hope for the best. 3490 3491 return OK; 3492 } 3493 3494 if (csd_size < 2) { 3495 return ERROR_MALFORMED; 3496 } 3497 3498 static uint32_t kSamplingRate[] = { 3499 96000, 88200, 64000, 48000, 44100, 32000, 24000, 22050, 3500 16000, 12000, 11025, 8000, 7350 3501 }; 3502 3503 ABitReader br(csd, csd_size); 3504 uint32_t objectType = br.getBits(5); 3505 3506 if (objectType == 31) { // AAC-ELD => additional 6 bits 3507 objectType = 32 + br.getBits(6); 3508 } 3509 3510 if (mLastTrack == NULL) 3511 return ERROR_MALFORMED; 3512 3513 //keep AOT type 3514 mLastTrack->meta.setInt32(kKeyAACAOT, objectType); 3515 3516 uint32_t freqIndex = br.getBits(4); 3517 3518 int32_t sampleRate = 0; 3519 int32_t numChannels = 0; 3520 if (freqIndex == 15) { 3521 if (br.numBitsLeft() < 28) return ERROR_MALFORMED; 3522 sampleRate = br.getBits(24); 3523 numChannels = br.getBits(4); 3524 } else { 3525 if (br.numBitsLeft() < 4) return ERROR_MALFORMED; 3526 numChannels = br.getBits(4); 3527 3528 if (freqIndex == 13 || freqIndex == 14) { 3529 return ERROR_MALFORMED; 3530 } 3531 3532 sampleRate = kSamplingRate[freqIndex]; 3533 } 3534 3535 if (objectType == AOT_SBR || objectType == AOT_PS) {//SBR specific config per 14496-3 table 1.13 3536 if (br.numBitsLeft() < 4) return ERROR_MALFORMED; 3537 uint32_t extFreqIndex = br.getBits(4); 3538 int32_t extSampleRate __unused; 3539 if (extFreqIndex == 15) { 3540 if (csd_size < 8) { 3541 return ERROR_MALFORMED; 3542 } 3543 if (br.numBitsLeft() < 24) return ERROR_MALFORMED; 3544 extSampleRate = br.getBits(24); 3545 } else { 3546 if (extFreqIndex == 13 || extFreqIndex == 14) { 3547 return ERROR_MALFORMED; 3548 } 3549 extSampleRate = kSamplingRate[extFreqIndex]; 3550 } 3551 //TODO: save the extension sampling rate value in meta data => 3552 // mLastTrack->meta.setInt32(kKeyExtSampleRate, extSampleRate); 3553 } 3554 3555 switch (numChannels) { 3556 // values defined in 14496-3_2009 amendment-4 Table 1.19 - Channel Configuration 3557 case 0: 3558 case 1:// FC 3559 case 2:// FL FR 3560 case 3:// FC, FL FR 3561 case 4:// FC, FL FR, RC 3562 case 5:// FC, FL FR, SL SR 3563 case 6:// FC, FL FR, SL SR, LFE 3564 //numChannels already contains the right value 3565 break; 3566 case 11:// FC, FL FR, SL SR, RC, LFE 3567 numChannels = 7; 3568 break; 3569 case 7: // FC, FCL FCR, FL FR, SL SR, LFE 3570 case 12:// FC, FL FR, SL SR, RL RR, LFE 3571 case 14:// FC, FL FR, SL SR, LFE, FHL FHR 3572 numChannels = 8; 3573 break; 3574 default: 3575 return ERROR_UNSUPPORTED; 3576 } 3577 3578 { 3579 if (objectType == AOT_SBR || objectType == AOT_PS) { 3580 if (br.numBitsLeft() < 5) return ERROR_MALFORMED; 3581 objectType = br.getBits(5); 3582 3583 if (objectType == AOT_ESCAPE) { 3584 if (br.numBitsLeft() < 6) return ERROR_MALFORMED; 3585 objectType = 32 + br.getBits(6); 3586 } 3587 } 3588 if (objectType == AOT_AAC_LC || objectType == AOT_ER_AAC_LC || 3589 objectType == AOT_ER_AAC_LD || objectType == AOT_ER_AAC_SCAL || 3590 objectType == AOT_ER_BSAC) { 3591 if (br.numBitsLeft() < 2) return ERROR_MALFORMED; 3592 const int32_t frameLengthFlag __unused = br.getBits(1); 3593 3594 const int32_t dependsOnCoreCoder = br.getBits(1); 3595 3596 if (dependsOnCoreCoder ) { 3597 if (br.numBitsLeft() < 14) return ERROR_MALFORMED; 3598 const int32_t coreCoderDelay __unused = br.getBits(14); 3599 } 3600 3601 int32_t extensionFlag = -1; 3602 if (br.numBitsLeft() > 0) { 3603 extensionFlag = br.getBits(1); 3604 } else { 3605 switch (objectType) { 3606 // 14496-3 4.5.1.1 extensionFlag 3607 case AOT_AAC_LC: 3608 extensionFlag = 0; 3609 break; 3610 case AOT_ER_AAC_LC: 3611 case AOT_ER_AAC_SCAL: 3612 case AOT_ER_BSAC: 3613 case AOT_ER_AAC_LD: 3614 extensionFlag = 1; 3615 break; 3616 default: 3617 return ERROR_MALFORMED; 3618 break; 3619 } 3620 ALOGW("csd missing extension flag; assuming %d for object type %u.", 3621 extensionFlag, objectType); 3622 } 3623 3624 if (numChannels == 0) { 3625 int32_t channelsEffectiveNum = 0; 3626 int32_t channelsNum = 0; 3627 if (br.numBitsLeft() < 32) { 3628 return ERROR_MALFORMED; 3629 } 3630 const int32_t ElementInstanceTag __unused = br.getBits(4); 3631 const int32_t Profile __unused = br.getBits(2); 3632 const int32_t SamplingFrequencyIndex __unused = br.getBits(4); 3633 const int32_t NumFrontChannelElements = br.getBits(4); 3634 const int32_t NumSideChannelElements = br.getBits(4); 3635 const int32_t NumBackChannelElements = br.getBits(4); 3636 const int32_t NumLfeChannelElements = br.getBits(2); 3637 const int32_t NumAssocDataElements __unused = br.getBits(3); 3638 const int32_t NumValidCcElements __unused = br.getBits(4); 3639 3640 const int32_t MonoMixdownPresent = br.getBits(1); 3641 3642 if (MonoMixdownPresent != 0) { 3643 if (br.numBitsLeft() < 4) return ERROR_MALFORMED; 3644 const int32_t MonoMixdownElementNumber __unused = br.getBits(4); 3645 } 3646 3647 if (br.numBitsLeft() < 1) return ERROR_MALFORMED; 3648 const int32_t StereoMixdownPresent = br.getBits(1); 3649 if (StereoMixdownPresent != 0) { 3650 if (br.numBitsLeft() < 4) return ERROR_MALFORMED; 3651 const int32_t StereoMixdownElementNumber __unused = br.getBits(4); 3652 } 3653 3654 if (br.numBitsLeft() < 1) return ERROR_MALFORMED; 3655 const int32_t MatrixMixdownIndexPresent = br.getBits(1); 3656 if (MatrixMixdownIndexPresent != 0) { 3657 if (br.numBitsLeft() < 3) return ERROR_MALFORMED; 3658 const int32_t MatrixMixdownIndex __unused = br.getBits(2); 3659 const int32_t PseudoSurroundEnable __unused = br.getBits(1); 3660 } 3661 3662 int i; 3663 for (i=0; i < NumFrontChannelElements; i++) { 3664 if (br.numBitsLeft() < 5) return ERROR_MALFORMED; 3665 const int32_t FrontElementIsCpe = br.getBits(1); 3666 const int32_t FrontElementTagSelect __unused = br.getBits(4); 3667 channelsNum += FrontElementIsCpe ? 2 : 1; 3668 } 3669 3670 for (i=0; i < NumSideChannelElements; i++) { 3671 if (br.numBitsLeft() < 5) return ERROR_MALFORMED; 3672 const int32_t SideElementIsCpe = br.getBits(1); 3673 const int32_t SideElementTagSelect __unused = br.getBits(4); 3674 channelsNum += SideElementIsCpe ? 2 : 1; 3675 } 3676 3677 for (i=0; i < NumBackChannelElements; i++) { 3678 if (br.numBitsLeft() < 5) return ERROR_MALFORMED; 3679 const int32_t BackElementIsCpe = br.getBits(1); 3680 const int32_t BackElementTagSelect __unused = br.getBits(4); 3681 channelsNum += BackElementIsCpe ? 2 : 1; 3682 } 3683 channelsEffectiveNum = channelsNum; 3684 3685 for (i=0; i < NumLfeChannelElements; i++) { 3686 if (br.numBitsLeft() < 4) return ERROR_MALFORMED; 3687 const int32_t LfeElementTagSelect __unused = br.getBits(4); 3688 channelsNum += 1; 3689 } 3690 ALOGV("mpeg4 audio channelsNum = %d", channelsNum); 3691 ALOGV("mpeg4 audio channelsEffectiveNum = %d", channelsEffectiveNum); 3692 numChannels = channelsNum; 3693 } 3694 } 3695 } 3696 3697 if (numChannels == 0) { 3698 return ERROR_UNSUPPORTED; 3699 } 3700 3701 if (mLastTrack == NULL) 3702 return ERROR_MALFORMED; 3703 3704 int32_t prevSampleRate; 3705 CHECK(mLastTrack->meta.findInt32(kKeySampleRate, &prevSampleRate)); 3706 3707 if (prevSampleRate != sampleRate) { 3708 ALOGV("mpeg4 audio sample rate different from previous setting. " 3709 "was: %d, now: %d", prevSampleRate, sampleRate); 3710 } 3711 3712 mLastTrack->meta.setInt32(kKeySampleRate, sampleRate); 3713 3714 int32_t prevChannelCount; 3715 CHECK(mLastTrack->meta.findInt32(kKeyChannelCount, &prevChannelCount)); 3716 3717 if (prevChannelCount != numChannels) { 3718 ALOGV("mpeg4 audio channel count different from previous setting. " 3719 "was: %d, now: %d", prevChannelCount, numChannels); 3720 } 3721 3722 mLastTrack->meta.setInt32(kKeyChannelCount, numChannels); 3723 3724 return OK; 3725} 3726 3727//////////////////////////////////////////////////////////////////////////////// 3728 3729MPEG4Source::MPEG4Source( 3730 MetaDataBase &format, 3731 DataSourceBase *dataSource, 3732 int32_t timeScale, 3733 const sp<SampleTable> &sampleTable, 3734 Vector<SidxEntry> &sidx, 3735 const Trex *trex, 3736 off64_t firstMoofOffset, 3737 const sp<ItemTable> &itemTable) 3738 : mFormat(format), 3739 mDataSource(dataSource), 3740 mTimescale(timeScale), 3741 mSampleTable(sampleTable), 3742 mCurrentSampleIndex(0), 3743 mCurrentFragmentIndex(0), 3744 mSegments(sidx), 3745 mTrex(trex), 3746 mFirstMoofOffset(firstMoofOffset), 3747 mCurrentMoofOffset(firstMoofOffset), 3748 mNextMoofOffset(-1), 3749 mCurrentTime(0), 3750 mCurrentSampleInfoAllocSize(0), 3751 mCurrentSampleInfoSizes(NULL), 3752 mCurrentSampleInfoOffsetsAllocSize(0), 3753 mCurrentSampleInfoOffsets(NULL), 3754 mIsAVC(false), 3755 mIsHEVC(false), 3756 mNALLengthSize(0), 3757 mStarted(false), 3758 mGroup(NULL), 3759 mBuffer(NULL), 3760 mWantsNALFragments(false), 3761 mSrcBuffer(NULL), 3762 mIsHeif(itemTable != NULL), 3763 mItemTable(itemTable) { 3764 3765 memset(&mTrackFragmentHeaderInfo, 0, sizeof(mTrackFragmentHeaderInfo)); 3766 3767 mFormat.findInt32(kKeyCryptoMode, &mCryptoMode); 3768 mDefaultIVSize = 0; 3769 mFormat.findInt32(kKeyCryptoDefaultIVSize, &mDefaultIVSize); 3770 uint32_t keytype; 3771 const void *key; 3772 size_t keysize; 3773 if (mFormat.findData(kKeyCryptoKey, &keytype, &key, &keysize)) { 3774 CHECK(keysize <= 16); 3775 memset(mCryptoKey, 0, 16); 3776 memcpy(mCryptoKey, key, keysize); 3777 } 3778 3779 const char *mime; 3780 bool success = mFormat.findCString(kKeyMIMEType, &mime); 3781 CHECK(success); 3782 3783 mIsAVC = !strcasecmp(mime, MEDIA_MIMETYPE_VIDEO_AVC); 3784 mIsHEVC = !strcasecmp(mime, MEDIA_MIMETYPE_VIDEO_HEVC) || 3785 !strcasecmp(mime, MEDIA_MIMETYPE_IMAGE_ANDROID_HEIC); 3786 3787 if (mIsAVC) { 3788 uint32_t type; 3789 const void *data; 3790 size_t size; 3791 CHECK(format.findData(kKeyAVCC, &type, &data, &size)); 3792 3793 const uint8_t *ptr = (const uint8_t *)data; 3794 3795 CHECK(size >= 7); 3796 CHECK_EQ((unsigned)ptr[0], 1u); // configurationVersion == 1 3797 3798 // The number of bytes used to encode the length of a NAL unit. 3799 mNALLengthSize = 1 + (ptr[4] & 3); 3800 } else if (mIsHEVC) { 3801 uint32_t type; 3802 const void *data; 3803 size_t size; 3804 CHECK(format.findData(kKeyHVCC, &type, &data, &size)); 3805 3806 const uint8_t *ptr = (const uint8_t *)data; 3807 3808 CHECK(size >= 22); 3809 CHECK_EQ((unsigned)ptr[0], 1u); // configurationVersion == 1 3810 3811 mNALLengthSize = 1 + (ptr[14 + 7] & 3); 3812 } 3813 3814 CHECK(format.findInt32(kKeyTrackID, &mTrackId)); 3815 3816} 3817 3818status_t MPEG4Source::init() { 3819 if (mFirstMoofOffset != 0) { 3820 off64_t offset = mFirstMoofOffset; 3821 return parseChunk(&offset); 3822 } 3823 return OK; 3824} 3825 3826MPEG4Source::~MPEG4Source() { 3827 if (mStarted) { 3828 stop(); 3829 } 3830 free(mCurrentSampleInfoSizes); 3831 free(mCurrentSampleInfoOffsets); 3832} 3833 3834status_t MPEG4Source::start(MetaDataBase *params) { 3835 Mutex::Autolock autoLock(mLock); 3836 3837 CHECK(!mStarted); 3838 3839 int32_t val; 3840 if (params && params->findInt32(kKeyWantsNALFragments, &val) 3841 && val != 0) { 3842 mWantsNALFragments = true; 3843 } else { 3844 mWantsNALFragments = false; 3845 } 3846 3847 int32_t tmp; 3848 CHECK(mFormat.findInt32(kKeyMaxInputSize, &tmp)); 3849 size_t max_size = tmp; 3850 3851 // A somewhat arbitrary limit that should be sufficient for 8k video frames 3852 // If you see the message below for a valid input stream: increase the limit 3853 const size_t kMaxBufferSize = 64 * 1024 * 1024; 3854 if (max_size > kMaxBufferSize) { 3855 ALOGE("bogus max input size: %zu > %zu", max_size, kMaxBufferSize); 3856 return ERROR_MALFORMED; 3857 } 3858 if (max_size == 0) { 3859 ALOGE("zero max input size"); 3860 return ERROR_MALFORMED; 3861 } 3862 3863 // Allow up to kMaxBuffers, but not if the total exceeds kMaxBufferSize. 3864 const size_t kInitialBuffers = 2; 3865 const size_t kMaxBuffers = 8; 3866 const size_t realMaxBuffers = min(kMaxBufferSize / max_size, kMaxBuffers); 3867 mGroup = new MediaBufferGroup(kInitialBuffers, max_size, realMaxBuffers); 3868 mSrcBuffer = new (std::nothrow) uint8_t[max_size]; 3869 if (mSrcBuffer == NULL) { 3870 // file probably specified a bad max size 3871 delete mGroup; 3872 mGroup = NULL; 3873 return ERROR_MALFORMED; 3874 } 3875 3876 mStarted = true; 3877 3878 return OK; 3879} 3880 3881status_t MPEG4Source::stop() { 3882 Mutex::Autolock autoLock(mLock); 3883 3884 CHECK(mStarted); 3885 3886 if (mBuffer != NULL) { 3887 mBuffer->release(); 3888 mBuffer = NULL; 3889 } 3890 3891 delete[] mSrcBuffer; 3892 mSrcBuffer = NULL; 3893 3894 delete mGroup; 3895 mGroup = NULL; 3896 3897 mStarted = false; 3898 mCurrentSampleIndex = 0; 3899 3900 return OK; 3901} 3902 3903status_t MPEG4Source::parseChunk(off64_t *offset) { 3904 uint32_t hdr[2]; 3905 if (mDataSource->readAt(*offset, hdr, 8) < 8) { 3906 return ERROR_IO; 3907 } 3908 uint64_t chunk_size = ntohl(hdr[0]); 3909 uint32_t chunk_type = ntohl(hdr[1]); 3910 off64_t data_offset = *offset + 8; 3911 3912 if (chunk_size == 1) { 3913 if (mDataSource->readAt(*offset + 8, &chunk_size, 8) < 8) { 3914 return ERROR_IO; 3915 } 3916 chunk_size = ntoh64(chunk_size); 3917 data_offset += 8; 3918 3919 if (chunk_size < 16) { 3920 // The smallest valid chunk is 16 bytes long in this case. 3921 return ERROR_MALFORMED; 3922 } 3923 } else if (chunk_size < 8) { 3924 // The smallest valid chunk is 8 bytes long. 3925 return ERROR_MALFORMED; 3926 } 3927 3928 char chunk[5]; 3929 MakeFourCCString(chunk_type, chunk); 3930 ALOGV("MPEG4Source chunk %s @ %#llx", chunk, (long long)*offset); 3931 3932 off64_t chunk_data_size = *offset + chunk_size - data_offset; 3933 3934 switch(chunk_type) { 3935 3936 case FOURCC('t', 'r', 'a', 'f'): 3937 case FOURCC('m', 'o', 'o', 'f'): { 3938 off64_t stop_offset = *offset + chunk_size; 3939 *offset = data_offset; 3940 while (*offset < stop_offset) { 3941 status_t err = parseChunk(offset); 3942 if (err != OK) { 3943 return err; 3944 } 3945 } 3946 if (chunk_type == FOURCC('m', 'o', 'o', 'f')) { 3947 // *offset points to the box following this moof. Find the next moof from there. 3948 3949 while (true) { 3950 if (mDataSource->readAt(*offset, hdr, 8) < 8) { 3951 // no more box to the end of file. 3952 break; 3953 } 3954 chunk_size = ntohl(hdr[0]); 3955 chunk_type = ntohl(hdr[1]); 3956 if (chunk_size == 1) { 3957 // ISO/IEC 14496-12:2012, 8.8.4 Movie Fragment Box, moof is a Box 3958 // which is defined in 4.2 Object Structure. 3959 // When chunk_size==1, 8 bytes follows as "largesize". 3960 if (mDataSource->readAt(*offset + 8, &chunk_size, 8) < 8) { 3961 return ERROR_IO; 3962 } 3963 chunk_size = ntoh64(chunk_size); 3964 if (chunk_size < 16) { 3965 // The smallest valid chunk is 16 bytes long in this case. 3966 return ERROR_MALFORMED; 3967 } 3968 } else if (chunk_size == 0) { 3969 // next box extends to end of file. 3970 } else if (chunk_size < 8) { 3971 // The smallest valid chunk is 8 bytes long in this case. 3972 return ERROR_MALFORMED; 3973 } 3974 3975 if (chunk_type == FOURCC('m', 'o', 'o', 'f')) { 3976 mNextMoofOffset = *offset; 3977 break; 3978 } else if (chunk_size == 0) { 3979 break; 3980 } 3981 *offset += chunk_size; 3982 } 3983 } 3984 break; 3985 } 3986 3987 case FOURCC('t', 'f', 'h', 'd'): { 3988 status_t err; 3989 if ((err = parseTrackFragmentHeader(data_offset, chunk_data_size)) != OK) { 3990 return err; 3991 } 3992 *offset += chunk_size; 3993 break; 3994 } 3995 3996 case FOURCC('t', 'r', 'u', 'n'): { 3997 status_t err; 3998 if (mLastParsedTrackId == mTrackId) { 3999 if ((err = parseTrackFragmentRun(data_offset, chunk_data_size)) != OK) { 4000 return err; 4001 } 4002 } 4003 4004 *offset += chunk_size; 4005 break; 4006 } 4007 4008 case FOURCC('s', 'a', 'i', 'z'): { 4009 status_t err; 4010 if ((err = parseSampleAuxiliaryInformationSizes(data_offset, chunk_data_size)) != OK) { 4011 return err; 4012 } 4013 *offset += chunk_size; 4014 break; 4015 } 4016 case FOURCC('s', 'a', 'i', 'o'): { 4017 status_t err; 4018 if ((err = parseSampleAuxiliaryInformationOffsets(data_offset, chunk_data_size)) != OK) { 4019 return err; 4020 } 4021 *offset += chunk_size; 4022 break; 4023 } 4024 4025 case FOURCC('m', 'd', 'a', 't'): { 4026 // parse DRM info if present 4027 ALOGV("MPEG4Source::parseChunk mdat"); 4028 // if saiz/saoi was previously observed, do something with the sampleinfos 4029 *offset += chunk_size; 4030 break; 4031 } 4032 4033 default: { 4034 *offset += chunk_size; 4035 break; 4036 } 4037 } 4038 return OK; 4039} 4040 4041status_t MPEG4Source::parseSampleAuxiliaryInformationSizes( 4042 off64_t offset, off64_t /* size */) { 4043 ALOGV("parseSampleAuxiliaryInformationSizes"); 4044 // 14496-12 8.7.12 4045 uint8_t version; 4046 if (mDataSource->readAt( 4047 offset, &version, sizeof(version)) 4048 < (ssize_t)sizeof(version)) { 4049 return ERROR_IO; 4050 } 4051 4052 if (version != 0) { 4053 return ERROR_UNSUPPORTED; 4054 } 4055 offset++; 4056 4057 uint32_t flags; 4058 if (!mDataSource->getUInt24(offset, &flags)) { 4059 return ERROR_IO; 4060 } 4061 offset += 3; 4062 4063 if (flags & 1) { 4064 uint32_t tmp; 4065 if (!mDataSource->getUInt32(offset, &tmp)) { 4066 return ERROR_MALFORMED; 4067 } 4068 mCurrentAuxInfoType = tmp; 4069 offset += 4; 4070 if (!mDataSource->getUInt32(offset, &tmp)) { 4071 return ERROR_MALFORMED; 4072 } 4073 mCurrentAuxInfoTypeParameter = tmp; 4074 offset += 4; 4075 } 4076 4077 uint8_t defsize; 4078 if (mDataSource->readAt(offset, &defsize, 1) != 1) { 4079 return ERROR_MALFORMED; 4080 } 4081 mCurrentDefaultSampleInfoSize = defsize; 4082 offset++; 4083 4084 uint32_t smplcnt; 4085 if (!mDataSource->getUInt32(offset, &smplcnt)) { 4086 return ERROR_MALFORMED; 4087 } 4088 mCurrentSampleInfoCount = smplcnt; 4089 offset += 4; 4090 4091 if (mCurrentDefaultSampleInfoSize != 0) { 4092 ALOGV("@@@@ using default sample info size of %d", mCurrentDefaultSampleInfoSize); 4093 return OK; 4094 } 4095 if (smplcnt > mCurrentSampleInfoAllocSize) { 4096 uint8_t * newPtr = (uint8_t*) realloc(mCurrentSampleInfoSizes, smplcnt); 4097 if (newPtr == NULL) { 4098 ALOGE("failed to realloc %u -> %u", mCurrentSampleInfoAllocSize, smplcnt); 4099 return NO_MEMORY; 4100 } 4101 mCurrentSampleInfoSizes = newPtr; 4102 mCurrentSampleInfoAllocSize = smplcnt; 4103 } 4104 4105 mDataSource->readAt(offset, mCurrentSampleInfoSizes, smplcnt); 4106 return OK; 4107} 4108 4109status_t MPEG4Source::parseSampleAuxiliaryInformationOffsets( 4110 off64_t offset, off64_t /* size */) { 4111 ALOGV("parseSampleAuxiliaryInformationOffsets"); 4112 // 14496-12 8.7.13 4113 uint8_t version; 4114 if (mDataSource->readAt(offset, &version, sizeof(version)) != 1) { 4115 return ERROR_IO; 4116 } 4117 offset++; 4118 4119 uint32_t flags; 4120 if (!mDataSource->getUInt24(offset, &flags)) { 4121 return ERROR_IO; 4122 } 4123 offset += 3; 4124 4125 uint32_t entrycount; 4126 if (!mDataSource->getUInt32(offset, &entrycount)) { 4127 return ERROR_IO; 4128 } 4129 offset += 4; 4130 if (entrycount == 0) { 4131 return OK; 4132 } 4133 if (entrycount > UINT32_MAX / 8) { 4134 return ERROR_MALFORMED; 4135 } 4136 4137 if (entrycount > mCurrentSampleInfoOffsetsAllocSize) { 4138 uint64_t *newPtr = (uint64_t *)realloc(mCurrentSampleInfoOffsets, entrycount * 8); 4139 if (newPtr == NULL) { 4140 ALOGE("failed to realloc %u -> %u", mCurrentSampleInfoOffsetsAllocSize, entrycount * 8); 4141 return NO_MEMORY; 4142 } 4143 mCurrentSampleInfoOffsets = newPtr; 4144 mCurrentSampleInfoOffsetsAllocSize = entrycount; 4145 } 4146 mCurrentSampleInfoOffsetCount = entrycount; 4147 4148 if (mCurrentSampleInfoOffsets == NULL) { 4149 return OK; 4150 } 4151 4152 for (size_t i = 0; i < entrycount; i++) { 4153 if (version == 0) { 4154 uint32_t tmp; 4155 if (!mDataSource->getUInt32(offset, &tmp)) { 4156 return ERROR_IO; 4157 } 4158 mCurrentSampleInfoOffsets[i] = tmp; 4159 offset += 4; 4160 } else { 4161 uint64_t tmp; 4162 if (!mDataSource->getUInt64(offset, &tmp)) { 4163 return ERROR_IO; 4164 } 4165 mCurrentSampleInfoOffsets[i] = tmp; 4166 offset += 8; 4167 } 4168 } 4169 4170 // parse clear/encrypted data 4171 4172 off64_t drmoffset = mCurrentSampleInfoOffsets[0]; // from moof 4173 4174 drmoffset += mCurrentMoofOffset; 4175 int ivlength; 4176 CHECK(mFormat.findInt32(kKeyCryptoDefaultIVSize, &ivlength)); 4177 4178 // only 0, 8 and 16 byte initialization vectors are supported 4179 if (ivlength != 0 && ivlength != 8 && ivlength != 16) { 4180 ALOGW("unsupported IV length: %d", ivlength); 4181 return ERROR_MALFORMED; 4182 } 4183 // read CencSampleAuxiliaryDataFormats 4184 for (size_t i = 0; i < mCurrentSampleInfoCount; i++) { 4185 if (i >= mCurrentSamples.size()) { 4186 ALOGW("too few samples"); 4187 break; 4188 } 4189 Sample *smpl = &mCurrentSamples.editItemAt(i); 4190 4191 memset(smpl->iv, 0, 16); 4192 if (mDataSource->readAt(drmoffset, smpl->iv, ivlength) != ivlength) { 4193 return ERROR_IO; 4194 } 4195 4196 drmoffset += ivlength; 4197 4198 int32_t smplinfosize = mCurrentDefaultSampleInfoSize; 4199 if (smplinfosize == 0) { 4200 smplinfosize = mCurrentSampleInfoSizes[i]; 4201 } 4202 if (smplinfosize > ivlength) { 4203 uint16_t numsubsamples; 4204 if (!mDataSource->getUInt16(drmoffset, &numsubsamples)) { 4205 return ERROR_IO; 4206 } 4207 drmoffset += 2; 4208 for (size_t j = 0; j < numsubsamples; j++) { 4209 uint16_t numclear; 4210 uint32_t numencrypted; 4211 if (!mDataSource->getUInt16(drmoffset, &numclear)) { 4212 return ERROR_IO; 4213 } 4214 drmoffset += 2; 4215 if (!mDataSource->getUInt32(drmoffset, &numencrypted)) { 4216 return ERROR_IO; 4217 } 4218 drmoffset += 4; 4219 smpl->clearsizes.add(numclear); 4220 smpl->encryptedsizes.add(numencrypted); 4221 } 4222 } else { 4223 smpl->clearsizes.add(0); 4224 smpl->encryptedsizes.add(smpl->size); 4225 } 4226 } 4227 4228 4229 return OK; 4230} 4231 4232status_t MPEG4Source::parseTrackFragmentHeader(off64_t offset, off64_t size) { 4233 4234 if (size < 8) { 4235 return -EINVAL; 4236 } 4237 4238 uint32_t flags; 4239 if (!mDataSource->getUInt32(offset, &flags)) { // actually version + flags 4240 return ERROR_MALFORMED; 4241 } 4242 4243 if (flags & 0xff000000) { 4244 return -EINVAL; 4245 } 4246 4247 if (!mDataSource->getUInt32(offset + 4, (uint32_t*)&mLastParsedTrackId)) { 4248 return ERROR_MALFORMED; 4249 } 4250 4251 if (mLastParsedTrackId != mTrackId) { 4252 // this is not the right track, skip it 4253 return OK; 4254 } 4255 4256 mTrackFragmentHeaderInfo.mFlags = flags; 4257 mTrackFragmentHeaderInfo.mTrackID = mLastParsedTrackId; 4258 offset += 8; 4259 size -= 8; 4260 4261 ALOGV("fragment header: %08x %08x", flags, mTrackFragmentHeaderInfo.mTrackID); 4262 4263 if (flags & TrackFragmentHeaderInfo::kBaseDataOffsetPresent) { 4264 if (size < 8) { 4265 return -EINVAL; 4266 } 4267 4268 if (!mDataSource->getUInt64(offset, &mTrackFragmentHeaderInfo.mBaseDataOffset)) { 4269 return ERROR_MALFORMED; 4270 } 4271 offset += 8; 4272 size -= 8; 4273 } 4274 4275 if (flags & TrackFragmentHeaderInfo::kSampleDescriptionIndexPresent) { 4276 if (size < 4) { 4277 return -EINVAL; 4278 } 4279 4280 if (!mDataSource->getUInt32(offset, &mTrackFragmentHeaderInfo.mSampleDescriptionIndex)) { 4281 return ERROR_MALFORMED; 4282 } 4283 offset += 4; 4284 size -= 4; 4285 } 4286 4287 if (flags & TrackFragmentHeaderInfo::kDefaultSampleDurationPresent) { 4288 if (size < 4) { 4289 return -EINVAL; 4290 } 4291 4292 if (!mDataSource->getUInt32(offset, &mTrackFragmentHeaderInfo.mDefaultSampleDuration)) { 4293 return ERROR_MALFORMED; 4294 } 4295 offset += 4; 4296 size -= 4; 4297 } 4298 4299 if (flags & TrackFragmentHeaderInfo::kDefaultSampleSizePresent) { 4300 if (size < 4) { 4301 return -EINVAL; 4302 } 4303 4304 if (!mDataSource->getUInt32(offset, &mTrackFragmentHeaderInfo.mDefaultSampleSize)) { 4305 return ERROR_MALFORMED; 4306 } 4307 offset += 4; 4308 size -= 4; 4309 } 4310 4311 if (flags & TrackFragmentHeaderInfo::kDefaultSampleFlagsPresent) { 4312 if (size < 4) { 4313 return -EINVAL; 4314 } 4315 4316 if (!mDataSource->getUInt32(offset, &mTrackFragmentHeaderInfo.mDefaultSampleFlags)) { 4317 return ERROR_MALFORMED; 4318 } 4319 offset += 4; 4320 size -= 4; 4321 } 4322 4323 if (!(flags & TrackFragmentHeaderInfo::kBaseDataOffsetPresent)) { 4324 mTrackFragmentHeaderInfo.mBaseDataOffset = mCurrentMoofOffset; 4325 } 4326 4327 mTrackFragmentHeaderInfo.mDataOffset = 0; 4328 return OK; 4329} 4330 4331status_t MPEG4Source::parseTrackFragmentRun(off64_t offset, off64_t size) { 4332 4333 ALOGV("MPEG4Extractor::parseTrackFragmentRun"); 4334 if (size < 8) { 4335 return -EINVAL; 4336 } 4337 4338 enum { 4339 kDataOffsetPresent = 0x01, 4340 kFirstSampleFlagsPresent = 0x04, 4341 kSampleDurationPresent = 0x100, 4342 kSampleSizePresent = 0x200, 4343 kSampleFlagsPresent = 0x400, 4344 kSampleCompositionTimeOffsetPresent = 0x800, 4345 }; 4346 4347 uint32_t flags; 4348 if (!mDataSource->getUInt32(offset, &flags)) { 4349 return ERROR_MALFORMED; 4350 } 4351 // |version| only affects SampleCompositionTimeOffset field. 4352 // If version == 0, SampleCompositionTimeOffset is uint32_t; 4353 // Otherwise, SampleCompositionTimeOffset is int32_t. 4354 // Sample.compositionOffset is defined as int32_t. 4355 uint8_t version = flags >> 24; 4356 flags &= 0xffffff; 4357 ALOGV("fragment run version: 0x%02x, flags: 0x%06x", version, flags); 4358 4359 if ((flags & kFirstSampleFlagsPresent) && (flags & kSampleFlagsPresent)) { 4360 // These two shall not be used together. 4361 return -EINVAL; 4362 } 4363 4364 uint32_t sampleCount; 4365 if (!mDataSource->getUInt32(offset + 4, &sampleCount)) { 4366 return ERROR_MALFORMED; 4367 } 4368 offset += 8; 4369 size -= 8; 4370 4371 uint64_t dataOffset = mTrackFragmentHeaderInfo.mDataOffset; 4372 4373 uint32_t firstSampleFlags = 0; 4374 4375 if (flags & kDataOffsetPresent) { 4376 if (size < 4) { 4377 return -EINVAL; 4378 } 4379 4380 int32_t dataOffsetDelta; 4381 if (!mDataSource->getUInt32(offset, (uint32_t*)&dataOffsetDelta)) { 4382 return ERROR_MALFORMED; 4383 } 4384 4385 dataOffset = mTrackFragmentHeaderInfo.mBaseDataOffset + dataOffsetDelta; 4386 4387 offset += 4; 4388 size -= 4; 4389 } 4390 4391 if (flags & kFirstSampleFlagsPresent) { 4392 if (size < 4) { 4393 return -EINVAL; 4394 } 4395 4396 if (!mDataSource->getUInt32(offset, &firstSampleFlags)) { 4397 return ERROR_MALFORMED; 4398 } 4399 offset += 4; 4400 size -= 4; 4401 } 4402 4403 uint32_t sampleDuration = 0, sampleSize = 0, sampleFlags = 0, 4404 sampleCtsOffset = 0; 4405 4406 size_t bytesPerSample = 0; 4407 if (flags & kSampleDurationPresent) { 4408 bytesPerSample += 4; 4409 } else if (mTrackFragmentHeaderInfo.mFlags 4410 & TrackFragmentHeaderInfo::kDefaultSampleDurationPresent) { 4411 sampleDuration = mTrackFragmentHeaderInfo.mDefaultSampleDuration; 4412 } else if (mTrex) { 4413 sampleDuration = mTrex->default_sample_duration; 4414 } 4415 4416 if (flags & kSampleSizePresent) { 4417 bytesPerSample += 4; 4418 } else if (mTrackFragmentHeaderInfo.mFlags 4419 & TrackFragmentHeaderInfo::kDefaultSampleSizePresent) { 4420 sampleSize = mTrackFragmentHeaderInfo.mDefaultSampleSize; 4421 } else { 4422 sampleSize = mTrackFragmentHeaderInfo.mDefaultSampleSize; 4423 } 4424 4425 if (flags & kSampleFlagsPresent) { 4426 bytesPerSample += 4; 4427 } else if (mTrackFragmentHeaderInfo.mFlags 4428 & TrackFragmentHeaderInfo::kDefaultSampleFlagsPresent) { 4429 sampleFlags = mTrackFragmentHeaderInfo.mDefaultSampleFlags; 4430 } else { 4431 sampleFlags = mTrackFragmentHeaderInfo.mDefaultSampleFlags; 4432 } 4433 4434 if (flags & kSampleCompositionTimeOffsetPresent) { 4435 bytesPerSample += 4; 4436 } else { 4437 sampleCtsOffset = 0; 4438 } 4439 4440 if (size < (off64_t)(sampleCount * bytesPerSample)) { 4441 return -EINVAL; 4442 } 4443 4444 Sample tmp; 4445 for (uint32_t i = 0; i < sampleCount; ++i) { 4446 if (flags & kSampleDurationPresent) { 4447 if (!mDataSource->getUInt32(offset, &sampleDuration)) { 4448 return ERROR_MALFORMED; 4449 } 4450 offset += 4; 4451 } 4452 4453 if (flags & kSampleSizePresent) { 4454 if (!mDataSource->getUInt32(offset, &sampleSize)) { 4455 return ERROR_MALFORMED; 4456 } 4457 offset += 4; 4458 } 4459 4460 if (flags & kSampleFlagsPresent) { 4461 if (!mDataSource->getUInt32(offset, &sampleFlags)) { 4462 return ERROR_MALFORMED; 4463 } 4464 offset += 4; 4465 } 4466 4467 if (flags & kSampleCompositionTimeOffsetPresent) { 4468 if (!mDataSource->getUInt32(offset, &sampleCtsOffset)) { 4469 return ERROR_MALFORMED; 4470 } 4471 offset += 4; 4472 } 4473 4474 ALOGV("adding sample %d at offset 0x%08" PRIx64 ", size %u, duration %u, " 4475 " flags 0x%08x", i + 1, 4476 dataOffset, sampleSize, sampleDuration, 4477 (flags & kFirstSampleFlagsPresent) && i == 0 4478 ? firstSampleFlags : sampleFlags); 4479 tmp.offset = dataOffset; 4480 tmp.size = sampleSize; 4481 tmp.duration = sampleDuration; 4482 tmp.compositionOffset = sampleCtsOffset; 4483 mCurrentSamples.add(tmp); 4484 4485 dataOffset += sampleSize; 4486 } 4487 4488 mTrackFragmentHeaderInfo.mDataOffset = dataOffset; 4489 4490 return OK; 4491} 4492 4493status_t MPEG4Source::getFormat(MetaDataBase &meta) { 4494 Mutex::Autolock autoLock(mLock); 4495 meta = mFormat; 4496 return OK; 4497} 4498 4499size_t MPEG4Source::parseNALSize(const uint8_t *data) const { 4500 switch (mNALLengthSize) { 4501 case 1: 4502 return *data; 4503 case 2: 4504 return U16_AT(data); 4505 case 3: 4506 return ((size_t)data[0] << 16) | U16_AT(&data[1]); 4507 case 4: 4508 return U32_AT(data); 4509 } 4510 4511 // This cannot happen, mNALLengthSize springs to life by adding 1 to 4512 // a 2-bit integer. 4513 CHECK(!"Should not be here."); 4514 4515 return 0; 4516} 4517 4518status_t MPEG4Source::read( 4519 MediaBufferBase **out, const ReadOptions *options) { 4520 Mutex::Autolock autoLock(mLock); 4521 4522 CHECK(mStarted); 4523 4524 if (options != nullptr && options->getNonBlocking() && !mGroup->has_buffers()) { 4525 *out = nullptr; 4526 return WOULD_BLOCK; 4527 } 4528 4529 if (mFirstMoofOffset > 0) { 4530 return fragmentedRead(out, options); 4531 } 4532 4533 *out = NULL; 4534 4535 int64_t targetSampleTimeUs = -1; 4536 4537 int64_t seekTimeUs; 4538 ReadOptions::SeekMode mode; 4539 if (options && options->getSeekTo(&seekTimeUs, &mode)) { 4540 if (mIsHeif) { 4541 CHECK(mSampleTable == NULL); 4542 CHECK(mItemTable != NULL); 4543 int32_t imageIndex; 4544 if (!mFormat.findInt32(kKeyTrackID, &imageIndex)) { 4545 return ERROR_MALFORMED; 4546 } 4547 4548 status_t err; 4549 if (seekTimeUs >= 0) { 4550 err = mItemTable->findImageItem(imageIndex, &mCurrentSampleIndex); 4551 } else { 4552 err = mItemTable->findThumbnailItem(imageIndex, &mCurrentSampleIndex); 4553 } 4554 if (err != OK) { 4555 return err; 4556 } 4557 } else { 4558 uint32_t findFlags = 0; 4559 switch (mode) { 4560 case ReadOptions::SEEK_PREVIOUS_SYNC: 4561 findFlags = SampleTable::kFlagBefore; 4562 break; 4563 case ReadOptions::SEEK_NEXT_SYNC: 4564 findFlags = SampleTable::kFlagAfter; 4565 break; 4566 case ReadOptions::SEEK_CLOSEST_SYNC: 4567 case ReadOptions::SEEK_CLOSEST: 4568 findFlags = SampleTable::kFlagClosest; 4569 break; 4570 case ReadOptions::SEEK_FRAME_INDEX: 4571 findFlags = SampleTable::kFlagFrameIndex; 4572 break; 4573 default: 4574 CHECK(!"Should not be here."); 4575 break; 4576 } 4577 4578 uint32_t sampleIndex; 4579 status_t err = mSampleTable->findSampleAtTime( 4580 seekTimeUs, 1000000, mTimescale, 4581 &sampleIndex, findFlags); 4582 4583 if (mode == ReadOptions::SEEK_CLOSEST 4584 || mode == ReadOptions::SEEK_FRAME_INDEX) { 4585 // We found the closest sample already, now we want the sync 4586 // sample preceding it (or the sample itself of course), even 4587 // if the subsequent sync sample is closer. 4588 findFlags = SampleTable::kFlagBefore; 4589 } 4590 4591 uint32_t syncSampleIndex; 4592 if (err == OK) { 4593 err = mSampleTable->findSyncSampleNear( 4594 sampleIndex, &syncSampleIndex, findFlags); 4595 } 4596 4597 uint32_t sampleTime; 4598 if (err == OK) { 4599 err = mSampleTable->getMetaDataForSample( 4600 sampleIndex, NULL, NULL, &sampleTime); 4601 } 4602 4603 if (err != OK) { 4604 if (err == ERROR_OUT_OF_RANGE) { 4605 // An attempt to seek past the end of the stream would 4606 // normally cause this ERROR_OUT_OF_RANGE error. Propagating 4607 // this all the way to the MediaPlayer would cause abnormal 4608 // termination. Legacy behaviour appears to be to behave as if 4609 // we had seeked to the end of stream, ending normally. 4610 err = ERROR_END_OF_STREAM; 4611 } 4612 ALOGV("end of stream"); 4613 return err; 4614 } 4615 4616 if (mode == ReadOptions::SEEK_CLOSEST 4617 || mode == ReadOptions::SEEK_FRAME_INDEX) { 4618 targetSampleTimeUs = (sampleTime * 1000000ll) / mTimescale; 4619 } 4620 4621#if 0 4622 uint32_t syncSampleTime; 4623 CHECK_EQ(OK, mSampleTable->getMetaDataForSample( 4624 syncSampleIndex, NULL, NULL, &syncSampleTime)); 4625 4626 ALOGI("seek to time %lld us => sample at time %lld us, " 4627 "sync sample at time %lld us", 4628 seekTimeUs, 4629 sampleTime * 1000000ll / mTimescale, 4630 syncSampleTime * 1000000ll / mTimescale); 4631#endif 4632 4633 mCurrentSampleIndex = syncSampleIndex; 4634 } 4635 4636 if (mBuffer != NULL) { 4637 mBuffer->release(); 4638 mBuffer = NULL; 4639 } 4640 4641 // fall through 4642 } 4643 4644 off64_t offset = 0; 4645 size_t size = 0; 4646 uint32_t cts, stts; 4647 bool isSyncSample; 4648 bool newBuffer = false; 4649 if (mBuffer == NULL) { 4650 newBuffer = true; 4651 4652 status_t err; 4653 if (!mIsHeif) { 4654 err = mSampleTable->getMetaDataForSample( 4655 mCurrentSampleIndex, &offset, &size, &cts, &isSyncSample, &stts); 4656 } else { 4657 err = mItemTable->getImageOffsetAndSize( 4658 options && options->getSeekTo(&seekTimeUs, &mode) ? 4659 &mCurrentSampleIndex : NULL, &offset, &size); 4660 4661 cts = stts = 0; 4662 isSyncSample = 0; 4663 ALOGV("image offset %lld, size %zu", (long long)offset, size); 4664 } 4665 4666 if (err != OK) { 4667 return err; 4668 } 4669 4670 err = mGroup->acquire_buffer(&mBuffer); 4671 4672 if (err != OK) { 4673 CHECK(mBuffer == NULL); 4674 return err; 4675 } 4676 if (size > mBuffer->size()) { 4677 ALOGE("buffer too small: %zu > %zu", size, mBuffer->size()); 4678 mBuffer->release(); 4679 mBuffer = NULL; 4680 return ERROR_BUFFER_TOO_SMALL; 4681 } 4682 } 4683 4684 if ((!mIsAVC && !mIsHEVC) || mWantsNALFragments) { 4685 if (newBuffer) { 4686 ssize_t num_bytes_read = 4687 mDataSource->readAt(offset, (uint8_t *)mBuffer->data(), size); 4688 4689 if (num_bytes_read < (ssize_t)size) { 4690 mBuffer->release(); 4691 mBuffer = NULL; 4692 4693 return ERROR_IO; 4694 } 4695 4696 CHECK(mBuffer != NULL); 4697 mBuffer->set_range(0, size); 4698 mBuffer->meta_data().clear(); 4699 mBuffer->meta_data().setInt64( 4700 kKeyTime, ((int64_t)cts * 1000000) / mTimescale); 4701 mBuffer->meta_data().setInt64( 4702 kKeyDuration, ((int64_t)stts * 1000000) / mTimescale); 4703 4704 if (targetSampleTimeUs >= 0) { 4705 mBuffer->meta_data().setInt64( 4706 kKeyTargetTime, targetSampleTimeUs); 4707 } 4708 4709 if (isSyncSample) { 4710 mBuffer->meta_data().setInt32(kKeyIsSyncFrame, 1); 4711 } 4712 4713 ++mCurrentSampleIndex; 4714 } 4715 4716 if (!mIsAVC && !mIsHEVC) { 4717 *out = mBuffer; 4718 mBuffer = NULL; 4719 4720 return OK; 4721 } 4722 4723 // Each NAL unit is split up into its constituent fragments and 4724 // each one of them returned in its own buffer. 4725 4726 CHECK(mBuffer->range_length() >= mNALLengthSize); 4727 4728 const uint8_t *src = 4729 (const uint8_t *)mBuffer->data() + mBuffer->range_offset(); 4730 4731 size_t nal_size = parseNALSize(src); 4732 if (mNALLengthSize > SIZE_MAX - nal_size) { 4733 ALOGE("b/24441553, b/24445122"); 4734 } 4735 if (mBuffer->range_length() - mNALLengthSize < nal_size) { 4736 ALOGE("incomplete NAL unit."); 4737 4738 mBuffer->release(); 4739 mBuffer = NULL; 4740 4741 return ERROR_MALFORMED; 4742 } 4743 4744 MediaBufferBase *clone = mBuffer->clone(); 4745 CHECK(clone != NULL); 4746 clone->set_range(mBuffer->range_offset() + mNALLengthSize, nal_size); 4747 4748 CHECK(mBuffer != NULL); 4749 mBuffer->set_range( 4750 mBuffer->range_offset() + mNALLengthSize + nal_size, 4751 mBuffer->range_length() - mNALLengthSize - nal_size); 4752 4753 if (mBuffer->range_length() == 0) { 4754 mBuffer->release(); 4755 mBuffer = NULL; 4756 } 4757 4758 *out = clone; 4759 4760 return OK; 4761 } else { 4762 // Whole NAL units are returned but each fragment is prefixed by 4763 // the start code (0x00 00 00 01). 4764 ssize_t num_bytes_read = 0; 4765 int32_t drm = 0; 4766 bool usesDRM = (mFormat.findInt32(kKeyIsDRM, &drm) && drm != 0); 4767 if (usesDRM) { 4768 num_bytes_read = 4769 mDataSource->readAt(offset, (uint8_t*)mBuffer->data(), size); 4770 } else { 4771 num_bytes_read = mDataSource->readAt(offset, mSrcBuffer, size); 4772 } 4773 4774 if (num_bytes_read < (ssize_t)size) { 4775 mBuffer->release(); 4776 mBuffer = NULL; 4777 4778 return ERROR_IO; 4779 } 4780 4781 if (usesDRM) { 4782 CHECK(mBuffer != NULL); 4783 mBuffer->set_range(0, size); 4784 4785 } else { 4786 uint8_t *dstData = (uint8_t *)mBuffer->data(); 4787 size_t srcOffset = 0; 4788 size_t dstOffset = 0; 4789 4790 while (srcOffset < size) { 4791 bool isMalFormed = !isInRange((size_t)0u, size, srcOffset, mNALLengthSize); 4792 size_t nalLength = 0; 4793 if (!isMalFormed) { 4794 nalLength = parseNALSize(&mSrcBuffer[srcOffset]); 4795 srcOffset += mNALLengthSize; 4796 isMalFormed = !isInRange((size_t)0u, size, srcOffset, nalLength); 4797 } 4798 4799 if (isMalFormed) { 4800 ALOGE("Video is malformed"); 4801 mBuffer->release(); 4802 mBuffer = NULL; 4803 return ERROR_MALFORMED; 4804 } 4805 4806 if (nalLength == 0) { 4807 continue; 4808 } 4809 4810 if (dstOffset > SIZE_MAX - 4 || 4811 dstOffset + 4 > SIZE_MAX - nalLength || 4812 dstOffset + 4 + nalLength > mBuffer->size()) { 4813 ALOGE("b/27208621 : %zu %zu", dstOffset, mBuffer->size()); 4814 android_errorWriteLog(0x534e4554, "27208621"); 4815 mBuffer->release(); 4816 mBuffer = NULL; 4817 return ERROR_MALFORMED; 4818 } 4819 4820 dstData[dstOffset++] = 0; 4821 dstData[dstOffset++] = 0; 4822 dstData[dstOffset++] = 0; 4823 dstData[dstOffset++] = 1; 4824 memcpy(&dstData[dstOffset], &mSrcBuffer[srcOffset], nalLength); 4825 srcOffset += nalLength; 4826 dstOffset += nalLength; 4827 } 4828 CHECK_EQ(srcOffset, size); 4829 CHECK(mBuffer != NULL); 4830 mBuffer->set_range(0, dstOffset); 4831 } 4832 4833 mBuffer->meta_data().clear(); 4834 mBuffer->meta_data().setInt64( 4835 kKeyTime, ((int64_t)cts * 1000000) / mTimescale); 4836 mBuffer->meta_data().setInt64( 4837 kKeyDuration, ((int64_t)stts * 1000000) / mTimescale); 4838 4839 if (targetSampleTimeUs >= 0) { 4840 mBuffer->meta_data().setInt64( 4841 kKeyTargetTime, targetSampleTimeUs); 4842 } 4843 4844 if (mIsAVC) { 4845 uint32_t layerId = FindAVCLayerId( 4846 (const uint8_t *)mBuffer->data(), mBuffer->range_length()); 4847 mBuffer->meta_data().setInt32(kKeyTemporalLayerId, layerId); 4848 } 4849 4850 if (isSyncSample) { 4851 mBuffer->meta_data().setInt32(kKeyIsSyncFrame, 1); 4852 } 4853 4854 ++mCurrentSampleIndex; 4855 4856 *out = mBuffer; 4857 mBuffer = NULL; 4858 4859 return OK; 4860 } 4861} 4862 4863status_t MPEG4Source::fragmentedRead( 4864 MediaBufferBase **out, const ReadOptions *options) { 4865 4866 ALOGV("MPEG4Source::fragmentedRead"); 4867 4868 CHECK(mStarted); 4869 4870 *out = NULL; 4871 4872 int64_t targetSampleTimeUs = -1; 4873 4874 int64_t seekTimeUs; 4875 ReadOptions::SeekMode mode; 4876 if (options && options->getSeekTo(&seekTimeUs, &mode)) { 4877 4878 int numSidxEntries = mSegments.size(); 4879 if (numSidxEntries != 0) { 4880 int64_t totalTime = 0; 4881 off64_t totalOffset = mFirstMoofOffset; 4882 for (int i = 0; i < numSidxEntries; i++) { 4883 const SidxEntry *se = &mSegments[i]; 4884 if (totalTime + se->mDurationUs > seekTimeUs) { 4885 // The requested time is somewhere in this segment 4886 if ((mode == ReadOptions::SEEK_NEXT_SYNC && seekTimeUs > totalTime) || 4887 (mode == ReadOptions::SEEK_CLOSEST_SYNC && 4888 (seekTimeUs - totalTime) > (totalTime + se->mDurationUs - seekTimeUs))) { 4889 // requested next sync, or closest sync and it was closer to the end of 4890 // this segment 4891 totalTime += se->mDurationUs; 4892 totalOffset += se->mSize; 4893 } 4894 break; 4895 } 4896 totalTime += se->mDurationUs; 4897 totalOffset += se->mSize; 4898 } 4899 mCurrentMoofOffset = totalOffset; 4900 mNextMoofOffset = -1; 4901 mCurrentSamples.clear(); 4902 mCurrentSampleIndex = 0; 4903 status_t err = parseChunk(&totalOffset); 4904 if (err != OK) { 4905 return err; 4906 } 4907 mCurrentTime = totalTime * mTimescale / 1000000ll; 4908 } else { 4909 // without sidx boxes, we can only seek to 0 4910 mCurrentMoofOffset = mFirstMoofOffset; 4911 mNextMoofOffset = -1; 4912 mCurrentSamples.clear(); 4913 mCurrentSampleIndex = 0; 4914 off64_t tmp = mCurrentMoofOffset; 4915 status_t err = parseChunk(&tmp); 4916 if (err != OK) { 4917 return err; 4918 } 4919 mCurrentTime = 0; 4920 } 4921 4922 if (mBuffer != NULL) { 4923 mBuffer->release(); 4924 mBuffer = NULL; 4925 } 4926 4927 // fall through 4928 } 4929 4930 off64_t offset = 0; 4931 size_t size = 0; 4932 uint32_t cts = 0; 4933 bool isSyncSample = false; 4934 bool newBuffer = false; 4935 if (mBuffer == NULL) { 4936 newBuffer = true; 4937 4938 if (mCurrentSampleIndex >= mCurrentSamples.size()) { 4939 // move to next fragment if there is one 4940 if (mNextMoofOffset <= mCurrentMoofOffset) { 4941 return ERROR_END_OF_STREAM; 4942 } 4943 off64_t nextMoof = mNextMoofOffset; 4944 mCurrentMoofOffset = nextMoof; 4945 mCurrentSamples.clear(); 4946 mCurrentSampleIndex = 0; 4947 status_t err = parseChunk(&nextMoof); 4948 if (err != OK) { 4949 return err; 4950 } 4951 if (mCurrentSampleIndex >= mCurrentSamples.size()) { 4952 return ERROR_END_OF_STREAM; 4953 } 4954 } 4955 4956 const Sample *smpl = &mCurrentSamples[mCurrentSampleIndex]; 4957 offset = smpl->offset; 4958 size = smpl->size; 4959 cts = mCurrentTime + smpl->compositionOffset; 4960 mCurrentTime += smpl->duration; 4961 isSyncSample = (mCurrentSampleIndex == 0); // XXX 4962 4963 status_t err = mGroup->acquire_buffer(&mBuffer); 4964 4965 if (err != OK) { 4966 CHECK(mBuffer == NULL); 4967 ALOGV("acquire_buffer returned %d", err); 4968 return err; 4969 } 4970 if (size > mBuffer->size()) { 4971 ALOGE("buffer too small: %zu > %zu", size, mBuffer->size()); 4972 mBuffer->release(); 4973 mBuffer = NULL; 4974 return ERROR_BUFFER_TOO_SMALL; 4975 } 4976 } 4977 4978 const Sample *smpl = &mCurrentSamples[mCurrentSampleIndex]; 4979 MetaDataBase &bufmeta = mBuffer->meta_data(); 4980 bufmeta.clear(); 4981 if (smpl->encryptedsizes.size()) { 4982 // store clear/encrypted lengths in metadata 4983 bufmeta.setData(kKeyPlainSizes, 0, 4984 smpl->clearsizes.array(), smpl->clearsizes.size() * 4); 4985 bufmeta.setData(kKeyEncryptedSizes, 0, 4986 smpl->encryptedsizes.array(), smpl->encryptedsizes.size() * 4); 4987 bufmeta.setData(kKeyCryptoIV, 0, smpl->iv, 16); // use 16 or the actual size? 4988 bufmeta.setInt32(kKeyCryptoDefaultIVSize, mDefaultIVSize); 4989 bufmeta.setInt32(kKeyCryptoMode, mCryptoMode); 4990 bufmeta.setData(kKeyCryptoKey, 0, mCryptoKey, 16); 4991 } 4992 4993 if ((!mIsAVC && !mIsHEVC)|| mWantsNALFragments) { 4994 if (newBuffer) { 4995 if (!isInRange((size_t)0u, mBuffer->size(), size)) { 4996 mBuffer->release(); 4997 mBuffer = NULL; 4998 4999 ALOGE("fragmentedRead ERROR_MALFORMED size %zu", size); 5000 return ERROR_MALFORMED; 5001 } 5002 5003 ssize_t num_bytes_read = 5004 mDataSource->readAt(offset, (uint8_t *)mBuffer->data(), size); 5005 5006 if (num_bytes_read < (ssize_t)size) { 5007 mBuffer->release(); 5008 mBuffer = NULL; 5009 5010 ALOGE("i/o error"); 5011 return ERROR_IO; 5012 } 5013 5014 CHECK(mBuffer != NULL); 5015 mBuffer->set_range(0, size); 5016 mBuffer->meta_data().setInt64( 5017 kKeyTime, ((int64_t)cts * 1000000) / mTimescale); 5018 mBuffer->meta_data().setInt64( 5019 kKeyDuration, ((int64_t)smpl->duration * 1000000) / mTimescale); 5020 5021 if (targetSampleTimeUs >= 0) { 5022 mBuffer->meta_data().setInt64( 5023 kKeyTargetTime, targetSampleTimeUs); 5024 } 5025 5026 if (mIsAVC) { 5027 uint32_t layerId = FindAVCLayerId( 5028 (const uint8_t *)mBuffer->data(), mBuffer->range_length()); 5029 mBuffer->meta_data().setInt32(kKeyTemporalLayerId, layerId); 5030 } 5031 5032 if (isSyncSample) { 5033 mBuffer->meta_data().setInt32(kKeyIsSyncFrame, 1); 5034 } 5035 5036 ++mCurrentSampleIndex; 5037 } 5038 5039 if (!mIsAVC && !mIsHEVC) { 5040 *out = mBuffer; 5041 mBuffer = NULL; 5042 5043 return OK; 5044 } 5045 5046 // Each NAL unit is split up into its constituent fragments and 5047 // each one of them returned in its own buffer. 5048 5049 CHECK(mBuffer->range_length() >= mNALLengthSize); 5050 5051 const uint8_t *src = 5052 (const uint8_t *)mBuffer->data() + mBuffer->range_offset(); 5053 5054 size_t nal_size = parseNALSize(src); 5055 if (mNALLengthSize > SIZE_MAX - nal_size) { 5056 ALOGE("b/24441553, b/24445122"); 5057 } 5058 5059 if (mBuffer->range_length() - mNALLengthSize < nal_size) { 5060 ALOGE("incomplete NAL unit."); 5061 5062 mBuffer->release(); 5063 mBuffer = NULL; 5064 5065 return ERROR_MALFORMED; 5066 } 5067 5068 MediaBufferBase *clone = mBuffer->clone(); 5069 CHECK(clone != NULL); 5070 clone->set_range(mBuffer->range_offset() + mNALLengthSize, nal_size); 5071 5072 CHECK(mBuffer != NULL); 5073 mBuffer->set_range( 5074 mBuffer->range_offset() + mNALLengthSize + nal_size, 5075 mBuffer->range_length() - mNALLengthSize - nal_size); 5076 5077 if (mBuffer->range_length() == 0) { 5078 mBuffer->release(); 5079 mBuffer = NULL; 5080 } 5081 5082 *out = clone; 5083 5084 return OK; 5085 } else { 5086 ALOGV("whole NAL"); 5087 // Whole NAL units are returned but each fragment is prefixed by 5088 // the start code (0x00 00 00 01). 5089 ssize_t num_bytes_read = 0; 5090 int32_t drm = 0; 5091 bool usesDRM = (mFormat.findInt32(kKeyIsDRM, &drm) && drm != 0); 5092 void *data = NULL; 5093 bool isMalFormed = false; 5094 if (usesDRM) { 5095 if (mBuffer == NULL || !isInRange((size_t)0u, mBuffer->size(), size)) { 5096 isMalFormed = true; 5097 } else { 5098 data = mBuffer->data(); 5099 } 5100 } else { 5101 int32_t max_size; 5102 if (!mFormat.findInt32(kKeyMaxInputSize, &max_size) 5103 || !isInRange((size_t)0u, (size_t)max_size, size)) { 5104 isMalFormed = true; 5105 } else { 5106 data = mSrcBuffer; 5107 } 5108 } 5109 5110 if (isMalFormed || data == NULL) { 5111 ALOGE("isMalFormed size %zu", size); 5112 if (mBuffer != NULL) { 5113 mBuffer->release(); 5114 mBuffer = NULL; 5115 } 5116 return ERROR_MALFORMED; 5117 } 5118 num_bytes_read = mDataSource->readAt(offset, data, size); 5119 5120 if (num_bytes_read < (ssize_t)size) { 5121 mBuffer->release(); 5122 mBuffer = NULL; 5123 5124 ALOGE("i/o error"); 5125 return ERROR_IO; 5126 } 5127 5128 if (usesDRM) { 5129 CHECK(mBuffer != NULL); 5130 mBuffer->set_range(0, size); 5131 5132 } else { 5133 uint8_t *dstData = (uint8_t *)mBuffer->data(); 5134 size_t srcOffset = 0; 5135 size_t dstOffset = 0; 5136 5137 while (srcOffset < size) { 5138 isMalFormed = !isInRange((size_t)0u, size, srcOffset, mNALLengthSize); 5139 size_t nalLength = 0; 5140 if (!isMalFormed) { 5141 nalLength = parseNALSize(&mSrcBuffer[srcOffset]); 5142 srcOffset += mNALLengthSize; 5143 isMalFormed = !isInRange((size_t)0u, size, srcOffset, nalLength) 5144 || !isInRange((size_t)0u, mBuffer->size(), dstOffset, (size_t)4u) 5145 || !isInRange((size_t)0u, mBuffer->size(), dstOffset + 4, nalLength); 5146 } 5147 5148 if (isMalFormed) { 5149 ALOGE("Video is malformed; nalLength %zu", nalLength); 5150 mBuffer->release(); 5151 mBuffer = NULL; 5152 return ERROR_MALFORMED; 5153 } 5154 5155 if (nalLength == 0) { 5156 continue; 5157 } 5158 5159 if (dstOffset > SIZE_MAX - 4 || 5160 dstOffset + 4 > SIZE_MAX - nalLength || 5161 dstOffset + 4 + nalLength > mBuffer->size()) { 5162 ALOGE("b/26365349 : %zu %zu", dstOffset, mBuffer->size()); 5163 android_errorWriteLog(0x534e4554, "26365349"); 5164 mBuffer->release(); 5165 mBuffer = NULL; 5166 return ERROR_MALFORMED; 5167 } 5168 5169 dstData[dstOffset++] = 0; 5170 dstData[dstOffset++] = 0; 5171 dstData[dstOffset++] = 0; 5172 dstData[dstOffset++] = 1; 5173 memcpy(&dstData[dstOffset], &mSrcBuffer[srcOffset], nalLength); 5174 srcOffset += nalLength; 5175 dstOffset += nalLength; 5176 } 5177 CHECK_EQ(srcOffset, size); 5178 CHECK(mBuffer != NULL); 5179 mBuffer->set_range(0, dstOffset); 5180 } 5181 5182 mBuffer->meta_data().setInt64( 5183 kKeyTime, ((int64_t)cts * 1000000) / mTimescale); 5184 mBuffer->meta_data().setInt64( 5185 kKeyDuration, ((int64_t)smpl->duration * 1000000) / mTimescale); 5186 5187 if (targetSampleTimeUs >= 0) { 5188 mBuffer->meta_data().setInt64( 5189 kKeyTargetTime, targetSampleTimeUs); 5190 } 5191 5192 if (isSyncSample) { 5193 mBuffer->meta_data().setInt32(kKeyIsSyncFrame, 1); 5194 } 5195 5196 ++mCurrentSampleIndex; 5197 5198 *out = mBuffer; 5199 mBuffer = NULL; 5200 5201 return OK; 5202 } 5203} 5204 5205MPEG4Extractor::Track *MPEG4Extractor::findTrackByMimePrefix( 5206 const char *mimePrefix) { 5207 for (Track *track = mFirstTrack; track != NULL; track = track->next) { 5208 const char *mime; 5209 if (track->meta.findCString(kKeyMIMEType, &mime) 5210 && !strncasecmp(mime, mimePrefix, strlen(mimePrefix))) { 5211 return track; 5212 } 5213 } 5214 5215 return NULL; 5216} 5217 5218static bool LegacySniffMPEG4(DataSourceBase *source, float *confidence) { 5219 uint8_t header[8]; 5220 5221 ssize_t n = source->readAt(4, header, sizeof(header)); 5222 if (n < (ssize_t)sizeof(header)) { 5223 return false; 5224 } 5225 5226 if (!memcmp(header, "ftyp3gp", 7) || !memcmp(header, "ftypmp42", 8) 5227 || !memcmp(header, "ftyp3gr6", 8) || !memcmp(header, "ftyp3gs6", 8) 5228 || !memcmp(header, "ftyp3ge6", 8) || !memcmp(header, "ftyp3gg6", 8) 5229 || !memcmp(header, "ftypisom", 8) || !memcmp(header, "ftypM4V ", 8) 5230 || !memcmp(header, "ftypM4A ", 8) || !memcmp(header, "ftypf4v ", 8) 5231 || !memcmp(header, "ftypkddi", 8) || !memcmp(header, "ftypM4VP", 8) 5232 || !memcmp(header, "ftypmif1", 8) || !memcmp(header, "ftypheic", 8) 5233 || !memcmp(header, "ftypmsf1", 8) || !memcmp(header, "ftyphevc", 8)) { 5234 *confidence = 0.4; 5235 5236 return true; 5237 } 5238 5239 return false; 5240} 5241 5242static bool isCompatibleBrand(uint32_t fourcc) { 5243 static const uint32_t kCompatibleBrands[] = { 5244 FOURCC('i', 's', 'o', 'm'), 5245 FOURCC('i', 's', 'o', '2'), 5246 FOURCC('a', 'v', 'c', '1'), 5247 FOURCC('h', 'v', 'c', '1'), 5248 FOURCC('h', 'e', 'v', '1'), 5249 FOURCC('3', 'g', 'p', '4'), 5250 FOURCC('m', 'p', '4', '1'), 5251 FOURCC('m', 'p', '4', '2'), 5252 FOURCC('d', 'a', 's', 'h'), 5253 5254 // Won't promise that the following file types can be played. 5255 // Just give these file types a chance. 5256 FOURCC('q', 't', ' ', ' '), // Apple's QuickTime 5257 FOURCC('M', 'S', 'N', 'V'), // Sony's PSP 5258 5259 FOURCC('3', 'g', '2', 'a'), // 3GPP2 5260 FOURCC('3', 'g', '2', 'b'), 5261 FOURCC('m', 'i', 'f', '1'), // HEIF image 5262 FOURCC('h', 'e', 'i', 'c'), // HEIF image 5263 FOURCC('m', 's', 'f', '1'), // HEIF image sequence 5264 FOURCC('h', 'e', 'v', 'c'), // HEIF image sequence 5265 }; 5266 5267 for (size_t i = 0; 5268 i < sizeof(kCompatibleBrands) / sizeof(kCompatibleBrands[0]); 5269 ++i) { 5270 if (kCompatibleBrands[i] == fourcc) { 5271 return true; 5272 } 5273 } 5274 5275 return false; 5276} 5277 5278// Attempt to actually parse the 'ftyp' atom and determine if a suitable 5279// compatible brand is present. 5280// Also try to identify where this file's metadata ends 5281// (end of the 'moov' atom) and report it to the caller as part of 5282// the metadata. 5283static bool BetterSniffMPEG4(DataSourceBase *source, float *confidence) { 5284 // We scan up to 128 bytes to identify this file as an MP4. 5285 static const off64_t kMaxScanOffset = 128ll; 5286 5287 off64_t offset = 0ll; 5288 bool foundGoodFileType = false; 5289 off64_t moovAtomEndOffset = -1ll; 5290 bool done = false; 5291 5292 while (!done && offset < kMaxScanOffset) { 5293 uint32_t hdr[2]; 5294 if (source->readAt(offset, hdr, 8) < 8) { 5295 return false; 5296 } 5297 5298 uint64_t chunkSize = ntohl(hdr[0]); 5299 uint32_t chunkType = ntohl(hdr[1]); 5300 off64_t chunkDataOffset = offset + 8; 5301 5302 if (chunkSize == 1) { 5303 if (source->readAt(offset + 8, &chunkSize, 8) < 8) { 5304 return false; 5305 } 5306 5307 chunkSize = ntoh64(chunkSize); 5308 chunkDataOffset += 8; 5309 5310 if (chunkSize < 16) { 5311 // The smallest valid chunk is 16 bytes long in this case. 5312 return false; 5313 } 5314 5315 } else if (chunkSize < 8) { 5316 // The smallest valid chunk is 8 bytes long. 5317 return false; 5318 } 5319 5320 // (data_offset - offset) is either 8 or 16 5321 off64_t chunkDataSize = chunkSize - (chunkDataOffset - offset); 5322 if (chunkDataSize < 0) { 5323 ALOGE("b/23540914"); 5324 return false; 5325 } 5326 5327 char chunkstring[5]; 5328 MakeFourCCString(chunkType, chunkstring); 5329 ALOGV("saw chunk type %s, size %" PRIu64 " @ %lld", chunkstring, chunkSize, (long long)offset); 5330 switch (chunkType) { 5331 case FOURCC('f', 't', 'y', 'p'): 5332 { 5333 if (chunkDataSize < 8) { 5334 return false; 5335 } 5336 5337 uint32_t numCompatibleBrands = (chunkDataSize - 8) / 4; 5338 for (size_t i = 0; i < numCompatibleBrands + 2; ++i) { 5339 if (i == 1) { 5340 // Skip this index, it refers to the minorVersion, 5341 // not a brand. 5342 continue; 5343 } 5344 5345 uint32_t brand; 5346 if (source->readAt( 5347 chunkDataOffset + 4 * i, &brand, 4) < 4) { 5348 return false; 5349 } 5350 5351 brand = ntohl(brand); 5352 5353 if (isCompatibleBrand(brand)) { 5354 foundGoodFileType = true; 5355 break; 5356 } 5357 } 5358 5359 if (!foundGoodFileType) { 5360 return false; 5361 } 5362 5363 break; 5364 } 5365 5366 case FOURCC('m', 'o', 'o', 'v'): 5367 { 5368 moovAtomEndOffset = offset + chunkSize; 5369 5370 done = true; 5371 break; 5372 } 5373 5374 default: 5375 break; 5376 } 5377 5378 offset += chunkSize; 5379 } 5380 5381 if (!foundGoodFileType) { 5382 return false; 5383 } 5384 5385 *confidence = 0.4f; 5386 5387 return true; 5388} 5389 5390static MediaExtractor* CreateExtractor(DataSourceBase *source, void *) { 5391 return new MPEG4Extractor(source); 5392} 5393 5394static MediaExtractor::CreatorFunc Sniff( 5395 DataSourceBase *source, float *confidence, void **, 5396 MediaExtractor::FreeMetaFunc *) { 5397 if (BetterSniffMPEG4(source, confidence)) { 5398 return CreateExtractor; 5399 } 5400 5401 if (LegacySniffMPEG4(source, confidence)) { 5402 ALOGW("Identified supported mpeg4 through LegacySniffMPEG4."); 5403 return CreateExtractor; 5404 } 5405 5406 return NULL; 5407} 5408 5409extern "C" { 5410// This is the only symbol that needs to be exported 5411__attribute__ ((visibility ("default"))) 5412MediaExtractor::ExtractorDef GETEXTRACTORDEF() { 5413 return { 5414 MediaExtractor::EXTRACTORDEF_VERSION, 5415 UUID("27575c67-4417-4c54-8d3d-8e626985a164"), 5416 1, // version 5417 "MP4 Extractor", 5418 Sniff 5419 }; 5420} 5421 5422} // extern "C" 5423 5424} // namespace android 5425