MPEG4Extractor.cpp revision 17e172b4c3c87ecaa7c87eecc42b4dc47e3e9734
1/* 2 * Copyright (C) 2009 The Android Open Source Project 3 * 4 * Licensed under the Apache License, Version 2.0 (the "License"); 5 * you may not use this file except in compliance with the License. 6 * You may obtain a copy of the License at 7 * 8 * http://www.apache.org/licenses/LICENSE-2.0 9 * 10 * Unless required by applicable law or agreed to in writing, software 11 * distributed under the License is distributed on an "AS IS" BASIS, 12 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 * See the License for the specific language governing permissions and 14 * limitations under the License. 15 */ 16 17//#define LOG_NDEBUG 0 18#define LOG_TAG "MPEG4Extractor" 19 20#include <ctype.h> 21#include <inttypes.h> 22#include <memory> 23#include <stdint.h> 24#include <stdlib.h> 25#include <string.h> 26 27#include <utils/Log.h> 28 29#include "MPEG4Extractor.h" 30#include "SampleTable.h" 31#include "ItemTable.h" 32#include "include/ESDS.h" 33 34#include <media/MediaSourceBase.h> 35#include <media/stagefright/foundation/ABitReader.h> 36#include <media/stagefright/foundation/ABuffer.h> 37#include <media/stagefright/foundation/ADebug.h> 38#include <media/stagefright/foundation/AMessage.h> 39#include <media/stagefright/foundation/AUtils.h> 40#include <media/stagefright/foundation/ByteUtils.h> 41#include <media/stagefright/foundation/ColorUtils.h> 42#include <media/stagefright/foundation/avc_utils.h> 43#include <media/stagefright/foundation/hexdump.h> 44#include <media/stagefright/MediaBuffer.h> 45#include <media/stagefright/MediaBufferGroup.h> 46#include <media/stagefright/MediaDefs.h> 47#include <media/stagefright/MetaData.h> 48#include <utils/String8.h> 49 50#include <byteswap.h> 51#include "include/ID3.h" 52 53#ifndef UINT32_MAX 54#define UINT32_MAX (4294967295U) 55#endif 56 57namespace android { 58 59enum { 60 // max track header chunk to return 61 kMaxTrackHeaderSize = 32, 62 63 // maximum size of an atom. Some atoms can be bigger according to the spec, 64 // but we only allow up to this size. 65 kMaxAtomSize = 64 * 1024 * 1024, 66}; 67 68class MPEG4Source : public MediaSourceBase { 69public: 70 // Caller retains ownership of both "dataSource" and "sampleTable". 71 MPEG4Source(const sp<MetaData> &format, 72 DataSourceBase *dataSource, 73 int32_t timeScale, 74 const sp<SampleTable> &sampleTable, 75 Vector<SidxEntry> &sidx, 76 const Trex *trex, 77 off64_t firstMoofOffset, 78 const sp<ItemTable> &itemTable); 79 virtual status_t init(); 80 81 virtual status_t start(MetaData *params = NULL); 82 virtual status_t stop(); 83 84 virtual sp<MetaData> getFormat(); 85 86 virtual status_t read(MediaBuffer **buffer, const ReadOptions *options = NULL); 87 virtual bool supportNonblockingRead() { return true; } 88 virtual status_t fragmentedRead(MediaBuffer **buffer, const ReadOptions *options = NULL); 89 90 virtual ~MPEG4Source(); 91 92private: 93 Mutex mLock; 94 95 sp<MetaData> mFormat; 96 DataSourceBase *mDataSource; 97 int32_t mTimescale; 98 sp<SampleTable> mSampleTable; 99 uint32_t mCurrentSampleIndex; 100 uint32_t mCurrentFragmentIndex; 101 Vector<SidxEntry> &mSegments; 102 const Trex *mTrex; 103 off64_t mFirstMoofOffset; 104 off64_t mCurrentMoofOffset; 105 off64_t mNextMoofOffset; 106 uint32_t mCurrentTime; 107 int32_t mLastParsedTrackId; 108 int32_t mTrackId; 109 110 int32_t mCryptoMode; // passed in from extractor 111 int32_t mDefaultIVSize; // passed in from extractor 112 uint8_t mCryptoKey[16]; // passed in from extractor 113 uint32_t mCurrentAuxInfoType; 114 uint32_t mCurrentAuxInfoTypeParameter; 115 int32_t mCurrentDefaultSampleInfoSize; 116 uint32_t mCurrentSampleInfoCount; 117 uint32_t mCurrentSampleInfoAllocSize; 118 uint8_t* mCurrentSampleInfoSizes; 119 uint32_t mCurrentSampleInfoOffsetCount; 120 uint32_t mCurrentSampleInfoOffsetsAllocSize; 121 uint64_t* mCurrentSampleInfoOffsets; 122 123 bool mIsAVC; 124 bool mIsHEVC; 125 size_t mNALLengthSize; 126 127 bool mStarted; 128 129 MediaBufferGroup *mGroup; 130 131 MediaBuffer *mBuffer; 132 133 bool mWantsNALFragments; 134 135 uint8_t *mSrcBuffer; 136 137 bool mIsHeif; 138 sp<ItemTable> mItemTable; 139 140 size_t parseNALSize(const uint8_t *data) const; 141 status_t parseChunk(off64_t *offset); 142 status_t parseTrackFragmentHeader(off64_t offset, off64_t size); 143 status_t parseTrackFragmentRun(off64_t offset, off64_t size); 144 status_t parseSampleAuxiliaryInformationSizes(off64_t offset, off64_t size); 145 status_t parseSampleAuxiliaryInformationOffsets(off64_t offset, off64_t size); 146 147 struct TrackFragmentHeaderInfo { 148 enum Flags { 149 kBaseDataOffsetPresent = 0x01, 150 kSampleDescriptionIndexPresent = 0x02, 151 kDefaultSampleDurationPresent = 0x08, 152 kDefaultSampleSizePresent = 0x10, 153 kDefaultSampleFlagsPresent = 0x20, 154 kDurationIsEmpty = 0x10000, 155 }; 156 157 uint32_t mTrackID; 158 uint32_t mFlags; 159 uint64_t mBaseDataOffset; 160 uint32_t mSampleDescriptionIndex; 161 uint32_t mDefaultSampleDuration; 162 uint32_t mDefaultSampleSize; 163 uint32_t mDefaultSampleFlags; 164 165 uint64_t mDataOffset; 166 }; 167 TrackFragmentHeaderInfo mTrackFragmentHeaderInfo; 168 169 struct Sample { 170 off64_t offset; 171 size_t size; 172 uint32_t duration; 173 int32_t compositionOffset; 174 uint8_t iv[16]; 175 Vector<size_t> clearsizes; 176 Vector<size_t> encryptedsizes; 177 }; 178 Vector<Sample> mCurrentSamples; 179 180 MPEG4Source(const MPEG4Source &); 181 MPEG4Source &operator=(const MPEG4Source &); 182}; 183 184// This custom data source wraps an existing one and satisfies requests 185// falling entirely within a cached range from the cache while forwarding 186// all remaining requests to the wrapped datasource. 187// This is used to cache the full sampletable metadata for a single track, 188// possibly wrapping multiple times to cover all tracks, i.e. 189// Each CachedRangedDataSource caches the sampletable metadata for a single track. 190 191struct CachedRangedDataSource : public DataSourceBase { 192 explicit CachedRangedDataSource(DataSourceBase *source); 193 virtual ~CachedRangedDataSource(); 194 195 virtual status_t initCheck() const; 196 virtual ssize_t readAt(off64_t offset, void *data, size_t size); 197 virtual status_t getSize(off64_t *size); 198 virtual uint32_t flags(); 199 200 status_t setCachedRange(off64_t offset, size_t size, bool assumeSourceOwnershipOnSuccess); 201 202 203private: 204 Mutex mLock; 205 206 DataSourceBase *mSource; 207 bool mOwnsDataSource; 208 off64_t mCachedOffset; 209 size_t mCachedSize; 210 uint8_t *mCache; 211 212 void clearCache(); 213 214 CachedRangedDataSource(const CachedRangedDataSource &); 215 CachedRangedDataSource &operator=(const CachedRangedDataSource &); 216}; 217 218CachedRangedDataSource::CachedRangedDataSource(DataSourceBase *source) 219 : mSource(source), 220 mOwnsDataSource(false), 221 mCachedOffset(0), 222 mCachedSize(0), 223 mCache(NULL) { 224} 225 226CachedRangedDataSource::~CachedRangedDataSource() { 227 clearCache(); 228 if (mOwnsDataSource) { 229 delete (CachedRangedDataSource*)mSource; 230 } 231} 232 233void CachedRangedDataSource::clearCache() { 234 if (mCache) { 235 free(mCache); 236 mCache = NULL; 237 } 238 239 mCachedOffset = 0; 240 mCachedSize = 0; 241} 242 243status_t CachedRangedDataSource::initCheck() const { 244 return mSource->initCheck(); 245} 246 247ssize_t CachedRangedDataSource::readAt(off64_t offset, void *data, size_t size) { 248 Mutex::Autolock autoLock(mLock); 249 250 if (isInRange(mCachedOffset, mCachedSize, offset, size)) { 251 memcpy(data, &mCache[offset - mCachedOffset], size); 252 return size; 253 } 254 255 return mSource->readAt(offset, data, size); 256} 257 258status_t CachedRangedDataSource::getSize(off64_t *size) { 259 return mSource->getSize(size); 260} 261 262uint32_t CachedRangedDataSource::flags() { 263 return mSource->flags(); 264} 265 266status_t CachedRangedDataSource::setCachedRange(off64_t offset, 267 size_t size, 268 bool assumeSourceOwnershipOnSuccess) { 269 Mutex::Autolock autoLock(mLock); 270 271 clearCache(); 272 273 mCache = (uint8_t *)malloc(size); 274 275 if (mCache == NULL) { 276 return -ENOMEM; 277 } 278 279 mCachedOffset = offset; 280 mCachedSize = size; 281 282 ssize_t err = mSource->readAt(mCachedOffset, mCache, mCachedSize); 283 284 if (err < (ssize_t)size) { 285 clearCache(); 286 287 return ERROR_IO; 288 } 289 mOwnsDataSource = assumeSourceOwnershipOnSuccess; 290 return OK; 291} 292 293//////////////////////////////////////////////////////////////////////////////// 294 295static const bool kUseHexDump = false; 296 297static const char *FourCC2MIME(uint32_t fourcc) { 298 switch (fourcc) { 299 case FOURCC('m', 'p', '4', 'a'): 300 return MEDIA_MIMETYPE_AUDIO_AAC; 301 302 case FOURCC('s', 'a', 'm', 'r'): 303 return MEDIA_MIMETYPE_AUDIO_AMR_NB; 304 305 case FOURCC('s', 'a', 'w', 'b'): 306 return MEDIA_MIMETYPE_AUDIO_AMR_WB; 307 308 case FOURCC('m', 'p', '4', 'v'): 309 return MEDIA_MIMETYPE_VIDEO_MPEG4; 310 311 case FOURCC('s', '2', '6', '3'): 312 case FOURCC('h', '2', '6', '3'): 313 case FOURCC('H', '2', '6', '3'): 314 return MEDIA_MIMETYPE_VIDEO_H263; 315 316 case FOURCC('a', 'v', 'c', '1'): 317 return MEDIA_MIMETYPE_VIDEO_AVC; 318 319 case FOURCC('h', 'v', 'c', '1'): 320 case FOURCC('h', 'e', 'v', '1'): 321 return MEDIA_MIMETYPE_VIDEO_HEVC; 322 default: 323 CHECK(!"should not be here."); 324 return NULL; 325 } 326} 327 328static bool AdjustChannelsAndRate(uint32_t fourcc, uint32_t *channels, uint32_t *rate) { 329 if (!strcasecmp(MEDIA_MIMETYPE_AUDIO_AMR_NB, FourCC2MIME(fourcc))) { 330 // AMR NB audio is always mono, 8kHz 331 *channels = 1; 332 *rate = 8000; 333 return true; 334 } else if (!strcasecmp(MEDIA_MIMETYPE_AUDIO_AMR_WB, FourCC2MIME(fourcc))) { 335 // AMR WB audio is always mono, 16kHz 336 *channels = 1; 337 *rate = 16000; 338 return true; 339 } 340 return false; 341} 342 343MPEG4Extractor::MPEG4Extractor(DataSourceBase *source, const char *mime) 344 : mMoofOffset(0), 345 mMoofFound(false), 346 mMdatFound(false), 347 mDataSource(source), 348 mCachedSource(NULL), 349 mInitCheck(NO_INIT), 350 mHeaderTimescale(0), 351 mIsQT(false), 352 mIsHeif(false), 353 mHasMoovBox(false), 354 mPreferHeif(mime != NULL && !strcasecmp(mime, MEDIA_MIMETYPE_CONTAINER_HEIF)), 355 mFirstTrack(NULL), 356 mLastTrack(NULL), 357 mFileMetaData(new MetaData), 358 mFirstSINF(NULL), 359 mIsDrm(false) { 360 ALOGV("mime=%s, mPreferHeif=%d", mime, mPreferHeif); 361} 362 363MPEG4Extractor::~MPEG4Extractor() { 364 Track *track = mFirstTrack; 365 while (track) { 366 Track *next = track->next; 367 368 delete track; 369 track = next; 370 } 371 mFirstTrack = mLastTrack = NULL; 372 373 SINF *sinf = mFirstSINF; 374 while (sinf) { 375 SINF *next = sinf->next; 376 delete[] sinf->IPMPData; 377 delete sinf; 378 sinf = next; 379 } 380 mFirstSINF = NULL; 381 382 for (size_t i = 0; i < mPssh.size(); i++) { 383 delete [] mPssh[i].data; 384 } 385 mPssh.clear(); 386 387 delete mCachedSource; 388} 389 390uint32_t MPEG4Extractor::flags() const { 391 return CAN_PAUSE | 392 ((mMoofOffset == 0 || mSidxEntries.size() != 0) ? 393 (CAN_SEEK_BACKWARD | CAN_SEEK_FORWARD | CAN_SEEK) : 0); 394} 395 396sp<MetaData> MPEG4Extractor::getMetaData() { 397 status_t err; 398 if ((err = readMetaData()) != OK) { 399 return new MetaData; 400 } 401 402 return mFileMetaData; 403} 404 405size_t MPEG4Extractor::countTracks() { 406 status_t err; 407 if ((err = readMetaData()) != OK) { 408 ALOGV("MPEG4Extractor::countTracks: no tracks"); 409 return 0; 410 } 411 412 size_t n = 0; 413 Track *track = mFirstTrack; 414 while (track) { 415 ++n; 416 track = track->next; 417 } 418 419 ALOGV("MPEG4Extractor::countTracks: %zu tracks", n); 420 return n; 421} 422 423sp<MetaData> MPEG4Extractor::getTrackMetaData( 424 size_t index, uint32_t flags) { 425 status_t err; 426 if ((err = readMetaData()) != OK) { 427 return NULL; 428 } 429 430 Track *track = mFirstTrack; 431 while (index > 0) { 432 if (track == NULL) { 433 return NULL; 434 } 435 436 track = track->next; 437 --index; 438 } 439 440 if (track == NULL) { 441 return NULL; 442 } 443 444 [=] { 445 int64_t duration; 446 int32_t samplerate; 447 if (track->has_elst && mHeaderTimescale != 0 && 448 track->meta->findInt64(kKeyDuration, &duration) && 449 track->meta->findInt32(kKeySampleRate, &samplerate)) { 450 451 track->has_elst = false; 452 453 if (track->elst_segment_duration > INT64_MAX) { 454 return; 455 } 456 int64_t segment_duration = track->elst_segment_duration; 457 int64_t media_time = track->elst_media_time; 458 int64_t halfscale = mHeaderTimescale / 2; 459 ALOGV("segment_duration = %" PRId64 ", media_time = %" PRId64 460 ", halfscale = %" PRId64 ", timescale = %d", 461 segment_duration, 462 media_time, 463 halfscale, 464 mHeaderTimescale); 465 466 int64_t delay; 467 // delay = ((media_time * samplerate) + halfscale) / mHeaderTimescale; 468 if (__builtin_mul_overflow(media_time, samplerate, &delay) || 469 __builtin_add_overflow(delay, halfscale, &delay) || 470 (delay /= mHeaderTimescale, false) || 471 delay > INT32_MAX || 472 delay < INT32_MIN) { 473 return; 474 } 475 ALOGV("delay = %" PRId64, delay); 476 track->meta->setInt32(kKeyEncoderDelay, delay); 477 478 int64_t scaled_duration; 479 // scaled_duration = duration * mHeaderTimescale; 480 if (__builtin_mul_overflow(duration, mHeaderTimescale, &scaled_duration)) { 481 return; 482 } 483 ALOGV("scaled_duration = %" PRId64, scaled_duration); 484 485 int64_t segment_end; 486 int64_t padding; 487 // padding = scaled_duration - ((segment_duration + media_time) * 1000000); 488 if (__builtin_add_overflow(segment_duration, media_time, &segment_end) || 489 __builtin_mul_overflow(segment_end, 1000000, &segment_end) || 490 __builtin_sub_overflow(scaled_duration, segment_end, &padding)) { 491 return; 492 } 493 ALOGV("segment_end = %" PRId64 ", padding = %" PRId64, segment_end, padding); 494 495 if (padding < 0) { 496 // track duration from media header (which is what kKeyDuration is) might 497 // be slightly shorter than the segment duration, which would make the 498 // padding negative. Clamp to zero. 499 padding = 0; 500 } 501 502 int64_t paddingsamples; 503 int64_t halfscale_e6; 504 int64_t timescale_e6; 505 // paddingsamples = ((padding * samplerate) + (halfscale * 1000000)) 506 // / (mHeaderTimescale * 1000000); 507 if (__builtin_mul_overflow(padding, samplerate, &paddingsamples) || 508 __builtin_mul_overflow(halfscale, 1000000, &halfscale_e6) || 509 __builtin_mul_overflow(mHeaderTimescale, 1000000, ×cale_e6) || 510 __builtin_add_overflow(paddingsamples, halfscale_e6, &paddingsamples) || 511 (paddingsamples /= timescale_e6, false) || 512 paddingsamples > INT32_MAX) { 513 return; 514 } 515 ALOGV("paddingsamples = %" PRId64, paddingsamples); 516 track->meta->setInt32(kKeyEncoderPadding, paddingsamples); 517 } 518 }(); 519 520 if ((flags & kIncludeExtensiveMetaData) 521 && !track->includes_expensive_metadata) { 522 track->includes_expensive_metadata = true; 523 524 const char *mime; 525 CHECK(track->meta->findCString(kKeyMIMEType, &mime)); 526 if (!strncasecmp("video/", mime, 6)) { 527 // MPEG2 tracks do not provide CSD, so read the stream header 528 if (!strcmp(mime, MEDIA_MIMETYPE_VIDEO_MPEG2)) { 529 off64_t offset; 530 size_t size; 531 if (track->sampleTable->getMetaDataForSample( 532 0 /* sampleIndex */, &offset, &size, NULL /* sampleTime */) == OK) { 533 if (size > kMaxTrackHeaderSize) { 534 size = kMaxTrackHeaderSize; 535 } 536 uint8_t header[kMaxTrackHeaderSize]; 537 if (mDataSource->readAt(offset, &header, size) == (ssize_t)size) { 538 track->meta->setData(kKeyStreamHeader, 'mdat', header, size); 539 } 540 } 541 } 542 543 if (mMoofOffset > 0) { 544 int64_t duration; 545 if (track->meta->findInt64(kKeyDuration, &duration)) { 546 // nothing fancy, just pick a frame near 1/4th of the duration 547 track->meta->setInt64( 548 kKeyThumbnailTime, duration / 4); 549 } 550 } else { 551 uint32_t sampleIndex; 552 uint32_t sampleTime; 553 if (track->timescale != 0 && 554 track->sampleTable->findThumbnailSample(&sampleIndex) == OK 555 && track->sampleTable->getMetaDataForSample( 556 sampleIndex, NULL /* offset */, NULL /* size */, 557 &sampleTime) == OK) { 558 track->meta->setInt64( 559 kKeyThumbnailTime, 560 ((int64_t)sampleTime * 1000000) / track->timescale); 561 } 562 } 563 } 564 } 565 566 return track->meta; 567} 568 569status_t MPEG4Extractor::readMetaData() { 570 if (mInitCheck != NO_INIT) { 571 return mInitCheck; 572 } 573 574 off64_t offset = 0; 575 status_t err; 576 bool sawMoovOrSidx = false; 577 578 while (!((mHasMoovBox && sawMoovOrSidx && (mMdatFound || mMoofFound)) || 579 (mIsHeif && (mPreferHeif || !mHasMoovBox) && 580 (mItemTable != NULL) && mItemTable->isValid()))) { 581 off64_t orig_offset = offset; 582 err = parseChunk(&offset, 0); 583 584 if (err != OK && err != UNKNOWN_ERROR) { 585 break; 586 } else if (offset <= orig_offset) { 587 // only continue parsing if the offset was advanced, 588 // otherwise we might end up in an infinite loop 589 ALOGE("did not advance: %lld->%lld", (long long)orig_offset, (long long)offset); 590 err = ERROR_MALFORMED; 591 break; 592 } else if (err == UNKNOWN_ERROR) { 593 sawMoovOrSidx = true; 594 } 595 } 596 597 if (mIsHeif && (mItemTable != NULL) && (mItemTable->countImages() > 0)) { 598 for (uint32_t imageIndex = 0; 599 imageIndex < mItemTable->countImages(); imageIndex++) { 600 sp<MetaData> meta = mItemTable->getImageMeta(imageIndex); 601 if (meta == NULL) { 602 ALOGE("heif image %u has no meta!", imageIndex); 603 continue; 604 } 605 // Some heif files advertise image sequence brands (eg. 'hevc') in 606 // ftyp box, but don't have any valid tracks in them. Instead of 607 // reporting the entire file as malformed, we override the error 608 // to allow still images to be extracted. 609 if (err != OK) { 610 ALOGW("Extracting still images only"); 611 err = OK; 612 } 613 614 ALOGV("adding HEIF image track %u", imageIndex); 615 Track *track = new Track; 616 track->next = NULL; 617 if (mLastTrack != NULL) { 618 mLastTrack->next = track; 619 } else { 620 mFirstTrack = track; 621 } 622 mLastTrack = track; 623 624 track->meta = meta; 625 track->meta->setInt32(kKeyTrackID, imageIndex); 626 track->includes_expensive_metadata = false; 627 track->skipTrack = false; 628 track->timescale = 0; 629 } 630 } 631 632 if (mInitCheck == OK) { 633 if (findTrackByMimePrefix("video/") != NULL) { 634 mFileMetaData->setCString( 635 kKeyMIMEType, MEDIA_MIMETYPE_CONTAINER_MPEG4); 636 } else if (findTrackByMimePrefix("audio/") != NULL) { 637 mFileMetaData->setCString(kKeyMIMEType, "audio/mp4"); 638 } else if (findTrackByMimePrefix( 639 MEDIA_MIMETYPE_IMAGE_ANDROID_HEIC) != NULL) { 640 mFileMetaData->setCString( 641 kKeyMIMEType, MEDIA_MIMETYPE_CONTAINER_HEIF); 642 } else { 643 mFileMetaData->setCString(kKeyMIMEType, "application/octet-stream"); 644 } 645 } else { 646 mInitCheck = err; 647 } 648 649 CHECK_NE(err, (status_t)NO_INIT); 650 651 // copy pssh data into file metadata 652 uint64_t psshsize = 0; 653 for (size_t i = 0; i < mPssh.size(); i++) { 654 psshsize += 20 + mPssh[i].datalen; 655 } 656 if (psshsize > 0 && psshsize <= UINT32_MAX) { 657 char *buf = (char*)malloc(psshsize); 658 if (!buf) { 659 ALOGE("b/28471206"); 660 return NO_MEMORY; 661 } 662 char *ptr = buf; 663 for (size_t i = 0; i < mPssh.size(); i++) { 664 memcpy(ptr, mPssh[i].uuid, 20); // uuid + length 665 memcpy(ptr + 20, mPssh[i].data, mPssh[i].datalen); 666 ptr += (20 + mPssh[i].datalen); 667 } 668 mFileMetaData->setData(kKeyPssh, 'pssh', buf, psshsize); 669 free(buf); 670 } 671 672 return mInitCheck; 673} 674 675char* MPEG4Extractor::getDrmTrackInfo(size_t trackID, int *len) { 676 if (mFirstSINF == NULL) { 677 return NULL; 678 } 679 680 SINF *sinf = mFirstSINF; 681 while (sinf && (trackID != sinf->trackID)) { 682 sinf = sinf->next; 683 } 684 685 if (sinf == NULL) { 686 return NULL; 687 } 688 689 *len = sinf->len; 690 return sinf->IPMPData; 691} 692 693// Reads an encoded integer 7 bits at a time until it encounters the high bit clear. 694static int32_t readSize(off64_t offset, 695 DataSourceBase *DataSourceBase, uint8_t *numOfBytes) { 696 uint32_t size = 0; 697 uint8_t data; 698 bool moreData = true; 699 *numOfBytes = 0; 700 701 while (moreData) { 702 if (DataSourceBase->readAt(offset, &data, 1) < 1) { 703 return -1; 704 } 705 offset ++; 706 moreData = (data >= 128) ? true : false; 707 size = (size << 7) | (data & 0x7f); // Take last 7 bits 708 (*numOfBytes) ++; 709 } 710 711 return size; 712} 713 714status_t MPEG4Extractor::parseDrmSINF( 715 off64_t * /* offset */, off64_t data_offset) { 716 uint8_t updateIdTag; 717 if (mDataSource->readAt(data_offset, &updateIdTag, 1) < 1) { 718 return ERROR_IO; 719 } 720 data_offset ++; 721 722 if (0x01/*OBJECT_DESCRIPTOR_UPDATE_ID_TAG*/ != updateIdTag) { 723 return ERROR_MALFORMED; 724 } 725 726 uint8_t numOfBytes; 727 int32_t size = readSize(data_offset, mDataSource, &numOfBytes); 728 if (size < 0) { 729 return ERROR_IO; 730 } 731 data_offset += numOfBytes; 732 733 while(size >= 11 ) { 734 uint8_t descriptorTag; 735 if (mDataSource->readAt(data_offset, &descriptorTag, 1) < 1) { 736 return ERROR_IO; 737 } 738 data_offset ++; 739 740 if (0x11/*OBJECT_DESCRIPTOR_ID_TAG*/ != descriptorTag) { 741 return ERROR_MALFORMED; 742 } 743 744 uint8_t buffer[8]; 745 //ObjectDescriptorID and ObjectDescriptor url flag 746 if (mDataSource->readAt(data_offset, buffer, 2) < 2) { 747 return ERROR_IO; 748 } 749 data_offset += 2; 750 751 if ((buffer[1] >> 5) & 0x0001) { //url flag is set 752 return ERROR_MALFORMED; 753 } 754 755 if (mDataSource->readAt(data_offset, buffer, 8) < 8) { 756 return ERROR_IO; 757 } 758 data_offset += 8; 759 760 if ((0x0F/*ES_ID_REF_TAG*/ != buffer[1]) 761 || ( 0x0A/*IPMP_DESCRIPTOR_POINTER_ID_TAG*/ != buffer[5])) { 762 return ERROR_MALFORMED; 763 } 764 765 SINF *sinf = new SINF; 766 sinf->trackID = U16_AT(&buffer[3]); 767 sinf->IPMPDescriptorID = buffer[7]; 768 sinf->next = mFirstSINF; 769 mFirstSINF = sinf; 770 771 size -= (8 + 2 + 1); 772 } 773 774 if (size != 0) { 775 return ERROR_MALFORMED; 776 } 777 778 if (mDataSource->readAt(data_offset, &updateIdTag, 1) < 1) { 779 return ERROR_IO; 780 } 781 data_offset ++; 782 783 if(0x05/*IPMP_DESCRIPTOR_UPDATE_ID_TAG*/ != updateIdTag) { 784 return ERROR_MALFORMED; 785 } 786 787 size = readSize(data_offset, mDataSource, &numOfBytes); 788 if (size < 0) { 789 return ERROR_IO; 790 } 791 data_offset += numOfBytes; 792 793 while (size > 0) { 794 uint8_t tag; 795 int32_t dataLen; 796 if (mDataSource->readAt(data_offset, &tag, 1) < 1) { 797 return ERROR_IO; 798 } 799 data_offset ++; 800 801 if (0x0B/*IPMP_DESCRIPTOR_ID_TAG*/ == tag) { 802 uint8_t id; 803 dataLen = readSize(data_offset, mDataSource, &numOfBytes); 804 if (dataLen < 0) { 805 return ERROR_IO; 806 } else if (dataLen < 4) { 807 return ERROR_MALFORMED; 808 } 809 data_offset += numOfBytes; 810 811 if (mDataSource->readAt(data_offset, &id, 1) < 1) { 812 return ERROR_IO; 813 } 814 data_offset ++; 815 816 SINF *sinf = mFirstSINF; 817 while (sinf && (sinf->IPMPDescriptorID != id)) { 818 sinf = sinf->next; 819 } 820 if (sinf == NULL) { 821 return ERROR_MALFORMED; 822 } 823 sinf->len = dataLen - 3; 824 sinf->IPMPData = new (std::nothrow) char[sinf->len]; 825 if (sinf->IPMPData == NULL) { 826 return ERROR_MALFORMED; 827 } 828 data_offset += 2; 829 830 if (mDataSource->readAt(data_offset, sinf->IPMPData, sinf->len) < sinf->len) { 831 return ERROR_IO; 832 } 833 data_offset += sinf->len; 834 835 size -= (dataLen + numOfBytes + 1); 836 } 837 } 838 839 if (size != 0) { 840 return ERROR_MALFORMED; 841 } 842 843 return UNKNOWN_ERROR; // Return a dummy error. 844} 845 846struct PathAdder { 847 PathAdder(Vector<uint32_t> *path, uint32_t chunkType) 848 : mPath(path) { 849 mPath->push(chunkType); 850 } 851 852 ~PathAdder() { 853 mPath->pop(); 854 } 855 856private: 857 Vector<uint32_t> *mPath; 858 859 PathAdder(const PathAdder &); 860 PathAdder &operator=(const PathAdder &); 861}; 862 863static bool underMetaDataPath(const Vector<uint32_t> &path) { 864 return path.size() >= 5 865 && path[0] == FOURCC('m', 'o', 'o', 'v') 866 && path[1] == FOURCC('u', 'd', 't', 'a') 867 && path[2] == FOURCC('m', 'e', 't', 'a') 868 && path[3] == FOURCC('i', 'l', 's', 't'); 869} 870 871static bool underQTMetaPath(const Vector<uint32_t> &path, int32_t depth) { 872 return path.size() >= 2 873 && path[0] == FOURCC('m', 'o', 'o', 'v') 874 && path[1] == FOURCC('m', 'e', 't', 'a') 875 && (depth == 2 876 || (depth == 3 877 && (path[2] == FOURCC('h', 'd', 'l', 'r') 878 || path[2] == FOURCC('i', 'l', 's', 't') 879 || path[2] == FOURCC('k', 'e', 'y', 's')))); 880} 881 882// Given a time in seconds since Jan 1 1904, produce a human-readable string. 883static bool convertTimeToDate(int64_t time_1904, String8 *s) { 884 // delta between mpeg4 time and unix epoch time 885 static const int64_t delta = (((66 * 365 + 17) * 24) * 3600); 886 if (time_1904 < INT64_MIN + delta) { 887 return false; 888 } 889 time_t time_1970 = time_1904 - delta; 890 891 char tmp[32]; 892 struct tm* tm = gmtime(&time_1970); 893 if (tm != NULL && 894 strftime(tmp, sizeof(tmp), "%Y%m%dT%H%M%S.000Z", tm) > 0) { 895 s->setTo(tmp); 896 return true; 897 } 898 return false; 899} 900 901status_t MPEG4Extractor::parseChunk(off64_t *offset, int depth) { 902 ALOGV("entering parseChunk %lld/%d", (long long)*offset, depth); 903 904 if (*offset < 0) { 905 ALOGE("b/23540914"); 906 return ERROR_MALFORMED; 907 } 908 if (depth > 100) { 909 ALOGE("b/27456299"); 910 return ERROR_MALFORMED; 911 } 912 uint32_t hdr[2]; 913 if (mDataSource->readAt(*offset, hdr, 8) < 8) { 914 return ERROR_IO; 915 } 916 uint64_t chunk_size = ntohl(hdr[0]); 917 int32_t chunk_type = ntohl(hdr[1]); 918 off64_t data_offset = *offset + 8; 919 920 if (chunk_size == 1) { 921 if (mDataSource->readAt(*offset + 8, &chunk_size, 8) < 8) { 922 return ERROR_IO; 923 } 924 chunk_size = ntoh64(chunk_size); 925 data_offset += 8; 926 927 if (chunk_size < 16) { 928 // The smallest valid chunk is 16 bytes long in this case. 929 return ERROR_MALFORMED; 930 } 931 } else if (chunk_size == 0) { 932 if (depth == 0) { 933 // atom extends to end of file 934 off64_t sourceSize; 935 if (mDataSource->getSize(&sourceSize) == OK) { 936 chunk_size = (sourceSize - *offset); 937 } else { 938 // XXX could we just pick a "sufficiently large" value here? 939 ALOGE("atom size is 0, and data source has no size"); 940 return ERROR_MALFORMED; 941 } 942 } else { 943 // not allowed for non-toplevel atoms, skip it 944 *offset += 4; 945 return OK; 946 } 947 } else if (chunk_size < 8) { 948 // The smallest valid chunk is 8 bytes long. 949 ALOGE("invalid chunk size: %" PRIu64, chunk_size); 950 return ERROR_MALFORMED; 951 } 952 953 char chunk[5]; 954 MakeFourCCString(chunk_type, chunk); 955 ALOGV("chunk: %s @ %lld, %d", chunk, (long long)*offset, depth); 956 957 if (kUseHexDump) { 958 static const char kWhitespace[] = " "; 959 const char *indent = &kWhitespace[sizeof(kWhitespace) - 1 - 2 * depth]; 960 printf("%sfound chunk '%s' of size %" PRIu64 "\n", indent, chunk, chunk_size); 961 962 char buffer[256]; 963 size_t n = chunk_size; 964 if (n > sizeof(buffer)) { 965 n = sizeof(buffer); 966 } 967 if (mDataSource->readAt(*offset, buffer, n) 968 < (ssize_t)n) { 969 return ERROR_IO; 970 } 971 972 hexdump(buffer, n); 973 } 974 975 PathAdder autoAdder(&mPath, chunk_type); 976 977 // (data_offset - *offset) is either 8 or 16 978 off64_t chunk_data_size = chunk_size - (data_offset - *offset); 979 if (chunk_data_size < 0) { 980 ALOGE("b/23540914"); 981 return ERROR_MALFORMED; 982 } 983 if (chunk_type != FOURCC('m', 'd', 'a', 't') && chunk_data_size > kMaxAtomSize) { 984 char errMsg[100]; 985 sprintf(errMsg, "%s atom has size %" PRId64, chunk, chunk_data_size); 986 ALOGE("%s (b/28615448)", errMsg); 987 android_errorWriteWithInfoLog(0x534e4554, "28615448", -1, errMsg, strlen(errMsg)); 988 return ERROR_MALFORMED; 989 } 990 991 if (chunk_type != FOURCC('c', 'p', 'r', 't') 992 && chunk_type != FOURCC('c', 'o', 'v', 'r') 993 && mPath.size() == 5 && underMetaDataPath(mPath)) { 994 off64_t stop_offset = *offset + chunk_size; 995 *offset = data_offset; 996 while (*offset < stop_offset) { 997 status_t err = parseChunk(offset, depth + 1); 998 if (err != OK) { 999 return err; 1000 } 1001 } 1002 1003 if (*offset != stop_offset) { 1004 return ERROR_MALFORMED; 1005 } 1006 1007 return OK; 1008 } 1009 1010 switch(chunk_type) { 1011 case FOURCC('m', 'o', 'o', 'v'): 1012 case FOURCC('t', 'r', 'a', 'k'): 1013 case FOURCC('m', 'd', 'i', 'a'): 1014 case FOURCC('m', 'i', 'n', 'f'): 1015 case FOURCC('d', 'i', 'n', 'f'): 1016 case FOURCC('s', 't', 'b', 'l'): 1017 case FOURCC('m', 'v', 'e', 'x'): 1018 case FOURCC('m', 'o', 'o', 'f'): 1019 case FOURCC('t', 'r', 'a', 'f'): 1020 case FOURCC('m', 'f', 'r', 'a'): 1021 case FOURCC('u', 'd', 't', 'a'): 1022 case FOURCC('i', 'l', 's', 't'): 1023 case FOURCC('s', 'i', 'n', 'f'): 1024 case FOURCC('s', 'c', 'h', 'i'): 1025 case FOURCC('e', 'd', 't', 's'): 1026 case FOURCC('w', 'a', 'v', 'e'): 1027 { 1028 if (chunk_type == FOURCC('m', 'o', 'o', 'v') && depth != 0) { 1029 ALOGE("moov: depth %d", depth); 1030 return ERROR_MALFORMED; 1031 } 1032 1033 if (chunk_type == FOURCC('m', 'o', 'o', 'v') && mInitCheck == OK) { 1034 ALOGE("duplicate moov"); 1035 return ERROR_MALFORMED; 1036 } 1037 1038 if (chunk_type == FOURCC('m', 'o', 'o', 'f') && !mMoofFound) { 1039 // store the offset of the first segment 1040 mMoofFound = true; 1041 mMoofOffset = *offset; 1042 } 1043 1044 if (chunk_type == FOURCC('s', 't', 'b', 'l')) { 1045 ALOGV("sampleTable chunk is %" PRIu64 " bytes long.", chunk_size); 1046 1047 if (mDataSource->flags() 1048 & (DataSourceBase::kWantsPrefetching 1049 | DataSourceBase::kIsCachingDataSource)) { 1050 CachedRangedDataSource *cachedSource = 1051 new CachedRangedDataSource(mDataSource); 1052 1053 if (cachedSource->setCachedRange( 1054 *offset, chunk_size, 1055 mCachedSource != NULL /* assume ownership on success */) == OK) { 1056 mDataSource = mCachedSource = cachedSource; 1057 } else { 1058 delete cachedSource; 1059 } 1060 } 1061 1062 if (mLastTrack == NULL) { 1063 return ERROR_MALFORMED; 1064 } 1065 1066 mLastTrack->sampleTable = new SampleTable(mDataSource); 1067 } 1068 1069 bool isTrack = false; 1070 if (chunk_type == FOURCC('t', 'r', 'a', 'k')) { 1071 if (depth != 1) { 1072 ALOGE("trak: depth %d", depth); 1073 return ERROR_MALFORMED; 1074 } 1075 isTrack = true; 1076 1077 ALOGV("adding new track"); 1078 Track *track = new Track; 1079 track->next = NULL; 1080 if (mLastTrack) { 1081 mLastTrack->next = track; 1082 } else { 1083 mFirstTrack = track; 1084 } 1085 mLastTrack = track; 1086 1087 track->meta = new MetaData; 1088 track->includes_expensive_metadata = false; 1089 track->skipTrack = false; 1090 track->timescale = 0; 1091 track->meta->setCString(kKeyMIMEType, "application/octet-stream"); 1092 track->has_elst = false; 1093 } 1094 1095 off64_t stop_offset = *offset + chunk_size; 1096 *offset = data_offset; 1097 while (*offset < stop_offset) { 1098 status_t err = parseChunk(offset, depth + 1); 1099 if (err != OK) { 1100 if (isTrack) { 1101 mLastTrack->skipTrack = true; 1102 break; 1103 } 1104 return err; 1105 } 1106 } 1107 1108 if (*offset != stop_offset) { 1109 return ERROR_MALFORMED; 1110 } 1111 1112 if (isTrack) { 1113 int32_t trackId; 1114 // There must be exact one track header per track. 1115 if (!mLastTrack->meta->findInt32(kKeyTrackID, &trackId)) { 1116 mLastTrack->skipTrack = true; 1117 } 1118 1119 status_t err = verifyTrack(mLastTrack); 1120 if (err != OK) { 1121 mLastTrack->skipTrack = true; 1122 } 1123 1124 if (mLastTrack->skipTrack) { 1125 ALOGV("skipping this track..."); 1126 Track *cur = mFirstTrack; 1127 1128 if (cur == mLastTrack) { 1129 delete cur; 1130 mFirstTrack = mLastTrack = NULL; 1131 } else { 1132 while (cur && cur->next != mLastTrack) { 1133 cur = cur->next; 1134 } 1135 if (cur) { 1136 cur->next = NULL; 1137 } 1138 delete mLastTrack; 1139 mLastTrack = cur; 1140 } 1141 1142 return OK; 1143 } 1144 } else if (chunk_type == FOURCC('m', 'o', 'o', 'v')) { 1145 mInitCheck = OK; 1146 1147 if (!mIsDrm) { 1148 return UNKNOWN_ERROR; // Return a dummy error. 1149 } else { 1150 return OK; 1151 } 1152 } 1153 break; 1154 } 1155 1156 case FOURCC('e', 'l', 's', 't'): 1157 { 1158 *offset += chunk_size; 1159 1160 if (!mLastTrack) { 1161 return ERROR_MALFORMED; 1162 } 1163 1164 // See 14496-12 8.6.6 1165 uint8_t version; 1166 if (mDataSource->readAt(data_offset, &version, 1) < 1) { 1167 return ERROR_IO; 1168 } 1169 1170 uint32_t entry_count; 1171 if (!mDataSource->getUInt32(data_offset + 4, &entry_count)) { 1172 return ERROR_IO; 1173 } 1174 1175 if (entry_count != 1) { 1176 // we only support a single entry at the moment, for gapless playback 1177 ALOGW("ignoring edit list with %d entries", entry_count); 1178 } else { 1179 off64_t entriesoffset = data_offset + 8; 1180 uint64_t segment_duration; 1181 int64_t media_time; 1182 1183 if (version == 1) { 1184 if (!mDataSource->getUInt64(entriesoffset, &segment_duration) || 1185 !mDataSource->getUInt64(entriesoffset + 8, (uint64_t*)&media_time)) { 1186 return ERROR_IO; 1187 } 1188 } else if (version == 0) { 1189 uint32_t sd; 1190 int32_t mt; 1191 if (!mDataSource->getUInt32(entriesoffset, &sd) || 1192 !mDataSource->getUInt32(entriesoffset + 4, (uint32_t*)&mt)) { 1193 return ERROR_IO; 1194 } 1195 segment_duration = sd; 1196 media_time = mt; 1197 } else { 1198 return ERROR_IO; 1199 } 1200 1201 // save these for later, because the elst atom might precede 1202 // the atoms that actually gives us the duration and sample rate 1203 // needed to calculate the padding and delay values 1204 mLastTrack->has_elst = true; 1205 mLastTrack->elst_media_time = media_time; 1206 mLastTrack->elst_segment_duration = segment_duration; 1207 } 1208 break; 1209 } 1210 1211 case FOURCC('f', 'r', 'm', 'a'): 1212 { 1213 *offset += chunk_size; 1214 1215 uint32_t original_fourcc; 1216 if (mDataSource->readAt(data_offset, &original_fourcc, 4) < 4) { 1217 return ERROR_IO; 1218 } 1219 original_fourcc = ntohl(original_fourcc); 1220 ALOGV("read original format: %d", original_fourcc); 1221 1222 if (mLastTrack == NULL) { 1223 return ERROR_MALFORMED; 1224 } 1225 1226 mLastTrack->meta->setCString(kKeyMIMEType, FourCC2MIME(original_fourcc)); 1227 uint32_t num_channels = 0; 1228 uint32_t sample_rate = 0; 1229 if (AdjustChannelsAndRate(original_fourcc, &num_channels, &sample_rate)) { 1230 mLastTrack->meta->setInt32(kKeyChannelCount, num_channels); 1231 mLastTrack->meta->setInt32(kKeySampleRate, sample_rate); 1232 } 1233 break; 1234 } 1235 1236 case FOURCC('t', 'e', 'n', 'c'): 1237 { 1238 *offset += chunk_size; 1239 1240 if (chunk_size < 32) { 1241 return ERROR_MALFORMED; 1242 } 1243 1244 // tenc box contains 1 byte version, 3 byte flags, 3 byte default algorithm id, one byte 1245 // default IV size, 16 bytes default KeyID 1246 // (ISO 23001-7) 1247 char buf[4]; 1248 memset(buf, 0, 4); 1249 if (mDataSource->readAt(data_offset + 4, buf + 1, 3) < 3) { 1250 return ERROR_IO; 1251 } 1252 uint32_t defaultAlgorithmId = ntohl(*((int32_t*)buf)); 1253 if (defaultAlgorithmId > 1) { 1254 // only 0 (clear) and 1 (AES-128) are valid 1255 return ERROR_MALFORMED; 1256 } 1257 1258 memset(buf, 0, 4); 1259 if (mDataSource->readAt(data_offset + 7, buf + 3, 1) < 1) { 1260 return ERROR_IO; 1261 } 1262 uint32_t defaultIVSize = ntohl(*((int32_t*)buf)); 1263 1264 if ((defaultAlgorithmId == 0 && defaultIVSize != 0) || 1265 (defaultAlgorithmId != 0 && defaultIVSize == 0)) { 1266 // only unencrypted data must have 0 IV size 1267 return ERROR_MALFORMED; 1268 } else if (defaultIVSize != 0 && 1269 defaultIVSize != 8 && 1270 defaultIVSize != 16) { 1271 // only supported sizes are 0, 8 and 16 1272 return ERROR_MALFORMED; 1273 } 1274 1275 uint8_t defaultKeyId[16]; 1276 1277 if (mDataSource->readAt(data_offset + 8, &defaultKeyId, 16) < 16) { 1278 return ERROR_IO; 1279 } 1280 1281 if (mLastTrack == NULL) 1282 return ERROR_MALFORMED; 1283 1284 mLastTrack->meta->setInt32(kKeyCryptoMode, defaultAlgorithmId); 1285 mLastTrack->meta->setInt32(kKeyCryptoDefaultIVSize, defaultIVSize); 1286 mLastTrack->meta->setData(kKeyCryptoKey, 'tenc', defaultKeyId, 16); 1287 break; 1288 } 1289 1290 case FOURCC('t', 'k', 'h', 'd'): 1291 { 1292 *offset += chunk_size; 1293 1294 status_t err; 1295 if ((err = parseTrackHeader(data_offset, chunk_data_size)) != OK) { 1296 return err; 1297 } 1298 1299 break; 1300 } 1301 1302 case FOURCC('t', 'r', 'e', 'f'): 1303 { 1304 *offset += chunk_size; 1305 1306 if (mLastTrack == NULL) { 1307 return ERROR_MALFORMED; 1308 } 1309 1310 // Skip thumbnail track for now since we don't have an 1311 // API to retrieve it yet. 1312 // The thumbnail track can't be accessed by negative index or time, 1313 // because each timed sample has its own corresponding thumbnail 1314 // in the thumbnail track. We'll need a dedicated API to retrieve 1315 // thumbnail at time instead. 1316 mLastTrack->skipTrack = true; 1317 1318 break; 1319 } 1320 1321 case FOURCC('p', 's', 's', 'h'): 1322 { 1323 *offset += chunk_size; 1324 1325 PsshInfo pssh; 1326 1327 if (mDataSource->readAt(data_offset + 4, &pssh.uuid, 16) < 16) { 1328 return ERROR_IO; 1329 } 1330 1331 uint32_t psshdatalen = 0; 1332 if (mDataSource->readAt(data_offset + 20, &psshdatalen, 4) < 4) { 1333 return ERROR_IO; 1334 } 1335 pssh.datalen = ntohl(psshdatalen); 1336 ALOGV("pssh data size: %d", pssh.datalen); 1337 if (chunk_size < 20 || pssh.datalen > chunk_size - 20) { 1338 // pssh data length exceeds size of containing box 1339 return ERROR_MALFORMED; 1340 } 1341 1342 pssh.data = new (std::nothrow) uint8_t[pssh.datalen]; 1343 if (pssh.data == NULL) { 1344 return ERROR_MALFORMED; 1345 } 1346 ALOGV("allocated pssh @ %p", pssh.data); 1347 ssize_t requested = (ssize_t) pssh.datalen; 1348 if (mDataSource->readAt(data_offset + 24, pssh.data, requested) < requested) { 1349 delete[] pssh.data; 1350 return ERROR_IO; 1351 } 1352 mPssh.push_back(pssh); 1353 1354 break; 1355 } 1356 1357 case FOURCC('m', 'd', 'h', 'd'): 1358 { 1359 *offset += chunk_size; 1360 1361 if (chunk_data_size < 4 || mLastTrack == NULL) { 1362 return ERROR_MALFORMED; 1363 } 1364 1365 uint8_t version; 1366 if (mDataSource->readAt( 1367 data_offset, &version, sizeof(version)) 1368 < (ssize_t)sizeof(version)) { 1369 return ERROR_IO; 1370 } 1371 1372 off64_t timescale_offset; 1373 1374 if (version == 1) { 1375 timescale_offset = data_offset + 4 + 16; 1376 } else if (version == 0) { 1377 timescale_offset = data_offset + 4 + 8; 1378 } else { 1379 return ERROR_IO; 1380 } 1381 1382 uint32_t timescale; 1383 if (mDataSource->readAt( 1384 timescale_offset, ×cale, sizeof(timescale)) 1385 < (ssize_t)sizeof(timescale)) { 1386 return ERROR_IO; 1387 } 1388 1389 if (!timescale) { 1390 ALOGE("timescale should not be ZERO."); 1391 return ERROR_MALFORMED; 1392 } 1393 1394 mLastTrack->timescale = ntohl(timescale); 1395 1396 // 14496-12 says all ones means indeterminate, but some files seem to use 1397 // 0 instead. We treat both the same. 1398 int64_t duration = 0; 1399 if (version == 1) { 1400 if (mDataSource->readAt( 1401 timescale_offset + 4, &duration, sizeof(duration)) 1402 < (ssize_t)sizeof(duration)) { 1403 return ERROR_IO; 1404 } 1405 if (duration != -1) { 1406 duration = ntoh64(duration); 1407 } 1408 } else { 1409 uint32_t duration32; 1410 if (mDataSource->readAt( 1411 timescale_offset + 4, &duration32, sizeof(duration32)) 1412 < (ssize_t)sizeof(duration32)) { 1413 return ERROR_IO; 1414 } 1415 if (duration32 != 0xffffffff) { 1416 duration = ntohl(duration32); 1417 } 1418 } 1419 if (duration != 0 && mLastTrack->timescale != 0) { 1420 mLastTrack->meta->setInt64( 1421 kKeyDuration, (duration * 1000000) / mLastTrack->timescale); 1422 } 1423 1424 uint8_t lang[2]; 1425 off64_t lang_offset; 1426 if (version == 1) { 1427 lang_offset = timescale_offset + 4 + 8; 1428 } else if (version == 0) { 1429 lang_offset = timescale_offset + 4 + 4; 1430 } else { 1431 return ERROR_IO; 1432 } 1433 1434 if (mDataSource->readAt(lang_offset, &lang, sizeof(lang)) 1435 < (ssize_t)sizeof(lang)) { 1436 return ERROR_IO; 1437 } 1438 1439 // To get the ISO-639-2/T three character language code 1440 // 1 bit pad followed by 3 5-bits characters. Each character 1441 // is packed as the difference between its ASCII value and 0x60. 1442 char lang_code[4]; 1443 lang_code[0] = ((lang[0] >> 2) & 0x1f) + 0x60; 1444 lang_code[1] = ((lang[0] & 0x3) << 3 | (lang[1] >> 5)) + 0x60; 1445 lang_code[2] = (lang[1] & 0x1f) + 0x60; 1446 lang_code[3] = '\0'; 1447 1448 mLastTrack->meta->setCString( 1449 kKeyMediaLanguage, lang_code); 1450 1451 break; 1452 } 1453 1454 case FOURCC('s', 't', 's', 'd'): 1455 { 1456 uint8_t buffer[8]; 1457 if (chunk_data_size < (off64_t)sizeof(buffer)) { 1458 return ERROR_MALFORMED; 1459 } 1460 1461 if (mDataSource->readAt( 1462 data_offset, buffer, 8) < 8) { 1463 return ERROR_IO; 1464 } 1465 1466 if (U32_AT(buffer) != 0) { 1467 // Should be version 0, flags 0. 1468 return ERROR_MALFORMED; 1469 } 1470 1471 uint32_t entry_count = U32_AT(&buffer[4]); 1472 1473 if (entry_count > 1) { 1474 // For 3GPP timed text, there could be multiple tx3g boxes contain 1475 // multiple text display formats. These formats will be used to 1476 // display the timed text. 1477 // For encrypted files, there may also be more than one entry. 1478 const char *mime; 1479 1480 if (mLastTrack == NULL) 1481 return ERROR_MALFORMED; 1482 1483 CHECK(mLastTrack->meta->findCString(kKeyMIMEType, &mime)); 1484 if (strcasecmp(mime, MEDIA_MIMETYPE_TEXT_3GPP) && 1485 strcasecmp(mime, "application/octet-stream")) { 1486 // For now we only support a single type of media per track. 1487 mLastTrack->skipTrack = true; 1488 *offset += chunk_size; 1489 break; 1490 } 1491 } 1492 off64_t stop_offset = *offset + chunk_size; 1493 *offset = data_offset + 8; 1494 for (uint32_t i = 0; i < entry_count; ++i) { 1495 status_t err = parseChunk(offset, depth + 1); 1496 if (err != OK) { 1497 return err; 1498 } 1499 } 1500 1501 if (*offset != stop_offset) { 1502 return ERROR_MALFORMED; 1503 } 1504 break; 1505 } 1506 case FOURCC('m', 'e', 't', 't'): 1507 { 1508 *offset += chunk_size; 1509 1510 if (mLastTrack == NULL) 1511 return ERROR_MALFORMED; 1512 1513 sp<ABuffer> buffer = new ABuffer(chunk_data_size); 1514 if (buffer->data() == NULL) { 1515 return NO_MEMORY; 1516 } 1517 1518 if (mDataSource->readAt( 1519 data_offset, buffer->data(), chunk_data_size) < chunk_data_size) { 1520 return ERROR_IO; 1521 } 1522 1523 String8 mimeFormat((const char *)(buffer->data()), chunk_data_size); 1524 mLastTrack->meta->setCString(kKeyMIMEType, mimeFormat.string()); 1525 1526 break; 1527 } 1528 1529 case FOURCC('m', 'p', '4', 'a'): 1530 case FOURCC('e', 'n', 'c', 'a'): 1531 case FOURCC('s', 'a', 'm', 'r'): 1532 case FOURCC('s', 'a', 'w', 'b'): 1533 { 1534 if (mIsQT && chunk_type == FOURCC('m', 'p', '4', 'a') 1535 && depth >= 1 && mPath[depth - 1] == FOURCC('w', 'a', 'v', 'e')) { 1536 // Ignore mp4a embedded in QT wave atom 1537 *offset += chunk_size; 1538 break; 1539 } 1540 1541 uint8_t buffer[8 + 20]; 1542 if (chunk_data_size < (ssize_t)sizeof(buffer)) { 1543 // Basic AudioSampleEntry size. 1544 return ERROR_MALFORMED; 1545 } 1546 1547 if (mDataSource->readAt( 1548 data_offset, buffer, sizeof(buffer)) < (ssize_t)sizeof(buffer)) { 1549 return ERROR_IO; 1550 } 1551 1552 uint16_t data_ref_index __unused = U16_AT(&buffer[6]); 1553 uint16_t version = U16_AT(&buffer[8]); 1554 uint32_t num_channels = U16_AT(&buffer[16]); 1555 1556 uint16_t sample_size = U16_AT(&buffer[18]); 1557 uint32_t sample_rate = U32_AT(&buffer[24]) >> 16; 1558 1559 if (mLastTrack == NULL) 1560 return ERROR_MALFORMED; 1561 1562 off64_t stop_offset = *offset + chunk_size; 1563 *offset = data_offset + sizeof(buffer); 1564 1565 if (mIsQT && chunk_type == FOURCC('m', 'p', '4', 'a')) { 1566 if (version == 1) { 1567 if (mDataSource->readAt(*offset, buffer, 16) < 16) { 1568 return ERROR_IO; 1569 } 1570 1571#if 0 1572 U32_AT(buffer); // samples per packet 1573 U32_AT(&buffer[4]); // bytes per packet 1574 U32_AT(&buffer[8]); // bytes per frame 1575 U32_AT(&buffer[12]); // bytes per sample 1576#endif 1577 *offset += 16; 1578 } else if (version == 2) { 1579 uint8_t v2buffer[36]; 1580 if (mDataSource->readAt(*offset, v2buffer, 36) < 36) { 1581 return ERROR_IO; 1582 } 1583 1584#if 0 1585 U32_AT(v2buffer); // size of struct only 1586 sample_rate = (uint32_t)U64_AT(&v2buffer[4]); // audio sample rate 1587 num_channels = U32_AT(&v2buffer[12]); // num audio channels 1588 U32_AT(&v2buffer[16]); // always 0x7f000000 1589 sample_size = (uint16_t)U32_AT(&v2buffer[20]); // const bits per channel 1590 U32_AT(&v2buffer[24]); // format specifc flags 1591 U32_AT(&v2buffer[28]); // const bytes per audio packet 1592 U32_AT(&v2buffer[32]); // const LPCM frames per audio packet 1593#endif 1594 *offset += 36; 1595 } 1596 } 1597 1598 if (chunk_type != FOURCC('e', 'n', 'c', 'a')) { 1599 // if the chunk type is enca, we'll get the type from the sinf/frma box later 1600 mLastTrack->meta->setCString(kKeyMIMEType, FourCC2MIME(chunk_type)); 1601 AdjustChannelsAndRate(chunk_type, &num_channels, &sample_rate); 1602 } 1603 ALOGV("*** coding='%s' %d channels, size %d, rate %d\n", 1604 chunk, num_channels, sample_size, sample_rate); 1605 mLastTrack->meta->setInt32(kKeyChannelCount, num_channels); 1606 mLastTrack->meta->setInt32(kKeySampleRate, sample_rate); 1607 1608 while (*offset < stop_offset) { 1609 status_t err = parseChunk(offset, depth + 1); 1610 if (err != OK) { 1611 return err; 1612 } 1613 } 1614 1615 if (*offset != stop_offset) { 1616 return ERROR_MALFORMED; 1617 } 1618 break; 1619 } 1620 1621 case FOURCC('m', 'p', '4', 'v'): 1622 case FOURCC('e', 'n', 'c', 'v'): 1623 case FOURCC('s', '2', '6', '3'): 1624 case FOURCC('H', '2', '6', '3'): 1625 case FOURCC('h', '2', '6', '3'): 1626 case FOURCC('a', 'v', 'c', '1'): 1627 case FOURCC('h', 'v', 'c', '1'): 1628 case FOURCC('h', 'e', 'v', '1'): 1629 { 1630 uint8_t buffer[78]; 1631 if (chunk_data_size < (ssize_t)sizeof(buffer)) { 1632 // Basic VideoSampleEntry size. 1633 return ERROR_MALFORMED; 1634 } 1635 1636 if (mDataSource->readAt( 1637 data_offset, buffer, sizeof(buffer)) < (ssize_t)sizeof(buffer)) { 1638 return ERROR_IO; 1639 } 1640 1641 uint16_t data_ref_index __unused = U16_AT(&buffer[6]); 1642 uint16_t width = U16_AT(&buffer[6 + 18]); 1643 uint16_t height = U16_AT(&buffer[6 + 20]); 1644 1645 // The video sample is not standard-compliant if it has invalid dimension. 1646 // Use some default width and height value, and 1647 // let the decoder figure out the actual width and height (and thus 1648 // be prepared for INFO_FOMRAT_CHANGED event). 1649 if (width == 0) width = 352; 1650 if (height == 0) height = 288; 1651 1652 // printf("*** coding='%s' width=%d height=%d\n", 1653 // chunk, width, height); 1654 1655 if (mLastTrack == NULL) 1656 return ERROR_MALFORMED; 1657 1658 if (chunk_type != FOURCC('e', 'n', 'c', 'v')) { 1659 // if the chunk type is encv, we'll get the type from the sinf/frma box later 1660 mLastTrack->meta->setCString(kKeyMIMEType, FourCC2MIME(chunk_type)); 1661 } 1662 mLastTrack->meta->setInt32(kKeyWidth, width); 1663 mLastTrack->meta->setInt32(kKeyHeight, height); 1664 1665 off64_t stop_offset = *offset + chunk_size; 1666 *offset = data_offset + sizeof(buffer); 1667 while (*offset < stop_offset) { 1668 status_t err = parseChunk(offset, depth + 1); 1669 if (err != OK) { 1670 return err; 1671 } 1672 } 1673 1674 if (*offset != stop_offset) { 1675 return ERROR_MALFORMED; 1676 } 1677 break; 1678 } 1679 1680 case FOURCC('s', 't', 'c', 'o'): 1681 case FOURCC('c', 'o', '6', '4'): 1682 { 1683 if ((mLastTrack == NULL) || (mLastTrack->sampleTable == NULL)) { 1684 return ERROR_MALFORMED; 1685 } 1686 1687 status_t err = 1688 mLastTrack->sampleTable->setChunkOffsetParams( 1689 chunk_type, data_offset, chunk_data_size); 1690 1691 *offset += chunk_size; 1692 1693 if (err != OK) { 1694 return err; 1695 } 1696 1697 break; 1698 } 1699 1700 case FOURCC('s', 't', 's', 'c'): 1701 { 1702 if ((mLastTrack == NULL) || (mLastTrack->sampleTable == NULL)) 1703 return ERROR_MALFORMED; 1704 1705 status_t err = 1706 mLastTrack->sampleTable->setSampleToChunkParams( 1707 data_offset, chunk_data_size); 1708 1709 *offset += chunk_size; 1710 1711 if (err != OK) { 1712 return err; 1713 } 1714 1715 break; 1716 } 1717 1718 case FOURCC('s', 't', 's', 'z'): 1719 case FOURCC('s', 't', 'z', '2'): 1720 { 1721 if ((mLastTrack == NULL) || (mLastTrack->sampleTable == NULL)) { 1722 return ERROR_MALFORMED; 1723 } 1724 1725 status_t err = 1726 mLastTrack->sampleTable->setSampleSizeParams( 1727 chunk_type, data_offset, chunk_data_size); 1728 1729 *offset += chunk_size; 1730 1731 if (err != OK) { 1732 return err; 1733 } 1734 1735 size_t max_size; 1736 err = mLastTrack->sampleTable->getMaxSampleSize(&max_size); 1737 1738 if (err != OK) { 1739 return err; 1740 } 1741 1742 if (max_size != 0) { 1743 // Assume that a given buffer only contains at most 10 chunks, 1744 // each chunk originally prefixed with a 2 byte length will 1745 // have a 4 byte header (0x00 0x00 0x00 0x01) after conversion, 1746 // and thus will grow by 2 bytes per chunk. 1747 if (max_size > SIZE_MAX - 10 * 2) { 1748 ALOGE("max sample size too big: %zu", max_size); 1749 return ERROR_MALFORMED; 1750 } 1751 mLastTrack->meta->setInt32(kKeyMaxInputSize, max_size + 10 * 2); 1752 } else { 1753 // No size was specified. Pick a conservatively large size. 1754 uint32_t width, height; 1755 if (!mLastTrack->meta->findInt32(kKeyWidth, (int32_t*)&width) || 1756 !mLastTrack->meta->findInt32(kKeyHeight,(int32_t*) &height)) { 1757 ALOGE("No width or height, assuming worst case 1080p"); 1758 width = 1920; 1759 height = 1080; 1760 } else { 1761 // A resolution was specified, check that it's not too big. The values below 1762 // were chosen so that the calculations below don't cause overflows, they're 1763 // not indicating that resolutions up to 32kx32k are actually supported. 1764 if (width > 32768 || height > 32768) { 1765 ALOGE("can't support %u x %u video", width, height); 1766 return ERROR_MALFORMED; 1767 } 1768 } 1769 1770 const char *mime; 1771 CHECK(mLastTrack->meta->findCString(kKeyMIMEType, &mime)); 1772 if (!strcmp(mime, MEDIA_MIMETYPE_VIDEO_AVC) 1773 || !strcmp(mime, MEDIA_MIMETYPE_VIDEO_HEVC)) { 1774 // AVC & HEVC requires compression ratio of at least 2, and uses 1775 // macroblocks 1776 max_size = ((width + 15) / 16) * ((height + 15) / 16) * 192; 1777 } else { 1778 // For all other formats there is no minimum compression 1779 // ratio. Use compression ratio of 1. 1780 max_size = width * height * 3 / 2; 1781 } 1782 // HACK: allow 10% overhead 1783 // TODO: read sample size from traf atom for fragmented MPEG4. 1784 max_size += max_size / 10; 1785 mLastTrack->meta->setInt32(kKeyMaxInputSize, max_size); 1786 } 1787 1788 // NOTE: setting another piece of metadata invalidates any pointers (such as the 1789 // mimetype) previously obtained, so don't cache them. 1790 const char *mime; 1791 CHECK(mLastTrack->meta->findCString(kKeyMIMEType, &mime)); 1792 // Calculate average frame rate. 1793 if (!strncasecmp("video/", mime, 6)) { 1794 size_t nSamples = mLastTrack->sampleTable->countSamples(); 1795 if (nSamples == 0) { 1796 int32_t trackId; 1797 if (mLastTrack->meta->findInt32(kKeyTrackID, &trackId)) { 1798 for (size_t i = 0; i < mTrex.size(); i++) { 1799 Trex *t = &mTrex.editItemAt(i); 1800 if (t->track_ID == (uint32_t) trackId) { 1801 if (t->default_sample_duration > 0) { 1802 int32_t frameRate = 1803 mLastTrack->timescale / t->default_sample_duration; 1804 mLastTrack->meta->setInt32(kKeyFrameRate, frameRate); 1805 } 1806 break; 1807 } 1808 } 1809 } 1810 } else { 1811 int64_t durationUs; 1812 if (mLastTrack->meta->findInt64(kKeyDuration, &durationUs)) { 1813 if (durationUs > 0) { 1814 int32_t frameRate = (nSamples * 1000000LL + 1815 (durationUs >> 1)) / durationUs; 1816 mLastTrack->meta->setInt32(kKeyFrameRate, frameRate); 1817 } 1818 } 1819 ALOGV("setting frame count %zu", nSamples); 1820 mLastTrack->meta->setInt32(kKeyFrameCount, nSamples); 1821 } 1822 } 1823 1824 break; 1825 } 1826 1827 case FOURCC('s', 't', 't', 's'): 1828 { 1829 if ((mLastTrack == NULL) || (mLastTrack->sampleTable == NULL)) 1830 return ERROR_MALFORMED; 1831 1832 *offset += chunk_size; 1833 1834 status_t err = 1835 mLastTrack->sampleTable->setTimeToSampleParams( 1836 data_offset, chunk_data_size); 1837 1838 if (err != OK) { 1839 return err; 1840 } 1841 1842 break; 1843 } 1844 1845 case FOURCC('c', 't', 't', 's'): 1846 { 1847 if ((mLastTrack == NULL) || (mLastTrack->sampleTable == NULL)) 1848 return ERROR_MALFORMED; 1849 1850 *offset += chunk_size; 1851 1852 status_t err = 1853 mLastTrack->sampleTable->setCompositionTimeToSampleParams( 1854 data_offset, chunk_data_size); 1855 1856 if (err != OK) { 1857 return err; 1858 } 1859 1860 break; 1861 } 1862 1863 case FOURCC('s', 't', 's', 's'): 1864 { 1865 if ((mLastTrack == NULL) || (mLastTrack->sampleTable == NULL)) 1866 return ERROR_MALFORMED; 1867 1868 *offset += chunk_size; 1869 1870 status_t err = 1871 mLastTrack->sampleTable->setSyncSampleParams( 1872 data_offset, chunk_data_size); 1873 1874 if (err != OK) { 1875 return err; 1876 } 1877 1878 break; 1879 } 1880 1881 // \xA9xyz 1882 case FOURCC(0xA9, 'x', 'y', 'z'): 1883 { 1884 *offset += chunk_size; 1885 1886 // Best case the total data length inside "\xA9xyz" box would 1887 // be 9, for instance "\xA9xyz" + "\x00\x05\x15\xc7" + "+0+0/", 1888 // where "\x00\x05" is the text string length with value = 5, 1889 // "\0x15\xc7" is the language code = en, and "+0+0/" is a 1890 // location (string) value with longitude = 0 and latitude = 0. 1891 // Since some devices encountered in the wild omit the trailing 1892 // slash, we'll allow that. 1893 if (chunk_data_size < 8) { // 8 instead of 9 to allow for missing / 1894 return ERROR_MALFORMED; 1895 } 1896 1897 uint16_t len; 1898 if (!mDataSource->getUInt16(data_offset, &len)) { 1899 return ERROR_IO; 1900 } 1901 1902 // allow "+0+0" without trailing slash 1903 if (len < 4 || len > chunk_data_size - 4) { 1904 return ERROR_MALFORMED; 1905 } 1906 // The location string following the language code is formatted 1907 // according to ISO 6709:2008 (https://en.wikipedia.org/wiki/ISO_6709). 1908 // Allocate 2 extra bytes, in case we need to add a trailing slash, 1909 // and to add a terminating 0. 1910 std::unique_ptr<char[]> buffer(new (std::nothrow) char[len+2]()); 1911 if (!buffer) { 1912 return NO_MEMORY; 1913 } 1914 1915 if (mDataSource->readAt( 1916 data_offset + 4, &buffer[0], len) < len) { 1917 return ERROR_IO; 1918 } 1919 1920 len = strlen(&buffer[0]); 1921 if (len < 4) { 1922 return ERROR_MALFORMED; 1923 } 1924 // Add a trailing slash if there wasn't one. 1925 if (buffer[len - 1] != '/') { 1926 buffer[len] = '/'; 1927 } 1928 mFileMetaData->setCString(kKeyLocation, &buffer[0]); 1929 break; 1930 } 1931 1932 case FOURCC('e', 's', 'd', 's'): 1933 { 1934 *offset += chunk_size; 1935 1936 if (chunk_data_size < 4) { 1937 return ERROR_MALFORMED; 1938 } 1939 1940 uint8_t buffer[256]; 1941 if (chunk_data_size > (off64_t)sizeof(buffer)) { 1942 return ERROR_BUFFER_TOO_SMALL; 1943 } 1944 1945 if (mDataSource->readAt( 1946 data_offset, buffer, chunk_data_size) < chunk_data_size) { 1947 return ERROR_IO; 1948 } 1949 1950 if (U32_AT(buffer) != 0) { 1951 // Should be version 0, flags 0. 1952 return ERROR_MALFORMED; 1953 } 1954 1955 if (mLastTrack == NULL) 1956 return ERROR_MALFORMED; 1957 1958 mLastTrack->meta->setData( 1959 kKeyESDS, kTypeESDS, &buffer[4], chunk_data_size - 4); 1960 1961 if (mPath.size() >= 2 1962 && mPath[mPath.size() - 2] == FOURCC('m', 'p', '4', 'a')) { 1963 // Information from the ESDS must be relied on for proper 1964 // setup of sample rate and channel count for MPEG4 Audio. 1965 // The generic header appears to only contain generic 1966 // information... 1967 1968 status_t err = updateAudioTrackInfoFromESDS_MPEG4Audio( 1969 &buffer[4], chunk_data_size - 4); 1970 1971 if (err != OK) { 1972 return err; 1973 } 1974 } 1975 if (mPath.size() >= 2 1976 && mPath[mPath.size() - 2] == FOURCC('m', 'p', '4', 'v')) { 1977 // Check if the video is MPEG2 1978 ESDS esds(&buffer[4], chunk_data_size - 4); 1979 1980 uint8_t objectTypeIndication; 1981 if (esds.getObjectTypeIndication(&objectTypeIndication) == OK) { 1982 if (objectTypeIndication >= 0x60 && objectTypeIndication <= 0x65) { 1983 mLastTrack->meta->setCString(kKeyMIMEType, MEDIA_MIMETYPE_VIDEO_MPEG2); 1984 } 1985 } 1986 } 1987 break; 1988 } 1989 1990 case FOURCC('b', 't', 'r', 't'): 1991 { 1992 *offset += chunk_size; 1993 if (mLastTrack == NULL) { 1994 return ERROR_MALFORMED; 1995 } 1996 1997 uint8_t buffer[12]; 1998 if (chunk_data_size != sizeof(buffer)) { 1999 return ERROR_MALFORMED; 2000 } 2001 2002 if (mDataSource->readAt( 2003 data_offset, buffer, chunk_data_size) < chunk_data_size) { 2004 return ERROR_IO; 2005 } 2006 2007 uint32_t maxBitrate = U32_AT(&buffer[4]); 2008 uint32_t avgBitrate = U32_AT(&buffer[8]); 2009 if (maxBitrate > 0 && maxBitrate < INT32_MAX) { 2010 mLastTrack->meta->setInt32(kKeyMaxBitRate, (int32_t)maxBitrate); 2011 } 2012 if (avgBitrate > 0 && avgBitrate < INT32_MAX) { 2013 mLastTrack->meta->setInt32(kKeyBitRate, (int32_t)avgBitrate); 2014 } 2015 break; 2016 } 2017 2018 case FOURCC('a', 'v', 'c', 'C'): 2019 { 2020 *offset += chunk_size; 2021 2022 sp<ABuffer> buffer = new ABuffer(chunk_data_size); 2023 2024 if (buffer->data() == NULL) { 2025 ALOGE("b/28471206"); 2026 return NO_MEMORY; 2027 } 2028 2029 if (mDataSource->readAt( 2030 data_offset, buffer->data(), chunk_data_size) < chunk_data_size) { 2031 return ERROR_IO; 2032 } 2033 2034 if (mLastTrack == NULL) 2035 return ERROR_MALFORMED; 2036 2037 mLastTrack->meta->setData( 2038 kKeyAVCC, kTypeAVCC, buffer->data(), chunk_data_size); 2039 2040 break; 2041 } 2042 case FOURCC('h', 'v', 'c', 'C'): 2043 { 2044 sp<ABuffer> buffer = new ABuffer(chunk_data_size); 2045 2046 if (buffer->data() == NULL) { 2047 ALOGE("b/28471206"); 2048 return NO_MEMORY; 2049 } 2050 2051 if (mDataSource->readAt( 2052 data_offset, buffer->data(), chunk_data_size) < chunk_data_size) { 2053 return ERROR_IO; 2054 } 2055 2056 if (mLastTrack == NULL) 2057 return ERROR_MALFORMED; 2058 2059 mLastTrack->meta->setData( 2060 kKeyHVCC, kTypeHVCC, buffer->data(), chunk_data_size); 2061 2062 *offset += chunk_size; 2063 break; 2064 } 2065 2066 case FOURCC('d', '2', '6', '3'): 2067 { 2068 *offset += chunk_size; 2069 /* 2070 * d263 contains a fixed 7 bytes part: 2071 * vendor - 4 bytes 2072 * version - 1 byte 2073 * level - 1 byte 2074 * profile - 1 byte 2075 * optionally, "d263" box itself may contain a 16-byte 2076 * bit rate box (bitr) 2077 * average bit rate - 4 bytes 2078 * max bit rate - 4 bytes 2079 */ 2080 char buffer[23]; 2081 if (chunk_data_size != 7 && 2082 chunk_data_size != 23) { 2083 ALOGE("Incorrect D263 box size %lld", (long long)chunk_data_size); 2084 return ERROR_MALFORMED; 2085 } 2086 2087 if (mDataSource->readAt( 2088 data_offset, buffer, chunk_data_size) < chunk_data_size) { 2089 return ERROR_IO; 2090 } 2091 2092 if (mLastTrack == NULL) 2093 return ERROR_MALFORMED; 2094 2095 mLastTrack->meta->setData(kKeyD263, kTypeD263, buffer, chunk_data_size); 2096 2097 break; 2098 } 2099 2100 case FOURCC('m', 'e', 't', 'a'): 2101 { 2102 off64_t stop_offset = *offset + chunk_size; 2103 *offset = data_offset; 2104 bool isParsingMetaKeys = underQTMetaPath(mPath, 2); 2105 if (!isParsingMetaKeys) { 2106 uint8_t buffer[4]; 2107 if (chunk_data_size < (off64_t)sizeof(buffer)) { 2108 *offset = stop_offset; 2109 return ERROR_MALFORMED; 2110 } 2111 2112 if (mDataSource->readAt( 2113 data_offset, buffer, 4) < 4) { 2114 *offset = stop_offset; 2115 return ERROR_IO; 2116 } 2117 2118 if (U32_AT(buffer) != 0) { 2119 // Should be version 0, flags 0. 2120 2121 // If it's not, let's assume this is one of those 2122 // apparently malformed chunks that don't have flags 2123 // and completely different semantics than what's 2124 // in the MPEG4 specs and skip it. 2125 *offset = stop_offset; 2126 return OK; 2127 } 2128 *offset += sizeof(buffer); 2129 } 2130 2131 while (*offset < stop_offset) { 2132 status_t err = parseChunk(offset, depth + 1); 2133 if (err != OK) { 2134 return err; 2135 } 2136 } 2137 2138 if (*offset != stop_offset) { 2139 return ERROR_MALFORMED; 2140 } 2141 break; 2142 } 2143 2144 case FOURCC('i', 'l', 'o', 'c'): 2145 case FOURCC('i', 'i', 'n', 'f'): 2146 case FOURCC('i', 'p', 'r', 'p'): 2147 case FOURCC('p', 'i', 't', 'm'): 2148 case FOURCC('i', 'd', 'a', 't'): 2149 case FOURCC('i', 'r', 'e', 'f'): 2150 case FOURCC('i', 'p', 'r', 'o'): 2151 { 2152 if (mIsHeif) { 2153 if (mItemTable == NULL) { 2154 mItemTable = new ItemTable(mDataSource); 2155 } 2156 status_t err = mItemTable->parse( 2157 chunk_type, data_offset, chunk_data_size); 2158 if (err != OK) { 2159 return err; 2160 } 2161 } 2162 *offset += chunk_size; 2163 break; 2164 } 2165 2166 case FOURCC('m', 'e', 'a', 'n'): 2167 case FOURCC('n', 'a', 'm', 'e'): 2168 case FOURCC('d', 'a', 't', 'a'): 2169 { 2170 *offset += chunk_size; 2171 2172 if (mPath.size() == 6 && underMetaDataPath(mPath)) { 2173 status_t err = parseITunesMetaData(data_offset, chunk_data_size); 2174 2175 if (err != OK) { 2176 return err; 2177 } 2178 } 2179 2180 break; 2181 } 2182 2183 case FOURCC('m', 'v', 'h', 'd'): 2184 { 2185 *offset += chunk_size; 2186 2187 if (depth != 1) { 2188 ALOGE("mvhd: depth %d", depth); 2189 return ERROR_MALFORMED; 2190 } 2191 if (chunk_data_size < 32) { 2192 return ERROR_MALFORMED; 2193 } 2194 2195 uint8_t header[32]; 2196 if (mDataSource->readAt( 2197 data_offset, header, sizeof(header)) 2198 < (ssize_t)sizeof(header)) { 2199 return ERROR_IO; 2200 } 2201 2202 uint64_t creationTime; 2203 uint64_t duration = 0; 2204 if (header[0] == 1) { 2205 creationTime = U64_AT(&header[4]); 2206 mHeaderTimescale = U32_AT(&header[20]); 2207 duration = U64_AT(&header[24]); 2208 if (duration == 0xffffffffffffffff) { 2209 duration = 0; 2210 } 2211 } else if (header[0] != 0) { 2212 return ERROR_MALFORMED; 2213 } else { 2214 creationTime = U32_AT(&header[4]); 2215 mHeaderTimescale = U32_AT(&header[12]); 2216 uint32_t d32 = U32_AT(&header[16]); 2217 if (d32 == 0xffffffff) { 2218 d32 = 0; 2219 } 2220 duration = d32; 2221 } 2222 if (duration != 0 && mHeaderTimescale != 0 && duration < UINT64_MAX / 1000000) { 2223 mFileMetaData->setInt64(kKeyDuration, duration * 1000000 / mHeaderTimescale); 2224 } 2225 2226 String8 s; 2227 if (convertTimeToDate(creationTime, &s)) { 2228 mFileMetaData->setCString(kKeyDate, s.string()); 2229 } 2230 2231 2232 break; 2233 } 2234 2235 case FOURCC('m', 'e', 'h', 'd'): 2236 { 2237 *offset += chunk_size; 2238 2239 if (chunk_data_size < 8) { 2240 return ERROR_MALFORMED; 2241 } 2242 2243 uint8_t flags[4]; 2244 if (mDataSource->readAt( 2245 data_offset, flags, sizeof(flags)) 2246 < (ssize_t)sizeof(flags)) { 2247 return ERROR_IO; 2248 } 2249 2250 uint64_t duration = 0; 2251 if (flags[0] == 1) { 2252 // 64 bit 2253 if (chunk_data_size < 12) { 2254 return ERROR_MALFORMED; 2255 } 2256 mDataSource->getUInt64(data_offset + 4, &duration); 2257 if (duration == 0xffffffffffffffff) { 2258 duration = 0; 2259 } 2260 } else if (flags[0] == 0) { 2261 // 32 bit 2262 uint32_t d32; 2263 mDataSource->getUInt32(data_offset + 4, &d32); 2264 if (d32 == 0xffffffff) { 2265 d32 = 0; 2266 } 2267 duration = d32; 2268 } else { 2269 return ERROR_MALFORMED; 2270 } 2271 2272 if (duration != 0 && mHeaderTimescale != 0) { 2273 mFileMetaData->setInt64(kKeyDuration, duration * 1000000 / mHeaderTimescale); 2274 } 2275 2276 break; 2277 } 2278 2279 case FOURCC('m', 'd', 'a', 't'): 2280 { 2281 ALOGV("mdat chunk, drm: %d", mIsDrm); 2282 2283 mMdatFound = true; 2284 2285 if (!mIsDrm) { 2286 *offset += chunk_size; 2287 break; 2288 } 2289 2290 if (chunk_size < 8) { 2291 return ERROR_MALFORMED; 2292 } 2293 2294 return parseDrmSINF(offset, data_offset); 2295 } 2296 2297 case FOURCC('h', 'd', 'l', 'r'): 2298 { 2299 *offset += chunk_size; 2300 2301 if (underQTMetaPath(mPath, 3)) { 2302 break; 2303 } 2304 2305 uint32_t buffer; 2306 if (mDataSource->readAt( 2307 data_offset + 8, &buffer, 4) < 4) { 2308 return ERROR_IO; 2309 } 2310 2311 uint32_t type = ntohl(buffer); 2312 // For the 3GPP file format, the handler-type within the 'hdlr' box 2313 // shall be 'text'. We also want to support 'sbtl' handler type 2314 // for a practical reason as various MPEG4 containers use it. 2315 if (type == FOURCC('t', 'e', 'x', 't') || type == FOURCC('s', 'b', 't', 'l')) { 2316 if (mLastTrack != NULL) { 2317 mLastTrack->meta->setCString(kKeyMIMEType, MEDIA_MIMETYPE_TEXT_3GPP); 2318 } 2319 } 2320 2321 break; 2322 } 2323 2324 case FOURCC('k', 'e', 'y', 's'): 2325 { 2326 *offset += chunk_size; 2327 2328 if (underQTMetaPath(mPath, 3)) { 2329 status_t err = parseQTMetaKey(data_offset, chunk_data_size); 2330 if (err != OK) { 2331 return err; 2332 } 2333 } 2334 break; 2335 } 2336 2337 case FOURCC('t', 'r', 'e', 'x'): 2338 { 2339 *offset += chunk_size; 2340 2341 if (chunk_data_size < 24) { 2342 return ERROR_IO; 2343 } 2344 Trex trex; 2345 if (!mDataSource->getUInt32(data_offset + 4, &trex.track_ID) || 2346 !mDataSource->getUInt32(data_offset + 8, &trex.default_sample_description_index) || 2347 !mDataSource->getUInt32(data_offset + 12, &trex.default_sample_duration) || 2348 !mDataSource->getUInt32(data_offset + 16, &trex.default_sample_size) || 2349 !mDataSource->getUInt32(data_offset + 20, &trex.default_sample_flags)) { 2350 return ERROR_IO; 2351 } 2352 mTrex.add(trex); 2353 break; 2354 } 2355 2356 case FOURCC('t', 'x', '3', 'g'): 2357 { 2358 if (mLastTrack == NULL) 2359 return ERROR_MALFORMED; 2360 2361 uint32_t type; 2362 const void *data; 2363 size_t size = 0; 2364 if (!mLastTrack->meta->findData( 2365 kKeyTextFormatData, &type, &data, &size)) { 2366 size = 0; 2367 } 2368 2369 if ((chunk_size > SIZE_MAX) || (SIZE_MAX - chunk_size <= size)) { 2370 return ERROR_MALFORMED; 2371 } 2372 2373 uint8_t *buffer = new (std::nothrow) uint8_t[size + chunk_size]; 2374 if (buffer == NULL) { 2375 return ERROR_MALFORMED; 2376 } 2377 2378 if (size > 0) { 2379 memcpy(buffer, data, size); 2380 } 2381 2382 if ((size_t)(mDataSource->readAt(*offset, buffer + size, chunk_size)) 2383 < chunk_size) { 2384 delete[] buffer; 2385 buffer = NULL; 2386 2387 // advance read pointer so we don't end up reading this again 2388 *offset += chunk_size; 2389 return ERROR_IO; 2390 } 2391 2392 mLastTrack->meta->setData( 2393 kKeyTextFormatData, 0, buffer, size + chunk_size); 2394 2395 delete[] buffer; 2396 2397 *offset += chunk_size; 2398 break; 2399 } 2400 2401 case FOURCC('c', 'o', 'v', 'r'): 2402 { 2403 *offset += chunk_size; 2404 2405 if (mFileMetaData != NULL) { 2406 ALOGV("chunk_data_size = %" PRId64 " and data_offset = %" PRId64, 2407 chunk_data_size, data_offset); 2408 2409 if (chunk_data_size < 0 || static_cast<uint64_t>(chunk_data_size) >= SIZE_MAX - 1) { 2410 return ERROR_MALFORMED; 2411 } 2412 sp<ABuffer> buffer = new ABuffer(chunk_data_size + 1); 2413 if (buffer->data() == NULL) { 2414 ALOGE("b/28471206"); 2415 return NO_MEMORY; 2416 } 2417 if (mDataSource->readAt( 2418 data_offset, buffer->data(), chunk_data_size) != (ssize_t)chunk_data_size) { 2419 return ERROR_IO; 2420 } 2421 const int kSkipBytesOfDataBox = 16; 2422 if (chunk_data_size <= kSkipBytesOfDataBox) { 2423 return ERROR_MALFORMED; 2424 } 2425 2426 mFileMetaData->setData( 2427 kKeyAlbumArt, MetaData::TYPE_NONE, 2428 buffer->data() + kSkipBytesOfDataBox, chunk_data_size - kSkipBytesOfDataBox); 2429 } 2430 2431 break; 2432 } 2433 2434 case FOURCC('c', 'o', 'l', 'r'): 2435 { 2436 *offset += chunk_size; 2437 // this must be in a VisualSampleEntry box under the Sample Description Box ('stsd') 2438 // ignore otherwise 2439 if (depth >= 2 && mPath[depth - 2] == FOURCC('s', 't', 's', 'd')) { 2440 status_t err = parseColorInfo(data_offset, chunk_data_size); 2441 if (err != OK) { 2442 return err; 2443 } 2444 } 2445 2446 break; 2447 } 2448 2449 case FOURCC('t', 'i', 't', 'l'): 2450 case FOURCC('p', 'e', 'r', 'f'): 2451 case FOURCC('a', 'u', 't', 'h'): 2452 case FOURCC('g', 'n', 'r', 'e'): 2453 case FOURCC('a', 'l', 'b', 'm'): 2454 case FOURCC('y', 'r', 'r', 'c'): 2455 { 2456 *offset += chunk_size; 2457 2458 status_t err = parse3GPPMetaData(data_offset, chunk_data_size, depth); 2459 2460 if (err != OK) { 2461 return err; 2462 } 2463 2464 break; 2465 } 2466 2467 case FOURCC('I', 'D', '3', '2'): 2468 { 2469 *offset += chunk_size; 2470 2471 if (chunk_data_size < 6) { 2472 return ERROR_MALFORMED; 2473 } 2474 2475 parseID3v2MetaData(data_offset + 6); 2476 2477 break; 2478 } 2479 2480 case FOURCC('-', '-', '-', '-'): 2481 { 2482 mLastCommentMean.clear(); 2483 mLastCommentName.clear(); 2484 mLastCommentData.clear(); 2485 *offset += chunk_size; 2486 break; 2487 } 2488 2489 case FOURCC('s', 'i', 'd', 'x'): 2490 { 2491 status_t err = parseSegmentIndex(data_offset, chunk_data_size); 2492 if (err != OK) { 2493 return err; 2494 } 2495 *offset += chunk_size; 2496 return UNKNOWN_ERROR; // stop parsing after sidx 2497 } 2498 2499 case FOURCC('a', 'c', '-', '3'): 2500 { 2501 *offset += chunk_size; 2502 return parseAC3SampleEntry(data_offset); 2503 } 2504 2505 case FOURCC('f', 't', 'y', 'p'): 2506 { 2507 if (chunk_data_size < 8 || depth != 0) { 2508 return ERROR_MALFORMED; 2509 } 2510 2511 off64_t stop_offset = *offset + chunk_size; 2512 uint32_t numCompatibleBrands = (chunk_data_size - 8) / 4; 2513 std::set<uint32_t> brandSet; 2514 for (size_t i = 0; i < numCompatibleBrands + 2; ++i) { 2515 if (i == 1) { 2516 // Skip this index, it refers to the minorVersion, 2517 // not a brand. 2518 continue; 2519 } 2520 2521 uint32_t brand; 2522 if (mDataSource->readAt(data_offset + 4 * i, &brand, 4) < 4) { 2523 return ERROR_MALFORMED; 2524 } 2525 2526 brand = ntohl(brand); 2527 brandSet.insert(brand); 2528 } 2529 2530 if (brandSet.count(FOURCC('q', 't', ' ', ' ')) > 0) { 2531 mIsQT = true; 2532 } else { 2533 if (brandSet.count(FOURCC('m', 'i', 'f', '1')) > 0 2534 && brandSet.count(FOURCC('h', 'e', 'i', 'c')) > 0) { 2535 ALOGV("identified HEIF image"); 2536 2537 mIsHeif = true; 2538 brandSet.erase(FOURCC('m', 'i', 'f', '1')); 2539 brandSet.erase(FOURCC('h', 'e', 'i', 'c')); 2540 } 2541 2542 if (!brandSet.empty()) { 2543 // This means that the file should have moov box. 2544 // It could be any iso files (mp4, heifs, etc.) 2545 mHasMoovBox = true; 2546 ALOGV("identified HEIF image with other tracks"); 2547 } 2548 } 2549 2550 *offset = stop_offset; 2551 2552 break; 2553 } 2554 2555 default: 2556 { 2557 // check if we're parsing 'ilst' for meta keys 2558 // if so, treat type as a number (key-id). 2559 if (underQTMetaPath(mPath, 3)) { 2560 status_t err = parseQTMetaVal(chunk_type, data_offset, chunk_data_size); 2561 if (err != OK) { 2562 return err; 2563 } 2564 } 2565 2566 *offset += chunk_size; 2567 break; 2568 } 2569 } 2570 2571 return OK; 2572} 2573 2574status_t MPEG4Extractor::parseAC3SampleEntry(off64_t offset) { 2575 // skip 16 bytes: 2576 // + 6-byte reserved, 2577 // + 2-byte data reference index, 2578 // + 8-byte reserved 2579 offset += 16; 2580 uint16_t channelCount; 2581 if (!mDataSource->getUInt16(offset, &channelCount)) { 2582 return ERROR_MALFORMED; 2583 } 2584 // skip 8 bytes: 2585 // + 2-byte channelCount, 2586 // + 2-byte sample size, 2587 // + 4-byte reserved 2588 offset += 8; 2589 uint16_t sampleRate; 2590 if (!mDataSource->getUInt16(offset, &sampleRate)) { 2591 ALOGE("MPEG4Extractor: error while reading ac-3 block: cannot read sample rate"); 2592 return ERROR_MALFORMED; 2593 } 2594 2595 // skip 4 bytes: 2596 // + 2-byte sampleRate, 2597 // + 2-byte reserved 2598 offset += 4; 2599 return parseAC3SpecificBox(offset, sampleRate); 2600} 2601 2602status_t MPEG4Extractor::parseAC3SpecificBox( 2603 off64_t offset, uint16_t sampleRate) { 2604 uint32_t size; 2605 // + 4-byte size 2606 // + 4-byte type 2607 // + 3-byte payload 2608 const uint32_t kAC3SpecificBoxSize = 11; 2609 if (!mDataSource->getUInt32(offset, &size) || size < kAC3SpecificBoxSize) { 2610 ALOGE("MPEG4Extractor: error while reading ac-3 block: cannot read specific box size"); 2611 return ERROR_MALFORMED; 2612 } 2613 2614 offset += 4; 2615 uint32_t type; 2616 if (!mDataSource->getUInt32(offset, &type) || type != FOURCC('d', 'a', 'c', '3')) { 2617 ALOGE("MPEG4Extractor: error while reading ac-3 specific block: header not dac3"); 2618 return ERROR_MALFORMED; 2619 } 2620 2621 offset += 4; 2622 const uint32_t kAC3SpecificBoxPayloadSize = 3; 2623 uint8_t chunk[kAC3SpecificBoxPayloadSize]; 2624 if (mDataSource->readAt(offset, chunk, sizeof(chunk)) != sizeof(chunk)) { 2625 ALOGE("MPEG4Extractor: error while reading ac-3 specific block: bitstream fields"); 2626 return ERROR_MALFORMED; 2627 } 2628 2629 ABitReader br(chunk, sizeof(chunk)); 2630 static const unsigned channelCountTable[] = {2, 1, 2, 3, 3, 4, 4, 5}; 2631 static const unsigned sampleRateTable[] = {48000, 44100, 32000}; 2632 2633 unsigned fscod = br.getBits(2); 2634 if (fscod == 3) { 2635 ALOGE("Incorrect fscod (3) in AC3 header"); 2636 return ERROR_MALFORMED; 2637 } 2638 unsigned boxSampleRate = sampleRateTable[fscod]; 2639 if (boxSampleRate != sampleRate) { 2640 ALOGE("sample rate mismatch: boxSampleRate = %d, sampleRate = %d", 2641 boxSampleRate, sampleRate); 2642 return ERROR_MALFORMED; 2643 } 2644 2645 unsigned bsid = br.getBits(5); 2646 if (bsid > 8) { 2647 ALOGW("Incorrect bsid in AC3 header. Possibly E-AC-3?"); 2648 return ERROR_MALFORMED; 2649 } 2650 2651 // skip 2652 unsigned bsmod __unused = br.getBits(3); 2653 2654 unsigned acmod = br.getBits(3); 2655 unsigned lfeon = br.getBits(1); 2656 unsigned channelCount = channelCountTable[acmod] + lfeon; 2657 2658 if (mLastTrack == NULL) { 2659 return ERROR_MALFORMED; 2660 } 2661 mLastTrack->meta->setCString(kKeyMIMEType, MEDIA_MIMETYPE_AUDIO_AC3); 2662 mLastTrack->meta->setInt32(kKeyChannelCount, channelCount); 2663 mLastTrack->meta->setInt32(kKeySampleRate, sampleRate); 2664 return OK; 2665} 2666 2667status_t MPEG4Extractor::parseSegmentIndex(off64_t offset, size_t size) { 2668 ALOGV("MPEG4Extractor::parseSegmentIndex"); 2669 2670 if (size < 12) { 2671 return -EINVAL; 2672 } 2673 2674 uint32_t flags; 2675 if (!mDataSource->getUInt32(offset, &flags)) { 2676 return ERROR_MALFORMED; 2677 } 2678 2679 uint32_t version = flags >> 24; 2680 flags &= 0xffffff; 2681 2682 ALOGV("sidx version %d", version); 2683 2684 uint32_t referenceId; 2685 if (!mDataSource->getUInt32(offset + 4, &referenceId)) { 2686 return ERROR_MALFORMED; 2687 } 2688 2689 uint32_t timeScale; 2690 if (!mDataSource->getUInt32(offset + 8, &timeScale)) { 2691 return ERROR_MALFORMED; 2692 } 2693 ALOGV("sidx refid/timescale: %d/%d", referenceId, timeScale); 2694 if (timeScale == 0) 2695 return ERROR_MALFORMED; 2696 2697 uint64_t earliestPresentationTime; 2698 uint64_t firstOffset; 2699 2700 offset += 12; 2701 size -= 12; 2702 2703 if (version == 0) { 2704 if (size < 8) { 2705 return -EINVAL; 2706 } 2707 uint32_t tmp; 2708 if (!mDataSource->getUInt32(offset, &tmp)) { 2709 return ERROR_MALFORMED; 2710 } 2711 earliestPresentationTime = tmp; 2712 if (!mDataSource->getUInt32(offset + 4, &tmp)) { 2713 return ERROR_MALFORMED; 2714 } 2715 firstOffset = tmp; 2716 offset += 8; 2717 size -= 8; 2718 } else { 2719 if (size < 16) { 2720 return -EINVAL; 2721 } 2722 if (!mDataSource->getUInt64(offset, &earliestPresentationTime)) { 2723 return ERROR_MALFORMED; 2724 } 2725 if (!mDataSource->getUInt64(offset + 8, &firstOffset)) { 2726 return ERROR_MALFORMED; 2727 } 2728 offset += 16; 2729 size -= 16; 2730 } 2731 ALOGV("sidx pres/off: %" PRIu64 "/%" PRIu64, earliestPresentationTime, firstOffset); 2732 2733 if (size < 4) { 2734 return -EINVAL; 2735 } 2736 2737 uint16_t referenceCount; 2738 if (!mDataSource->getUInt16(offset + 2, &referenceCount)) { 2739 return ERROR_MALFORMED; 2740 } 2741 offset += 4; 2742 size -= 4; 2743 ALOGV("refcount: %d", referenceCount); 2744 2745 if (size < referenceCount * 12) { 2746 return -EINVAL; 2747 } 2748 2749 uint64_t total_duration = 0; 2750 for (unsigned int i = 0; i < referenceCount; i++) { 2751 uint32_t d1, d2, d3; 2752 2753 if (!mDataSource->getUInt32(offset, &d1) || // size 2754 !mDataSource->getUInt32(offset + 4, &d2) || // duration 2755 !mDataSource->getUInt32(offset + 8, &d3)) { // flags 2756 return ERROR_MALFORMED; 2757 } 2758 2759 if (d1 & 0x80000000) { 2760 ALOGW("sub-sidx boxes not supported yet"); 2761 } 2762 bool sap = d3 & 0x80000000; 2763 uint32_t saptype = (d3 >> 28) & 7; 2764 if (!sap || (saptype != 1 && saptype != 2)) { 2765 // type 1 and 2 are sync samples 2766 ALOGW("not a stream access point, or unsupported type: %08x", d3); 2767 } 2768 total_duration += d2; 2769 offset += 12; 2770 ALOGV(" item %d, %08x %08x %08x", i, d1, d2, d3); 2771 SidxEntry se; 2772 se.mSize = d1 & 0x7fffffff; 2773 se.mDurationUs = 1000000LL * d2 / timeScale; 2774 mSidxEntries.add(se); 2775 } 2776 2777 uint64_t sidxDuration = total_duration * 1000000 / timeScale; 2778 2779 if (mLastTrack == NULL) 2780 return ERROR_MALFORMED; 2781 2782 int64_t metaDuration; 2783 if (!mLastTrack->meta->findInt64(kKeyDuration, &metaDuration) || metaDuration == 0) { 2784 mLastTrack->meta->setInt64(kKeyDuration, sidxDuration); 2785 } 2786 return OK; 2787} 2788 2789status_t MPEG4Extractor::parseQTMetaKey(off64_t offset, size_t size) { 2790 if (size < 8) { 2791 return ERROR_MALFORMED; 2792 } 2793 2794 uint32_t count; 2795 if (!mDataSource->getUInt32(offset + 4, &count)) { 2796 return ERROR_MALFORMED; 2797 } 2798 2799 if (mMetaKeyMap.size() > 0) { 2800 ALOGW("'keys' atom seen again, discarding existing entries"); 2801 mMetaKeyMap.clear(); 2802 } 2803 2804 off64_t keyOffset = offset + 8; 2805 off64_t stopOffset = offset + size; 2806 for (size_t i = 1; i <= count; i++) { 2807 if (keyOffset + 8 > stopOffset) { 2808 return ERROR_MALFORMED; 2809 } 2810 2811 uint32_t keySize; 2812 if (!mDataSource->getUInt32(keyOffset, &keySize) 2813 || keySize < 8 2814 || keyOffset + keySize > stopOffset) { 2815 return ERROR_MALFORMED; 2816 } 2817 2818 uint32_t type; 2819 if (!mDataSource->getUInt32(keyOffset + 4, &type) 2820 || type != FOURCC('m', 'd', 't', 'a')) { 2821 return ERROR_MALFORMED; 2822 } 2823 2824 keySize -= 8; 2825 keyOffset += 8; 2826 2827 sp<ABuffer> keyData = new ABuffer(keySize); 2828 if (keyData->data() == NULL) { 2829 return ERROR_MALFORMED; 2830 } 2831 if (mDataSource->readAt( 2832 keyOffset, keyData->data(), keySize) < (ssize_t) keySize) { 2833 return ERROR_MALFORMED; 2834 } 2835 2836 AString key((const char *)keyData->data(), keySize); 2837 mMetaKeyMap.add(i, key); 2838 2839 keyOffset += keySize; 2840 } 2841 return OK; 2842} 2843 2844status_t MPEG4Extractor::parseQTMetaVal( 2845 int32_t keyId, off64_t offset, size_t size) { 2846 ssize_t index = mMetaKeyMap.indexOfKey(keyId); 2847 if (index < 0) { 2848 // corresponding key is not present, ignore 2849 return ERROR_MALFORMED; 2850 } 2851 2852 if (size <= 16) { 2853 return ERROR_MALFORMED; 2854 } 2855 uint32_t dataSize; 2856 if (!mDataSource->getUInt32(offset, &dataSize) 2857 || dataSize > size || dataSize <= 16) { 2858 return ERROR_MALFORMED; 2859 } 2860 uint32_t atomFourCC; 2861 if (!mDataSource->getUInt32(offset + 4, &atomFourCC) 2862 || atomFourCC != FOURCC('d', 'a', 't', 'a')) { 2863 return ERROR_MALFORMED; 2864 } 2865 uint32_t dataType; 2866 if (!mDataSource->getUInt32(offset + 8, &dataType) 2867 || ((dataType & 0xff000000) != 0)) { 2868 // not well-known type 2869 return ERROR_MALFORMED; 2870 } 2871 2872 dataSize -= 16; 2873 offset += 16; 2874 2875 if (dataType == 23 && dataSize >= 4) { 2876 // BE Float32 2877 uint32_t val; 2878 if (!mDataSource->getUInt32(offset, &val)) { 2879 return ERROR_MALFORMED; 2880 } 2881 if (!strcasecmp(mMetaKeyMap[index].c_str(), "com.android.capture.fps")) { 2882 mFileMetaData->setFloat(kKeyCaptureFramerate, *(float *)&val); 2883 } 2884 } else if (dataType == 67 && dataSize >= 4) { 2885 // BE signed int32 2886 uint32_t val; 2887 if (!mDataSource->getUInt32(offset, &val)) { 2888 return ERROR_MALFORMED; 2889 } 2890 if (!strcasecmp(mMetaKeyMap[index].c_str(), "com.android.video.temporal_layers_count")) { 2891 mFileMetaData->setInt32(kKeyTemporalLayerCount, val); 2892 } 2893 } else { 2894 // add more keys if needed 2895 ALOGV("ignoring key: type %d, size %d", dataType, dataSize); 2896 } 2897 2898 return OK; 2899} 2900 2901status_t MPEG4Extractor::parseTrackHeader( 2902 off64_t data_offset, off64_t data_size) { 2903 if (data_size < 4) { 2904 return ERROR_MALFORMED; 2905 } 2906 2907 uint8_t version; 2908 if (mDataSource->readAt(data_offset, &version, 1) < 1) { 2909 return ERROR_IO; 2910 } 2911 2912 size_t dynSize = (version == 1) ? 36 : 24; 2913 2914 uint8_t buffer[36 + 60]; 2915 2916 if (data_size != (off64_t)dynSize + 60) { 2917 return ERROR_MALFORMED; 2918 } 2919 2920 if (mDataSource->readAt( 2921 data_offset, buffer, data_size) < (ssize_t)data_size) { 2922 return ERROR_IO; 2923 } 2924 2925 uint64_t ctime __unused, mtime __unused, duration __unused; 2926 int32_t id; 2927 2928 if (version == 1) { 2929 ctime = U64_AT(&buffer[4]); 2930 mtime = U64_AT(&buffer[12]); 2931 id = U32_AT(&buffer[20]); 2932 duration = U64_AT(&buffer[28]); 2933 } else if (version == 0) { 2934 ctime = U32_AT(&buffer[4]); 2935 mtime = U32_AT(&buffer[8]); 2936 id = U32_AT(&buffer[12]); 2937 duration = U32_AT(&buffer[20]); 2938 } else { 2939 return ERROR_UNSUPPORTED; 2940 } 2941 2942 if (mLastTrack == NULL) 2943 return ERROR_MALFORMED; 2944 2945 mLastTrack->meta->setInt32(kKeyTrackID, id); 2946 2947 size_t matrixOffset = dynSize + 16; 2948 int32_t a00 = U32_AT(&buffer[matrixOffset]); 2949 int32_t a01 = U32_AT(&buffer[matrixOffset + 4]); 2950 int32_t a10 = U32_AT(&buffer[matrixOffset + 12]); 2951 int32_t a11 = U32_AT(&buffer[matrixOffset + 16]); 2952 2953#if 0 2954 int32_t dx = U32_AT(&buffer[matrixOffset + 8]); 2955 int32_t dy = U32_AT(&buffer[matrixOffset + 20]); 2956 2957 ALOGI("x' = %.2f * x + %.2f * y + %.2f", 2958 a00 / 65536.0f, a01 / 65536.0f, dx / 65536.0f); 2959 ALOGI("y' = %.2f * x + %.2f * y + %.2f", 2960 a10 / 65536.0f, a11 / 65536.0f, dy / 65536.0f); 2961#endif 2962 2963 uint32_t rotationDegrees; 2964 2965 static const int32_t kFixedOne = 0x10000; 2966 if (a00 == kFixedOne && a01 == 0 && a10 == 0 && a11 == kFixedOne) { 2967 // Identity, no rotation 2968 rotationDegrees = 0; 2969 } else if (a00 == 0 && a01 == kFixedOne && a10 == -kFixedOne && a11 == 0) { 2970 rotationDegrees = 90; 2971 } else if (a00 == 0 && a01 == -kFixedOne && a10 == kFixedOne && a11 == 0) { 2972 rotationDegrees = 270; 2973 } else if (a00 == -kFixedOne && a01 == 0 && a10 == 0 && a11 == -kFixedOne) { 2974 rotationDegrees = 180; 2975 } else { 2976 ALOGW("We only support 0,90,180,270 degree rotation matrices"); 2977 rotationDegrees = 0; 2978 } 2979 2980 if (rotationDegrees != 0) { 2981 mLastTrack->meta->setInt32(kKeyRotation, rotationDegrees); 2982 } 2983 2984 // Handle presentation display size, which could be different 2985 // from the image size indicated by kKeyWidth and kKeyHeight. 2986 uint32_t width = U32_AT(&buffer[dynSize + 52]); 2987 uint32_t height = U32_AT(&buffer[dynSize + 56]); 2988 mLastTrack->meta->setInt32(kKeyDisplayWidth, width >> 16); 2989 mLastTrack->meta->setInt32(kKeyDisplayHeight, height >> 16); 2990 2991 return OK; 2992} 2993 2994status_t MPEG4Extractor::parseITunesMetaData(off64_t offset, size_t size) { 2995 if (size == 0) { 2996 return OK; 2997 } 2998 2999 if (size < 4 || size == SIZE_MAX) { 3000 return ERROR_MALFORMED; 3001 } 3002 3003 uint8_t *buffer = new (std::nothrow) uint8_t[size + 1]; 3004 if (buffer == NULL) { 3005 return ERROR_MALFORMED; 3006 } 3007 if (mDataSource->readAt( 3008 offset, buffer, size) != (ssize_t)size) { 3009 delete[] buffer; 3010 buffer = NULL; 3011 3012 return ERROR_IO; 3013 } 3014 3015 uint32_t flags = U32_AT(buffer); 3016 3017 uint32_t metadataKey = 0; 3018 char chunk[5]; 3019 MakeFourCCString(mPath[4], chunk); 3020 ALOGV("meta: %s @ %lld", chunk, (long long)offset); 3021 switch ((int32_t)mPath[4]) { 3022 case FOURCC(0xa9, 'a', 'l', 'b'): 3023 { 3024 metadataKey = kKeyAlbum; 3025 break; 3026 } 3027 case FOURCC(0xa9, 'A', 'R', 'T'): 3028 { 3029 metadataKey = kKeyArtist; 3030 break; 3031 } 3032 case FOURCC('a', 'A', 'R', 'T'): 3033 { 3034 metadataKey = kKeyAlbumArtist; 3035 break; 3036 } 3037 case FOURCC(0xa9, 'd', 'a', 'y'): 3038 { 3039 metadataKey = kKeyYear; 3040 break; 3041 } 3042 case FOURCC(0xa9, 'n', 'a', 'm'): 3043 { 3044 metadataKey = kKeyTitle; 3045 break; 3046 } 3047 case FOURCC(0xa9, 'w', 'r', 't'): 3048 { 3049 metadataKey = kKeyWriter; 3050 break; 3051 } 3052 case FOURCC('c', 'o', 'v', 'r'): 3053 { 3054 metadataKey = kKeyAlbumArt; 3055 break; 3056 } 3057 case FOURCC('g', 'n', 'r', 'e'): 3058 { 3059 metadataKey = kKeyGenre; 3060 break; 3061 } 3062 case FOURCC(0xa9, 'g', 'e', 'n'): 3063 { 3064 metadataKey = kKeyGenre; 3065 break; 3066 } 3067 case FOURCC('c', 'p', 'i', 'l'): 3068 { 3069 if (size == 9 && flags == 21) { 3070 char tmp[16]; 3071 sprintf(tmp, "%d", 3072 (int)buffer[size - 1]); 3073 3074 mFileMetaData->setCString(kKeyCompilation, tmp); 3075 } 3076 break; 3077 } 3078 case FOURCC('t', 'r', 'k', 'n'): 3079 { 3080 if (size == 16 && flags == 0) { 3081 char tmp[16]; 3082 uint16_t* pTrack = (uint16_t*)&buffer[10]; 3083 uint16_t* pTotalTracks = (uint16_t*)&buffer[12]; 3084 sprintf(tmp, "%d/%d", ntohs(*pTrack), ntohs(*pTotalTracks)); 3085 3086 mFileMetaData->setCString(kKeyCDTrackNumber, tmp); 3087 } 3088 break; 3089 } 3090 case FOURCC('d', 'i', 's', 'k'): 3091 { 3092 if ((size == 14 || size == 16) && flags == 0) { 3093 char tmp[16]; 3094 uint16_t* pDisc = (uint16_t*)&buffer[10]; 3095 uint16_t* pTotalDiscs = (uint16_t*)&buffer[12]; 3096 sprintf(tmp, "%d/%d", ntohs(*pDisc), ntohs(*pTotalDiscs)); 3097 3098 mFileMetaData->setCString(kKeyDiscNumber, tmp); 3099 } 3100 break; 3101 } 3102 case FOURCC('-', '-', '-', '-'): 3103 { 3104 buffer[size] = '\0'; 3105 switch (mPath[5]) { 3106 case FOURCC('m', 'e', 'a', 'n'): 3107 mLastCommentMean.setTo((const char *)buffer + 4); 3108 break; 3109 case FOURCC('n', 'a', 'm', 'e'): 3110 mLastCommentName.setTo((const char *)buffer + 4); 3111 break; 3112 case FOURCC('d', 'a', 't', 'a'): 3113 if (size < 8) { 3114 delete[] buffer; 3115 buffer = NULL; 3116 ALOGE("b/24346430"); 3117 return ERROR_MALFORMED; 3118 } 3119 mLastCommentData.setTo((const char *)buffer + 8); 3120 break; 3121 } 3122 3123 // Once we have a set of mean/name/data info, go ahead and process 3124 // it to see if its something we are interested in. Whether or not 3125 // were are interested in the specific tag, make sure to clear out 3126 // the set so we can be ready to process another tuple should one 3127 // show up later in the file. 3128 if ((mLastCommentMean.length() != 0) && 3129 (mLastCommentName.length() != 0) && 3130 (mLastCommentData.length() != 0)) { 3131 3132 if (mLastCommentMean == "com.apple.iTunes" 3133 && mLastCommentName == "iTunSMPB") { 3134 int32_t delay, padding; 3135 if (sscanf(mLastCommentData, 3136 " %*x %x %x %*x", &delay, &padding) == 2) { 3137 if (mLastTrack == NULL) { 3138 delete[] buffer; 3139 return ERROR_MALFORMED; 3140 } 3141 3142 mLastTrack->meta->setInt32(kKeyEncoderDelay, delay); 3143 mLastTrack->meta->setInt32(kKeyEncoderPadding, padding); 3144 } 3145 } 3146 3147 mLastCommentMean.clear(); 3148 mLastCommentName.clear(); 3149 mLastCommentData.clear(); 3150 } 3151 break; 3152 } 3153 3154 default: 3155 break; 3156 } 3157 3158 if (size >= 8 && metadataKey && !mFileMetaData->hasData(metadataKey)) { 3159 if (metadataKey == kKeyAlbumArt) { 3160 mFileMetaData->setData( 3161 kKeyAlbumArt, MetaData::TYPE_NONE, 3162 buffer + 8, size - 8); 3163 } else if (metadataKey == kKeyGenre) { 3164 if (flags == 0) { 3165 // uint8_t genre code, iTunes genre codes are 3166 // the standard id3 codes, except they start 3167 // at 1 instead of 0 (e.g. Pop is 14, not 13) 3168 // We use standard id3 numbering, so subtract 1. 3169 int genrecode = (int)buffer[size - 1]; 3170 genrecode--; 3171 if (genrecode < 0) { 3172 genrecode = 255; // reserved for 'unknown genre' 3173 } 3174 char genre[10]; 3175 sprintf(genre, "%d", genrecode); 3176 3177 mFileMetaData->setCString(metadataKey, genre); 3178 } else if (flags == 1) { 3179 // custom genre string 3180 buffer[size] = '\0'; 3181 3182 mFileMetaData->setCString( 3183 metadataKey, (const char *)buffer + 8); 3184 } 3185 } else { 3186 buffer[size] = '\0'; 3187 3188 mFileMetaData->setCString( 3189 metadataKey, (const char *)buffer + 8); 3190 } 3191 } 3192 3193 delete[] buffer; 3194 buffer = NULL; 3195 3196 return OK; 3197} 3198 3199status_t MPEG4Extractor::parseColorInfo(off64_t offset, size_t size) { 3200 if (size < 4 || size == SIZE_MAX || mLastTrack == NULL) { 3201 return ERROR_MALFORMED; 3202 } 3203 3204 uint8_t *buffer = new (std::nothrow) uint8_t[size + 1]; 3205 if (buffer == NULL) { 3206 return ERROR_MALFORMED; 3207 } 3208 if (mDataSource->readAt(offset, buffer, size) != (ssize_t)size) { 3209 delete[] buffer; 3210 buffer = NULL; 3211 3212 return ERROR_IO; 3213 } 3214 3215 int32_t type = U32_AT(&buffer[0]); 3216 if ((type == FOURCC('n', 'c', 'l', 'x') && size >= 11) 3217 || (type == FOURCC('n', 'c', 'l', 'c') && size >= 10)) { 3218 int32_t primaries = U16_AT(&buffer[4]); 3219 int32_t transfer = U16_AT(&buffer[6]); 3220 int32_t coeffs = U16_AT(&buffer[8]); 3221 bool fullRange = (type == FOURCC('n', 'c', 'l', 'x')) && (buffer[10] & 128); 3222 3223 ColorAspects aspects; 3224 ColorUtils::convertIsoColorAspectsToCodecAspects( 3225 primaries, transfer, coeffs, fullRange, aspects); 3226 3227 // only store the first color specification 3228 if (!mLastTrack->meta->hasData(kKeyColorPrimaries)) { 3229 mLastTrack->meta->setInt32(kKeyColorPrimaries, aspects.mPrimaries); 3230 mLastTrack->meta->setInt32(kKeyTransferFunction, aspects.mTransfer); 3231 mLastTrack->meta->setInt32(kKeyColorMatrix, aspects.mMatrixCoeffs); 3232 mLastTrack->meta->setInt32(kKeyColorRange, aspects.mRange); 3233 } 3234 } 3235 3236 delete[] buffer; 3237 buffer = NULL; 3238 3239 return OK; 3240} 3241 3242status_t MPEG4Extractor::parse3GPPMetaData(off64_t offset, size_t size, int depth) { 3243 if (size < 4 || size == SIZE_MAX) { 3244 return ERROR_MALFORMED; 3245 } 3246 3247 uint8_t *buffer = new (std::nothrow) uint8_t[size + 1]; 3248 if (buffer == NULL) { 3249 return ERROR_MALFORMED; 3250 } 3251 if (mDataSource->readAt( 3252 offset, buffer, size) != (ssize_t)size) { 3253 delete[] buffer; 3254 buffer = NULL; 3255 3256 return ERROR_IO; 3257 } 3258 3259 uint32_t metadataKey = 0; 3260 switch (mPath[depth]) { 3261 case FOURCC('t', 'i', 't', 'l'): 3262 { 3263 metadataKey = kKeyTitle; 3264 break; 3265 } 3266 case FOURCC('p', 'e', 'r', 'f'): 3267 { 3268 metadataKey = kKeyArtist; 3269 break; 3270 } 3271 case FOURCC('a', 'u', 't', 'h'): 3272 { 3273 metadataKey = kKeyWriter; 3274 break; 3275 } 3276 case FOURCC('g', 'n', 'r', 'e'): 3277 { 3278 metadataKey = kKeyGenre; 3279 break; 3280 } 3281 case FOURCC('a', 'l', 'b', 'm'): 3282 { 3283 if (buffer[size - 1] != '\0') { 3284 char tmp[4]; 3285 sprintf(tmp, "%u", buffer[size - 1]); 3286 3287 mFileMetaData->setCString(kKeyCDTrackNumber, tmp); 3288 } 3289 3290 metadataKey = kKeyAlbum; 3291 break; 3292 } 3293 case FOURCC('y', 'r', 'r', 'c'): 3294 { 3295 if (size < 6) { 3296 delete[] buffer; 3297 buffer = NULL; 3298 ALOGE("b/62133227"); 3299 android_errorWriteLog(0x534e4554, "62133227"); 3300 return ERROR_MALFORMED; 3301 } 3302 char tmp[5]; 3303 uint16_t year = U16_AT(&buffer[4]); 3304 3305 if (year < 10000) { 3306 sprintf(tmp, "%u", year); 3307 3308 mFileMetaData->setCString(kKeyYear, tmp); 3309 } 3310 break; 3311 } 3312 3313 default: 3314 break; 3315 } 3316 3317 if (metadataKey > 0) { 3318 bool isUTF8 = true; // Common case 3319 char16_t *framedata = NULL; 3320 int len16 = 0; // Number of UTF-16 characters 3321 3322 // smallest possible valid UTF-16 string w BOM: 0xfe 0xff 0x00 0x00 3323 if (size < 6) { 3324 delete[] buffer; 3325 buffer = NULL; 3326 return ERROR_MALFORMED; 3327 } 3328 3329 if (size - 6 >= 4) { 3330 len16 = ((size - 6) / 2) - 1; // don't include 0x0000 terminator 3331 framedata = (char16_t *)(buffer + 6); 3332 if (0xfffe == *framedata) { 3333 // endianness marker (BOM) doesn't match host endianness 3334 for (int i = 0; i < len16; i++) { 3335 framedata[i] = bswap_16(framedata[i]); 3336 } 3337 // BOM is now swapped to 0xfeff, we will execute next block too 3338 } 3339 3340 if (0xfeff == *framedata) { 3341 // Remove the BOM 3342 framedata++; 3343 len16--; 3344 isUTF8 = false; 3345 } 3346 // else normal non-zero-length UTF-8 string 3347 // we can't handle UTF-16 without BOM as there is no other 3348 // indication of encoding. 3349 } 3350 3351 if (isUTF8) { 3352 buffer[size] = 0; 3353 mFileMetaData->setCString(metadataKey, (const char *)buffer + 6); 3354 } else { 3355 // Convert from UTF-16 string to UTF-8 string. 3356 String8 tmpUTF8str(framedata, len16); 3357 mFileMetaData->setCString(metadataKey, tmpUTF8str.string()); 3358 } 3359 } 3360 3361 delete[] buffer; 3362 buffer = NULL; 3363 3364 return OK; 3365} 3366 3367void MPEG4Extractor::parseID3v2MetaData(off64_t offset) { 3368 ID3 id3(mDataSource, true /* ignorev1 */, offset); 3369 3370 if (id3.isValid()) { 3371 struct Map { 3372 int key; 3373 const char *tag1; 3374 const char *tag2; 3375 }; 3376 static const Map kMap[] = { 3377 { kKeyAlbum, "TALB", "TAL" }, 3378 { kKeyArtist, "TPE1", "TP1" }, 3379 { kKeyAlbumArtist, "TPE2", "TP2" }, 3380 { kKeyComposer, "TCOM", "TCM" }, 3381 { kKeyGenre, "TCON", "TCO" }, 3382 { kKeyTitle, "TIT2", "TT2" }, 3383 { kKeyYear, "TYE", "TYER" }, 3384 { kKeyAuthor, "TXT", "TEXT" }, 3385 { kKeyCDTrackNumber, "TRK", "TRCK" }, 3386 { kKeyDiscNumber, "TPA", "TPOS" }, 3387 { kKeyCompilation, "TCP", "TCMP" }, 3388 }; 3389 static const size_t kNumMapEntries = sizeof(kMap) / sizeof(kMap[0]); 3390 3391 for (size_t i = 0; i < kNumMapEntries; ++i) { 3392 if (!mFileMetaData->hasData(kMap[i].key)) { 3393 ID3::Iterator *it = new ID3::Iterator(id3, kMap[i].tag1); 3394 if (it->done()) { 3395 delete it; 3396 it = new ID3::Iterator(id3, kMap[i].tag2); 3397 } 3398 3399 if (it->done()) { 3400 delete it; 3401 continue; 3402 } 3403 3404 String8 s; 3405 it->getString(&s); 3406 delete it; 3407 3408 mFileMetaData->setCString(kMap[i].key, s); 3409 } 3410 } 3411 3412 size_t dataSize; 3413 String8 mime; 3414 const void *data = id3.getAlbumArt(&dataSize, &mime); 3415 3416 if (data) { 3417 mFileMetaData->setData(kKeyAlbumArt, MetaData::TYPE_NONE, data, dataSize); 3418 mFileMetaData->setCString(kKeyAlbumArtMIME, mime.string()); 3419 } 3420 } 3421} 3422 3423MediaSourceBase *MPEG4Extractor::getTrack(size_t index) { 3424 status_t err; 3425 if ((err = readMetaData()) != OK) { 3426 return NULL; 3427 } 3428 3429 Track *track = mFirstTrack; 3430 while (index > 0) { 3431 if (track == NULL) { 3432 return NULL; 3433 } 3434 3435 track = track->next; 3436 --index; 3437 } 3438 3439 if (track == NULL) { 3440 return NULL; 3441 } 3442 3443 3444 Trex *trex = NULL; 3445 int32_t trackId; 3446 if (track->meta->findInt32(kKeyTrackID, &trackId)) { 3447 for (size_t i = 0; i < mTrex.size(); i++) { 3448 Trex *t = &mTrex.editItemAt(i); 3449 if (t->track_ID == (uint32_t) trackId) { 3450 trex = t; 3451 break; 3452 } 3453 } 3454 } else { 3455 ALOGE("b/21657957"); 3456 return NULL; 3457 } 3458 3459 ALOGV("getTrack called, pssh: %zu", mPssh.size()); 3460 3461 const char *mime; 3462 if (!track->meta->findCString(kKeyMIMEType, &mime)) { 3463 return NULL; 3464 } 3465 3466 sp<ItemTable> itemTable; 3467 if (!strcasecmp(mime, MEDIA_MIMETYPE_VIDEO_AVC)) { 3468 uint32_t type; 3469 const void *data; 3470 size_t size; 3471 if (!track->meta->findData(kKeyAVCC, &type, &data, &size)) { 3472 return NULL; 3473 } 3474 3475 const uint8_t *ptr = (const uint8_t *)data; 3476 3477 if (size < 7 || ptr[0] != 1) { // configurationVersion == 1 3478 return NULL; 3479 } 3480 } else if (!strcasecmp(mime, MEDIA_MIMETYPE_VIDEO_HEVC) 3481 || !strcasecmp(mime, MEDIA_MIMETYPE_IMAGE_ANDROID_HEIC)) { 3482 uint32_t type; 3483 const void *data; 3484 size_t size; 3485 if (!track->meta->findData(kKeyHVCC, &type, &data, &size)) { 3486 return NULL; 3487 } 3488 3489 const uint8_t *ptr = (const uint8_t *)data; 3490 3491 if (size < 22 || ptr[0] != 1) { // configurationVersion == 1 3492 return NULL; 3493 } 3494 if (!strcasecmp(mime, MEDIA_MIMETYPE_IMAGE_ANDROID_HEIC)) { 3495 itemTable = mItemTable; 3496 } 3497 } 3498 3499 MPEG4Source *source = new MPEG4Source( 3500 track->meta, mDataSource, track->timescale, track->sampleTable, 3501 mSidxEntries, trex, mMoofOffset, itemTable); 3502 if (source->init() != OK) { 3503 delete source; 3504 return NULL; 3505 } 3506 return source; 3507} 3508 3509// static 3510status_t MPEG4Extractor::verifyTrack(Track *track) { 3511 const char *mime; 3512 CHECK(track->meta->findCString(kKeyMIMEType, &mime)); 3513 3514 uint32_t type; 3515 const void *data; 3516 size_t size; 3517 if (!strcasecmp(mime, MEDIA_MIMETYPE_VIDEO_AVC)) { 3518 if (!track->meta->findData(kKeyAVCC, &type, &data, &size) 3519 || type != kTypeAVCC) { 3520 return ERROR_MALFORMED; 3521 } 3522 } else if (!strcasecmp(mime, MEDIA_MIMETYPE_VIDEO_HEVC)) { 3523 if (!track->meta->findData(kKeyHVCC, &type, &data, &size) 3524 || type != kTypeHVCC) { 3525 return ERROR_MALFORMED; 3526 } 3527 } else if (!strcasecmp(mime, MEDIA_MIMETYPE_VIDEO_MPEG4) 3528 || !strcasecmp(mime, MEDIA_MIMETYPE_VIDEO_MPEG2) 3529 || !strcasecmp(mime, MEDIA_MIMETYPE_AUDIO_AAC)) { 3530 if (!track->meta->findData(kKeyESDS, &type, &data, &size) 3531 || type != kTypeESDS) { 3532 return ERROR_MALFORMED; 3533 } 3534 } 3535 3536 if (track->sampleTable == NULL || !track->sampleTable->isValid()) { 3537 // Make sure we have all the metadata we need. 3538 ALOGE("stbl atom missing/invalid."); 3539 return ERROR_MALFORMED; 3540 } 3541 3542 if (track->timescale == 0) { 3543 ALOGE("timescale invalid."); 3544 return ERROR_MALFORMED; 3545 } 3546 3547 return OK; 3548} 3549 3550typedef enum { 3551 //AOT_NONE = -1, 3552 //AOT_NULL_OBJECT = 0, 3553 //AOT_AAC_MAIN = 1, /**< Main profile */ 3554 AOT_AAC_LC = 2, /**< Low Complexity object */ 3555 //AOT_AAC_SSR = 3, 3556 //AOT_AAC_LTP = 4, 3557 AOT_SBR = 5, 3558 //AOT_AAC_SCAL = 6, 3559 //AOT_TWIN_VQ = 7, 3560 //AOT_CELP = 8, 3561 //AOT_HVXC = 9, 3562 //AOT_RSVD_10 = 10, /**< (reserved) */ 3563 //AOT_RSVD_11 = 11, /**< (reserved) */ 3564 //AOT_TTSI = 12, /**< TTSI Object */ 3565 //AOT_MAIN_SYNTH = 13, /**< Main Synthetic object */ 3566 //AOT_WAV_TAB_SYNTH = 14, /**< Wavetable Synthesis object */ 3567 //AOT_GEN_MIDI = 15, /**< General MIDI object */ 3568 //AOT_ALG_SYNTH_AUD_FX = 16, /**< Algorithmic Synthesis and Audio FX object */ 3569 AOT_ER_AAC_LC = 17, /**< Error Resilient(ER) AAC Low Complexity */ 3570 //AOT_RSVD_18 = 18, /**< (reserved) */ 3571 //AOT_ER_AAC_LTP = 19, /**< Error Resilient(ER) AAC LTP object */ 3572 AOT_ER_AAC_SCAL = 20, /**< Error Resilient(ER) AAC Scalable object */ 3573 //AOT_ER_TWIN_VQ = 21, /**< Error Resilient(ER) TwinVQ object */ 3574 AOT_ER_BSAC = 22, /**< Error Resilient(ER) BSAC object */ 3575 AOT_ER_AAC_LD = 23, /**< Error Resilient(ER) AAC LowDelay object */ 3576 //AOT_ER_CELP = 24, /**< Error Resilient(ER) CELP object */ 3577 //AOT_ER_HVXC = 25, /**< Error Resilient(ER) HVXC object */ 3578 //AOT_ER_HILN = 26, /**< Error Resilient(ER) HILN object */ 3579 //AOT_ER_PARA = 27, /**< Error Resilient(ER) Parametric object */ 3580 //AOT_RSVD_28 = 28, /**< might become SSC */ 3581 AOT_PS = 29, /**< PS, Parametric Stereo (includes SBR) */ 3582 //AOT_MPEGS = 30, /**< MPEG Surround */ 3583 3584 AOT_ESCAPE = 31, /**< Signal AOT uses more than 5 bits */ 3585 3586 //AOT_MP3ONMP4_L1 = 32, /**< MPEG-Layer1 in mp4 */ 3587 //AOT_MP3ONMP4_L2 = 33, /**< MPEG-Layer2 in mp4 */ 3588 //AOT_MP3ONMP4_L3 = 34, /**< MPEG-Layer3 in mp4 */ 3589 //AOT_RSVD_35 = 35, /**< might become DST */ 3590 //AOT_RSVD_36 = 36, /**< might become ALS */ 3591 //AOT_AAC_SLS = 37, /**< AAC + SLS */ 3592 //AOT_SLS = 38, /**< SLS */ 3593 //AOT_ER_AAC_ELD = 39, /**< AAC Enhanced Low Delay */ 3594 3595 //AOT_USAC = 42, /**< USAC */ 3596 //AOT_SAOC = 43, /**< SAOC */ 3597 //AOT_LD_MPEGS = 44, /**< Low Delay MPEG Surround */ 3598 3599 //AOT_RSVD50 = 50, /**< Interim AOT for Rsvd50 */ 3600} AUDIO_OBJECT_TYPE; 3601 3602status_t MPEG4Extractor::updateAudioTrackInfoFromESDS_MPEG4Audio( 3603 const void *esds_data, size_t esds_size) { 3604 ESDS esds(esds_data, esds_size); 3605 3606 uint8_t objectTypeIndication; 3607 if (esds.getObjectTypeIndication(&objectTypeIndication) != OK) { 3608 return ERROR_MALFORMED; 3609 } 3610 3611 if (objectTypeIndication == 0xe1) { 3612 // This isn't MPEG4 audio at all, it's QCELP 14k... 3613 if (mLastTrack == NULL) 3614 return ERROR_MALFORMED; 3615 3616 mLastTrack->meta->setCString(kKeyMIMEType, MEDIA_MIMETYPE_AUDIO_QCELP); 3617 return OK; 3618 } 3619 3620 if (objectTypeIndication == 0x6b) { 3621 // The media subtype is MP3 audio 3622 // Our software MP3 audio decoder may not be able to handle 3623 // packetized MP3 audio; for now, lets just return ERROR_UNSUPPORTED 3624 ALOGE("MP3 track in MP4/3GPP file is not supported"); 3625 return ERROR_UNSUPPORTED; 3626 } 3627 3628 if (mLastTrack != NULL) { 3629 uint32_t maxBitrate = 0; 3630 uint32_t avgBitrate = 0; 3631 esds.getBitRate(&maxBitrate, &avgBitrate); 3632 if (maxBitrate > 0 && maxBitrate < INT32_MAX) { 3633 mLastTrack->meta->setInt32(kKeyMaxBitRate, (int32_t)maxBitrate); 3634 } 3635 if (avgBitrate > 0 && avgBitrate < INT32_MAX) { 3636 mLastTrack->meta->setInt32(kKeyBitRate, (int32_t)avgBitrate); 3637 } 3638 } 3639 3640 const uint8_t *csd; 3641 size_t csd_size; 3642 if (esds.getCodecSpecificInfo( 3643 (const void **)&csd, &csd_size) != OK) { 3644 return ERROR_MALFORMED; 3645 } 3646 3647 if (kUseHexDump) { 3648 printf("ESD of size %zu\n", csd_size); 3649 hexdump(csd, csd_size); 3650 } 3651 3652 if (csd_size == 0) { 3653 // There's no further information, i.e. no codec specific data 3654 // Let's assume that the information provided in the mpeg4 headers 3655 // is accurate and hope for the best. 3656 3657 return OK; 3658 } 3659 3660 if (csd_size < 2) { 3661 return ERROR_MALFORMED; 3662 } 3663 3664 static uint32_t kSamplingRate[] = { 3665 96000, 88200, 64000, 48000, 44100, 32000, 24000, 22050, 3666 16000, 12000, 11025, 8000, 7350 3667 }; 3668 3669 ABitReader br(csd, csd_size); 3670 uint32_t objectType = br.getBits(5); 3671 3672 if (objectType == 31) { // AAC-ELD => additional 6 bits 3673 objectType = 32 + br.getBits(6); 3674 } 3675 3676 if (mLastTrack == NULL) 3677 return ERROR_MALFORMED; 3678 3679 //keep AOT type 3680 mLastTrack->meta->setInt32(kKeyAACAOT, objectType); 3681 3682 uint32_t freqIndex = br.getBits(4); 3683 3684 int32_t sampleRate = 0; 3685 int32_t numChannels = 0; 3686 if (freqIndex == 15) { 3687 if (br.numBitsLeft() < 28) return ERROR_MALFORMED; 3688 sampleRate = br.getBits(24); 3689 numChannels = br.getBits(4); 3690 } else { 3691 if (br.numBitsLeft() < 4) return ERROR_MALFORMED; 3692 numChannels = br.getBits(4); 3693 3694 if (freqIndex == 13 || freqIndex == 14) { 3695 return ERROR_MALFORMED; 3696 } 3697 3698 sampleRate = kSamplingRate[freqIndex]; 3699 } 3700 3701 if (objectType == AOT_SBR || objectType == AOT_PS) {//SBR specific config per 14496-3 table 1.13 3702 if (br.numBitsLeft() < 4) return ERROR_MALFORMED; 3703 uint32_t extFreqIndex = br.getBits(4); 3704 int32_t extSampleRate __unused; 3705 if (extFreqIndex == 15) { 3706 if (csd_size < 8) { 3707 return ERROR_MALFORMED; 3708 } 3709 if (br.numBitsLeft() < 24) return ERROR_MALFORMED; 3710 extSampleRate = br.getBits(24); 3711 } else { 3712 if (extFreqIndex == 13 || extFreqIndex == 14) { 3713 return ERROR_MALFORMED; 3714 } 3715 extSampleRate = kSamplingRate[extFreqIndex]; 3716 } 3717 //TODO: save the extension sampling rate value in meta data => 3718 // mLastTrack->meta->setInt32(kKeyExtSampleRate, extSampleRate); 3719 } 3720 3721 switch (numChannels) { 3722 // values defined in 14496-3_2009 amendment-4 Table 1.19 - Channel Configuration 3723 case 0: 3724 case 1:// FC 3725 case 2:// FL FR 3726 case 3:// FC, FL FR 3727 case 4:// FC, FL FR, RC 3728 case 5:// FC, FL FR, SL SR 3729 case 6:// FC, FL FR, SL SR, LFE 3730 //numChannels already contains the right value 3731 break; 3732 case 11:// FC, FL FR, SL SR, RC, LFE 3733 numChannels = 7; 3734 break; 3735 case 7: // FC, FCL FCR, FL FR, SL SR, LFE 3736 case 12:// FC, FL FR, SL SR, RL RR, LFE 3737 case 14:// FC, FL FR, SL SR, LFE, FHL FHR 3738 numChannels = 8; 3739 break; 3740 default: 3741 return ERROR_UNSUPPORTED; 3742 } 3743 3744 { 3745 if (objectType == AOT_SBR || objectType == AOT_PS) { 3746 if (br.numBitsLeft() < 5) return ERROR_MALFORMED; 3747 objectType = br.getBits(5); 3748 3749 if (objectType == AOT_ESCAPE) { 3750 if (br.numBitsLeft() < 6) return ERROR_MALFORMED; 3751 objectType = 32 + br.getBits(6); 3752 } 3753 } 3754 if (objectType == AOT_AAC_LC || objectType == AOT_ER_AAC_LC || 3755 objectType == AOT_ER_AAC_LD || objectType == AOT_ER_AAC_SCAL || 3756 objectType == AOT_ER_BSAC) { 3757 if (br.numBitsLeft() < 2) return ERROR_MALFORMED; 3758 const int32_t frameLengthFlag __unused = br.getBits(1); 3759 3760 const int32_t dependsOnCoreCoder = br.getBits(1); 3761 3762 if (dependsOnCoreCoder ) { 3763 if (br.numBitsLeft() < 14) return ERROR_MALFORMED; 3764 const int32_t coreCoderDelay __unused = br.getBits(14); 3765 } 3766 3767 int32_t extensionFlag = -1; 3768 if (br.numBitsLeft() > 0) { 3769 extensionFlag = br.getBits(1); 3770 } else { 3771 switch (objectType) { 3772 // 14496-3 4.5.1.1 extensionFlag 3773 case AOT_AAC_LC: 3774 extensionFlag = 0; 3775 break; 3776 case AOT_ER_AAC_LC: 3777 case AOT_ER_AAC_SCAL: 3778 case AOT_ER_BSAC: 3779 case AOT_ER_AAC_LD: 3780 extensionFlag = 1; 3781 break; 3782 default: 3783 return ERROR_MALFORMED; 3784 break; 3785 } 3786 ALOGW("csd missing extension flag; assuming %d for object type %u.", 3787 extensionFlag, objectType); 3788 } 3789 3790 if (numChannels == 0) { 3791 int32_t channelsEffectiveNum = 0; 3792 int32_t channelsNum = 0; 3793 if (br.numBitsLeft() < 32) { 3794 return ERROR_MALFORMED; 3795 } 3796 const int32_t ElementInstanceTag __unused = br.getBits(4); 3797 const int32_t Profile __unused = br.getBits(2); 3798 const int32_t SamplingFrequencyIndex __unused = br.getBits(4); 3799 const int32_t NumFrontChannelElements = br.getBits(4); 3800 const int32_t NumSideChannelElements = br.getBits(4); 3801 const int32_t NumBackChannelElements = br.getBits(4); 3802 const int32_t NumLfeChannelElements = br.getBits(2); 3803 const int32_t NumAssocDataElements __unused = br.getBits(3); 3804 const int32_t NumValidCcElements __unused = br.getBits(4); 3805 3806 const int32_t MonoMixdownPresent = br.getBits(1); 3807 3808 if (MonoMixdownPresent != 0) { 3809 if (br.numBitsLeft() < 4) return ERROR_MALFORMED; 3810 const int32_t MonoMixdownElementNumber __unused = br.getBits(4); 3811 } 3812 3813 if (br.numBitsLeft() < 1) return ERROR_MALFORMED; 3814 const int32_t StereoMixdownPresent = br.getBits(1); 3815 if (StereoMixdownPresent != 0) { 3816 if (br.numBitsLeft() < 4) return ERROR_MALFORMED; 3817 const int32_t StereoMixdownElementNumber __unused = br.getBits(4); 3818 } 3819 3820 if (br.numBitsLeft() < 1) return ERROR_MALFORMED; 3821 const int32_t MatrixMixdownIndexPresent = br.getBits(1); 3822 if (MatrixMixdownIndexPresent != 0) { 3823 if (br.numBitsLeft() < 3) return ERROR_MALFORMED; 3824 const int32_t MatrixMixdownIndex __unused = br.getBits(2); 3825 const int32_t PseudoSurroundEnable __unused = br.getBits(1); 3826 } 3827 3828 int i; 3829 for (i=0; i < NumFrontChannelElements; i++) { 3830 if (br.numBitsLeft() < 5) return ERROR_MALFORMED; 3831 const int32_t FrontElementIsCpe = br.getBits(1); 3832 const int32_t FrontElementTagSelect __unused = br.getBits(4); 3833 channelsNum += FrontElementIsCpe ? 2 : 1; 3834 } 3835 3836 for (i=0; i < NumSideChannelElements; i++) { 3837 if (br.numBitsLeft() < 5) return ERROR_MALFORMED; 3838 const int32_t SideElementIsCpe = br.getBits(1); 3839 const int32_t SideElementTagSelect __unused = br.getBits(4); 3840 channelsNum += SideElementIsCpe ? 2 : 1; 3841 } 3842 3843 for (i=0; i < NumBackChannelElements; i++) { 3844 if (br.numBitsLeft() < 5) return ERROR_MALFORMED; 3845 const int32_t BackElementIsCpe = br.getBits(1); 3846 const int32_t BackElementTagSelect __unused = br.getBits(4); 3847 channelsNum += BackElementIsCpe ? 2 : 1; 3848 } 3849 channelsEffectiveNum = channelsNum; 3850 3851 for (i=0; i < NumLfeChannelElements; i++) { 3852 if (br.numBitsLeft() < 4) return ERROR_MALFORMED; 3853 const int32_t LfeElementTagSelect __unused = br.getBits(4); 3854 channelsNum += 1; 3855 } 3856 ALOGV("mpeg4 audio channelsNum = %d", channelsNum); 3857 ALOGV("mpeg4 audio channelsEffectiveNum = %d", channelsEffectiveNum); 3858 numChannels = channelsNum; 3859 } 3860 } 3861 } 3862 3863 if (numChannels == 0) { 3864 return ERROR_UNSUPPORTED; 3865 } 3866 3867 if (mLastTrack == NULL) 3868 return ERROR_MALFORMED; 3869 3870 int32_t prevSampleRate; 3871 CHECK(mLastTrack->meta->findInt32(kKeySampleRate, &prevSampleRate)); 3872 3873 if (prevSampleRate != sampleRate) { 3874 ALOGV("mpeg4 audio sample rate different from previous setting. " 3875 "was: %d, now: %d", prevSampleRate, sampleRate); 3876 } 3877 3878 mLastTrack->meta->setInt32(kKeySampleRate, sampleRate); 3879 3880 int32_t prevChannelCount; 3881 CHECK(mLastTrack->meta->findInt32(kKeyChannelCount, &prevChannelCount)); 3882 3883 if (prevChannelCount != numChannels) { 3884 ALOGV("mpeg4 audio channel count different from previous setting. " 3885 "was: %d, now: %d", prevChannelCount, numChannels); 3886 } 3887 3888 mLastTrack->meta->setInt32(kKeyChannelCount, numChannels); 3889 3890 return OK; 3891} 3892 3893//////////////////////////////////////////////////////////////////////////////// 3894 3895MPEG4Source::MPEG4Source( 3896 const sp<MetaData> &format, 3897 DataSourceBase *dataSource, 3898 int32_t timeScale, 3899 const sp<SampleTable> &sampleTable, 3900 Vector<SidxEntry> &sidx, 3901 const Trex *trex, 3902 off64_t firstMoofOffset, 3903 const sp<ItemTable> &itemTable) 3904 : mFormat(format), 3905 mDataSource(dataSource), 3906 mTimescale(timeScale), 3907 mSampleTable(sampleTable), 3908 mCurrentSampleIndex(0), 3909 mCurrentFragmentIndex(0), 3910 mSegments(sidx), 3911 mTrex(trex), 3912 mFirstMoofOffset(firstMoofOffset), 3913 mCurrentMoofOffset(firstMoofOffset), 3914 mNextMoofOffset(-1), 3915 mCurrentTime(0), 3916 mCurrentSampleInfoAllocSize(0), 3917 mCurrentSampleInfoSizes(NULL), 3918 mCurrentSampleInfoOffsetsAllocSize(0), 3919 mCurrentSampleInfoOffsets(NULL), 3920 mIsAVC(false), 3921 mIsHEVC(false), 3922 mNALLengthSize(0), 3923 mStarted(false), 3924 mGroup(NULL), 3925 mBuffer(NULL), 3926 mWantsNALFragments(false), 3927 mSrcBuffer(NULL), 3928 mIsHeif(itemTable != NULL), 3929 mItemTable(itemTable) { 3930 3931 memset(&mTrackFragmentHeaderInfo, 0, sizeof(mTrackFragmentHeaderInfo)); 3932 3933 mFormat->findInt32(kKeyCryptoMode, &mCryptoMode); 3934 mDefaultIVSize = 0; 3935 mFormat->findInt32(kKeyCryptoDefaultIVSize, &mDefaultIVSize); 3936 uint32_t keytype; 3937 const void *key; 3938 size_t keysize; 3939 if (mFormat->findData(kKeyCryptoKey, &keytype, &key, &keysize)) { 3940 CHECK(keysize <= 16); 3941 memset(mCryptoKey, 0, 16); 3942 memcpy(mCryptoKey, key, keysize); 3943 } 3944 3945 const char *mime; 3946 bool success = mFormat->findCString(kKeyMIMEType, &mime); 3947 CHECK(success); 3948 3949 mIsAVC = !strcasecmp(mime, MEDIA_MIMETYPE_VIDEO_AVC); 3950 mIsHEVC = !strcasecmp(mime, MEDIA_MIMETYPE_VIDEO_HEVC) || 3951 !strcasecmp(mime, MEDIA_MIMETYPE_IMAGE_ANDROID_HEIC); 3952 3953 if (mIsAVC) { 3954 uint32_t type; 3955 const void *data; 3956 size_t size; 3957 CHECK(format->findData(kKeyAVCC, &type, &data, &size)); 3958 3959 const uint8_t *ptr = (const uint8_t *)data; 3960 3961 CHECK(size >= 7); 3962 CHECK_EQ((unsigned)ptr[0], 1u); // configurationVersion == 1 3963 3964 // The number of bytes used to encode the length of a NAL unit. 3965 mNALLengthSize = 1 + (ptr[4] & 3); 3966 } else if (mIsHEVC) { 3967 uint32_t type; 3968 const void *data; 3969 size_t size; 3970 CHECK(format->findData(kKeyHVCC, &type, &data, &size)); 3971 3972 const uint8_t *ptr = (const uint8_t *)data; 3973 3974 CHECK(size >= 22); 3975 CHECK_EQ((unsigned)ptr[0], 1u); // configurationVersion == 1 3976 3977 mNALLengthSize = 1 + (ptr[14 + 7] & 3); 3978 } 3979 3980 CHECK(format->findInt32(kKeyTrackID, &mTrackId)); 3981 3982} 3983 3984status_t MPEG4Source::init() { 3985 if (mFirstMoofOffset != 0) { 3986 off64_t offset = mFirstMoofOffset; 3987 return parseChunk(&offset); 3988 } 3989 return OK; 3990} 3991 3992MPEG4Source::~MPEG4Source() { 3993 if (mStarted) { 3994 stop(); 3995 } 3996 free(mCurrentSampleInfoSizes); 3997 free(mCurrentSampleInfoOffsets); 3998} 3999 4000status_t MPEG4Source::start(MetaData *params) { 4001 Mutex::Autolock autoLock(mLock); 4002 4003 CHECK(!mStarted); 4004 4005 int32_t val; 4006 if (params && params->findInt32(kKeyWantsNALFragments, &val) 4007 && val != 0) { 4008 mWantsNALFragments = true; 4009 } else { 4010 mWantsNALFragments = false; 4011 } 4012 4013 int32_t tmp; 4014 CHECK(mFormat->findInt32(kKeyMaxInputSize, &tmp)); 4015 size_t max_size = tmp; 4016 4017 // A somewhat arbitrary limit that should be sufficient for 8k video frames 4018 // If you see the message below for a valid input stream: increase the limit 4019 const size_t kMaxBufferSize = 64 * 1024 * 1024; 4020 if (max_size > kMaxBufferSize) { 4021 ALOGE("bogus max input size: %zu > %zu", max_size, kMaxBufferSize); 4022 return ERROR_MALFORMED; 4023 } 4024 if (max_size == 0) { 4025 ALOGE("zero max input size"); 4026 return ERROR_MALFORMED; 4027 } 4028 4029 // Allow up to kMaxBuffers, but not if the total exceeds kMaxBufferSize. 4030 const size_t kMaxBuffers = 8; 4031 const size_t buffers = min(kMaxBufferSize / max_size, kMaxBuffers); 4032 mGroup = new MediaBufferGroup(buffers, max_size); 4033 mSrcBuffer = new (std::nothrow) uint8_t[max_size]; 4034 if (mSrcBuffer == NULL) { 4035 // file probably specified a bad max size 4036 delete mGroup; 4037 mGroup = NULL; 4038 return ERROR_MALFORMED; 4039 } 4040 4041 mStarted = true; 4042 4043 return OK; 4044} 4045 4046status_t MPEG4Source::stop() { 4047 Mutex::Autolock autoLock(mLock); 4048 4049 CHECK(mStarted); 4050 4051 if (mBuffer != NULL) { 4052 mBuffer->release(); 4053 mBuffer = NULL; 4054 } 4055 4056 delete[] mSrcBuffer; 4057 mSrcBuffer = NULL; 4058 4059 delete mGroup; 4060 mGroup = NULL; 4061 4062 mStarted = false; 4063 mCurrentSampleIndex = 0; 4064 4065 return OK; 4066} 4067 4068status_t MPEG4Source::parseChunk(off64_t *offset) { 4069 uint32_t hdr[2]; 4070 if (mDataSource->readAt(*offset, hdr, 8) < 8) { 4071 return ERROR_IO; 4072 } 4073 uint64_t chunk_size = ntohl(hdr[0]); 4074 uint32_t chunk_type = ntohl(hdr[1]); 4075 off64_t data_offset = *offset + 8; 4076 4077 if (chunk_size == 1) { 4078 if (mDataSource->readAt(*offset + 8, &chunk_size, 8) < 8) { 4079 return ERROR_IO; 4080 } 4081 chunk_size = ntoh64(chunk_size); 4082 data_offset += 8; 4083 4084 if (chunk_size < 16) { 4085 // The smallest valid chunk is 16 bytes long in this case. 4086 return ERROR_MALFORMED; 4087 } 4088 } else if (chunk_size < 8) { 4089 // The smallest valid chunk is 8 bytes long. 4090 return ERROR_MALFORMED; 4091 } 4092 4093 char chunk[5]; 4094 MakeFourCCString(chunk_type, chunk); 4095 ALOGV("MPEG4Source chunk %s @ %#llx", chunk, (long long)*offset); 4096 4097 off64_t chunk_data_size = *offset + chunk_size - data_offset; 4098 4099 switch(chunk_type) { 4100 4101 case FOURCC('t', 'r', 'a', 'f'): 4102 case FOURCC('m', 'o', 'o', 'f'): { 4103 off64_t stop_offset = *offset + chunk_size; 4104 *offset = data_offset; 4105 while (*offset < stop_offset) { 4106 status_t err = parseChunk(offset); 4107 if (err != OK) { 4108 return err; 4109 } 4110 } 4111 if (chunk_type == FOURCC('m', 'o', 'o', 'f')) { 4112 // *offset points to the box following this moof. Find the next moof from there. 4113 4114 while (true) { 4115 if (mDataSource->readAt(*offset, hdr, 8) < 8) { 4116 // no more box to the end of file. 4117 break; 4118 } 4119 chunk_size = ntohl(hdr[0]); 4120 chunk_type = ntohl(hdr[1]); 4121 if (chunk_size == 1) { 4122 // ISO/IEC 14496-12:2012, 8.8.4 Movie Fragment Box, moof is a Box 4123 // which is defined in 4.2 Object Structure. 4124 // When chunk_size==1, 8 bytes follows as "largesize". 4125 if (mDataSource->readAt(*offset + 8, &chunk_size, 8) < 8) { 4126 return ERROR_IO; 4127 } 4128 chunk_size = ntoh64(chunk_size); 4129 if (chunk_size < 16) { 4130 // The smallest valid chunk is 16 bytes long in this case. 4131 return ERROR_MALFORMED; 4132 } 4133 } else if (chunk_size == 0) { 4134 // next box extends to end of file. 4135 } else if (chunk_size < 8) { 4136 // The smallest valid chunk is 8 bytes long in this case. 4137 return ERROR_MALFORMED; 4138 } 4139 4140 if (chunk_type == FOURCC('m', 'o', 'o', 'f')) { 4141 mNextMoofOffset = *offset; 4142 break; 4143 } else if (chunk_size == 0) { 4144 break; 4145 } 4146 *offset += chunk_size; 4147 } 4148 } 4149 break; 4150 } 4151 4152 case FOURCC('t', 'f', 'h', 'd'): { 4153 status_t err; 4154 if ((err = parseTrackFragmentHeader(data_offset, chunk_data_size)) != OK) { 4155 return err; 4156 } 4157 *offset += chunk_size; 4158 break; 4159 } 4160 4161 case FOURCC('t', 'r', 'u', 'n'): { 4162 status_t err; 4163 if (mLastParsedTrackId == mTrackId) { 4164 if ((err = parseTrackFragmentRun(data_offset, chunk_data_size)) != OK) { 4165 return err; 4166 } 4167 } 4168 4169 *offset += chunk_size; 4170 break; 4171 } 4172 4173 case FOURCC('s', 'a', 'i', 'z'): { 4174 status_t err; 4175 if ((err = parseSampleAuxiliaryInformationSizes(data_offset, chunk_data_size)) != OK) { 4176 return err; 4177 } 4178 *offset += chunk_size; 4179 break; 4180 } 4181 case FOURCC('s', 'a', 'i', 'o'): { 4182 status_t err; 4183 if ((err = parseSampleAuxiliaryInformationOffsets(data_offset, chunk_data_size)) != OK) { 4184 return err; 4185 } 4186 *offset += chunk_size; 4187 break; 4188 } 4189 4190 case FOURCC('m', 'd', 'a', 't'): { 4191 // parse DRM info if present 4192 ALOGV("MPEG4Source::parseChunk mdat"); 4193 // if saiz/saoi was previously observed, do something with the sampleinfos 4194 *offset += chunk_size; 4195 break; 4196 } 4197 4198 default: { 4199 *offset += chunk_size; 4200 break; 4201 } 4202 } 4203 return OK; 4204} 4205 4206status_t MPEG4Source::parseSampleAuxiliaryInformationSizes( 4207 off64_t offset, off64_t /* size */) { 4208 ALOGV("parseSampleAuxiliaryInformationSizes"); 4209 // 14496-12 8.7.12 4210 uint8_t version; 4211 if (mDataSource->readAt( 4212 offset, &version, sizeof(version)) 4213 < (ssize_t)sizeof(version)) { 4214 return ERROR_IO; 4215 } 4216 4217 if (version != 0) { 4218 return ERROR_UNSUPPORTED; 4219 } 4220 offset++; 4221 4222 uint32_t flags; 4223 if (!mDataSource->getUInt24(offset, &flags)) { 4224 return ERROR_IO; 4225 } 4226 offset += 3; 4227 4228 if (flags & 1) { 4229 uint32_t tmp; 4230 if (!mDataSource->getUInt32(offset, &tmp)) { 4231 return ERROR_MALFORMED; 4232 } 4233 mCurrentAuxInfoType = tmp; 4234 offset += 4; 4235 if (!mDataSource->getUInt32(offset, &tmp)) { 4236 return ERROR_MALFORMED; 4237 } 4238 mCurrentAuxInfoTypeParameter = tmp; 4239 offset += 4; 4240 } 4241 4242 uint8_t defsize; 4243 if (mDataSource->readAt(offset, &defsize, 1) != 1) { 4244 return ERROR_MALFORMED; 4245 } 4246 mCurrentDefaultSampleInfoSize = defsize; 4247 offset++; 4248 4249 uint32_t smplcnt; 4250 if (!mDataSource->getUInt32(offset, &smplcnt)) { 4251 return ERROR_MALFORMED; 4252 } 4253 mCurrentSampleInfoCount = smplcnt; 4254 offset += 4; 4255 4256 if (mCurrentDefaultSampleInfoSize != 0) { 4257 ALOGV("@@@@ using default sample info size of %d", mCurrentDefaultSampleInfoSize); 4258 return OK; 4259 } 4260 if (smplcnt > mCurrentSampleInfoAllocSize) { 4261 uint8_t * newPtr = (uint8_t*) realloc(mCurrentSampleInfoSizes, smplcnt); 4262 if (newPtr == NULL) { 4263 ALOGE("failed to realloc %u -> %u", mCurrentSampleInfoAllocSize, smplcnt); 4264 return NO_MEMORY; 4265 } 4266 mCurrentSampleInfoSizes = newPtr; 4267 mCurrentSampleInfoAllocSize = smplcnt; 4268 } 4269 4270 mDataSource->readAt(offset, mCurrentSampleInfoSizes, smplcnt); 4271 return OK; 4272} 4273 4274status_t MPEG4Source::parseSampleAuxiliaryInformationOffsets( 4275 off64_t offset, off64_t /* size */) { 4276 ALOGV("parseSampleAuxiliaryInformationOffsets"); 4277 // 14496-12 8.7.13 4278 uint8_t version; 4279 if (mDataSource->readAt(offset, &version, sizeof(version)) != 1) { 4280 return ERROR_IO; 4281 } 4282 offset++; 4283 4284 uint32_t flags; 4285 if (!mDataSource->getUInt24(offset, &flags)) { 4286 return ERROR_IO; 4287 } 4288 offset += 3; 4289 4290 uint32_t entrycount; 4291 if (!mDataSource->getUInt32(offset, &entrycount)) { 4292 return ERROR_IO; 4293 } 4294 offset += 4; 4295 if (entrycount == 0) { 4296 return OK; 4297 } 4298 if (entrycount > UINT32_MAX / 8) { 4299 return ERROR_MALFORMED; 4300 } 4301 4302 if (entrycount > mCurrentSampleInfoOffsetsAllocSize) { 4303 uint64_t *newPtr = (uint64_t *)realloc(mCurrentSampleInfoOffsets, entrycount * 8); 4304 if (newPtr == NULL) { 4305 ALOGE("failed to realloc %u -> %u", mCurrentSampleInfoOffsetsAllocSize, entrycount * 8); 4306 return NO_MEMORY; 4307 } 4308 mCurrentSampleInfoOffsets = newPtr; 4309 mCurrentSampleInfoOffsetsAllocSize = entrycount; 4310 } 4311 mCurrentSampleInfoOffsetCount = entrycount; 4312 4313 if (mCurrentSampleInfoOffsets == NULL) { 4314 return OK; 4315 } 4316 4317 for (size_t i = 0; i < entrycount; i++) { 4318 if (version == 0) { 4319 uint32_t tmp; 4320 if (!mDataSource->getUInt32(offset, &tmp)) { 4321 return ERROR_IO; 4322 } 4323 mCurrentSampleInfoOffsets[i] = tmp; 4324 offset += 4; 4325 } else { 4326 uint64_t tmp; 4327 if (!mDataSource->getUInt64(offset, &tmp)) { 4328 return ERROR_IO; 4329 } 4330 mCurrentSampleInfoOffsets[i] = tmp; 4331 offset += 8; 4332 } 4333 } 4334 4335 // parse clear/encrypted data 4336 4337 off64_t drmoffset = mCurrentSampleInfoOffsets[0]; // from moof 4338 4339 drmoffset += mCurrentMoofOffset; 4340 int ivlength; 4341 CHECK(mFormat->findInt32(kKeyCryptoDefaultIVSize, &ivlength)); 4342 4343 // only 0, 8 and 16 byte initialization vectors are supported 4344 if (ivlength != 0 && ivlength != 8 && ivlength != 16) { 4345 ALOGW("unsupported IV length: %d", ivlength); 4346 return ERROR_MALFORMED; 4347 } 4348 // read CencSampleAuxiliaryDataFormats 4349 for (size_t i = 0; i < mCurrentSampleInfoCount; i++) { 4350 if (i >= mCurrentSamples.size()) { 4351 ALOGW("too few samples"); 4352 break; 4353 } 4354 Sample *smpl = &mCurrentSamples.editItemAt(i); 4355 4356 memset(smpl->iv, 0, 16); 4357 if (mDataSource->readAt(drmoffset, smpl->iv, ivlength) != ivlength) { 4358 return ERROR_IO; 4359 } 4360 4361 drmoffset += ivlength; 4362 4363 int32_t smplinfosize = mCurrentDefaultSampleInfoSize; 4364 if (smplinfosize == 0) { 4365 smplinfosize = mCurrentSampleInfoSizes[i]; 4366 } 4367 if (smplinfosize > ivlength) { 4368 uint16_t numsubsamples; 4369 if (!mDataSource->getUInt16(drmoffset, &numsubsamples)) { 4370 return ERROR_IO; 4371 } 4372 drmoffset += 2; 4373 for (size_t j = 0; j < numsubsamples; j++) { 4374 uint16_t numclear; 4375 uint32_t numencrypted; 4376 if (!mDataSource->getUInt16(drmoffset, &numclear)) { 4377 return ERROR_IO; 4378 } 4379 drmoffset += 2; 4380 if (!mDataSource->getUInt32(drmoffset, &numencrypted)) { 4381 return ERROR_IO; 4382 } 4383 drmoffset += 4; 4384 smpl->clearsizes.add(numclear); 4385 smpl->encryptedsizes.add(numencrypted); 4386 } 4387 } else { 4388 smpl->clearsizes.add(0); 4389 smpl->encryptedsizes.add(smpl->size); 4390 } 4391 } 4392 4393 4394 return OK; 4395} 4396 4397status_t MPEG4Source::parseTrackFragmentHeader(off64_t offset, off64_t size) { 4398 4399 if (size < 8) { 4400 return -EINVAL; 4401 } 4402 4403 uint32_t flags; 4404 if (!mDataSource->getUInt32(offset, &flags)) { // actually version + flags 4405 return ERROR_MALFORMED; 4406 } 4407 4408 if (flags & 0xff000000) { 4409 return -EINVAL; 4410 } 4411 4412 if (!mDataSource->getUInt32(offset + 4, (uint32_t*)&mLastParsedTrackId)) { 4413 return ERROR_MALFORMED; 4414 } 4415 4416 if (mLastParsedTrackId != mTrackId) { 4417 // this is not the right track, skip it 4418 return OK; 4419 } 4420 4421 mTrackFragmentHeaderInfo.mFlags = flags; 4422 mTrackFragmentHeaderInfo.mTrackID = mLastParsedTrackId; 4423 offset += 8; 4424 size -= 8; 4425 4426 ALOGV("fragment header: %08x %08x", flags, mTrackFragmentHeaderInfo.mTrackID); 4427 4428 if (flags & TrackFragmentHeaderInfo::kBaseDataOffsetPresent) { 4429 if (size < 8) { 4430 return -EINVAL; 4431 } 4432 4433 if (!mDataSource->getUInt64(offset, &mTrackFragmentHeaderInfo.mBaseDataOffset)) { 4434 return ERROR_MALFORMED; 4435 } 4436 offset += 8; 4437 size -= 8; 4438 } 4439 4440 if (flags & TrackFragmentHeaderInfo::kSampleDescriptionIndexPresent) { 4441 if (size < 4) { 4442 return -EINVAL; 4443 } 4444 4445 if (!mDataSource->getUInt32(offset, &mTrackFragmentHeaderInfo.mSampleDescriptionIndex)) { 4446 return ERROR_MALFORMED; 4447 } 4448 offset += 4; 4449 size -= 4; 4450 } 4451 4452 if (flags & TrackFragmentHeaderInfo::kDefaultSampleDurationPresent) { 4453 if (size < 4) { 4454 return -EINVAL; 4455 } 4456 4457 if (!mDataSource->getUInt32(offset, &mTrackFragmentHeaderInfo.mDefaultSampleDuration)) { 4458 return ERROR_MALFORMED; 4459 } 4460 offset += 4; 4461 size -= 4; 4462 } 4463 4464 if (flags & TrackFragmentHeaderInfo::kDefaultSampleSizePresent) { 4465 if (size < 4) { 4466 return -EINVAL; 4467 } 4468 4469 if (!mDataSource->getUInt32(offset, &mTrackFragmentHeaderInfo.mDefaultSampleSize)) { 4470 return ERROR_MALFORMED; 4471 } 4472 offset += 4; 4473 size -= 4; 4474 } 4475 4476 if (flags & TrackFragmentHeaderInfo::kDefaultSampleFlagsPresent) { 4477 if (size < 4) { 4478 return -EINVAL; 4479 } 4480 4481 if (!mDataSource->getUInt32(offset, &mTrackFragmentHeaderInfo.mDefaultSampleFlags)) { 4482 return ERROR_MALFORMED; 4483 } 4484 offset += 4; 4485 size -= 4; 4486 } 4487 4488 if (!(flags & TrackFragmentHeaderInfo::kBaseDataOffsetPresent)) { 4489 mTrackFragmentHeaderInfo.mBaseDataOffset = mCurrentMoofOffset; 4490 } 4491 4492 mTrackFragmentHeaderInfo.mDataOffset = 0; 4493 return OK; 4494} 4495 4496status_t MPEG4Source::parseTrackFragmentRun(off64_t offset, off64_t size) { 4497 4498 ALOGV("MPEG4Extractor::parseTrackFragmentRun"); 4499 if (size < 8) { 4500 return -EINVAL; 4501 } 4502 4503 enum { 4504 kDataOffsetPresent = 0x01, 4505 kFirstSampleFlagsPresent = 0x04, 4506 kSampleDurationPresent = 0x100, 4507 kSampleSizePresent = 0x200, 4508 kSampleFlagsPresent = 0x400, 4509 kSampleCompositionTimeOffsetPresent = 0x800, 4510 }; 4511 4512 uint32_t flags; 4513 if (!mDataSource->getUInt32(offset, &flags)) { 4514 return ERROR_MALFORMED; 4515 } 4516 // |version| only affects SampleCompositionTimeOffset field. 4517 // If version == 0, SampleCompositionTimeOffset is uint32_t; 4518 // Otherwise, SampleCompositionTimeOffset is int32_t. 4519 // Sample.compositionOffset is defined as int32_t. 4520 uint8_t version = flags >> 24; 4521 flags &= 0xffffff; 4522 ALOGV("fragment run version: 0x%02x, flags: 0x%06x", version, flags); 4523 4524 if ((flags & kFirstSampleFlagsPresent) && (flags & kSampleFlagsPresent)) { 4525 // These two shall not be used together. 4526 return -EINVAL; 4527 } 4528 4529 uint32_t sampleCount; 4530 if (!mDataSource->getUInt32(offset + 4, &sampleCount)) { 4531 return ERROR_MALFORMED; 4532 } 4533 offset += 8; 4534 size -= 8; 4535 4536 uint64_t dataOffset = mTrackFragmentHeaderInfo.mDataOffset; 4537 4538 uint32_t firstSampleFlags = 0; 4539 4540 if (flags & kDataOffsetPresent) { 4541 if (size < 4) { 4542 return -EINVAL; 4543 } 4544 4545 int32_t dataOffsetDelta; 4546 if (!mDataSource->getUInt32(offset, (uint32_t*)&dataOffsetDelta)) { 4547 return ERROR_MALFORMED; 4548 } 4549 4550 dataOffset = mTrackFragmentHeaderInfo.mBaseDataOffset + dataOffsetDelta; 4551 4552 offset += 4; 4553 size -= 4; 4554 } 4555 4556 if (flags & kFirstSampleFlagsPresent) { 4557 if (size < 4) { 4558 return -EINVAL; 4559 } 4560 4561 if (!mDataSource->getUInt32(offset, &firstSampleFlags)) { 4562 return ERROR_MALFORMED; 4563 } 4564 offset += 4; 4565 size -= 4; 4566 } 4567 4568 uint32_t sampleDuration = 0, sampleSize = 0, sampleFlags = 0, 4569 sampleCtsOffset = 0; 4570 4571 size_t bytesPerSample = 0; 4572 if (flags & kSampleDurationPresent) { 4573 bytesPerSample += 4; 4574 } else if (mTrackFragmentHeaderInfo.mFlags 4575 & TrackFragmentHeaderInfo::kDefaultSampleDurationPresent) { 4576 sampleDuration = mTrackFragmentHeaderInfo.mDefaultSampleDuration; 4577 } else if (mTrex) { 4578 sampleDuration = mTrex->default_sample_duration; 4579 } 4580 4581 if (flags & kSampleSizePresent) { 4582 bytesPerSample += 4; 4583 } else if (mTrackFragmentHeaderInfo.mFlags 4584 & TrackFragmentHeaderInfo::kDefaultSampleSizePresent) { 4585 sampleSize = mTrackFragmentHeaderInfo.mDefaultSampleSize; 4586 } else { 4587 sampleSize = mTrackFragmentHeaderInfo.mDefaultSampleSize; 4588 } 4589 4590 if (flags & kSampleFlagsPresent) { 4591 bytesPerSample += 4; 4592 } else if (mTrackFragmentHeaderInfo.mFlags 4593 & TrackFragmentHeaderInfo::kDefaultSampleFlagsPresent) { 4594 sampleFlags = mTrackFragmentHeaderInfo.mDefaultSampleFlags; 4595 } else { 4596 sampleFlags = mTrackFragmentHeaderInfo.mDefaultSampleFlags; 4597 } 4598 4599 if (flags & kSampleCompositionTimeOffsetPresent) { 4600 bytesPerSample += 4; 4601 } else { 4602 sampleCtsOffset = 0; 4603 } 4604 4605 if (size < (off64_t)(sampleCount * bytesPerSample)) { 4606 return -EINVAL; 4607 } 4608 4609 Sample tmp; 4610 for (uint32_t i = 0; i < sampleCount; ++i) { 4611 if (flags & kSampleDurationPresent) { 4612 if (!mDataSource->getUInt32(offset, &sampleDuration)) { 4613 return ERROR_MALFORMED; 4614 } 4615 offset += 4; 4616 } 4617 4618 if (flags & kSampleSizePresent) { 4619 if (!mDataSource->getUInt32(offset, &sampleSize)) { 4620 return ERROR_MALFORMED; 4621 } 4622 offset += 4; 4623 } 4624 4625 if (flags & kSampleFlagsPresent) { 4626 if (!mDataSource->getUInt32(offset, &sampleFlags)) { 4627 return ERROR_MALFORMED; 4628 } 4629 offset += 4; 4630 } 4631 4632 if (flags & kSampleCompositionTimeOffsetPresent) { 4633 if (!mDataSource->getUInt32(offset, &sampleCtsOffset)) { 4634 return ERROR_MALFORMED; 4635 } 4636 offset += 4; 4637 } 4638 4639 ALOGV("adding sample %d at offset 0x%08" PRIx64 ", size %u, duration %u, " 4640 " flags 0x%08x", i + 1, 4641 dataOffset, sampleSize, sampleDuration, 4642 (flags & kFirstSampleFlagsPresent) && i == 0 4643 ? firstSampleFlags : sampleFlags); 4644 tmp.offset = dataOffset; 4645 tmp.size = sampleSize; 4646 tmp.duration = sampleDuration; 4647 tmp.compositionOffset = sampleCtsOffset; 4648 mCurrentSamples.add(tmp); 4649 4650 dataOffset += sampleSize; 4651 } 4652 4653 mTrackFragmentHeaderInfo.mDataOffset = dataOffset; 4654 4655 return OK; 4656} 4657 4658sp<MetaData> MPEG4Source::getFormat() { 4659 Mutex::Autolock autoLock(mLock); 4660 4661 return mFormat; 4662} 4663 4664size_t MPEG4Source::parseNALSize(const uint8_t *data) const { 4665 switch (mNALLengthSize) { 4666 case 1: 4667 return *data; 4668 case 2: 4669 return U16_AT(data); 4670 case 3: 4671 return ((size_t)data[0] << 16) | U16_AT(&data[1]); 4672 case 4: 4673 return U32_AT(data); 4674 } 4675 4676 // This cannot happen, mNALLengthSize springs to life by adding 1 to 4677 // a 2-bit integer. 4678 CHECK(!"Should not be here."); 4679 4680 return 0; 4681} 4682 4683status_t MPEG4Source::read( 4684 MediaBuffer **out, const ReadOptions *options) { 4685 Mutex::Autolock autoLock(mLock); 4686 4687 CHECK(mStarted); 4688 4689 if (options != nullptr && options->getNonBlocking() && !mGroup->has_buffers()) { 4690 *out = nullptr; 4691 return WOULD_BLOCK; 4692 } 4693 4694 if (mFirstMoofOffset > 0) { 4695 return fragmentedRead(out, options); 4696 } 4697 4698 *out = NULL; 4699 4700 int64_t targetSampleTimeUs = -1; 4701 4702 int64_t seekTimeUs; 4703 ReadOptions::SeekMode mode; 4704 if (options && options->getSeekTo(&seekTimeUs, &mode)) { 4705 if (mIsHeif) { 4706 CHECK(mSampleTable == NULL); 4707 CHECK(mItemTable != NULL); 4708 int32_t imageIndex; 4709 if (!mFormat->findInt32(kKeyTrackID, &imageIndex)) { 4710 return ERROR_MALFORMED; 4711 } 4712 4713 status_t err; 4714 if (seekTimeUs >= 0) { 4715 err = mItemTable->findImageItem(imageIndex, &mCurrentSampleIndex); 4716 } else { 4717 err = mItemTable->findThumbnailItem(imageIndex, &mCurrentSampleIndex); 4718 } 4719 if (err != OK) { 4720 return err; 4721 } 4722 } else { 4723 uint32_t findFlags = 0; 4724 switch (mode) { 4725 case ReadOptions::SEEK_PREVIOUS_SYNC: 4726 findFlags = SampleTable::kFlagBefore; 4727 break; 4728 case ReadOptions::SEEK_NEXT_SYNC: 4729 findFlags = SampleTable::kFlagAfter; 4730 break; 4731 case ReadOptions::SEEK_CLOSEST_SYNC: 4732 case ReadOptions::SEEK_CLOSEST: 4733 findFlags = SampleTable::kFlagClosest; 4734 break; 4735 case ReadOptions::SEEK_FRAME_INDEX: 4736 findFlags = SampleTable::kFlagFrameIndex; 4737 break; 4738 default: 4739 CHECK(!"Should not be here."); 4740 break; 4741 } 4742 4743 uint32_t sampleIndex; 4744 status_t err = mSampleTable->findSampleAtTime( 4745 seekTimeUs, 1000000, mTimescale, 4746 &sampleIndex, findFlags); 4747 4748 if (mode == ReadOptions::SEEK_CLOSEST 4749 || mode == ReadOptions::SEEK_FRAME_INDEX) { 4750 // We found the closest sample already, now we want the sync 4751 // sample preceding it (or the sample itself of course), even 4752 // if the subsequent sync sample is closer. 4753 findFlags = SampleTable::kFlagBefore; 4754 } 4755 4756 uint32_t syncSampleIndex; 4757 if (err == OK) { 4758 err = mSampleTable->findSyncSampleNear( 4759 sampleIndex, &syncSampleIndex, findFlags); 4760 } 4761 4762 uint32_t sampleTime; 4763 if (err == OK) { 4764 err = mSampleTable->getMetaDataForSample( 4765 sampleIndex, NULL, NULL, &sampleTime); 4766 } 4767 4768 if (err != OK) { 4769 if (err == ERROR_OUT_OF_RANGE) { 4770 // An attempt to seek past the end of the stream would 4771 // normally cause this ERROR_OUT_OF_RANGE error. Propagating 4772 // this all the way to the MediaPlayer would cause abnormal 4773 // termination. Legacy behaviour appears to be to behave as if 4774 // we had seeked to the end of stream, ending normally. 4775 err = ERROR_END_OF_STREAM; 4776 } 4777 ALOGV("end of stream"); 4778 return err; 4779 } 4780 4781 if (mode == ReadOptions::SEEK_CLOSEST 4782 || mode == ReadOptions::SEEK_FRAME_INDEX) { 4783 targetSampleTimeUs = (sampleTime * 1000000ll) / mTimescale; 4784 } 4785 4786#if 0 4787 uint32_t syncSampleTime; 4788 CHECK_EQ(OK, mSampleTable->getMetaDataForSample( 4789 syncSampleIndex, NULL, NULL, &syncSampleTime)); 4790 4791 ALOGI("seek to time %lld us => sample at time %lld us, " 4792 "sync sample at time %lld us", 4793 seekTimeUs, 4794 sampleTime * 1000000ll / mTimescale, 4795 syncSampleTime * 1000000ll / mTimescale); 4796#endif 4797 4798 mCurrentSampleIndex = syncSampleIndex; 4799 } 4800 4801 if (mBuffer != NULL) { 4802 mBuffer->release(); 4803 mBuffer = NULL; 4804 } 4805 4806 // fall through 4807 } 4808 4809 off64_t offset = 0; 4810 size_t size = 0; 4811 uint32_t cts, stts; 4812 bool isSyncSample; 4813 bool newBuffer = false; 4814 if (mBuffer == NULL) { 4815 newBuffer = true; 4816 4817 status_t err; 4818 if (!mIsHeif) { 4819 err = mSampleTable->getMetaDataForSample( 4820 mCurrentSampleIndex, &offset, &size, &cts, &isSyncSample, &stts); 4821 } else { 4822 err = mItemTable->getImageOffsetAndSize( 4823 options && options->getSeekTo(&seekTimeUs, &mode) ? 4824 &mCurrentSampleIndex : NULL, &offset, &size); 4825 4826 cts = stts = 0; 4827 isSyncSample = 0; 4828 ALOGV("image offset %lld, size %zu", (long long)offset, size); 4829 } 4830 4831 if (err != OK) { 4832 return err; 4833 } 4834 4835 err = mGroup->acquire_buffer(&mBuffer); 4836 4837 if (err != OK) { 4838 CHECK(mBuffer == NULL); 4839 return err; 4840 } 4841 if (size > mBuffer->size()) { 4842 ALOGE("buffer too small: %zu > %zu", size, mBuffer->size()); 4843 mBuffer->release(); 4844 mBuffer = NULL; 4845 return ERROR_BUFFER_TOO_SMALL; 4846 } 4847 } 4848 4849 if ((!mIsAVC && !mIsHEVC) || mWantsNALFragments) { 4850 if (newBuffer) { 4851 ssize_t num_bytes_read = 4852 mDataSource->readAt(offset, (uint8_t *)mBuffer->data(), size); 4853 4854 if (num_bytes_read < (ssize_t)size) { 4855 mBuffer->release(); 4856 mBuffer = NULL; 4857 4858 return ERROR_IO; 4859 } 4860 4861 CHECK(mBuffer != NULL); 4862 mBuffer->set_range(0, size); 4863 mBuffer->meta_data()->clear(); 4864 mBuffer->meta_data()->setInt64( 4865 kKeyTime, ((int64_t)cts * 1000000) / mTimescale); 4866 mBuffer->meta_data()->setInt64( 4867 kKeyDuration, ((int64_t)stts * 1000000) / mTimescale); 4868 4869 if (targetSampleTimeUs >= 0) { 4870 mBuffer->meta_data()->setInt64( 4871 kKeyTargetTime, targetSampleTimeUs); 4872 } 4873 4874 if (isSyncSample) { 4875 mBuffer->meta_data()->setInt32(kKeyIsSyncFrame, 1); 4876 } 4877 4878 ++mCurrentSampleIndex; 4879 } 4880 4881 if (!mIsAVC && !mIsHEVC) { 4882 *out = mBuffer; 4883 mBuffer = NULL; 4884 4885 return OK; 4886 } 4887 4888 // Each NAL unit is split up into its constituent fragments and 4889 // each one of them returned in its own buffer. 4890 4891 CHECK(mBuffer->range_length() >= mNALLengthSize); 4892 4893 const uint8_t *src = 4894 (const uint8_t *)mBuffer->data() + mBuffer->range_offset(); 4895 4896 size_t nal_size = parseNALSize(src); 4897 if (mNALLengthSize > SIZE_MAX - nal_size) { 4898 ALOGE("b/24441553, b/24445122"); 4899 } 4900 if (mBuffer->range_length() - mNALLengthSize < nal_size) { 4901 ALOGE("incomplete NAL unit."); 4902 4903 mBuffer->release(); 4904 mBuffer = NULL; 4905 4906 return ERROR_MALFORMED; 4907 } 4908 4909 MediaBuffer *clone = mBuffer->clone(); 4910 CHECK(clone != NULL); 4911 clone->set_range(mBuffer->range_offset() + mNALLengthSize, nal_size); 4912 4913 CHECK(mBuffer != NULL); 4914 mBuffer->set_range( 4915 mBuffer->range_offset() + mNALLengthSize + nal_size, 4916 mBuffer->range_length() - mNALLengthSize - nal_size); 4917 4918 if (mBuffer->range_length() == 0) { 4919 mBuffer->release(); 4920 mBuffer = NULL; 4921 } 4922 4923 *out = clone; 4924 4925 return OK; 4926 } else { 4927 // Whole NAL units are returned but each fragment is prefixed by 4928 // the start code (0x00 00 00 01). 4929 ssize_t num_bytes_read = 0; 4930 int32_t drm = 0; 4931 bool usesDRM = (mFormat->findInt32(kKeyIsDRM, &drm) && drm != 0); 4932 if (usesDRM) { 4933 num_bytes_read = 4934 mDataSource->readAt(offset, (uint8_t*)mBuffer->data(), size); 4935 } else { 4936 num_bytes_read = mDataSource->readAt(offset, mSrcBuffer, size); 4937 } 4938 4939 if (num_bytes_read < (ssize_t)size) { 4940 mBuffer->release(); 4941 mBuffer = NULL; 4942 4943 return ERROR_IO; 4944 } 4945 4946 if (usesDRM) { 4947 CHECK(mBuffer != NULL); 4948 mBuffer->set_range(0, size); 4949 4950 } else { 4951 uint8_t *dstData = (uint8_t *)mBuffer->data(); 4952 size_t srcOffset = 0; 4953 size_t dstOffset = 0; 4954 4955 while (srcOffset < size) { 4956 bool isMalFormed = !isInRange((size_t)0u, size, srcOffset, mNALLengthSize); 4957 size_t nalLength = 0; 4958 if (!isMalFormed) { 4959 nalLength = parseNALSize(&mSrcBuffer[srcOffset]); 4960 srcOffset += mNALLengthSize; 4961 isMalFormed = !isInRange((size_t)0u, size, srcOffset, nalLength); 4962 } 4963 4964 if (isMalFormed) { 4965 ALOGE("Video is malformed"); 4966 mBuffer->release(); 4967 mBuffer = NULL; 4968 return ERROR_MALFORMED; 4969 } 4970 4971 if (nalLength == 0) { 4972 continue; 4973 } 4974 4975 if (dstOffset > SIZE_MAX - 4 || 4976 dstOffset + 4 > SIZE_MAX - nalLength || 4977 dstOffset + 4 + nalLength > mBuffer->size()) { 4978 ALOGE("b/27208621 : %zu %zu", dstOffset, mBuffer->size()); 4979 android_errorWriteLog(0x534e4554, "27208621"); 4980 mBuffer->release(); 4981 mBuffer = NULL; 4982 return ERROR_MALFORMED; 4983 } 4984 4985 dstData[dstOffset++] = 0; 4986 dstData[dstOffset++] = 0; 4987 dstData[dstOffset++] = 0; 4988 dstData[dstOffset++] = 1; 4989 memcpy(&dstData[dstOffset], &mSrcBuffer[srcOffset], nalLength); 4990 srcOffset += nalLength; 4991 dstOffset += nalLength; 4992 } 4993 CHECK_EQ(srcOffset, size); 4994 CHECK(mBuffer != NULL); 4995 mBuffer->set_range(0, dstOffset); 4996 } 4997 4998 mBuffer->meta_data()->clear(); 4999 mBuffer->meta_data()->setInt64( 5000 kKeyTime, ((int64_t)cts * 1000000) / mTimescale); 5001 mBuffer->meta_data()->setInt64( 5002 kKeyDuration, ((int64_t)stts * 1000000) / mTimescale); 5003 5004 if (targetSampleTimeUs >= 0) { 5005 mBuffer->meta_data()->setInt64( 5006 kKeyTargetTime, targetSampleTimeUs); 5007 } 5008 5009 if (mIsAVC) { 5010 uint32_t layerId = FindAVCLayerId( 5011 (const uint8_t *)mBuffer->data(), mBuffer->range_length()); 5012 mBuffer->meta_data()->setInt32(kKeyTemporalLayerId, layerId); 5013 } 5014 5015 if (isSyncSample) { 5016 mBuffer->meta_data()->setInt32(kKeyIsSyncFrame, 1); 5017 } 5018 5019 ++mCurrentSampleIndex; 5020 5021 *out = mBuffer; 5022 mBuffer = NULL; 5023 5024 return OK; 5025 } 5026} 5027 5028status_t MPEG4Source::fragmentedRead( 5029 MediaBuffer **out, const ReadOptions *options) { 5030 5031 ALOGV("MPEG4Source::fragmentedRead"); 5032 5033 CHECK(mStarted); 5034 5035 *out = NULL; 5036 5037 int64_t targetSampleTimeUs = -1; 5038 5039 int64_t seekTimeUs; 5040 ReadOptions::SeekMode mode; 5041 if (options && options->getSeekTo(&seekTimeUs, &mode)) { 5042 5043 int numSidxEntries = mSegments.size(); 5044 if (numSidxEntries != 0) { 5045 int64_t totalTime = 0; 5046 off64_t totalOffset = mFirstMoofOffset; 5047 for (int i = 0; i < numSidxEntries; i++) { 5048 const SidxEntry *se = &mSegments[i]; 5049 if (totalTime + se->mDurationUs > seekTimeUs) { 5050 // The requested time is somewhere in this segment 5051 if ((mode == ReadOptions::SEEK_NEXT_SYNC && seekTimeUs > totalTime) || 5052 (mode == ReadOptions::SEEK_CLOSEST_SYNC && 5053 (seekTimeUs - totalTime) > (totalTime + se->mDurationUs - seekTimeUs))) { 5054 // requested next sync, or closest sync and it was closer to the end of 5055 // this segment 5056 totalTime += se->mDurationUs; 5057 totalOffset += se->mSize; 5058 } 5059 break; 5060 } 5061 totalTime += se->mDurationUs; 5062 totalOffset += se->mSize; 5063 } 5064 mCurrentMoofOffset = totalOffset; 5065 mNextMoofOffset = -1; 5066 mCurrentSamples.clear(); 5067 mCurrentSampleIndex = 0; 5068 status_t err = parseChunk(&totalOffset); 5069 if (err != OK) { 5070 return err; 5071 } 5072 mCurrentTime = totalTime * mTimescale / 1000000ll; 5073 } else { 5074 // without sidx boxes, we can only seek to 0 5075 mCurrentMoofOffset = mFirstMoofOffset; 5076 mNextMoofOffset = -1; 5077 mCurrentSamples.clear(); 5078 mCurrentSampleIndex = 0; 5079 off64_t tmp = mCurrentMoofOffset; 5080 status_t err = parseChunk(&tmp); 5081 if (err != OK) { 5082 return err; 5083 } 5084 mCurrentTime = 0; 5085 } 5086 5087 if (mBuffer != NULL) { 5088 mBuffer->release(); 5089 mBuffer = NULL; 5090 } 5091 5092 // fall through 5093 } 5094 5095 off64_t offset = 0; 5096 size_t size = 0; 5097 uint32_t cts = 0; 5098 bool isSyncSample = false; 5099 bool newBuffer = false; 5100 if (mBuffer == NULL) { 5101 newBuffer = true; 5102 5103 if (mCurrentSampleIndex >= mCurrentSamples.size()) { 5104 // move to next fragment if there is one 5105 if (mNextMoofOffset <= mCurrentMoofOffset) { 5106 return ERROR_END_OF_STREAM; 5107 } 5108 off64_t nextMoof = mNextMoofOffset; 5109 mCurrentMoofOffset = nextMoof; 5110 mCurrentSamples.clear(); 5111 mCurrentSampleIndex = 0; 5112 status_t err = parseChunk(&nextMoof); 5113 if (err != OK) { 5114 return err; 5115 } 5116 if (mCurrentSampleIndex >= mCurrentSamples.size()) { 5117 return ERROR_END_OF_STREAM; 5118 } 5119 } 5120 5121 const Sample *smpl = &mCurrentSamples[mCurrentSampleIndex]; 5122 offset = smpl->offset; 5123 size = smpl->size; 5124 cts = mCurrentTime + smpl->compositionOffset; 5125 mCurrentTime += smpl->duration; 5126 isSyncSample = (mCurrentSampleIndex == 0); // XXX 5127 5128 status_t err = mGroup->acquire_buffer(&mBuffer); 5129 5130 if (err != OK) { 5131 CHECK(mBuffer == NULL); 5132 ALOGV("acquire_buffer returned %d", err); 5133 return err; 5134 } 5135 if (size > mBuffer->size()) { 5136 ALOGE("buffer too small: %zu > %zu", size, mBuffer->size()); 5137 mBuffer->release(); 5138 mBuffer = NULL; 5139 return ERROR_BUFFER_TOO_SMALL; 5140 } 5141 } 5142 5143 const Sample *smpl = &mCurrentSamples[mCurrentSampleIndex]; 5144 const sp<MetaData> bufmeta = mBuffer->meta_data(); 5145 bufmeta->clear(); 5146 if (smpl->encryptedsizes.size()) { 5147 // store clear/encrypted lengths in metadata 5148 bufmeta->setData(kKeyPlainSizes, 0, 5149 smpl->clearsizes.array(), smpl->clearsizes.size() * 4); 5150 bufmeta->setData(kKeyEncryptedSizes, 0, 5151 smpl->encryptedsizes.array(), smpl->encryptedsizes.size() * 4); 5152 bufmeta->setData(kKeyCryptoIV, 0, smpl->iv, 16); // use 16 or the actual size? 5153 bufmeta->setInt32(kKeyCryptoDefaultIVSize, mDefaultIVSize); 5154 bufmeta->setInt32(kKeyCryptoMode, mCryptoMode); 5155 bufmeta->setData(kKeyCryptoKey, 0, mCryptoKey, 16); 5156 } 5157 5158 if ((!mIsAVC && !mIsHEVC)|| mWantsNALFragments) { 5159 if (newBuffer) { 5160 if (!isInRange((size_t)0u, mBuffer->size(), size)) { 5161 mBuffer->release(); 5162 mBuffer = NULL; 5163 5164 ALOGE("fragmentedRead ERROR_MALFORMED size %zu", size); 5165 return ERROR_MALFORMED; 5166 } 5167 5168 ssize_t num_bytes_read = 5169 mDataSource->readAt(offset, (uint8_t *)mBuffer->data(), size); 5170 5171 if (num_bytes_read < (ssize_t)size) { 5172 mBuffer->release(); 5173 mBuffer = NULL; 5174 5175 ALOGE("i/o error"); 5176 return ERROR_IO; 5177 } 5178 5179 CHECK(mBuffer != NULL); 5180 mBuffer->set_range(0, size); 5181 mBuffer->meta_data()->setInt64( 5182 kKeyTime, ((int64_t)cts * 1000000) / mTimescale); 5183 mBuffer->meta_data()->setInt64( 5184 kKeyDuration, ((int64_t)smpl->duration * 1000000) / mTimescale); 5185 5186 if (targetSampleTimeUs >= 0) { 5187 mBuffer->meta_data()->setInt64( 5188 kKeyTargetTime, targetSampleTimeUs); 5189 } 5190 5191 if (mIsAVC) { 5192 uint32_t layerId = FindAVCLayerId( 5193 (const uint8_t *)mBuffer->data(), mBuffer->range_length()); 5194 mBuffer->meta_data()->setInt32(kKeyTemporalLayerId, layerId); 5195 } 5196 5197 if (isSyncSample) { 5198 mBuffer->meta_data()->setInt32(kKeyIsSyncFrame, 1); 5199 } 5200 5201 ++mCurrentSampleIndex; 5202 } 5203 5204 if (!mIsAVC && !mIsHEVC) { 5205 *out = mBuffer; 5206 mBuffer = NULL; 5207 5208 return OK; 5209 } 5210 5211 // Each NAL unit is split up into its constituent fragments and 5212 // each one of them returned in its own buffer. 5213 5214 CHECK(mBuffer->range_length() >= mNALLengthSize); 5215 5216 const uint8_t *src = 5217 (const uint8_t *)mBuffer->data() + mBuffer->range_offset(); 5218 5219 size_t nal_size = parseNALSize(src); 5220 if (mNALLengthSize > SIZE_MAX - nal_size) { 5221 ALOGE("b/24441553, b/24445122"); 5222 } 5223 5224 if (mBuffer->range_length() - mNALLengthSize < nal_size) { 5225 ALOGE("incomplete NAL unit."); 5226 5227 mBuffer->release(); 5228 mBuffer = NULL; 5229 5230 return ERROR_MALFORMED; 5231 } 5232 5233 MediaBuffer *clone = mBuffer->clone(); 5234 CHECK(clone != NULL); 5235 clone->set_range(mBuffer->range_offset() + mNALLengthSize, nal_size); 5236 5237 CHECK(mBuffer != NULL); 5238 mBuffer->set_range( 5239 mBuffer->range_offset() + mNALLengthSize + nal_size, 5240 mBuffer->range_length() - mNALLengthSize - nal_size); 5241 5242 if (mBuffer->range_length() == 0) { 5243 mBuffer->release(); 5244 mBuffer = NULL; 5245 } 5246 5247 *out = clone; 5248 5249 return OK; 5250 } else { 5251 ALOGV("whole NAL"); 5252 // Whole NAL units are returned but each fragment is prefixed by 5253 // the start code (0x00 00 00 01). 5254 ssize_t num_bytes_read = 0; 5255 int32_t drm = 0; 5256 bool usesDRM = (mFormat->findInt32(kKeyIsDRM, &drm) && drm != 0); 5257 void *data = NULL; 5258 bool isMalFormed = false; 5259 if (usesDRM) { 5260 if (mBuffer == NULL || !isInRange((size_t)0u, mBuffer->size(), size)) { 5261 isMalFormed = true; 5262 } else { 5263 data = mBuffer->data(); 5264 } 5265 } else { 5266 int32_t max_size; 5267 if (mFormat == NULL 5268 || !mFormat->findInt32(kKeyMaxInputSize, &max_size) 5269 || !isInRange((size_t)0u, (size_t)max_size, size)) { 5270 isMalFormed = true; 5271 } else { 5272 data = mSrcBuffer; 5273 } 5274 } 5275 5276 if (isMalFormed || data == NULL) { 5277 ALOGE("isMalFormed size %zu", size); 5278 if (mBuffer != NULL) { 5279 mBuffer->release(); 5280 mBuffer = NULL; 5281 } 5282 return ERROR_MALFORMED; 5283 } 5284 num_bytes_read = mDataSource->readAt(offset, data, size); 5285 5286 if (num_bytes_read < (ssize_t)size) { 5287 mBuffer->release(); 5288 mBuffer = NULL; 5289 5290 ALOGE("i/o error"); 5291 return ERROR_IO; 5292 } 5293 5294 if (usesDRM) { 5295 CHECK(mBuffer != NULL); 5296 mBuffer->set_range(0, size); 5297 5298 } else { 5299 uint8_t *dstData = (uint8_t *)mBuffer->data(); 5300 size_t srcOffset = 0; 5301 size_t dstOffset = 0; 5302 5303 while (srcOffset < size) { 5304 isMalFormed = !isInRange((size_t)0u, size, srcOffset, mNALLengthSize); 5305 size_t nalLength = 0; 5306 if (!isMalFormed) { 5307 nalLength = parseNALSize(&mSrcBuffer[srcOffset]); 5308 srcOffset += mNALLengthSize; 5309 isMalFormed = !isInRange((size_t)0u, size, srcOffset, nalLength) 5310 || !isInRange((size_t)0u, mBuffer->size(), dstOffset, (size_t)4u) 5311 || !isInRange((size_t)0u, mBuffer->size(), dstOffset + 4, nalLength); 5312 } 5313 5314 if (isMalFormed) { 5315 ALOGE("Video is malformed; nalLength %zu", nalLength); 5316 mBuffer->release(); 5317 mBuffer = NULL; 5318 return ERROR_MALFORMED; 5319 } 5320 5321 if (nalLength == 0) { 5322 continue; 5323 } 5324 5325 if (dstOffset > SIZE_MAX - 4 || 5326 dstOffset + 4 > SIZE_MAX - nalLength || 5327 dstOffset + 4 + nalLength > mBuffer->size()) { 5328 ALOGE("b/26365349 : %zu %zu", dstOffset, mBuffer->size()); 5329 android_errorWriteLog(0x534e4554, "26365349"); 5330 mBuffer->release(); 5331 mBuffer = NULL; 5332 return ERROR_MALFORMED; 5333 } 5334 5335 dstData[dstOffset++] = 0; 5336 dstData[dstOffset++] = 0; 5337 dstData[dstOffset++] = 0; 5338 dstData[dstOffset++] = 1; 5339 memcpy(&dstData[dstOffset], &mSrcBuffer[srcOffset], nalLength); 5340 srcOffset += nalLength; 5341 dstOffset += nalLength; 5342 } 5343 CHECK_EQ(srcOffset, size); 5344 CHECK(mBuffer != NULL); 5345 mBuffer->set_range(0, dstOffset); 5346 } 5347 5348 mBuffer->meta_data()->setInt64( 5349 kKeyTime, ((int64_t)cts * 1000000) / mTimescale); 5350 mBuffer->meta_data()->setInt64( 5351 kKeyDuration, ((int64_t)smpl->duration * 1000000) / mTimescale); 5352 5353 if (targetSampleTimeUs >= 0) { 5354 mBuffer->meta_data()->setInt64( 5355 kKeyTargetTime, targetSampleTimeUs); 5356 } 5357 5358 if (isSyncSample) { 5359 mBuffer->meta_data()->setInt32(kKeyIsSyncFrame, 1); 5360 } 5361 5362 ++mCurrentSampleIndex; 5363 5364 *out = mBuffer; 5365 mBuffer = NULL; 5366 5367 return OK; 5368 } 5369} 5370 5371MPEG4Extractor::Track *MPEG4Extractor::findTrackByMimePrefix( 5372 const char *mimePrefix) { 5373 for (Track *track = mFirstTrack; track != NULL; track = track->next) { 5374 const char *mime; 5375 if (track->meta != NULL 5376 && track->meta->findCString(kKeyMIMEType, &mime) 5377 && !strncasecmp(mime, mimePrefix, strlen(mimePrefix))) { 5378 return track; 5379 } 5380 } 5381 5382 return NULL; 5383} 5384 5385static bool LegacySniffMPEG4(DataSourceBase *source, float *confidence) { 5386 uint8_t header[8]; 5387 5388 ssize_t n = source->readAt(4, header, sizeof(header)); 5389 if (n < (ssize_t)sizeof(header)) { 5390 return false; 5391 } 5392 5393 if (!memcmp(header, "ftyp3gp", 7) || !memcmp(header, "ftypmp42", 8) 5394 || !memcmp(header, "ftyp3gr6", 8) || !memcmp(header, "ftyp3gs6", 8) 5395 || !memcmp(header, "ftyp3ge6", 8) || !memcmp(header, "ftyp3gg6", 8) 5396 || !memcmp(header, "ftypisom", 8) || !memcmp(header, "ftypM4V ", 8) 5397 || !memcmp(header, "ftypM4A ", 8) || !memcmp(header, "ftypf4v ", 8) 5398 || !memcmp(header, "ftypkddi", 8) || !memcmp(header, "ftypM4VP", 8) 5399 || !memcmp(header, "ftypmif1", 8) || !memcmp(header, "ftypheic", 8) 5400 || !memcmp(header, "ftypmsf1", 8) || !memcmp(header, "ftyphevc", 8)) { 5401 *confidence = 0.4; 5402 5403 return true; 5404 } 5405 5406 return false; 5407} 5408 5409static bool isCompatibleBrand(uint32_t fourcc) { 5410 static const uint32_t kCompatibleBrands[] = { 5411 FOURCC('i', 's', 'o', 'm'), 5412 FOURCC('i', 's', 'o', '2'), 5413 FOURCC('a', 'v', 'c', '1'), 5414 FOURCC('h', 'v', 'c', '1'), 5415 FOURCC('h', 'e', 'v', '1'), 5416 FOURCC('3', 'g', 'p', '4'), 5417 FOURCC('m', 'p', '4', '1'), 5418 FOURCC('m', 'p', '4', '2'), 5419 FOURCC('d', 'a', 's', 'h'), 5420 5421 // Won't promise that the following file types can be played. 5422 // Just give these file types a chance. 5423 FOURCC('q', 't', ' ', ' '), // Apple's QuickTime 5424 FOURCC('M', 'S', 'N', 'V'), // Sony's PSP 5425 5426 FOURCC('3', 'g', '2', 'a'), // 3GPP2 5427 FOURCC('3', 'g', '2', 'b'), 5428 FOURCC('m', 'i', 'f', '1'), // HEIF image 5429 FOURCC('h', 'e', 'i', 'c'), // HEIF image 5430 FOURCC('m', 's', 'f', '1'), // HEIF image sequence 5431 FOURCC('h', 'e', 'v', 'c'), // HEIF image sequence 5432 }; 5433 5434 for (size_t i = 0; 5435 i < sizeof(kCompatibleBrands) / sizeof(kCompatibleBrands[0]); 5436 ++i) { 5437 if (kCompatibleBrands[i] == fourcc) { 5438 return true; 5439 } 5440 } 5441 5442 return false; 5443} 5444 5445// Attempt to actually parse the 'ftyp' atom and determine if a suitable 5446// compatible brand is present. 5447// Also try to identify where this file's metadata ends 5448// (end of the 'moov' atom) and report it to the caller as part of 5449// the metadata. 5450static bool BetterSniffMPEG4(DataSourceBase *source, float *confidence) { 5451 // We scan up to 128 bytes to identify this file as an MP4. 5452 static const off64_t kMaxScanOffset = 128ll; 5453 5454 off64_t offset = 0ll; 5455 bool foundGoodFileType = false; 5456 off64_t moovAtomEndOffset = -1ll; 5457 bool done = false; 5458 5459 while (!done && offset < kMaxScanOffset) { 5460 uint32_t hdr[2]; 5461 if (source->readAt(offset, hdr, 8) < 8) { 5462 return false; 5463 } 5464 5465 uint64_t chunkSize = ntohl(hdr[0]); 5466 uint32_t chunkType = ntohl(hdr[1]); 5467 off64_t chunkDataOffset = offset + 8; 5468 5469 if (chunkSize == 1) { 5470 if (source->readAt(offset + 8, &chunkSize, 8) < 8) { 5471 return false; 5472 } 5473 5474 chunkSize = ntoh64(chunkSize); 5475 chunkDataOffset += 8; 5476 5477 if (chunkSize < 16) { 5478 // The smallest valid chunk is 16 bytes long in this case. 5479 return false; 5480 } 5481 5482 } else if (chunkSize < 8) { 5483 // The smallest valid chunk is 8 bytes long. 5484 return false; 5485 } 5486 5487 // (data_offset - offset) is either 8 or 16 5488 off64_t chunkDataSize = chunkSize - (chunkDataOffset - offset); 5489 if (chunkDataSize < 0) { 5490 ALOGE("b/23540914"); 5491 return false; 5492 } 5493 5494 char chunkstring[5]; 5495 MakeFourCCString(chunkType, chunkstring); 5496 ALOGV("saw chunk type %s, size %" PRIu64 " @ %lld", chunkstring, chunkSize, (long long)offset); 5497 switch (chunkType) { 5498 case FOURCC('f', 't', 'y', 'p'): 5499 { 5500 if (chunkDataSize < 8) { 5501 return false; 5502 } 5503 5504 uint32_t numCompatibleBrands = (chunkDataSize - 8) / 4; 5505 for (size_t i = 0; i < numCompatibleBrands + 2; ++i) { 5506 if (i == 1) { 5507 // Skip this index, it refers to the minorVersion, 5508 // not a brand. 5509 continue; 5510 } 5511 5512 uint32_t brand; 5513 if (source->readAt( 5514 chunkDataOffset + 4 * i, &brand, 4) < 4) { 5515 return false; 5516 } 5517 5518 brand = ntohl(brand); 5519 5520 if (isCompatibleBrand(brand)) { 5521 foundGoodFileType = true; 5522 break; 5523 } 5524 } 5525 5526 if (!foundGoodFileType) { 5527 return false; 5528 } 5529 5530 break; 5531 } 5532 5533 case FOURCC('m', 'o', 'o', 'v'): 5534 { 5535 moovAtomEndOffset = offset + chunkSize; 5536 5537 done = true; 5538 break; 5539 } 5540 5541 default: 5542 break; 5543 } 5544 5545 offset += chunkSize; 5546 } 5547 5548 if (!foundGoodFileType) { 5549 return false; 5550 } 5551 5552 *confidence = 0.4f; 5553 5554 return true; 5555} 5556 5557static MediaExtractor* CreateExtractor(DataSourceBase *source, void *) { 5558 return new MPEG4Extractor(source); 5559} 5560 5561static MediaExtractor::CreatorFunc Sniff( 5562 DataSourceBase *source, float *confidence, void **, 5563 MediaExtractor::FreeMetaFunc *) { 5564 if (BetterSniffMPEG4(source, confidence)) { 5565 return CreateExtractor; 5566 } 5567 5568 if (LegacySniffMPEG4(source, confidence)) { 5569 ALOGW("Identified supported mpeg4 through LegacySniffMPEG4."); 5570 return CreateExtractor; 5571 } 5572 5573 return NULL; 5574} 5575 5576extern "C" { 5577// This is the only symbol that needs to be exported 5578__attribute__ ((visibility ("default"))) 5579MediaExtractor::ExtractorDef GETEXTRACTORDEF() { 5580 return { 5581 MediaExtractor::EXTRACTORDEF_VERSION, 5582 UUID("27575c67-4417-4c54-8d3d-8e626985a164"), 5583 1, // version 5584 "MP4 Extractor", 5585 Sniff 5586 }; 5587} 5588 5589} // extern "C" 5590 5591} // namespace android 5592