MPEG4Extractor.cpp revision af1da8561beda2a8f9959a15808c9b4f404942b3
1/* 2 * Copyright (C) 2009 The Android Open Source Project 3 * 4 * Licensed under the Apache License, Version 2.0 (the "License"); 5 * you may not use this file except in compliance with the License. 6 * You may obtain a copy of the License at 7 * 8 * http://www.apache.org/licenses/LICENSE-2.0 9 * 10 * Unless required by applicable law or agreed to in writing, software 11 * distributed under the License is distributed on an "AS IS" BASIS, 12 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 * See the License for the specific language governing permissions and 14 * limitations under the License. 15 */ 16 17//#define LOG_NDEBUG 0 18#define LOG_TAG "MPEG4Extractor" 19 20#include <ctype.h> 21#include <inttypes.h> 22#include <memory> 23#include <stdint.h> 24#include <stdlib.h> 25#include <string.h> 26 27#include <utils/Log.h> 28 29#include "MPEG4Extractor.h" 30#include "SampleTable.h" 31#include "ItemTable.h" 32#include "include/ESDS.h" 33 34#include <media/MediaSource.h> 35#include <media/stagefright/foundation/ABitReader.h> 36#include <media/stagefright/foundation/ABuffer.h> 37#include <media/stagefright/foundation/ADebug.h> 38#include <media/stagefright/foundation/AMessage.h> 39#include <media/stagefright/foundation/AUtils.h> 40#include <media/stagefright/foundation/ByteUtils.h> 41#include <media/stagefright/foundation/ColorUtils.h> 42#include <media/stagefright/foundation/avc_utils.h> 43#include <media/stagefright/foundation/hexdump.h> 44#include <media/stagefright/MediaBuffer.h> 45#include <media/stagefright/MediaBufferGroup.h> 46#include <media/stagefright/MediaDefs.h> 47#include <media/stagefright/MetaData.h> 48#include <utils/String8.h> 49 50#include <byteswap.h> 51#include "include/ID3.h" 52 53#ifndef UINT32_MAX 54#define UINT32_MAX (4294967295U) 55#endif 56 57namespace android { 58 59enum { 60 // max track header chunk to return 61 kMaxTrackHeaderSize = 32, 62 63 // maximum size of an atom. Some atoms can be bigger according to the spec, 64 // but we only allow up to this size. 65 kMaxAtomSize = 64 * 1024 * 1024, 66}; 67 68class MPEG4Source : public MediaSource { 69public: 70 // Caller retains ownership of both "dataSource" and "sampleTable". 71 MPEG4Source(const sp<MPEG4Extractor> &owner, 72 const sp<MetaData> &format, 73 const sp<DataSource> &dataSource, 74 int32_t timeScale, 75 const sp<SampleTable> &sampleTable, 76 Vector<SidxEntry> &sidx, 77 const Trex *trex, 78 off64_t firstMoofOffset, 79 const sp<ItemTable> &itemTable); 80 virtual status_t init(); 81 82 virtual status_t start(MetaData *params = NULL); 83 virtual status_t stop(); 84 85 virtual sp<MetaData> getFormat(); 86 87 virtual status_t read(MediaBuffer **buffer, const ReadOptions *options = NULL); 88 virtual bool supportNonblockingRead() { return true; } 89 virtual status_t fragmentedRead(MediaBuffer **buffer, const ReadOptions *options = NULL); 90 91protected: 92 virtual ~MPEG4Source(); 93 94private: 95 Mutex mLock; 96 97 // keep the MPEG4Extractor around, since we're referencing its data 98 sp<MPEG4Extractor> mOwner; 99 sp<MetaData> mFormat; 100 sp<DataSource> mDataSource; 101 int32_t mTimescale; 102 sp<SampleTable> mSampleTable; 103 uint32_t mCurrentSampleIndex; 104 uint32_t mCurrentFragmentIndex; 105 Vector<SidxEntry> &mSegments; 106 const Trex *mTrex; 107 off64_t mFirstMoofOffset; 108 off64_t mCurrentMoofOffset; 109 off64_t mNextMoofOffset; 110 uint32_t mCurrentTime; 111 int32_t mLastParsedTrackId; 112 int32_t mTrackId; 113 114 int32_t mCryptoMode; // passed in from extractor 115 int32_t mDefaultIVSize; // passed in from extractor 116 uint8_t mCryptoKey[16]; // passed in from extractor 117 uint32_t mCurrentAuxInfoType; 118 uint32_t mCurrentAuxInfoTypeParameter; 119 int32_t mCurrentDefaultSampleInfoSize; 120 uint32_t mCurrentSampleInfoCount; 121 uint32_t mCurrentSampleInfoAllocSize; 122 uint8_t* mCurrentSampleInfoSizes; 123 uint32_t mCurrentSampleInfoOffsetCount; 124 uint32_t mCurrentSampleInfoOffsetsAllocSize; 125 uint64_t* mCurrentSampleInfoOffsets; 126 127 bool mIsAVC; 128 bool mIsHEVC; 129 size_t mNALLengthSize; 130 131 bool mStarted; 132 133 MediaBufferGroup *mGroup; 134 135 MediaBuffer *mBuffer; 136 137 bool mWantsNALFragments; 138 139 uint8_t *mSrcBuffer; 140 141 bool mIsHeif; 142 sp<ItemTable> mItemTable; 143 144 size_t parseNALSize(const uint8_t *data) const; 145 status_t parseChunk(off64_t *offset); 146 status_t parseTrackFragmentHeader(off64_t offset, off64_t size); 147 status_t parseTrackFragmentRun(off64_t offset, off64_t size); 148 status_t parseSampleAuxiliaryInformationSizes(off64_t offset, off64_t size); 149 status_t parseSampleAuxiliaryInformationOffsets(off64_t offset, off64_t size); 150 151 struct TrackFragmentHeaderInfo { 152 enum Flags { 153 kBaseDataOffsetPresent = 0x01, 154 kSampleDescriptionIndexPresent = 0x02, 155 kDefaultSampleDurationPresent = 0x08, 156 kDefaultSampleSizePresent = 0x10, 157 kDefaultSampleFlagsPresent = 0x20, 158 kDurationIsEmpty = 0x10000, 159 }; 160 161 uint32_t mTrackID; 162 uint32_t mFlags; 163 uint64_t mBaseDataOffset; 164 uint32_t mSampleDescriptionIndex; 165 uint32_t mDefaultSampleDuration; 166 uint32_t mDefaultSampleSize; 167 uint32_t mDefaultSampleFlags; 168 169 uint64_t mDataOffset; 170 }; 171 TrackFragmentHeaderInfo mTrackFragmentHeaderInfo; 172 173 struct Sample { 174 off64_t offset; 175 size_t size; 176 uint32_t duration; 177 int32_t compositionOffset; 178 uint8_t iv[16]; 179 Vector<size_t> clearsizes; 180 Vector<size_t> encryptedsizes; 181 }; 182 Vector<Sample> mCurrentSamples; 183 184 MPEG4Source(const MPEG4Source &); 185 MPEG4Source &operator=(const MPEG4Source &); 186}; 187 188// This custom data source wraps an existing one and satisfies requests 189// falling entirely within a cached range from the cache while forwarding 190// all remaining requests to the wrapped datasource. 191// This is used to cache the full sampletable metadata for a single track, 192// possibly wrapping multiple times to cover all tracks, i.e. 193// Each MPEG4DataSource caches the sampletable metadata for a single track. 194 195struct MPEG4DataSource : public DataSource { 196 explicit MPEG4DataSource(const sp<DataSource> &source); 197 198 virtual status_t initCheck() const; 199 virtual ssize_t readAt(off64_t offset, void *data, size_t size); 200 virtual status_t getSize(off64_t *size); 201 virtual uint32_t flags(); 202 203 status_t setCachedRange(off64_t offset, size_t size); 204 205protected: 206 virtual ~MPEG4DataSource(); 207 208private: 209 Mutex mLock; 210 211 sp<DataSource> mSource; 212 off64_t mCachedOffset; 213 size_t mCachedSize; 214 uint8_t *mCache; 215 216 void clearCache(); 217 218 MPEG4DataSource(const MPEG4DataSource &); 219 MPEG4DataSource &operator=(const MPEG4DataSource &); 220}; 221 222MPEG4DataSource::MPEG4DataSource(const sp<DataSource> &source) 223 : mSource(source), 224 mCachedOffset(0), 225 mCachedSize(0), 226 mCache(NULL) { 227} 228 229MPEG4DataSource::~MPEG4DataSource() { 230 clearCache(); 231} 232 233void MPEG4DataSource::clearCache() { 234 if (mCache) { 235 free(mCache); 236 mCache = NULL; 237 } 238 239 mCachedOffset = 0; 240 mCachedSize = 0; 241} 242 243status_t MPEG4DataSource::initCheck() const { 244 return mSource->initCheck(); 245} 246 247ssize_t MPEG4DataSource::readAt(off64_t offset, void *data, size_t size) { 248 Mutex::Autolock autoLock(mLock); 249 250 if (isInRange(mCachedOffset, mCachedSize, offset, size)) { 251 memcpy(data, &mCache[offset - mCachedOffset], size); 252 return size; 253 } 254 255 return mSource->readAt(offset, data, size); 256} 257 258status_t MPEG4DataSource::getSize(off64_t *size) { 259 return mSource->getSize(size); 260} 261 262uint32_t MPEG4DataSource::flags() { 263 return mSource->flags(); 264} 265 266status_t MPEG4DataSource::setCachedRange(off64_t offset, size_t size) { 267 Mutex::Autolock autoLock(mLock); 268 269 clearCache(); 270 271 mCache = (uint8_t *)malloc(size); 272 273 if (mCache == NULL) { 274 return -ENOMEM; 275 } 276 277 mCachedOffset = offset; 278 mCachedSize = size; 279 280 ssize_t err = mSource->readAt(mCachedOffset, mCache, mCachedSize); 281 282 if (err < (ssize_t)size) { 283 clearCache(); 284 285 return ERROR_IO; 286 } 287 288 return OK; 289} 290 291//////////////////////////////////////////////////////////////////////////////// 292 293static const bool kUseHexDump = false; 294 295static const char *FourCC2MIME(uint32_t fourcc) { 296 switch (fourcc) { 297 case FOURCC('m', 'p', '4', 'a'): 298 return MEDIA_MIMETYPE_AUDIO_AAC; 299 300 case FOURCC('s', 'a', 'm', 'r'): 301 return MEDIA_MIMETYPE_AUDIO_AMR_NB; 302 303 case FOURCC('s', 'a', 'w', 'b'): 304 return MEDIA_MIMETYPE_AUDIO_AMR_WB; 305 306 case FOURCC('m', 'p', '4', 'v'): 307 return MEDIA_MIMETYPE_VIDEO_MPEG4; 308 309 case FOURCC('s', '2', '6', '3'): 310 case FOURCC('h', '2', '6', '3'): 311 case FOURCC('H', '2', '6', '3'): 312 return MEDIA_MIMETYPE_VIDEO_H263; 313 314 case FOURCC('a', 'v', 'c', '1'): 315 return MEDIA_MIMETYPE_VIDEO_AVC; 316 317 case FOURCC('h', 'v', 'c', '1'): 318 case FOURCC('h', 'e', 'v', '1'): 319 return MEDIA_MIMETYPE_VIDEO_HEVC; 320 default: 321 CHECK(!"should not be here."); 322 return NULL; 323 } 324} 325 326static bool AdjustChannelsAndRate(uint32_t fourcc, uint32_t *channels, uint32_t *rate) { 327 if (!strcasecmp(MEDIA_MIMETYPE_AUDIO_AMR_NB, FourCC2MIME(fourcc))) { 328 // AMR NB audio is always mono, 8kHz 329 *channels = 1; 330 *rate = 8000; 331 return true; 332 } else if (!strcasecmp(MEDIA_MIMETYPE_AUDIO_AMR_WB, FourCC2MIME(fourcc))) { 333 // AMR WB audio is always mono, 16kHz 334 *channels = 1; 335 *rate = 16000; 336 return true; 337 } 338 return false; 339} 340 341MPEG4Extractor::MPEG4Extractor(const sp<DataSource> &source, const char *mime) 342 : mMoofOffset(0), 343 mMoofFound(false), 344 mMdatFound(false), 345 mDataSource(source), 346 mInitCheck(NO_INIT), 347 mHeaderTimescale(0), 348 mIsQT(false), 349 mIsHeif(false), 350 mHasMoovBox(false), 351 mPreferHeif(mime != NULL && !strcasecmp(mime, MEDIA_MIMETYPE_CONTAINER_HEIF)), 352 mFirstTrack(NULL), 353 mLastTrack(NULL), 354 mFileMetaData(new MetaData), 355 mFirstSINF(NULL), 356 mIsDrm(false) { 357 ALOGV("mime=%s, mPreferHeif=%d", mime, mPreferHeif); 358} 359 360MPEG4Extractor::~MPEG4Extractor() { 361 release(); 362} 363 364void MPEG4Extractor::release() { 365 Track *track = mFirstTrack; 366 while (track) { 367 Track *next = track->next; 368 369 delete track; 370 track = next; 371 } 372 mFirstTrack = mLastTrack = NULL; 373 374 SINF *sinf = mFirstSINF; 375 while (sinf) { 376 SINF *next = sinf->next; 377 delete[] sinf->IPMPData; 378 delete sinf; 379 sinf = next; 380 } 381 mFirstSINF = NULL; 382 383 for (size_t i = 0; i < mPssh.size(); i++) { 384 delete [] mPssh[i].data; 385 } 386 mPssh.clear(); 387 388 if (mDataSource != NULL) { 389 mDataSource->close(); 390 mDataSource.clear(); 391 } 392} 393 394uint32_t MPEG4Extractor::flags() const { 395 return CAN_PAUSE | 396 ((mMoofOffset == 0 || mSidxEntries.size() != 0) ? 397 (CAN_SEEK_BACKWARD | CAN_SEEK_FORWARD | CAN_SEEK) : 0); 398} 399 400sp<MetaData> MPEG4Extractor::getMetaData() { 401 status_t err; 402 if ((err = readMetaData()) != OK) { 403 return new MetaData; 404 } 405 406 return mFileMetaData; 407} 408 409size_t MPEG4Extractor::countTracks() { 410 status_t err; 411 if ((err = readMetaData()) != OK) { 412 ALOGV("MPEG4Extractor::countTracks: no tracks"); 413 return 0; 414 } 415 416 size_t n = 0; 417 Track *track = mFirstTrack; 418 while (track) { 419 ++n; 420 track = track->next; 421 } 422 423 ALOGV("MPEG4Extractor::countTracks: %zu tracks", n); 424 return n; 425} 426 427sp<MetaData> MPEG4Extractor::getTrackMetaData( 428 size_t index, uint32_t flags) { 429 status_t err; 430 if ((err = readMetaData()) != OK) { 431 return NULL; 432 } 433 434 Track *track = mFirstTrack; 435 while (index > 0) { 436 if (track == NULL) { 437 return NULL; 438 } 439 440 track = track->next; 441 --index; 442 } 443 444 if (track == NULL) { 445 return NULL; 446 } 447 448 [=] { 449 int64_t duration; 450 int32_t samplerate; 451 if (track->has_elst && mHeaderTimescale != 0 && 452 track->meta->findInt64(kKeyDuration, &duration) && 453 track->meta->findInt32(kKeySampleRate, &samplerate)) { 454 455 track->has_elst = false; 456 457 if (track->elst_segment_duration > INT64_MAX) { 458 return; 459 } 460 int64_t segment_duration = track->elst_segment_duration; 461 int64_t media_time = track->elst_media_time; 462 int64_t halfscale = mHeaderTimescale / 2; 463 ALOGV("segment_duration = %" PRId64 ", media_time = %" PRId64 464 ", halfscale = %" PRId64 ", timescale = %d", 465 segment_duration, 466 media_time, 467 halfscale, 468 mHeaderTimescale); 469 470 int64_t delay; 471 // delay = ((media_time * samplerate) + halfscale) / mHeaderTimescale; 472 if (__builtin_mul_overflow(media_time, samplerate, &delay) || 473 __builtin_add_overflow(delay, halfscale, &delay) || 474 (delay /= mHeaderTimescale, false) || 475 delay > INT32_MAX || 476 delay < INT32_MIN) { 477 return; 478 } 479 ALOGV("delay = %" PRId64, delay); 480 track->meta->setInt32(kKeyEncoderDelay, delay); 481 482 int64_t scaled_duration; 483 // scaled_duration = duration * mHeaderTimescale; 484 if (__builtin_mul_overflow(duration, mHeaderTimescale, &scaled_duration)) { 485 return; 486 } 487 ALOGV("scaled_duration = %" PRId64, scaled_duration); 488 489 int64_t segment_end; 490 int64_t padding; 491 // padding = scaled_duration - ((segment_duration + media_time) * 1000000); 492 if (__builtin_add_overflow(segment_duration, media_time, &segment_end) || 493 __builtin_mul_overflow(segment_end, 1000000, &segment_end) || 494 __builtin_sub_overflow(scaled_duration, segment_end, &padding)) { 495 return; 496 } 497 ALOGV("segment_end = %" PRId64 ", padding = %" PRId64, segment_end, padding); 498 499 if (padding < 0) { 500 // track duration from media header (which is what kKeyDuration is) might 501 // be slightly shorter than the segment duration, which would make the 502 // padding negative. Clamp to zero. 503 padding = 0; 504 } 505 506 int64_t paddingsamples; 507 int64_t halfscale_e6; 508 int64_t timescale_e6; 509 // paddingsamples = ((padding * samplerate) + (halfscale * 1000000)) 510 // / (mHeaderTimescale * 1000000); 511 if (__builtin_mul_overflow(padding, samplerate, &paddingsamples) || 512 __builtin_mul_overflow(halfscale, 1000000, &halfscale_e6) || 513 __builtin_mul_overflow(mHeaderTimescale, 1000000, ×cale_e6) || 514 __builtin_add_overflow(paddingsamples, halfscale_e6, &paddingsamples) || 515 (paddingsamples /= timescale_e6, false) || 516 paddingsamples > INT32_MAX) { 517 return; 518 } 519 ALOGV("paddingsamples = %" PRId64, paddingsamples); 520 track->meta->setInt32(kKeyEncoderPadding, paddingsamples); 521 } 522 }(); 523 524 if ((flags & kIncludeExtensiveMetaData) 525 && !track->includes_expensive_metadata) { 526 track->includes_expensive_metadata = true; 527 528 const char *mime; 529 CHECK(track->meta->findCString(kKeyMIMEType, &mime)); 530 if (!strncasecmp("video/", mime, 6)) { 531 // MPEG2 tracks do not provide CSD, so read the stream header 532 if (!strcmp(mime, MEDIA_MIMETYPE_VIDEO_MPEG2)) { 533 off64_t offset; 534 size_t size; 535 if (track->sampleTable->getMetaDataForSample( 536 0 /* sampleIndex */, &offset, &size, NULL /* sampleTime */) == OK) { 537 if (size > kMaxTrackHeaderSize) { 538 size = kMaxTrackHeaderSize; 539 } 540 uint8_t header[kMaxTrackHeaderSize]; 541 if (mDataSource->readAt(offset, &header, size) == (ssize_t)size) { 542 track->meta->setData(kKeyStreamHeader, 'mdat', header, size); 543 } 544 } 545 } 546 547 if (mMoofOffset > 0) { 548 int64_t duration; 549 if (track->meta->findInt64(kKeyDuration, &duration)) { 550 // nothing fancy, just pick a frame near 1/4th of the duration 551 track->meta->setInt64( 552 kKeyThumbnailTime, duration / 4); 553 } 554 } else { 555 uint32_t sampleIndex; 556 uint32_t sampleTime; 557 if (track->timescale != 0 && 558 track->sampleTable->findThumbnailSample(&sampleIndex) == OK 559 && track->sampleTable->getMetaDataForSample( 560 sampleIndex, NULL /* offset */, NULL /* size */, 561 &sampleTime) == OK) { 562 track->meta->setInt64( 563 kKeyThumbnailTime, 564 ((int64_t)sampleTime * 1000000) / track->timescale); 565 } 566 } 567 } 568 } 569 570 return track->meta; 571} 572 573status_t MPEG4Extractor::readMetaData() { 574 if (mInitCheck != NO_INIT) { 575 return mInitCheck; 576 } 577 578 off64_t offset = 0; 579 status_t err; 580 bool sawMoovOrSidx = false; 581 582 while (!((mHasMoovBox && sawMoovOrSidx && (mMdatFound || mMoofFound)) || 583 (mIsHeif && (mPreferHeif || !mHasMoovBox) && 584 (mItemTable != NULL) && mItemTable->isValid()))) { 585 off64_t orig_offset = offset; 586 err = parseChunk(&offset, 0); 587 588 if (err != OK && err != UNKNOWN_ERROR) { 589 break; 590 } else if (offset <= orig_offset) { 591 // only continue parsing if the offset was advanced, 592 // otherwise we might end up in an infinite loop 593 ALOGE("did not advance: %lld->%lld", (long long)orig_offset, (long long)offset); 594 err = ERROR_MALFORMED; 595 break; 596 } else if (err == UNKNOWN_ERROR) { 597 sawMoovOrSidx = true; 598 } 599 } 600 601 if (mIsHeif && (mItemTable != NULL) && (mItemTable->countImages() > 0)) { 602 for (uint32_t imageIndex = 0; 603 imageIndex < mItemTable->countImages(); imageIndex++) { 604 sp<MetaData> meta = mItemTable->getImageMeta(imageIndex); 605 if (meta == NULL) { 606 ALOGE("heif image %u has no meta!", imageIndex); 607 continue; 608 } 609 // Some heif files advertise image sequence brands (eg. 'hevc') in 610 // ftyp box, but don't have any valid tracks in them. Instead of 611 // reporting the entire file as malformed, we override the error 612 // to allow still images to be extracted. 613 if (err != OK) { 614 ALOGW("Extracting still images only"); 615 err = OK; 616 } 617 618 ALOGV("adding HEIF image track %u", imageIndex); 619 Track *track = new Track; 620 track->next = NULL; 621 if (mLastTrack != NULL) { 622 mLastTrack->next = track; 623 } else { 624 mFirstTrack = track; 625 } 626 mLastTrack = track; 627 628 track->meta = meta; 629 track->meta->setInt32(kKeyTrackID, imageIndex); 630 track->includes_expensive_metadata = false; 631 track->skipTrack = false; 632 track->timescale = 0; 633 } 634 } 635 636 if (mInitCheck == OK) { 637 if (findTrackByMimePrefix("video/") != NULL) { 638 mFileMetaData->setCString( 639 kKeyMIMEType, MEDIA_MIMETYPE_CONTAINER_MPEG4); 640 } else if (findTrackByMimePrefix("audio/") != NULL) { 641 mFileMetaData->setCString(kKeyMIMEType, "audio/mp4"); 642 } else if (findTrackByMimePrefix( 643 MEDIA_MIMETYPE_IMAGE_ANDROID_HEIC) != NULL) { 644 mFileMetaData->setCString( 645 kKeyMIMEType, MEDIA_MIMETYPE_CONTAINER_HEIF); 646 } else { 647 mFileMetaData->setCString(kKeyMIMEType, "application/octet-stream"); 648 } 649 } else { 650 mInitCheck = err; 651 } 652 653 CHECK_NE(err, (status_t)NO_INIT); 654 655 // copy pssh data into file metadata 656 uint64_t psshsize = 0; 657 for (size_t i = 0; i < mPssh.size(); i++) { 658 psshsize += 20 + mPssh[i].datalen; 659 } 660 if (psshsize > 0 && psshsize <= UINT32_MAX) { 661 char *buf = (char*)malloc(psshsize); 662 if (!buf) { 663 ALOGE("b/28471206"); 664 return NO_MEMORY; 665 } 666 char *ptr = buf; 667 for (size_t i = 0; i < mPssh.size(); i++) { 668 memcpy(ptr, mPssh[i].uuid, 20); // uuid + length 669 memcpy(ptr + 20, mPssh[i].data, mPssh[i].datalen); 670 ptr += (20 + mPssh[i].datalen); 671 } 672 mFileMetaData->setData(kKeyPssh, 'pssh', buf, psshsize); 673 free(buf); 674 } 675 676 return mInitCheck; 677} 678 679char* MPEG4Extractor::getDrmTrackInfo(size_t trackID, int *len) { 680 if (mFirstSINF == NULL) { 681 return NULL; 682 } 683 684 SINF *sinf = mFirstSINF; 685 while (sinf && (trackID != sinf->trackID)) { 686 sinf = sinf->next; 687 } 688 689 if (sinf == NULL) { 690 return NULL; 691 } 692 693 *len = sinf->len; 694 return sinf->IPMPData; 695} 696 697// Reads an encoded integer 7 bits at a time until it encounters the high bit clear. 698static int32_t readSize(off64_t offset, 699 const sp<DataSource> &DataSource, uint8_t *numOfBytes) { 700 uint32_t size = 0; 701 uint8_t data; 702 bool moreData = true; 703 *numOfBytes = 0; 704 705 while (moreData) { 706 if (DataSource->readAt(offset, &data, 1) < 1) { 707 return -1; 708 } 709 offset ++; 710 moreData = (data >= 128) ? true : false; 711 size = (size << 7) | (data & 0x7f); // Take last 7 bits 712 (*numOfBytes) ++; 713 } 714 715 return size; 716} 717 718status_t MPEG4Extractor::parseDrmSINF( 719 off64_t * /* offset */, off64_t data_offset) { 720 uint8_t updateIdTag; 721 if (mDataSource->readAt(data_offset, &updateIdTag, 1) < 1) { 722 return ERROR_IO; 723 } 724 data_offset ++; 725 726 if (0x01/*OBJECT_DESCRIPTOR_UPDATE_ID_TAG*/ != updateIdTag) { 727 return ERROR_MALFORMED; 728 } 729 730 uint8_t numOfBytes; 731 int32_t size = readSize(data_offset, mDataSource, &numOfBytes); 732 if (size < 0) { 733 return ERROR_IO; 734 } 735 data_offset += numOfBytes; 736 737 while(size >= 11 ) { 738 uint8_t descriptorTag; 739 if (mDataSource->readAt(data_offset, &descriptorTag, 1) < 1) { 740 return ERROR_IO; 741 } 742 data_offset ++; 743 744 if (0x11/*OBJECT_DESCRIPTOR_ID_TAG*/ != descriptorTag) { 745 return ERROR_MALFORMED; 746 } 747 748 uint8_t buffer[8]; 749 //ObjectDescriptorID and ObjectDescriptor url flag 750 if (mDataSource->readAt(data_offset, buffer, 2) < 2) { 751 return ERROR_IO; 752 } 753 data_offset += 2; 754 755 if ((buffer[1] >> 5) & 0x0001) { //url flag is set 756 return ERROR_MALFORMED; 757 } 758 759 if (mDataSource->readAt(data_offset, buffer, 8) < 8) { 760 return ERROR_IO; 761 } 762 data_offset += 8; 763 764 if ((0x0F/*ES_ID_REF_TAG*/ != buffer[1]) 765 || ( 0x0A/*IPMP_DESCRIPTOR_POINTER_ID_TAG*/ != buffer[5])) { 766 return ERROR_MALFORMED; 767 } 768 769 SINF *sinf = new SINF; 770 sinf->trackID = U16_AT(&buffer[3]); 771 sinf->IPMPDescriptorID = buffer[7]; 772 sinf->next = mFirstSINF; 773 mFirstSINF = sinf; 774 775 size -= (8 + 2 + 1); 776 } 777 778 if (size != 0) { 779 return ERROR_MALFORMED; 780 } 781 782 if (mDataSource->readAt(data_offset, &updateIdTag, 1) < 1) { 783 return ERROR_IO; 784 } 785 data_offset ++; 786 787 if(0x05/*IPMP_DESCRIPTOR_UPDATE_ID_TAG*/ != updateIdTag) { 788 return ERROR_MALFORMED; 789 } 790 791 size = readSize(data_offset, mDataSource, &numOfBytes); 792 if (size < 0) { 793 return ERROR_IO; 794 } 795 data_offset += numOfBytes; 796 797 while (size > 0) { 798 uint8_t tag; 799 int32_t dataLen; 800 if (mDataSource->readAt(data_offset, &tag, 1) < 1) { 801 return ERROR_IO; 802 } 803 data_offset ++; 804 805 if (0x0B/*IPMP_DESCRIPTOR_ID_TAG*/ == tag) { 806 uint8_t id; 807 dataLen = readSize(data_offset, mDataSource, &numOfBytes); 808 if (dataLen < 0) { 809 return ERROR_IO; 810 } else if (dataLen < 4) { 811 return ERROR_MALFORMED; 812 } 813 data_offset += numOfBytes; 814 815 if (mDataSource->readAt(data_offset, &id, 1) < 1) { 816 return ERROR_IO; 817 } 818 data_offset ++; 819 820 SINF *sinf = mFirstSINF; 821 while (sinf && (sinf->IPMPDescriptorID != id)) { 822 sinf = sinf->next; 823 } 824 if (sinf == NULL) { 825 return ERROR_MALFORMED; 826 } 827 sinf->len = dataLen - 3; 828 sinf->IPMPData = new (std::nothrow) char[sinf->len]; 829 if (sinf->IPMPData == NULL) { 830 return ERROR_MALFORMED; 831 } 832 data_offset += 2; 833 834 if (mDataSource->readAt(data_offset, sinf->IPMPData, sinf->len) < sinf->len) { 835 return ERROR_IO; 836 } 837 data_offset += sinf->len; 838 839 size -= (dataLen + numOfBytes + 1); 840 } 841 } 842 843 if (size != 0) { 844 return ERROR_MALFORMED; 845 } 846 847 return UNKNOWN_ERROR; // Return a dummy error. 848} 849 850struct PathAdder { 851 PathAdder(Vector<uint32_t> *path, uint32_t chunkType) 852 : mPath(path) { 853 mPath->push(chunkType); 854 } 855 856 ~PathAdder() { 857 mPath->pop(); 858 } 859 860private: 861 Vector<uint32_t> *mPath; 862 863 PathAdder(const PathAdder &); 864 PathAdder &operator=(const PathAdder &); 865}; 866 867static bool underMetaDataPath(const Vector<uint32_t> &path) { 868 return path.size() >= 5 869 && path[0] == FOURCC('m', 'o', 'o', 'v') 870 && path[1] == FOURCC('u', 'd', 't', 'a') 871 && path[2] == FOURCC('m', 'e', 't', 'a') 872 && path[3] == FOURCC('i', 'l', 's', 't'); 873} 874 875static bool underQTMetaPath(const Vector<uint32_t> &path, int32_t depth) { 876 return path.size() >= 2 877 && path[0] == FOURCC('m', 'o', 'o', 'v') 878 && path[1] == FOURCC('m', 'e', 't', 'a') 879 && (depth == 2 880 || (depth == 3 881 && (path[2] == FOURCC('h', 'd', 'l', 'r') 882 || path[2] == FOURCC('i', 'l', 's', 't') 883 || path[2] == FOURCC('k', 'e', 'y', 's')))); 884} 885 886// Given a time in seconds since Jan 1 1904, produce a human-readable string. 887static bool convertTimeToDate(int64_t time_1904, String8 *s) { 888 // delta between mpeg4 time and unix epoch time 889 static const int64_t delta = (((66 * 365 + 17) * 24) * 3600); 890 if (time_1904 < INT64_MIN + delta) { 891 return false; 892 } 893 time_t time_1970 = time_1904 - delta; 894 895 char tmp[32]; 896 struct tm* tm = gmtime(&time_1970); 897 if (tm != NULL && 898 strftime(tmp, sizeof(tmp), "%Y%m%dT%H%M%S.000Z", tm) > 0) { 899 s->setTo(tmp); 900 return true; 901 } 902 return false; 903} 904 905status_t MPEG4Extractor::parseChunk(off64_t *offset, int depth) { 906 ALOGV("entering parseChunk %lld/%d", (long long)*offset, depth); 907 908 if (*offset < 0) { 909 ALOGE("b/23540914"); 910 return ERROR_MALFORMED; 911 } 912 if (depth > 100) { 913 ALOGE("b/27456299"); 914 return ERROR_MALFORMED; 915 } 916 uint32_t hdr[2]; 917 if (mDataSource->readAt(*offset, hdr, 8) < 8) { 918 return ERROR_IO; 919 } 920 uint64_t chunk_size = ntohl(hdr[0]); 921 int32_t chunk_type = ntohl(hdr[1]); 922 off64_t data_offset = *offset + 8; 923 924 if (chunk_size == 1) { 925 if (mDataSource->readAt(*offset + 8, &chunk_size, 8) < 8) { 926 return ERROR_IO; 927 } 928 chunk_size = ntoh64(chunk_size); 929 data_offset += 8; 930 931 if (chunk_size < 16) { 932 // The smallest valid chunk is 16 bytes long in this case. 933 return ERROR_MALFORMED; 934 } 935 } else if (chunk_size == 0) { 936 if (depth == 0) { 937 // atom extends to end of file 938 off64_t sourceSize; 939 if (mDataSource->getSize(&sourceSize) == OK) { 940 chunk_size = (sourceSize - *offset); 941 } else { 942 // XXX could we just pick a "sufficiently large" value here? 943 ALOGE("atom size is 0, and data source has no size"); 944 return ERROR_MALFORMED; 945 } 946 } else { 947 // not allowed for non-toplevel atoms, skip it 948 *offset += 4; 949 return OK; 950 } 951 } else if (chunk_size < 8) { 952 // The smallest valid chunk is 8 bytes long. 953 ALOGE("invalid chunk size: %" PRIu64, chunk_size); 954 return ERROR_MALFORMED; 955 } 956 957 char chunk[5]; 958 MakeFourCCString(chunk_type, chunk); 959 ALOGV("chunk: %s @ %lld, %d", chunk, (long long)*offset, depth); 960 961 if (kUseHexDump) { 962 static const char kWhitespace[] = " "; 963 const char *indent = &kWhitespace[sizeof(kWhitespace) - 1 - 2 * depth]; 964 printf("%sfound chunk '%s' of size %" PRIu64 "\n", indent, chunk, chunk_size); 965 966 char buffer[256]; 967 size_t n = chunk_size; 968 if (n > sizeof(buffer)) { 969 n = sizeof(buffer); 970 } 971 if (mDataSource->readAt(*offset, buffer, n) 972 < (ssize_t)n) { 973 return ERROR_IO; 974 } 975 976 hexdump(buffer, n); 977 } 978 979 PathAdder autoAdder(&mPath, chunk_type); 980 981 // (data_offset - *offset) is either 8 or 16 982 off64_t chunk_data_size = chunk_size - (data_offset - *offset); 983 if (chunk_data_size < 0) { 984 ALOGE("b/23540914"); 985 return ERROR_MALFORMED; 986 } 987 if (chunk_type != FOURCC('m', 'd', 'a', 't') && chunk_data_size > kMaxAtomSize) { 988 char errMsg[100]; 989 sprintf(errMsg, "%s atom has size %" PRId64, chunk, chunk_data_size); 990 ALOGE("%s (b/28615448)", errMsg); 991 android_errorWriteWithInfoLog(0x534e4554, "28615448", -1, errMsg, strlen(errMsg)); 992 return ERROR_MALFORMED; 993 } 994 995 if (chunk_type != FOURCC('c', 'p', 'r', 't') 996 && chunk_type != FOURCC('c', 'o', 'v', 'r') 997 && mPath.size() == 5 && underMetaDataPath(mPath)) { 998 off64_t stop_offset = *offset + chunk_size; 999 *offset = data_offset; 1000 while (*offset < stop_offset) { 1001 status_t err = parseChunk(offset, depth + 1); 1002 if (err != OK) { 1003 return err; 1004 } 1005 } 1006 1007 if (*offset != stop_offset) { 1008 return ERROR_MALFORMED; 1009 } 1010 1011 return OK; 1012 } 1013 1014 switch(chunk_type) { 1015 case FOURCC('m', 'o', 'o', 'v'): 1016 case FOURCC('t', 'r', 'a', 'k'): 1017 case FOURCC('m', 'd', 'i', 'a'): 1018 case FOURCC('m', 'i', 'n', 'f'): 1019 case FOURCC('d', 'i', 'n', 'f'): 1020 case FOURCC('s', 't', 'b', 'l'): 1021 case FOURCC('m', 'v', 'e', 'x'): 1022 case FOURCC('m', 'o', 'o', 'f'): 1023 case FOURCC('t', 'r', 'a', 'f'): 1024 case FOURCC('m', 'f', 'r', 'a'): 1025 case FOURCC('u', 'd', 't', 'a'): 1026 case FOURCC('i', 'l', 's', 't'): 1027 case FOURCC('s', 'i', 'n', 'f'): 1028 case FOURCC('s', 'c', 'h', 'i'): 1029 case FOURCC('e', 'd', 't', 's'): 1030 case FOURCC('w', 'a', 'v', 'e'): 1031 { 1032 if (chunk_type == FOURCC('m', 'o', 'o', 'v') && depth != 0) { 1033 ALOGE("moov: depth %d", depth); 1034 return ERROR_MALFORMED; 1035 } 1036 1037 if (chunk_type == FOURCC('m', 'o', 'o', 'v') && mInitCheck == OK) { 1038 ALOGE("duplicate moov"); 1039 return ERROR_MALFORMED; 1040 } 1041 1042 if (chunk_type == FOURCC('m', 'o', 'o', 'f') && !mMoofFound) { 1043 // store the offset of the first segment 1044 mMoofFound = true; 1045 mMoofOffset = *offset; 1046 } 1047 1048 if (chunk_type == FOURCC('s', 't', 'b', 'l')) { 1049 ALOGV("sampleTable chunk is %" PRIu64 " bytes long.", chunk_size); 1050 1051 if (mDataSource->flags() 1052 & (DataSource::kWantsPrefetching 1053 | DataSource::kIsCachingDataSource)) { 1054 sp<MPEG4DataSource> cachedSource = 1055 new MPEG4DataSource(mDataSource); 1056 1057 if (cachedSource->setCachedRange(*offset, chunk_size) == OK) { 1058 mDataSource = cachedSource; 1059 } 1060 } 1061 1062 if (mLastTrack == NULL) { 1063 return ERROR_MALFORMED; 1064 } 1065 1066 mLastTrack->sampleTable = new SampleTable(mDataSource); 1067 } 1068 1069 bool isTrack = false; 1070 if (chunk_type == FOURCC('t', 'r', 'a', 'k')) { 1071 if (depth != 1) { 1072 ALOGE("trak: depth %d", depth); 1073 return ERROR_MALFORMED; 1074 } 1075 isTrack = true; 1076 1077 ALOGV("adding new track"); 1078 Track *track = new Track; 1079 track->next = NULL; 1080 if (mLastTrack) { 1081 mLastTrack->next = track; 1082 } else { 1083 mFirstTrack = track; 1084 } 1085 mLastTrack = track; 1086 1087 track->meta = new MetaData; 1088 track->includes_expensive_metadata = false; 1089 track->skipTrack = false; 1090 track->timescale = 0; 1091 track->meta->setCString(kKeyMIMEType, "application/octet-stream"); 1092 track->has_elst = false; 1093 } 1094 1095 off64_t stop_offset = *offset + chunk_size; 1096 *offset = data_offset; 1097 while (*offset < stop_offset) { 1098 status_t err = parseChunk(offset, depth + 1); 1099 if (err != OK) { 1100 if (isTrack) { 1101 mLastTrack->skipTrack = true; 1102 break; 1103 } 1104 return err; 1105 } 1106 } 1107 1108 if (*offset != stop_offset) { 1109 return ERROR_MALFORMED; 1110 } 1111 1112 if (isTrack) { 1113 int32_t trackId; 1114 // There must be exact one track header per track. 1115 if (!mLastTrack->meta->findInt32(kKeyTrackID, &trackId)) { 1116 mLastTrack->skipTrack = true; 1117 } 1118 1119 status_t err = verifyTrack(mLastTrack); 1120 if (err != OK) { 1121 mLastTrack->skipTrack = true; 1122 } 1123 1124 if (mLastTrack->skipTrack) { 1125 ALOGV("skipping this track..."); 1126 Track *cur = mFirstTrack; 1127 1128 if (cur == mLastTrack) { 1129 delete cur; 1130 mFirstTrack = mLastTrack = NULL; 1131 } else { 1132 while (cur && cur->next != mLastTrack) { 1133 cur = cur->next; 1134 } 1135 if (cur) { 1136 cur->next = NULL; 1137 } 1138 delete mLastTrack; 1139 mLastTrack = cur; 1140 } 1141 1142 return OK; 1143 } 1144 } else if (chunk_type == FOURCC('m', 'o', 'o', 'v')) { 1145 mInitCheck = OK; 1146 1147 if (!mIsDrm) { 1148 return UNKNOWN_ERROR; // Return a dummy error. 1149 } else { 1150 return OK; 1151 } 1152 } 1153 break; 1154 } 1155 1156 case FOURCC('e', 'l', 's', 't'): 1157 { 1158 *offset += chunk_size; 1159 1160 if (!mLastTrack) { 1161 return ERROR_MALFORMED; 1162 } 1163 1164 // See 14496-12 8.6.6 1165 uint8_t version; 1166 if (mDataSource->readAt(data_offset, &version, 1) < 1) { 1167 return ERROR_IO; 1168 } 1169 1170 uint32_t entry_count; 1171 if (!mDataSource->getUInt32(data_offset + 4, &entry_count)) { 1172 return ERROR_IO; 1173 } 1174 1175 if (entry_count != 1) { 1176 // we only support a single entry at the moment, for gapless playback 1177 ALOGW("ignoring edit list with %d entries", entry_count); 1178 } else { 1179 off64_t entriesoffset = data_offset + 8; 1180 uint64_t segment_duration; 1181 int64_t media_time; 1182 1183 if (version == 1) { 1184 if (!mDataSource->getUInt64(entriesoffset, &segment_duration) || 1185 !mDataSource->getUInt64(entriesoffset + 8, (uint64_t*)&media_time)) { 1186 return ERROR_IO; 1187 } 1188 } else if (version == 0) { 1189 uint32_t sd; 1190 int32_t mt; 1191 if (!mDataSource->getUInt32(entriesoffset, &sd) || 1192 !mDataSource->getUInt32(entriesoffset + 4, (uint32_t*)&mt)) { 1193 return ERROR_IO; 1194 } 1195 segment_duration = sd; 1196 media_time = mt; 1197 } else { 1198 return ERROR_IO; 1199 } 1200 1201 // save these for later, because the elst atom might precede 1202 // the atoms that actually gives us the duration and sample rate 1203 // needed to calculate the padding and delay values 1204 mLastTrack->has_elst = true; 1205 mLastTrack->elst_media_time = media_time; 1206 mLastTrack->elst_segment_duration = segment_duration; 1207 } 1208 break; 1209 } 1210 1211 case FOURCC('f', 'r', 'm', 'a'): 1212 { 1213 *offset += chunk_size; 1214 1215 uint32_t original_fourcc; 1216 if (mDataSource->readAt(data_offset, &original_fourcc, 4) < 4) { 1217 return ERROR_IO; 1218 } 1219 original_fourcc = ntohl(original_fourcc); 1220 ALOGV("read original format: %d", original_fourcc); 1221 1222 if (mLastTrack == NULL) { 1223 return ERROR_MALFORMED; 1224 } 1225 1226 mLastTrack->meta->setCString(kKeyMIMEType, FourCC2MIME(original_fourcc)); 1227 uint32_t num_channels = 0; 1228 uint32_t sample_rate = 0; 1229 if (AdjustChannelsAndRate(original_fourcc, &num_channels, &sample_rate)) { 1230 mLastTrack->meta->setInt32(kKeyChannelCount, num_channels); 1231 mLastTrack->meta->setInt32(kKeySampleRate, sample_rate); 1232 } 1233 break; 1234 } 1235 1236 case FOURCC('t', 'e', 'n', 'c'): 1237 { 1238 *offset += chunk_size; 1239 1240 if (chunk_size < 32) { 1241 return ERROR_MALFORMED; 1242 } 1243 1244 // tenc box contains 1 byte version, 3 byte flags, 3 byte default algorithm id, one byte 1245 // default IV size, 16 bytes default KeyID 1246 // (ISO 23001-7) 1247 char buf[4]; 1248 memset(buf, 0, 4); 1249 if (mDataSource->readAt(data_offset + 4, buf + 1, 3) < 3) { 1250 return ERROR_IO; 1251 } 1252 uint32_t defaultAlgorithmId = ntohl(*((int32_t*)buf)); 1253 if (defaultAlgorithmId > 1) { 1254 // only 0 (clear) and 1 (AES-128) are valid 1255 return ERROR_MALFORMED; 1256 } 1257 1258 memset(buf, 0, 4); 1259 if (mDataSource->readAt(data_offset + 7, buf + 3, 1) < 1) { 1260 return ERROR_IO; 1261 } 1262 uint32_t defaultIVSize = ntohl(*((int32_t*)buf)); 1263 1264 if ((defaultAlgorithmId == 0 && defaultIVSize != 0) || 1265 (defaultAlgorithmId != 0 && defaultIVSize == 0)) { 1266 // only unencrypted data must have 0 IV size 1267 return ERROR_MALFORMED; 1268 } else if (defaultIVSize != 0 && 1269 defaultIVSize != 8 && 1270 defaultIVSize != 16) { 1271 // only supported sizes are 0, 8 and 16 1272 return ERROR_MALFORMED; 1273 } 1274 1275 uint8_t defaultKeyId[16]; 1276 1277 if (mDataSource->readAt(data_offset + 8, &defaultKeyId, 16) < 16) { 1278 return ERROR_IO; 1279 } 1280 1281 if (mLastTrack == NULL) 1282 return ERROR_MALFORMED; 1283 1284 mLastTrack->meta->setInt32(kKeyCryptoMode, defaultAlgorithmId); 1285 mLastTrack->meta->setInt32(kKeyCryptoDefaultIVSize, defaultIVSize); 1286 mLastTrack->meta->setData(kKeyCryptoKey, 'tenc', defaultKeyId, 16); 1287 break; 1288 } 1289 1290 case FOURCC('t', 'k', 'h', 'd'): 1291 { 1292 *offset += chunk_size; 1293 1294 status_t err; 1295 if ((err = parseTrackHeader(data_offset, chunk_data_size)) != OK) { 1296 return err; 1297 } 1298 1299 break; 1300 } 1301 1302 case FOURCC('t', 'r', 'e', 'f'): 1303 { 1304 *offset += chunk_size; 1305 1306 if (mLastTrack == NULL) { 1307 return ERROR_MALFORMED; 1308 } 1309 1310 // Skip thumbnail track for now since we don't have an 1311 // API to retrieve it yet. 1312 // The thumbnail track can't be accessed by negative index or time, 1313 // because each timed sample has its own corresponding thumbnail 1314 // in the thumbnail track. We'll need a dedicated API to retrieve 1315 // thumbnail at time instead. 1316 mLastTrack->skipTrack = true; 1317 1318 break; 1319 } 1320 1321 case FOURCC('p', 's', 's', 'h'): 1322 { 1323 *offset += chunk_size; 1324 1325 PsshInfo pssh; 1326 1327 if (mDataSource->readAt(data_offset + 4, &pssh.uuid, 16) < 16) { 1328 return ERROR_IO; 1329 } 1330 1331 uint32_t psshdatalen = 0; 1332 if (mDataSource->readAt(data_offset + 20, &psshdatalen, 4) < 4) { 1333 return ERROR_IO; 1334 } 1335 pssh.datalen = ntohl(psshdatalen); 1336 ALOGV("pssh data size: %d", pssh.datalen); 1337 if (chunk_size < 20 || pssh.datalen > chunk_size - 20) { 1338 // pssh data length exceeds size of containing box 1339 return ERROR_MALFORMED; 1340 } 1341 1342 pssh.data = new (std::nothrow) uint8_t[pssh.datalen]; 1343 if (pssh.data == NULL) { 1344 return ERROR_MALFORMED; 1345 } 1346 ALOGV("allocated pssh @ %p", pssh.data); 1347 ssize_t requested = (ssize_t) pssh.datalen; 1348 if (mDataSource->readAt(data_offset + 24, pssh.data, requested) < requested) { 1349 delete[] pssh.data; 1350 return ERROR_IO; 1351 } 1352 mPssh.push_back(pssh); 1353 1354 break; 1355 } 1356 1357 case FOURCC('m', 'd', 'h', 'd'): 1358 { 1359 *offset += chunk_size; 1360 1361 if (chunk_data_size < 4 || mLastTrack == NULL) { 1362 return ERROR_MALFORMED; 1363 } 1364 1365 uint8_t version; 1366 if (mDataSource->readAt( 1367 data_offset, &version, sizeof(version)) 1368 < (ssize_t)sizeof(version)) { 1369 return ERROR_IO; 1370 } 1371 1372 off64_t timescale_offset; 1373 1374 if (version == 1) { 1375 timescale_offset = data_offset + 4 + 16; 1376 } else if (version == 0) { 1377 timescale_offset = data_offset + 4 + 8; 1378 } else { 1379 return ERROR_IO; 1380 } 1381 1382 uint32_t timescale; 1383 if (mDataSource->readAt( 1384 timescale_offset, ×cale, sizeof(timescale)) 1385 < (ssize_t)sizeof(timescale)) { 1386 return ERROR_IO; 1387 } 1388 1389 if (!timescale) { 1390 ALOGE("timescale should not be ZERO."); 1391 return ERROR_MALFORMED; 1392 } 1393 1394 mLastTrack->timescale = ntohl(timescale); 1395 1396 // 14496-12 says all ones means indeterminate, but some files seem to use 1397 // 0 instead. We treat both the same. 1398 int64_t duration = 0; 1399 if (version == 1) { 1400 if (mDataSource->readAt( 1401 timescale_offset + 4, &duration, sizeof(duration)) 1402 < (ssize_t)sizeof(duration)) { 1403 return ERROR_IO; 1404 } 1405 if (duration != -1) { 1406 duration = ntoh64(duration); 1407 } 1408 } else { 1409 uint32_t duration32; 1410 if (mDataSource->readAt( 1411 timescale_offset + 4, &duration32, sizeof(duration32)) 1412 < (ssize_t)sizeof(duration32)) { 1413 return ERROR_IO; 1414 } 1415 if (duration32 != 0xffffffff) { 1416 duration = ntohl(duration32); 1417 } 1418 } 1419 if (duration != 0 && mLastTrack->timescale != 0) { 1420 mLastTrack->meta->setInt64( 1421 kKeyDuration, (duration * 1000000) / mLastTrack->timescale); 1422 } 1423 1424 uint8_t lang[2]; 1425 off64_t lang_offset; 1426 if (version == 1) { 1427 lang_offset = timescale_offset + 4 + 8; 1428 } else if (version == 0) { 1429 lang_offset = timescale_offset + 4 + 4; 1430 } else { 1431 return ERROR_IO; 1432 } 1433 1434 if (mDataSource->readAt(lang_offset, &lang, sizeof(lang)) 1435 < (ssize_t)sizeof(lang)) { 1436 return ERROR_IO; 1437 } 1438 1439 // To get the ISO-639-2/T three character language code 1440 // 1 bit pad followed by 3 5-bits characters. Each character 1441 // is packed as the difference between its ASCII value and 0x60. 1442 char lang_code[4]; 1443 lang_code[0] = ((lang[0] >> 2) & 0x1f) + 0x60; 1444 lang_code[1] = ((lang[0] & 0x3) << 3 | (lang[1] >> 5)) + 0x60; 1445 lang_code[2] = (lang[1] & 0x1f) + 0x60; 1446 lang_code[3] = '\0'; 1447 1448 mLastTrack->meta->setCString( 1449 kKeyMediaLanguage, lang_code); 1450 1451 break; 1452 } 1453 1454 case FOURCC('s', 't', 's', 'd'): 1455 { 1456 uint8_t buffer[8]; 1457 if (chunk_data_size < (off64_t)sizeof(buffer)) { 1458 return ERROR_MALFORMED; 1459 } 1460 1461 if (mDataSource->readAt( 1462 data_offset, buffer, 8) < 8) { 1463 return ERROR_IO; 1464 } 1465 1466 if (U32_AT(buffer) != 0) { 1467 // Should be version 0, flags 0. 1468 return ERROR_MALFORMED; 1469 } 1470 1471 uint32_t entry_count = U32_AT(&buffer[4]); 1472 1473 if (entry_count > 1) { 1474 // For 3GPP timed text, there could be multiple tx3g boxes contain 1475 // multiple text display formats. These formats will be used to 1476 // display the timed text. 1477 // For encrypted files, there may also be more than one entry. 1478 const char *mime; 1479 1480 if (mLastTrack == NULL) 1481 return ERROR_MALFORMED; 1482 1483 CHECK(mLastTrack->meta->findCString(kKeyMIMEType, &mime)); 1484 if (strcasecmp(mime, MEDIA_MIMETYPE_TEXT_3GPP) && 1485 strcasecmp(mime, "application/octet-stream")) { 1486 // For now we only support a single type of media per track. 1487 mLastTrack->skipTrack = true; 1488 *offset += chunk_size; 1489 break; 1490 } 1491 } 1492 off64_t stop_offset = *offset + chunk_size; 1493 *offset = data_offset + 8; 1494 for (uint32_t i = 0; i < entry_count; ++i) { 1495 status_t err = parseChunk(offset, depth + 1); 1496 if (err != OK) { 1497 return err; 1498 } 1499 } 1500 1501 if (*offset != stop_offset) { 1502 return ERROR_MALFORMED; 1503 } 1504 break; 1505 } 1506 case FOURCC('m', 'e', 't', 't'): 1507 { 1508 *offset += chunk_size; 1509 1510 if (mLastTrack == NULL) 1511 return ERROR_MALFORMED; 1512 1513 sp<ABuffer> buffer = new ABuffer(chunk_data_size); 1514 if (buffer->data() == NULL) { 1515 return NO_MEMORY; 1516 } 1517 1518 if (mDataSource->readAt( 1519 data_offset, buffer->data(), chunk_data_size) < chunk_data_size) { 1520 return ERROR_IO; 1521 } 1522 1523 String8 mimeFormat((const char *)(buffer->data()), chunk_data_size); 1524 mLastTrack->meta->setCString(kKeyMIMEType, mimeFormat.string()); 1525 1526 break; 1527 } 1528 1529 case FOURCC('m', 'p', '4', 'a'): 1530 case FOURCC('e', 'n', 'c', 'a'): 1531 case FOURCC('s', 'a', 'm', 'r'): 1532 case FOURCC('s', 'a', 'w', 'b'): 1533 { 1534 if (mIsQT && chunk_type == FOURCC('m', 'p', '4', 'a') 1535 && depth >= 1 && mPath[depth - 1] == FOURCC('w', 'a', 'v', 'e')) { 1536 // Ignore mp4a embedded in QT wave atom 1537 *offset += chunk_size; 1538 break; 1539 } 1540 1541 uint8_t buffer[8 + 20]; 1542 if (chunk_data_size < (ssize_t)sizeof(buffer)) { 1543 // Basic AudioSampleEntry size. 1544 return ERROR_MALFORMED; 1545 } 1546 1547 if (mDataSource->readAt( 1548 data_offset, buffer, sizeof(buffer)) < (ssize_t)sizeof(buffer)) { 1549 return ERROR_IO; 1550 } 1551 1552 uint16_t data_ref_index __unused = U16_AT(&buffer[6]); 1553 uint16_t version = U16_AT(&buffer[8]); 1554 uint32_t num_channels = U16_AT(&buffer[16]); 1555 1556 uint16_t sample_size = U16_AT(&buffer[18]); 1557 uint32_t sample_rate = U32_AT(&buffer[24]) >> 16; 1558 1559 if (mLastTrack == NULL) 1560 return ERROR_MALFORMED; 1561 1562 off64_t stop_offset = *offset + chunk_size; 1563 *offset = data_offset + sizeof(buffer); 1564 1565 if (mIsQT && chunk_type == FOURCC('m', 'p', '4', 'a')) { 1566 if (version == 1) { 1567 if (mDataSource->readAt(*offset, buffer, 16) < 16) { 1568 return ERROR_IO; 1569 } 1570 1571#if 0 1572 U32_AT(buffer); // samples per packet 1573 U32_AT(&buffer[4]); // bytes per packet 1574 U32_AT(&buffer[8]); // bytes per frame 1575 U32_AT(&buffer[12]); // bytes per sample 1576#endif 1577 *offset += 16; 1578 } else if (version == 2) { 1579 uint8_t v2buffer[36]; 1580 if (mDataSource->readAt(*offset, v2buffer, 36) < 36) { 1581 return ERROR_IO; 1582 } 1583 1584#if 0 1585 U32_AT(v2buffer); // size of struct only 1586 sample_rate = (uint32_t)U64_AT(&v2buffer[4]); // audio sample rate 1587 num_channels = U32_AT(&v2buffer[12]); // num audio channels 1588 U32_AT(&v2buffer[16]); // always 0x7f000000 1589 sample_size = (uint16_t)U32_AT(&v2buffer[20]); // const bits per channel 1590 U32_AT(&v2buffer[24]); // format specifc flags 1591 U32_AT(&v2buffer[28]); // const bytes per audio packet 1592 U32_AT(&v2buffer[32]); // const LPCM frames per audio packet 1593#endif 1594 *offset += 36; 1595 } 1596 } 1597 1598 if (chunk_type != FOURCC('e', 'n', 'c', 'a')) { 1599 // if the chunk type is enca, we'll get the type from the sinf/frma box later 1600 mLastTrack->meta->setCString(kKeyMIMEType, FourCC2MIME(chunk_type)); 1601 AdjustChannelsAndRate(chunk_type, &num_channels, &sample_rate); 1602 } 1603 ALOGV("*** coding='%s' %d channels, size %d, rate %d\n", 1604 chunk, num_channels, sample_size, sample_rate); 1605 mLastTrack->meta->setInt32(kKeyChannelCount, num_channels); 1606 mLastTrack->meta->setInt32(kKeySampleRate, sample_rate); 1607 1608 while (*offset < stop_offset) { 1609 status_t err = parseChunk(offset, depth + 1); 1610 if (err != OK) { 1611 return err; 1612 } 1613 } 1614 1615 if (*offset != stop_offset) { 1616 return ERROR_MALFORMED; 1617 } 1618 break; 1619 } 1620 1621 case FOURCC('m', 'p', '4', 'v'): 1622 case FOURCC('e', 'n', 'c', 'v'): 1623 case FOURCC('s', '2', '6', '3'): 1624 case FOURCC('H', '2', '6', '3'): 1625 case FOURCC('h', '2', '6', '3'): 1626 case FOURCC('a', 'v', 'c', '1'): 1627 case FOURCC('h', 'v', 'c', '1'): 1628 case FOURCC('h', 'e', 'v', '1'): 1629 { 1630 uint8_t buffer[78]; 1631 if (chunk_data_size < (ssize_t)sizeof(buffer)) { 1632 // Basic VideoSampleEntry size. 1633 return ERROR_MALFORMED; 1634 } 1635 1636 if (mDataSource->readAt( 1637 data_offset, buffer, sizeof(buffer)) < (ssize_t)sizeof(buffer)) { 1638 return ERROR_IO; 1639 } 1640 1641 uint16_t data_ref_index __unused = U16_AT(&buffer[6]); 1642 uint16_t width = U16_AT(&buffer[6 + 18]); 1643 uint16_t height = U16_AT(&buffer[6 + 20]); 1644 1645 // The video sample is not standard-compliant if it has invalid dimension. 1646 // Use some default width and height value, and 1647 // let the decoder figure out the actual width and height (and thus 1648 // be prepared for INFO_FOMRAT_CHANGED event). 1649 if (width == 0) width = 352; 1650 if (height == 0) height = 288; 1651 1652 // printf("*** coding='%s' width=%d height=%d\n", 1653 // chunk, width, height); 1654 1655 if (mLastTrack == NULL) 1656 return ERROR_MALFORMED; 1657 1658 if (chunk_type != FOURCC('e', 'n', 'c', 'v')) { 1659 // if the chunk type is encv, we'll get the type from the sinf/frma box later 1660 mLastTrack->meta->setCString(kKeyMIMEType, FourCC2MIME(chunk_type)); 1661 } 1662 mLastTrack->meta->setInt32(kKeyWidth, width); 1663 mLastTrack->meta->setInt32(kKeyHeight, height); 1664 1665 off64_t stop_offset = *offset + chunk_size; 1666 *offset = data_offset + sizeof(buffer); 1667 while (*offset < stop_offset) { 1668 status_t err = parseChunk(offset, depth + 1); 1669 if (err != OK) { 1670 return err; 1671 } 1672 } 1673 1674 if (*offset != stop_offset) { 1675 return ERROR_MALFORMED; 1676 } 1677 break; 1678 } 1679 1680 case FOURCC('s', 't', 'c', 'o'): 1681 case FOURCC('c', 'o', '6', '4'): 1682 { 1683 if ((mLastTrack == NULL) || (mLastTrack->sampleTable == NULL)) { 1684 return ERROR_MALFORMED; 1685 } 1686 1687 status_t err = 1688 mLastTrack->sampleTable->setChunkOffsetParams( 1689 chunk_type, data_offset, chunk_data_size); 1690 1691 *offset += chunk_size; 1692 1693 if (err != OK) { 1694 return err; 1695 } 1696 1697 break; 1698 } 1699 1700 case FOURCC('s', 't', 's', 'c'): 1701 { 1702 if ((mLastTrack == NULL) || (mLastTrack->sampleTable == NULL)) 1703 return ERROR_MALFORMED; 1704 1705 status_t err = 1706 mLastTrack->sampleTable->setSampleToChunkParams( 1707 data_offset, chunk_data_size); 1708 1709 *offset += chunk_size; 1710 1711 if (err != OK) { 1712 return err; 1713 } 1714 1715 break; 1716 } 1717 1718 case FOURCC('s', 't', 's', 'z'): 1719 case FOURCC('s', 't', 'z', '2'): 1720 { 1721 if ((mLastTrack == NULL) || (mLastTrack->sampleTable == NULL)) { 1722 return ERROR_MALFORMED; 1723 } 1724 1725 status_t err = 1726 mLastTrack->sampleTable->setSampleSizeParams( 1727 chunk_type, data_offset, chunk_data_size); 1728 1729 *offset += chunk_size; 1730 1731 if (err != OK) { 1732 return err; 1733 } 1734 1735 size_t max_size; 1736 err = mLastTrack->sampleTable->getMaxSampleSize(&max_size); 1737 1738 if (err != OK) { 1739 return err; 1740 } 1741 1742 if (max_size != 0) { 1743 // Assume that a given buffer only contains at most 10 chunks, 1744 // each chunk originally prefixed with a 2 byte length will 1745 // have a 4 byte header (0x00 0x00 0x00 0x01) after conversion, 1746 // and thus will grow by 2 bytes per chunk. 1747 if (max_size > SIZE_MAX - 10 * 2) { 1748 ALOGE("max sample size too big: %zu", max_size); 1749 return ERROR_MALFORMED; 1750 } 1751 mLastTrack->meta->setInt32(kKeyMaxInputSize, max_size + 10 * 2); 1752 } else { 1753 // No size was specified. Pick a conservatively large size. 1754 uint32_t width, height; 1755 if (!mLastTrack->meta->findInt32(kKeyWidth, (int32_t*)&width) || 1756 !mLastTrack->meta->findInt32(kKeyHeight,(int32_t*) &height)) { 1757 ALOGE("No width or height, assuming worst case 1080p"); 1758 width = 1920; 1759 height = 1080; 1760 } else { 1761 // A resolution was specified, check that it's not too big. The values below 1762 // were chosen so that the calculations below don't cause overflows, they're 1763 // not indicating that resolutions up to 32kx32k are actually supported. 1764 if (width > 32768 || height > 32768) { 1765 ALOGE("can't support %u x %u video", width, height); 1766 return ERROR_MALFORMED; 1767 } 1768 } 1769 1770 const char *mime; 1771 CHECK(mLastTrack->meta->findCString(kKeyMIMEType, &mime)); 1772 if (!strcmp(mime, MEDIA_MIMETYPE_VIDEO_AVC) 1773 || !strcmp(mime, MEDIA_MIMETYPE_VIDEO_HEVC)) { 1774 // AVC & HEVC requires compression ratio of at least 2, and uses 1775 // macroblocks 1776 max_size = ((width + 15) / 16) * ((height + 15) / 16) * 192; 1777 } else { 1778 // For all other formats there is no minimum compression 1779 // ratio. Use compression ratio of 1. 1780 max_size = width * height * 3 / 2; 1781 } 1782 // HACK: allow 10% overhead 1783 // TODO: read sample size from traf atom for fragmented MPEG4. 1784 max_size += max_size / 10; 1785 mLastTrack->meta->setInt32(kKeyMaxInputSize, max_size); 1786 } 1787 1788 // NOTE: setting another piece of metadata invalidates any pointers (such as the 1789 // mimetype) previously obtained, so don't cache them. 1790 const char *mime; 1791 CHECK(mLastTrack->meta->findCString(kKeyMIMEType, &mime)); 1792 // Calculate average frame rate. 1793 if (!strncasecmp("video/", mime, 6)) { 1794 size_t nSamples = mLastTrack->sampleTable->countSamples(); 1795 if (nSamples == 0) { 1796 int32_t trackId; 1797 if (mLastTrack->meta->findInt32(kKeyTrackID, &trackId)) { 1798 for (size_t i = 0; i < mTrex.size(); i++) { 1799 Trex *t = &mTrex.editItemAt(i); 1800 if (t->track_ID == (uint32_t) trackId) { 1801 if (t->default_sample_duration > 0) { 1802 int32_t frameRate = 1803 mLastTrack->timescale / t->default_sample_duration; 1804 mLastTrack->meta->setInt32(kKeyFrameRate, frameRate); 1805 } 1806 break; 1807 } 1808 } 1809 } 1810 } else { 1811 int64_t durationUs; 1812 if (mLastTrack->meta->findInt64(kKeyDuration, &durationUs)) { 1813 if (durationUs > 0) { 1814 int32_t frameRate = (nSamples * 1000000LL + 1815 (durationUs >> 1)) / durationUs; 1816 mLastTrack->meta->setInt32(kKeyFrameRate, frameRate); 1817 } 1818 } 1819 ALOGV("setting frame count %zu", nSamples); 1820 mLastTrack->meta->setInt32(kKeyFrameCount, nSamples); 1821 } 1822 } 1823 1824 break; 1825 } 1826 1827 case FOURCC('s', 't', 't', 's'): 1828 { 1829 if ((mLastTrack == NULL) || (mLastTrack->sampleTable == NULL)) 1830 return ERROR_MALFORMED; 1831 1832 *offset += chunk_size; 1833 1834 status_t err = 1835 mLastTrack->sampleTable->setTimeToSampleParams( 1836 data_offset, chunk_data_size); 1837 1838 if (err != OK) { 1839 return err; 1840 } 1841 1842 break; 1843 } 1844 1845 case FOURCC('c', 't', 't', 's'): 1846 { 1847 if ((mLastTrack == NULL) || (mLastTrack->sampleTable == NULL)) 1848 return ERROR_MALFORMED; 1849 1850 *offset += chunk_size; 1851 1852 status_t err = 1853 mLastTrack->sampleTable->setCompositionTimeToSampleParams( 1854 data_offset, chunk_data_size); 1855 1856 if (err != OK) { 1857 return err; 1858 } 1859 1860 break; 1861 } 1862 1863 case FOURCC('s', 't', 's', 's'): 1864 { 1865 if ((mLastTrack == NULL) || (mLastTrack->sampleTable == NULL)) 1866 return ERROR_MALFORMED; 1867 1868 *offset += chunk_size; 1869 1870 status_t err = 1871 mLastTrack->sampleTable->setSyncSampleParams( 1872 data_offset, chunk_data_size); 1873 1874 if (err != OK) { 1875 return err; 1876 } 1877 1878 break; 1879 } 1880 1881 // \xA9xyz 1882 case FOURCC(0xA9, 'x', 'y', 'z'): 1883 { 1884 *offset += chunk_size; 1885 1886 // Best case the total data length inside "\xA9xyz" box would 1887 // be 9, for instance "\xA9xyz" + "\x00\x05\x15\xc7" + "+0+0/", 1888 // where "\x00\x05" is the text string length with value = 5, 1889 // "\0x15\xc7" is the language code = en, and "+0+0/" is a 1890 // location (string) value with longitude = 0 and latitude = 0. 1891 // Since some devices encountered in the wild omit the trailing 1892 // slash, we'll allow that. 1893 if (chunk_data_size < 8) { // 8 instead of 9 to allow for missing / 1894 return ERROR_MALFORMED; 1895 } 1896 1897 uint16_t len; 1898 if (!mDataSource->getUInt16(data_offset, &len)) { 1899 return ERROR_IO; 1900 } 1901 1902 // allow "+0+0" without trailing slash 1903 if (len < 4 || len > chunk_data_size - 4) { 1904 return ERROR_MALFORMED; 1905 } 1906 // The location string following the language code is formatted 1907 // according to ISO 6709:2008 (https://en.wikipedia.org/wiki/ISO_6709). 1908 // Allocate 2 extra bytes, in case we need to add a trailing slash, 1909 // and to add a terminating 0. 1910 std::unique_ptr<char[]> buffer(new (std::nothrow) char[len+2]()); 1911 if (!buffer) { 1912 return NO_MEMORY; 1913 } 1914 1915 if (mDataSource->readAt( 1916 data_offset + 4, &buffer[0], len) < len) { 1917 return ERROR_IO; 1918 } 1919 1920 len = strlen(&buffer[0]); 1921 if (len < 4) { 1922 return ERROR_MALFORMED; 1923 } 1924 // Add a trailing slash if there wasn't one. 1925 if (buffer[len - 1] != '/') { 1926 buffer[len] = '/'; 1927 } 1928 mFileMetaData->setCString(kKeyLocation, &buffer[0]); 1929 break; 1930 } 1931 1932 case FOURCC('e', 's', 'd', 's'): 1933 { 1934 *offset += chunk_size; 1935 1936 if (chunk_data_size < 4) { 1937 return ERROR_MALFORMED; 1938 } 1939 1940 uint8_t buffer[256]; 1941 if (chunk_data_size > (off64_t)sizeof(buffer)) { 1942 return ERROR_BUFFER_TOO_SMALL; 1943 } 1944 1945 if (mDataSource->readAt( 1946 data_offset, buffer, chunk_data_size) < chunk_data_size) { 1947 return ERROR_IO; 1948 } 1949 1950 if (U32_AT(buffer) != 0) { 1951 // Should be version 0, flags 0. 1952 return ERROR_MALFORMED; 1953 } 1954 1955 if (mLastTrack == NULL) 1956 return ERROR_MALFORMED; 1957 1958 mLastTrack->meta->setData( 1959 kKeyESDS, kTypeESDS, &buffer[4], chunk_data_size - 4); 1960 1961 if (mPath.size() >= 2 1962 && mPath[mPath.size() - 2] == FOURCC('m', 'p', '4', 'a')) { 1963 // Information from the ESDS must be relied on for proper 1964 // setup of sample rate and channel count for MPEG4 Audio. 1965 // The generic header appears to only contain generic 1966 // information... 1967 1968 status_t err = updateAudioTrackInfoFromESDS_MPEG4Audio( 1969 &buffer[4], chunk_data_size - 4); 1970 1971 if (err != OK) { 1972 return err; 1973 } 1974 } 1975 if (mPath.size() >= 2 1976 && mPath[mPath.size() - 2] == FOURCC('m', 'p', '4', 'v')) { 1977 // Check if the video is MPEG2 1978 ESDS esds(&buffer[4], chunk_data_size - 4); 1979 1980 uint8_t objectTypeIndication; 1981 if (esds.getObjectTypeIndication(&objectTypeIndication) == OK) { 1982 if (objectTypeIndication >= 0x60 && objectTypeIndication <= 0x65) { 1983 mLastTrack->meta->setCString(kKeyMIMEType, MEDIA_MIMETYPE_VIDEO_MPEG2); 1984 } 1985 } 1986 } 1987 break; 1988 } 1989 1990 case FOURCC('b', 't', 'r', 't'): 1991 { 1992 *offset += chunk_size; 1993 if (mLastTrack == NULL) { 1994 return ERROR_MALFORMED; 1995 } 1996 1997 uint8_t buffer[12]; 1998 if (chunk_data_size != sizeof(buffer)) { 1999 return ERROR_MALFORMED; 2000 } 2001 2002 if (mDataSource->readAt( 2003 data_offset, buffer, chunk_data_size) < chunk_data_size) { 2004 return ERROR_IO; 2005 } 2006 2007 uint32_t maxBitrate = U32_AT(&buffer[4]); 2008 uint32_t avgBitrate = U32_AT(&buffer[8]); 2009 if (maxBitrate > 0 && maxBitrate < INT32_MAX) { 2010 mLastTrack->meta->setInt32(kKeyMaxBitRate, (int32_t)maxBitrate); 2011 } 2012 if (avgBitrate > 0 && avgBitrate < INT32_MAX) { 2013 mLastTrack->meta->setInt32(kKeyBitRate, (int32_t)avgBitrate); 2014 } 2015 break; 2016 } 2017 2018 case FOURCC('a', 'v', 'c', 'C'): 2019 { 2020 *offset += chunk_size; 2021 2022 sp<ABuffer> buffer = new ABuffer(chunk_data_size); 2023 2024 if (buffer->data() == NULL) { 2025 ALOGE("b/28471206"); 2026 return NO_MEMORY; 2027 } 2028 2029 if (mDataSource->readAt( 2030 data_offset, buffer->data(), chunk_data_size) < chunk_data_size) { 2031 return ERROR_IO; 2032 } 2033 2034 if (mLastTrack == NULL) 2035 return ERROR_MALFORMED; 2036 2037 mLastTrack->meta->setData( 2038 kKeyAVCC, kTypeAVCC, buffer->data(), chunk_data_size); 2039 2040 break; 2041 } 2042 case FOURCC('h', 'v', 'c', 'C'): 2043 { 2044 sp<ABuffer> buffer = new ABuffer(chunk_data_size); 2045 2046 if (buffer->data() == NULL) { 2047 ALOGE("b/28471206"); 2048 return NO_MEMORY; 2049 } 2050 2051 if (mDataSource->readAt( 2052 data_offset, buffer->data(), chunk_data_size) < chunk_data_size) { 2053 return ERROR_IO; 2054 } 2055 2056 if (mLastTrack == NULL) 2057 return ERROR_MALFORMED; 2058 2059 mLastTrack->meta->setData( 2060 kKeyHVCC, kTypeHVCC, buffer->data(), chunk_data_size); 2061 2062 *offset += chunk_size; 2063 break; 2064 } 2065 2066 case FOURCC('d', '2', '6', '3'): 2067 { 2068 *offset += chunk_size; 2069 /* 2070 * d263 contains a fixed 7 bytes part: 2071 * vendor - 4 bytes 2072 * version - 1 byte 2073 * level - 1 byte 2074 * profile - 1 byte 2075 * optionally, "d263" box itself may contain a 16-byte 2076 * bit rate box (bitr) 2077 * average bit rate - 4 bytes 2078 * max bit rate - 4 bytes 2079 */ 2080 char buffer[23]; 2081 if (chunk_data_size != 7 && 2082 chunk_data_size != 23) { 2083 ALOGE("Incorrect D263 box size %lld", (long long)chunk_data_size); 2084 return ERROR_MALFORMED; 2085 } 2086 2087 if (mDataSource->readAt( 2088 data_offset, buffer, chunk_data_size) < chunk_data_size) { 2089 return ERROR_IO; 2090 } 2091 2092 if (mLastTrack == NULL) 2093 return ERROR_MALFORMED; 2094 2095 mLastTrack->meta->setData(kKeyD263, kTypeD263, buffer, chunk_data_size); 2096 2097 break; 2098 } 2099 2100 case FOURCC('m', 'e', 't', 'a'): 2101 { 2102 off64_t stop_offset = *offset + chunk_size; 2103 *offset = data_offset; 2104 bool isParsingMetaKeys = underQTMetaPath(mPath, 2); 2105 if (!isParsingMetaKeys) { 2106 uint8_t buffer[4]; 2107 if (chunk_data_size < (off64_t)sizeof(buffer)) { 2108 *offset = stop_offset; 2109 return ERROR_MALFORMED; 2110 } 2111 2112 if (mDataSource->readAt( 2113 data_offset, buffer, 4) < 4) { 2114 *offset = stop_offset; 2115 return ERROR_IO; 2116 } 2117 2118 if (U32_AT(buffer) != 0) { 2119 // Should be version 0, flags 0. 2120 2121 // If it's not, let's assume this is one of those 2122 // apparently malformed chunks that don't have flags 2123 // and completely different semantics than what's 2124 // in the MPEG4 specs and skip it. 2125 *offset = stop_offset; 2126 return OK; 2127 } 2128 *offset += sizeof(buffer); 2129 } 2130 2131 while (*offset < stop_offset) { 2132 status_t err = parseChunk(offset, depth + 1); 2133 if (err != OK) { 2134 return err; 2135 } 2136 } 2137 2138 if (*offset != stop_offset) { 2139 return ERROR_MALFORMED; 2140 } 2141 break; 2142 } 2143 2144 case FOURCC('i', 'l', 'o', 'c'): 2145 case FOURCC('i', 'i', 'n', 'f'): 2146 case FOURCC('i', 'p', 'r', 'p'): 2147 case FOURCC('p', 'i', 't', 'm'): 2148 case FOURCC('i', 'd', 'a', 't'): 2149 case FOURCC('i', 'r', 'e', 'f'): 2150 case FOURCC('i', 'p', 'r', 'o'): 2151 { 2152 if (mIsHeif) { 2153 if (mItemTable == NULL) { 2154 mItemTable = new ItemTable(mDataSource); 2155 } 2156 status_t err = mItemTable->parse( 2157 chunk_type, data_offset, chunk_data_size); 2158 if (err != OK) { 2159 return err; 2160 } 2161 } 2162 *offset += chunk_size; 2163 break; 2164 } 2165 2166 case FOURCC('m', 'e', 'a', 'n'): 2167 case FOURCC('n', 'a', 'm', 'e'): 2168 case FOURCC('d', 'a', 't', 'a'): 2169 { 2170 *offset += chunk_size; 2171 2172 if (mPath.size() == 6 && underMetaDataPath(mPath)) { 2173 status_t err = parseITunesMetaData(data_offset, chunk_data_size); 2174 2175 if (err != OK) { 2176 return err; 2177 } 2178 } 2179 2180 break; 2181 } 2182 2183 case FOURCC('m', 'v', 'h', 'd'): 2184 { 2185 *offset += chunk_size; 2186 2187 if (depth != 1) { 2188 ALOGE("mvhd: depth %d", depth); 2189 return ERROR_MALFORMED; 2190 } 2191 if (chunk_data_size < 32) { 2192 return ERROR_MALFORMED; 2193 } 2194 2195 uint8_t header[32]; 2196 if (mDataSource->readAt( 2197 data_offset, header, sizeof(header)) 2198 < (ssize_t)sizeof(header)) { 2199 return ERROR_IO; 2200 } 2201 2202 uint64_t creationTime; 2203 uint64_t duration = 0; 2204 if (header[0] == 1) { 2205 creationTime = U64_AT(&header[4]); 2206 mHeaderTimescale = U32_AT(&header[20]); 2207 duration = U64_AT(&header[24]); 2208 if (duration == 0xffffffffffffffff) { 2209 duration = 0; 2210 } 2211 } else if (header[0] != 0) { 2212 return ERROR_MALFORMED; 2213 } else { 2214 creationTime = U32_AT(&header[4]); 2215 mHeaderTimescale = U32_AT(&header[12]); 2216 uint32_t d32 = U32_AT(&header[16]); 2217 if (d32 == 0xffffffff) { 2218 d32 = 0; 2219 } 2220 duration = d32; 2221 } 2222 if (duration != 0 && mHeaderTimescale != 0 && duration < UINT64_MAX / 1000000) { 2223 mFileMetaData->setInt64(kKeyDuration, duration * 1000000 / mHeaderTimescale); 2224 } 2225 2226 String8 s; 2227 if (convertTimeToDate(creationTime, &s)) { 2228 mFileMetaData->setCString(kKeyDate, s.string()); 2229 } 2230 2231 2232 break; 2233 } 2234 2235 case FOURCC('m', 'e', 'h', 'd'): 2236 { 2237 *offset += chunk_size; 2238 2239 if (chunk_data_size < 8) { 2240 return ERROR_MALFORMED; 2241 } 2242 2243 uint8_t flags[4]; 2244 if (mDataSource->readAt( 2245 data_offset, flags, sizeof(flags)) 2246 < (ssize_t)sizeof(flags)) { 2247 return ERROR_IO; 2248 } 2249 2250 uint64_t duration = 0; 2251 if (flags[0] == 1) { 2252 // 64 bit 2253 if (chunk_data_size < 12) { 2254 return ERROR_MALFORMED; 2255 } 2256 mDataSource->getUInt64(data_offset + 4, &duration); 2257 if (duration == 0xffffffffffffffff) { 2258 duration = 0; 2259 } 2260 } else if (flags[0] == 0) { 2261 // 32 bit 2262 uint32_t d32; 2263 mDataSource->getUInt32(data_offset + 4, &d32); 2264 if (d32 == 0xffffffff) { 2265 d32 = 0; 2266 } 2267 duration = d32; 2268 } else { 2269 return ERROR_MALFORMED; 2270 } 2271 2272 if (duration != 0 && mHeaderTimescale != 0) { 2273 mFileMetaData->setInt64(kKeyDuration, duration * 1000000 / mHeaderTimescale); 2274 } 2275 2276 break; 2277 } 2278 2279 case FOURCC('m', 'd', 'a', 't'): 2280 { 2281 ALOGV("mdat chunk, drm: %d", mIsDrm); 2282 2283 mMdatFound = true; 2284 2285 if (!mIsDrm) { 2286 *offset += chunk_size; 2287 break; 2288 } 2289 2290 if (chunk_size < 8) { 2291 return ERROR_MALFORMED; 2292 } 2293 2294 return parseDrmSINF(offset, data_offset); 2295 } 2296 2297 case FOURCC('h', 'd', 'l', 'r'): 2298 { 2299 *offset += chunk_size; 2300 2301 if (underQTMetaPath(mPath, 3)) { 2302 break; 2303 } 2304 2305 uint32_t buffer; 2306 if (mDataSource->readAt( 2307 data_offset + 8, &buffer, 4) < 4) { 2308 return ERROR_IO; 2309 } 2310 2311 uint32_t type = ntohl(buffer); 2312 // For the 3GPP file format, the handler-type within the 'hdlr' box 2313 // shall be 'text'. We also want to support 'sbtl' handler type 2314 // for a practical reason as various MPEG4 containers use it. 2315 if (type == FOURCC('t', 'e', 'x', 't') || type == FOURCC('s', 'b', 't', 'l')) { 2316 if (mLastTrack != NULL) { 2317 mLastTrack->meta->setCString(kKeyMIMEType, MEDIA_MIMETYPE_TEXT_3GPP); 2318 } 2319 } 2320 2321 break; 2322 } 2323 2324 case FOURCC('k', 'e', 'y', 's'): 2325 { 2326 *offset += chunk_size; 2327 2328 if (underQTMetaPath(mPath, 3)) { 2329 status_t err = parseQTMetaKey(data_offset, chunk_data_size); 2330 if (err != OK) { 2331 return err; 2332 } 2333 } 2334 break; 2335 } 2336 2337 case FOURCC('t', 'r', 'e', 'x'): 2338 { 2339 *offset += chunk_size; 2340 2341 if (chunk_data_size < 24) { 2342 return ERROR_IO; 2343 } 2344 Trex trex; 2345 if (!mDataSource->getUInt32(data_offset + 4, &trex.track_ID) || 2346 !mDataSource->getUInt32(data_offset + 8, &trex.default_sample_description_index) || 2347 !mDataSource->getUInt32(data_offset + 12, &trex.default_sample_duration) || 2348 !mDataSource->getUInt32(data_offset + 16, &trex.default_sample_size) || 2349 !mDataSource->getUInt32(data_offset + 20, &trex.default_sample_flags)) { 2350 return ERROR_IO; 2351 } 2352 mTrex.add(trex); 2353 break; 2354 } 2355 2356 case FOURCC('t', 'x', '3', 'g'): 2357 { 2358 if (mLastTrack == NULL) 2359 return ERROR_MALFORMED; 2360 2361 uint32_t type; 2362 const void *data; 2363 size_t size = 0; 2364 if (!mLastTrack->meta->findData( 2365 kKeyTextFormatData, &type, &data, &size)) { 2366 size = 0; 2367 } 2368 2369 if ((chunk_size > SIZE_MAX) || (SIZE_MAX - chunk_size <= size)) { 2370 return ERROR_MALFORMED; 2371 } 2372 2373 uint8_t *buffer = new (std::nothrow) uint8_t[size + chunk_size]; 2374 if (buffer == NULL) { 2375 return ERROR_MALFORMED; 2376 } 2377 2378 if (size > 0) { 2379 memcpy(buffer, data, size); 2380 } 2381 2382 if ((size_t)(mDataSource->readAt(*offset, buffer + size, chunk_size)) 2383 < chunk_size) { 2384 delete[] buffer; 2385 buffer = NULL; 2386 2387 // advance read pointer so we don't end up reading this again 2388 *offset += chunk_size; 2389 return ERROR_IO; 2390 } 2391 2392 mLastTrack->meta->setData( 2393 kKeyTextFormatData, 0, buffer, size + chunk_size); 2394 2395 delete[] buffer; 2396 2397 *offset += chunk_size; 2398 break; 2399 } 2400 2401 case FOURCC('c', 'o', 'v', 'r'): 2402 { 2403 *offset += chunk_size; 2404 2405 if (mFileMetaData != NULL) { 2406 ALOGV("chunk_data_size = %" PRId64 " and data_offset = %" PRId64, 2407 chunk_data_size, data_offset); 2408 2409 if (chunk_data_size < 0 || static_cast<uint64_t>(chunk_data_size) >= SIZE_MAX - 1) { 2410 return ERROR_MALFORMED; 2411 } 2412 sp<ABuffer> buffer = new ABuffer(chunk_data_size + 1); 2413 if (buffer->data() == NULL) { 2414 ALOGE("b/28471206"); 2415 return NO_MEMORY; 2416 } 2417 if (mDataSource->readAt( 2418 data_offset, buffer->data(), chunk_data_size) != (ssize_t)chunk_data_size) { 2419 return ERROR_IO; 2420 } 2421 const int kSkipBytesOfDataBox = 16; 2422 if (chunk_data_size <= kSkipBytesOfDataBox) { 2423 return ERROR_MALFORMED; 2424 } 2425 2426 mFileMetaData->setData( 2427 kKeyAlbumArt, MetaData::TYPE_NONE, 2428 buffer->data() + kSkipBytesOfDataBox, chunk_data_size - kSkipBytesOfDataBox); 2429 } 2430 2431 break; 2432 } 2433 2434 case FOURCC('c', 'o', 'l', 'r'): 2435 { 2436 *offset += chunk_size; 2437 // this must be in a VisualSampleEntry box under the Sample Description Box ('stsd') 2438 // ignore otherwise 2439 if (depth >= 2 && mPath[depth - 2] == FOURCC('s', 't', 's', 'd')) { 2440 status_t err = parseColorInfo(data_offset, chunk_data_size); 2441 if (err != OK) { 2442 return err; 2443 } 2444 } 2445 2446 break; 2447 } 2448 2449 case FOURCC('t', 'i', 't', 'l'): 2450 case FOURCC('p', 'e', 'r', 'f'): 2451 case FOURCC('a', 'u', 't', 'h'): 2452 case FOURCC('g', 'n', 'r', 'e'): 2453 case FOURCC('a', 'l', 'b', 'm'): 2454 case FOURCC('y', 'r', 'r', 'c'): 2455 { 2456 *offset += chunk_size; 2457 2458 status_t err = parse3GPPMetaData(data_offset, chunk_data_size, depth); 2459 2460 if (err != OK) { 2461 return err; 2462 } 2463 2464 break; 2465 } 2466 2467 case FOURCC('I', 'D', '3', '2'): 2468 { 2469 *offset += chunk_size; 2470 2471 if (chunk_data_size < 6) { 2472 return ERROR_MALFORMED; 2473 } 2474 2475 parseID3v2MetaData(data_offset + 6); 2476 2477 break; 2478 } 2479 2480 case FOURCC('-', '-', '-', '-'): 2481 { 2482 mLastCommentMean.clear(); 2483 mLastCommentName.clear(); 2484 mLastCommentData.clear(); 2485 *offset += chunk_size; 2486 break; 2487 } 2488 2489 case FOURCC('s', 'i', 'd', 'x'): 2490 { 2491 status_t err = parseSegmentIndex(data_offset, chunk_data_size); 2492 if (err != OK) { 2493 return err; 2494 } 2495 *offset += chunk_size; 2496 return UNKNOWN_ERROR; // stop parsing after sidx 2497 } 2498 2499 case FOURCC('a', 'c', '-', '3'): 2500 { 2501 *offset += chunk_size; 2502 return parseAC3SampleEntry(data_offset); 2503 } 2504 2505 case FOURCC('f', 't', 'y', 'p'): 2506 { 2507 if (chunk_data_size < 8 || depth != 0) { 2508 return ERROR_MALFORMED; 2509 } 2510 2511 off64_t stop_offset = *offset + chunk_size; 2512 uint32_t numCompatibleBrands = (chunk_data_size - 8) / 4; 2513 std::set<uint32_t> brandSet; 2514 for (size_t i = 0; i < numCompatibleBrands + 2; ++i) { 2515 if (i == 1) { 2516 // Skip this index, it refers to the minorVersion, 2517 // not a brand. 2518 continue; 2519 } 2520 2521 uint32_t brand; 2522 if (mDataSource->readAt(data_offset + 4 * i, &brand, 4) < 4) { 2523 return ERROR_MALFORMED; 2524 } 2525 2526 brand = ntohl(brand); 2527 brandSet.insert(brand); 2528 } 2529 2530 if (brandSet.count(FOURCC('q', 't', ' ', ' ')) > 0) { 2531 mIsQT = true; 2532 } else { 2533 if (brandSet.count(FOURCC('m', 'i', 'f', '1')) > 0 2534 && brandSet.count(FOURCC('h', 'e', 'i', 'c')) > 0) { 2535 ALOGV("identified HEIF image"); 2536 2537 mIsHeif = true; 2538 brandSet.erase(FOURCC('m', 'i', 'f', '1')); 2539 brandSet.erase(FOURCC('h', 'e', 'i', 'c')); 2540 } 2541 2542 if (!brandSet.empty()) { 2543 // This means that the file should have moov box. 2544 // It could be any iso files (mp4, heifs, etc.) 2545 mHasMoovBox = true; 2546 ALOGV("identified HEIF image with other tracks"); 2547 } 2548 } 2549 2550 *offset = stop_offset; 2551 2552 break; 2553 } 2554 2555 default: 2556 { 2557 // check if we're parsing 'ilst' for meta keys 2558 // if so, treat type as a number (key-id). 2559 if (underQTMetaPath(mPath, 3)) { 2560 status_t err = parseQTMetaVal(chunk_type, data_offset, chunk_data_size); 2561 if (err != OK) { 2562 return err; 2563 } 2564 } 2565 2566 *offset += chunk_size; 2567 break; 2568 } 2569 } 2570 2571 return OK; 2572} 2573 2574status_t MPEG4Extractor::parseAC3SampleEntry(off64_t offset) { 2575 // skip 16 bytes: 2576 // + 6-byte reserved, 2577 // + 2-byte data reference index, 2578 // + 8-byte reserved 2579 offset += 16; 2580 uint16_t channelCount; 2581 if (!mDataSource->getUInt16(offset, &channelCount)) { 2582 return ERROR_MALFORMED; 2583 } 2584 // skip 8 bytes: 2585 // + 2-byte channelCount, 2586 // + 2-byte sample size, 2587 // + 4-byte reserved 2588 offset += 8; 2589 uint16_t sampleRate; 2590 if (!mDataSource->getUInt16(offset, &sampleRate)) { 2591 ALOGE("MPEG4Extractor: error while reading ac-3 block: cannot read sample rate"); 2592 return ERROR_MALFORMED; 2593 } 2594 2595 // skip 4 bytes: 2596 // + 2-byte sampleRate, 2597 // + 2-byte reserved 2598 offset += 4; 2599 return parseAC3SpecificBox(offset, sampleRate); 2600} 2601 2602status_t MPEG4Extractor::parseAC3SpecificBox( 2603 off64_t offset, uint16_t sampleRate) { 2604 uint32_t size; 2605 // + 4-byte size 2606 // + 4-byte type 2607 // + 3-byte payload 2608 const uint32_t kAC3SpecificBoxSize = 11; 2609 if (!mDataSource->getUInt32(offset, &size) || size < kAC3SpecificBoxSize) { 2610 ALOGE("MPEG4Extractor: error while reading ac-3 block: cannot read specific box size"); 2611 return ERROR_MALFORMED; 2612 } 2613 2614 offset += 4; 2615 uint32_t type; 2616 if (!mDataSource->getUInt32(offset, &type) || type != FOURCC('d', 'a', 'c', '3')) { 2617 ALOGE("MPEG4Extractor: error while reading ac-3 specific block: header not dac3"); 2618 return ERROR_MALFORMED; 2619 } 2620 2621 offset += 4; 2622 const uint32_t kAC3SpecificBoxPayloadSize = 3; 2623 uint8_t chunk[kAC3SpecificBoxPayloadSize]; 2624 if (mDataSource->readAt(offset, chunk, sizeof(chunk)) != sizeof(chunk)) { 2625 ALOGE("MPEG4Extractor: error while reading ac-3 specific block: bitstream fields"); 2626 return ERROR_MALFORMED; 2627 } 2628 2629 ABitReader br(chunk, sizeof(chunk)); 2630 static const unsigned channelCountTable[] = {2, 1, 2, 3, 3, 4, 4, 5}; 2631 static const unsigned sampleRateTable[] = {48000, 44100, 32000}; 2632 2633 unsigned fscod = br.getBits(2); 2634 if (fscod == 3) { 2635 ALOGE("Incorrect fscod (3) in AC3 header"); 2636 return ERROR_MALFORMED; 2637 } 2638 unsigned boxSampleRate = sampleRateTable[fscod]; 2639 if (boxSampleRate != sampleRate) { 2640 ALOGE("sample rate mismatch: boxSampleRate = %d, sampleRate = %d", 2641 boxSampleRate, sampleRate); 2642 return ERROR_MALFORMED; 2643 } 2644 2645 unsigned bsid = br.getBits(5); 2646 if (bsid > 8) { 2647 ALOGW("Incorrect bsid in AC3 header. Possibly E-AC-3?"); 2648 return ERROR_MALFORMED; 2649 } 2650 2651 // skip 2652 unsigned bsmod __unused = br.getBits(3); 2653 2654 unsigned acmod = br.getBits(3); 2655 unsigned lfeon = br.getBits(1); 2656 unsigned channelCount = channelCountTable[acmod] + lfeon; 2657 2658 if (mLastTrack == NULL) { 2659 return ERROR_MALFORMED; 2660 } 2661 mLastTrack->meta->setCString(kKeyMIMEType, MEDIA_MIMETYPE_AUDIO_AC3); 2662 mLastTrack->meta->setInt32(kKeyChannelCount, channelCount); 2663 mLastTrack->meta->setInt32(kKeySampleRate, sampleRate); 2664 return OK; 2665} 2666 2667status_t MPEG4Extractor::parseSegmentIndex(off64_t offset, size_t size) { 2668 ALOGV("MPEG4Extractor::parseSegmentIndex"); 2669 2670 if (size < 12) { 2671 return -EINVAL; 2672 } 2673 2674 uint32_t flags; 2675 if (!mDataSource->getUInt32(offset, &flags)) { 2676 return ERROR_MALFORMED; 2677 } 2678 2679 uint32_t version = flags >> 24; 2680 flags &= 0xffffff; 2681 2682 ALOGV("sidx version %d", version); 2683 2684 uint32_t referenceId; 2685 if (!mDataSource->getUInt32(offset + 4, &referenceId)) { 2686 return ERROR_MALFORMED; 2687 } 2688 2689 uint32_t timeScale; 2690 if (!mDataSource->getUInt32(offset + 8, &timeScale)) { 2691 return ERROR_MALFORMED; 2692 } 2693 ALOGV("sidx refid/timescale: %d/%d", referenceId, timeScale); 2694 if (timeScale == 0) 2695 return ERROR_MALFORMED; 2696 2697 uint64_t earliestPresentationTime; 2698 uint64_t firstOffset; 2699 2700 offset += 12; 2701 size -= 12; 2702 2703 if (version == 0) { 2704 if (size < 8) { 2705 return -EINVAL; 2706 } 2707 uint32_t tmp; 2708 if (!mDataSource->getUInt32(offset, &tmp)) { 2709 return ERROR_MALFORMED; 2710 } 2711 earliestPresentationTime = tmp; 2712 if (!mDataSource->getUInt32(offset + 4, &tmp)) { 2713 return ERROR_MALFORMED; 2714 } 2715 firstOffset = tmp; 2716 offset += 8; 2717 size -= 8; 2718 } else { 2719 if (size < 16) { 2720 return -EINVAL; 2721 } 2722 if (!mDataSource->getUInt64(offset, &earliestPresentationTime)) { 2723 return ERROR_MALFORMED; 2724 } 2725 if (!mDataSource->getUInt64(offset + 8, &firstOffset)) { 2726 return ERROR_MALFORMED; 2727 } 2728 offset += 16; 2729 size -= 16; 2730 } 2731 ALOGV("sidx pres/off: %" PRIu64 "/%" PRIu64, earliestPresentationTime, firstOffset); 2732 2733 if (size < 4) { 2734 return -EINVAL; 2735 } 2736 2737 uint16_t referenceCount; 2738 if (!mDataSource->getUInt16(offset + 2, &referenceCount)) { 2739 return ERROR_MALFORMED; 2740 } 2741 offset += 4; 2742 size -= 4; 2743 ALOGV("refcount: %d", referenceCount); 2744 2745 if (size < referenceCount * 12) { 2746 return -EINVAL; 2747 } 2748 2749 uint64_t total_duration = 0; 2750 for (unsigned int i = 0; i < referenceCount; i++) { 2751 uint32_t d1, d2, d3; 2752 2753 if (!mDataSource->getUInt32(offset, &d1) || // size 2754 !mDataSource->getUInt32(offset + 4, &d2) || // duration 2755 !mDataSource->getUInt32(offset + 8, &d3)) { // flags 2756 return ERROR_MALFORMED; 2757 } 2758 2759 if (d1 & 0x80000000) { 2760 ALOGW("sub-sidx boxes not supported yet"); 2761 } 2762 bool sap = d3 & 0x80000000; 2763 uint32_t saptype = (d3 >> 28) & 7; 2764 if (!sap || (saptype != 1 && saptype != 2)) { 2765 // type 1 and 2 are sync samples 2766 ALOGW("not a stream access point, or unsupported type: %08x", d3); 2767 } 2768 total_duration += d2; 2769 offset += 12; 2770 ALOGV(" item %d, %08x %08x %08x", i, d1, d2, d3); 2771 SidxEntry se; 2772 se.mSize = d1 & 0x7fffffff; 2773 se.mDurationUs = 1000000LL * d2 / timeScale; 2774 mSidxEntries.add(se); 2775 } 2776 2777 uint64_t sidxDuration = total_duration * 1000000 / timeScale; 2778 2779 if (mLastTrack == NULL) 2780 return ERROR_MALFORMED; 2781 2782 int64_t metaDuration; 2783 if (!mLastTrack->meta->findInt64(kKeyDuration, &metaDuration) || metaDuration == 0) { 2784 mLastTrack->meta->setInt64(kKeyDuration, sidxDuration); 2785 } 2786 return OK; 2787} 2788 2789status_t MPEG4Extractor::parseQTMetaKey(off64_t offset, size_t size) { 2790 if (size < 8) { 2791 return ERROR_MALFORMED; 2792 } 2793 2794 uint32_t count; 2795 if (!mDataSource->getUInt32(offset + 4, &count)) { 2796 return ERROR_MALFORMED; 2797 } 2798 2799 if (mMetaKeyMap.size() > 0) { 2800 ALOGW("'keys' atom seen again, discarding existing entries"); 2801 mMetaKeyMap.clear(); 2802 } 2803 2804 off64_t keyOffset = offset + 8; 2805 off64_t stopOffset = offset + size; 2806 for (size_t i = 1; i <= count; i++) { 2807 if (keyOffset + 8 > stopOffset) { 2808 return ERROR_MALFORMED; 2809 } 2810 2811 uint32_t keySize; 2812 if (!mDataSource->getUInt32(keyOffset, &keySize) 2813 || keySize < 8 2814 || keyOffset + keySize > stopOffset) { 2815 return ERROR_MALFORMED; 2816 } 2817 2818 uint32_t type; 2819 if (!mDataSource->getUInt32(keyOffset + 4, &type) 2820 || type != FOURCC('m', 'd', 't', 'a')) { 2821 return ERROR_MALFORMED; 2822 } 2823 2824 keySize -= 8; 2825 keyOffset += 8; 2826 2827 sp<ABuffer> keyData = new ABuffer(keySize); 2828 if (keyData->data() == NULL) { 2829 return ERROR_MALFORMED; 2830 } 2831 if (mDataSource->readAt( 2832 keyOffset, keyData->data(), keySize) < (ssize_t) keySize) { 2833 return ERROR_MALFORMED; 2834 } 2835 2836 AString key((const char *)keyData->data(), keySize); 2837 mMetaKeyMap.add(i, key); 2838 2839 keyOffset += keySize; 2840 } 2841 return OK; 2842} 2843 2844status_t MPEG4Extractor::parseQTMetaVal( 2845 int32_t keyId, off64_t offset, size_t size) { 2846 ssize_t index = mMetaKeyMap.indexOfKey(keyId); 2847 if (index < 0) { 2848 // corresponding key is not present, ignore 2849 return ERROR_MALFORMED; 2850 } 2851 2852 if (size <= 16) { 2853 return ERROR_MALFORMED; 2854 } 2855 uint32_t dataSize; 2856 if (!mDataSource->getUInt32(offset, &dataSize) 2857 || dataSize > size || dataSize <= 16) { 2858 return ERROR_MALFORMED; 2859 } 2860 uint32_t atomFourCC; 2861 if (!mDataSource->getUInt32(offset + 4, &atomFourCC) 2862 || atomFourCC != FOURCC('d', 'a', 't', 'a')) { 2863 return ERROR_MALFORMED; 2864 } 2865 uint32_t dataType; 2866 if (!mDataSource->getUInt32(offset + 8, &dataType) 2867 || ((dataType & 0xff000000) != 0)) { 2868 // not well-known type 2869 return ERROR_MALFORMED; 2870 } 2871 2872 dataSize -= 16; 2873 offset += 16; 2874 2875 if (dataType == 23 && dataSize >= 4) { 2876 // BE Float32 2877 uint32_t val; 2878 if (!mDataSource->getUInt32(offset, &val)) { 2879 return ERROR_MALFORMED; 2880 } 2881 if (!strcasecmp(mMetaKeyMap[index].c_str(), "com.android.capture.fps")) { 2882 mFileMetaData->setFloat(kKeyCaptureFramerate, *(float *)&val); 2883 } 2884 } else if (dataType == 67 && dataSize >= 4) { 2885 // BE signed int32 2886 uint32_t val; 2887 if (!mDataSource->getUInt32(offset, &val)) { 2888 return ERROR_MALFORMED; 2889 } 2890 if (!strcasecmp(mMetaKeyMap[index].c_str(), "com.android.video.temporal_layers_count")) { 2891 mFileMetaData->setInt32(kKeyTemporalLayerCount, val); 2892 } 2893 } else { 2894 // add more keys if needed 2895 ALOGV("ignoring key: type %d, size %d", dataType, dataSize); 2896 } 2897 2898 return OK; 2899} 2900 2901status_t MPEG4Extractor::parseTrackHeader( 2902 off64_t data_offset, off64_t data_size) { 2903 if (data_size < 4) { 2904 return ERROR_MALFORMED; 2905 } 2906 2907 uint8_t version; 2908 if (mDataSource->readAt(data_offset, &version, 1) < 1) { 2909 return ERROR_IO; 2910 } 2911 2912 size_t dynSize = (version == 1) ? 36 : 24; 2913 2914 uint8_t buffer[36 + 60]; 2915 2916 if (data_size != (off64_t)dynSize + 60) { 2917 return ERROR_MALFORMED; 2918 } 2919 2920 if (mDataSource->readAt( 2921 data_offset, buffer, data_size) < (ssize_t)data_size) { 2922 return ERROR_IO; 2923 } 2924 2925 uint64_t ctime __unused, mtime __unused, duration __unused; 2926 int32_t id; 2927 2928 if (version == 1) { 2929 ctime = U64_AT(&buffer[4]); 2930 mtime = U64_AT(&buffer[12]); 2931 id = U32_AT(&buffer[20]); 2932 duration = U64_AT(&buffer[28]); 2933 } else if (version == 0) { 2934 ctime = U32_AT(&buffer[4]); 2935 mtime = U32_AT(&buffer[8]); 2936 id = U32_AT(&buffer[12]); 2937 duration = U32_AT(&buffer[20]); 2938 } else { 2939 return ERROR_UNSUPPORTED; 2940 } 2941 2942 if (mLastTrack == NULL) 2943 return ERROR_MALFORMED; 2944 2945 mLastTrack->meta->setInt32(kKeyTrackID, id); 2946 2947 size_t matrixOffset = dynSize + 16; 2948 int32_t a00 = U32_AT(&buffer[matrixOffset]); 2949 int32_t a01 = U32_AT(&buffer[matrixOffset + 4]); 2950 int32_t a10 = U32_AT(&buffer[matrixOffset + 12]); 2951 int32_t a11 = U32_AT(&buffer[matrixOffset + 16]); 2952 2953#if 0 2954 int32_t dx = U32_AT(&buffer[matrixOffset + 8]); 2955 int32_t dy = U32_AT(&buffer[matrixOffset + 20]); 2956 2957 ALOGI("x' = %.2f * x + %.2f * y + %.2f", 2958 a00 / 65536.0f, a01 / 65536.0f, dx / 65536.0f); 2959 ALOGI("y' = %.2f * x + %.2f * y + %.2f", 2960 a10 / 65536.0f, a11 / 65536.0f, dy / 65536.0f); 2961#endif 2962 2963 uint32_t rotationDegrees; 2964 2965 static const int32_t kFixedOne = 0x10000; 2966 if (a00 == kFixedOne && a01 == 0 && a10 == 0 && a11 == kFixedOne) { 2967 // Identity, no rotation 2968 rotationDegrees = 0; 2969 } else if (a00 == 0 && a01 == kFixedOne && a10 == -kFixedOne && a11 == 0) { 2970 rotationDegrees = 90; 2971 } else if (a00 == 0 && a01 == -kFixedOne && a10 == kFixedOne && a11 == 0) { 2972 rotationDegrees = 270; 2973 } else if (a00 == -kFixedOne && a01 == 0 && a10 == 0 && a11 == -kFixedOne) { 2974 rotationDegrees = 180; 2975 } else { 2976 ALOGW("We only support 0,90,180,270 degree rotation matrices"); 2977 rotationDegrees = 0; 2978 } 2979 2980 if (rotationDegrees != 0) { 2981 mLastTrack->meta->setInt32(kKeyRotation, rotationDegrees); 2982 } 2983 2984 // Handle presentation display size, which could be different 2985 // from the image size indicated by kKeyWidth and kKeyHeight. 2986 uint32_t width = U32_AT(&buffer[dynSize + 52]); 2987 uint32_t height = U32_AT(&buffer[dynSize + 56]); 2988 mLastTrack->meta->setInt32(kKeyDisplayWidth, width >> 16); 2989 mLastTrack->meta->setInt32(kKeyDisplayHeight, height >> 16); 2990 2991 return OK; 2992} 2993 2994status_t MPEG4Extractor::parseITunesMetaData(off64_t offset, size_t size) { 2995 if (size == 0) { 2996 return OK; 2997 } 2998 2999 if (size < 4 || size == SIZE_MAX) { 3000 return ERROR_MALFORMED; 3001 } 3002 3003 uint8_t *buffer = new (std::nothrow) uint8_t[size + 1]; 3004 if (buffer == NULL) { 3005 return ERROR_MALFORMED; 3006 } 3007 if (mDataSource->readAt( 3008 offset, buffer, size) != (ssize_t)size) { 3009 delete[] buffer; 3010 buffer = NULL; 3011 3012 return ERROR_IO; 3013 } 3014 3015 uint32_t flags = U32_AT(buffer); 3016 3017 uint32_t metadataKey = 0; 3018 char chunk[5]; 3019 MakeFourCCString(mPath[4], chunk); 3020 ALOGV("meta: %s @ %lld", chunk, (long long)offset); 3021 switch ((int32_t)mPath[4]) { 3022 case FOURCC(0xa9, 'a', 'l', 'b'): 3023 { 3024 metadataKey = kKeyAlbum; 3025 break; 3026 } 3027 case FOURCC(0xa9, 'A', 'R', 'T'): 3028 { 3029 metadataKey = kKeyArtist; 3030 break; 3031 } 3032 case FOURCC('a', 'A', 'R', 'T'): 3033 { 3034 metadataKey = kKeyAlbumArtist; 3035 break; 3036 } 3037 case FOURCC(0xa9, 'd', 'a', 'y'): 3038 { 3039 metadataKey = kKeyYear; 3040 break; 3041 } 3042 case FOURCC(0xa9, 'n', 'a', 'm'): 3043 { 3044 metadataKey = kKeyTitle; 3045 break; 3046 } 3047 case FOURCC(0xa9, 'w', 'r', 't'): 3048 { 3049 metadataKey = kKeyWriter; 3050 break; 3051 } 3052 case FOURCC('c', 'o', 'v', 'r'): 3053 { 3054 metadataKey = kKeyAlbumArt; 3055 break; 3056 } 3057 case FOURCC('g', 'n', 'r', 'e'): 3058 { 3059 metadataKey = kKeyGenre; 3060 break; 3061 } 3062 case FOURCC(0xa9, 'g', 'e', 'n'): 3063 { 3064 metadataKey = kKeyGenre; 3065 break; 3066 } 3067 case FOURCC('c', 'p', 'i', 'l'): 3068 { 3069 if (size == 9 && flags == 21) { 3070 char tmp[16]; 3071 sprintf(tmp, "%d", 3072 (int)buffer[size - 1]); 3073 3074 mFileMetaData->setCString(kKeyCompilation, tmp); 3075 } 3076 break; 3077 } 3078 case FOURCC('t', 'r', 'k', 'n'): 3079 { 3080 if (size == 16 && flags == 0) { 3081 char tmp[16]; 3082 uint16_t* pTrack = (uint16_t*)&buffer[10]; 3083 uint16_t* pTotalTracks = (uint16_t*)&buffer[12]; 3084 sprintf(tmp, "%d/%d", ntohs(*pTrack), ntohs(*pTotalTracks)); 3085 3086 mFileMetaData->setCString(kKeyCDTrackNumber, tmp); 3087 } 3088 break; 3089 } 3090 case FOURCC('d', 'i', 's', 'k'): 3091 { 3092 if ((size == 14 || size == 16) && flags == 0) { 3093 char tmp[16]; 3094 uint16_t* pDisc = (uint16_t*)&buffer[10]; 3095 uint16_t* pTotalDiscs = (uint16_t*)&buffer[12]; 3096 sprintf(tmp, "%d/%d", ntohs(*pDisc), ntohs(*pTotalDiscs)); 3097 3098 mFileMetaData->setCString(kKeyDiscNumber, tmp); 3099 } 3100 break; 3101 } 3102 case FOURCC('-', '-', '-', '-'): 3103 { 3104 buffer[size] = '\0'; 3105 switch (mPath[5]) { 3106 case FOURCC('m', 'e', 'a', 'n'): 3107 mLastCommentMean.setTo((const char *)buffer + 4); 3108 break; 3109 case FOURCC('n', 'a', 'm', 'e'): 3110 mLastCommentName.setTo((const char *)buffer + 4); 3111 break; 3112 case FOURCC('d', 'a', 't', 'a'): 3113 if (size < 8) { 3114 delete[] buffer; 3115 buffer = NULL; 3116 ALOGE("b/24346430"); 3117 return ERROR_MALFORMED; 3118 } 3119 mLastCommentData.setTo((const char *)buffer + 8); 3120 break; 3121 } 3122 3123 // Once we have a set of mean/name/data info, go ahead and process 3124 // it to see if its something we are interested in. Whether or not 3125 // were are interested in the specific tag, make sure to clear out 3126 // the set so we can be ready to process another tuple should one 3127 // show up later in the file. 3128 if ((mLastCommentMean.length() != 0) && 3129 (mLastCommentName.length() != 0) && 3130 (mLastCommentData.length() != 0)) { 3131 3132 if (mLastCommentMean == "com.apple.iTunes" 3133 && mLastCommentName == "iTunSMPB") { 3134 int32_t delay, padding; 3135 if (sscanf(mLastCommentData, 3136 " %*x %x %x %*x", &delay, &padding) == 2) { 3137 if (mLastTrack == NULL) { 3138 delete[] buffer; 3139 return ERROR_MALFORMED; 3140 } 3141 3142 mLastTrack->meta->setInt32(kKeyEncoderDelay, delay); 3143 mLastTrack->meta->setInt32(kKeyEncoderPadding, padding); 3144 } 3145 } 3146 3147 mLastCommentMean.clear(); 3148 mLastCommentName.clear(); 3149 mLastCommentData.clear(); 3150 } 3151 break; 3152 } 3153 3154 default: 3155 break; 3156 } 3157 3158 if (size >= 8 && metadataKey && !mFileMetaData->hasData(metadataKey)) { 3159 if (metadataKey == kKeyAlbumArt) { 3160 mFileMetaData->setData( 3161 kKeyAlbumArt, MetaData::TYPE_NONE, 3162 buffer + 8, size - 8); 3163 } else if (metadataKey == kKeyGenre) { 3164 if (flags == 0) { 3165 // uint8_t genre code, iTunes genre codes are 3166 // the standard id3 codes, except they start 3167 // at 1 instead of 0 (e.g. Pop is 14, not 13) 3168 // We use standard id3 numbering, so subtract 1. 3169 int genrecode = (int)buffer[size - 1]; 3170 genrecode--; 3171 if (genrecode < 0) { 3172 genrecode = 255; // reserved for 'unknown genre' 3173 } 3174 char genre[10]; 3175 sprintf(genre, "%d", genrecode); 3176 3177 mFileMetaData->setCString(metadataKey, genre); 3178 } else if (flags == 1) { 3179 // custom genre string 3180 buffer[size] = '\0'; 3181 3182 mFileMetaData->setCString( 3183 metadataKey, (const char *)buffer + 8); 3184 } 3185 } else { 3186 buffer[size] = '\0'; 3187 3188 mFileMetaData->setCString( 3189 metadataKey, (const char *)buffer + 8); 3190 } 3191 } 3192 3193 delete[] buffer; 3194 buffer = NULL; 3195 3196 return OK; 3197} 3198 3199status_t MPEG4Extractor::parseColorInfo(off64_t offset, size_t size) { 3200 if (size < 4 || size == SIZE_MAX || mLastTrack == NULL) { 3201 return ERROR_MALFORMED; 3202 } 3203 3204 uint8_t *buffer = new (std::nothrow) uint8_t[size + 1]; 3205 if (buffer == NULL) { 3206 return ERROR_MALFORMED; 3207 } 3208 if (mDataSource->readAt(offset, buffer, size) != (ssize_t)size) { 3209 delete[] buffer; 3210 buffer = NULL; 3211 3212 return ERROR_IO; 3213 } 3214 3215 int32_t type = U32_AT(&buffer[0]); 3216 if ((type == FOURCC('n', 'c', 'l', 'x') && size >= 11) 3217 || (type == FOURCC('n', 'c', 'l', 'c') && size >= 10)) { 3218 int32_t primaries = U16_AT(&buffer[4]); 3219 int32_t transfer = U16_AT(&buffer[6]); 3220 int32_t coeffs = U16_AT(&buffer[8]); 3221 bool fullRange = (type == FOURCC('n', 'c', 'l', 'x')) && (buffer[10] & 128); 3222 3223 ColorAspects aspects; 3224 ColorUtils::convertIsoColorAspectsToCodecAspects( 3225 primaries, transfer, coeffs, fullRange, aspects); 3226 3227 // only store the first color specification 3228 if (!mLastTrack->meta->hasData(kKeyColorPrimaries)) { 3229 mLastTrack->meta->setInt32(kKeyColorPrimaries, aspects.mPrimaries); 3230 mLastTrack->meta->setInt32(kKeyTransferFunction, aspects.mTransfer); 3231 mLastTrack->meta->setInt32(kKeyColorMatrix, aspects.mMatrixCoeffs); 3232 mLastTrack->meta->setInt32(kKeyColorRange, aspects.mRange); 3233 } 3234 } 3235 3236 delete[] buffer; 3237 buffer = NULL; 3238 3239 return OK; 3240} 3241 3242status_t MPEG4Extractor::parse3GPPMetaData(off64_t offset, size_t size, int depth) { 3243 if (size < 4 || size == SIZE_MAX) { 3244 return ERROR_MALFORMED; 3245 } 3246 3247 uint8_t *buffer = new (std::nothrow) uint8_t[size + 1]; 3248 if (buffer == NULL) { 3249 return ERROR_MALFORMED; 3250 } 3251 if (mDataSource->readAt( 3252 offset, buffer, size) != (ssize_t)size) { 3253 delete[] buffer; 3254 buffer = NULL; 3255 3256 return ERROR_IO; 3257 } 3258 3259 uint32_t metadataKey = 0; 3260 switch (mPath[depth]) { 3261 case FOURCC('t', 'i', 't', 'l'): 3262 { 3263 metadataKey = kKeyTitle; 3264 break; 3265 } 3266 case FOURCC('p', 'e', 'r', 'f'): 3267 { 3268 metadataKey = kKeyArtist; 3269 break; 3270 } 3271 case FOURCC('a', 'u', 't', 'h'): 3272 { 3273 metadataKey = kKeyWriter; 3274 break; 3275 } 3276 case FOURCC('g', 'n', 'r', 'e'): 3277 { 3278 metadataKey = kKeyGenre; 3279 break; 3280 } 3281 case FOURCC('a', 'l', 'b', 'm'): 3282 { 3283 if (buffer[size - 1] != '\0') { 3284 char tmp[4]; 3285 sprintf(tmp, "%u", buffer[size - 1]); 3286 3287 mFileMetaData->setCString(kKeyCDTrackNumber, tmp); 3288 } 3289 3290 metadataKey = kKeyAlbum; 3291 break; 3292 } 3293 case FOURCC('y', 'r', 'r', 'c'): 3294 { 3295 if (size < 6) { 3296 delete[] buffer; 3297 buffer = NULL; 3298 ALOGE("b/62133227"); 3299 android_errorWriteLog(0x534e4554, "62133227"); 3300 return ERROR_MALFORMED; 3301 } 3302 char tmp[5]; 3303 uint16_t year = U16_AT(&buffer[4]); 3304 3305 if (year < 10000) { 3306 sprintf(tmp, "%u", year); 3307 3308 mFileMetaData->setCString(kKeyYear, tmp); 3309 } 3310 break; 3311 } 3312 3313 default: 3314 break; 3315 } 3316 3317 if (metadataKey > 0) { 3318 bool isUTF8 = true; // Common case 3319 char16_t *framedata = NULL; 3320 int len16 = 0; // Number of UTF-16 characters 3321 3322 // smallest possible valid UTF-16 string w BOM: 0xfe 0xff 0x00 0x00 3323 if (size < 6) { 3324 delete[] buffer; 3325 buffer = NULL; 3326 return ERROR_MALFORMED; 3327 } 3328 3329 if (size - 6 >= 4) { 3330 len16 = ((size - 6) / 2) - 1; // don't include 0x0000 terminator 3331 framedata = (char16_t *)(buffer + 6); 3332 if (0xfffe == *framedata) { 3333 // endianness marker (BOM) doesn't match host endianness 3334 for (int i = 0; i < len16; i++) { 3335 framedata[i] = bswap_16(framedata[i]); 3336 } 3337 // BOM is now swapped to 0xfeff, we will execute next block too 3338 } 3339 3340 if (0xfeff == *framedata) { 3341 // Remove the BOM 3342 framedata++; 3343 len16--; 3344 isUTF8 = false; 3345 } 3346 // else normal non-zero-length UTF-8 string 3347 // we can't handle UTF-16 without BOM as there is no other 3348 // indication of encoding. 3349 } 3350 3351 if (isUTF8) { 3352 buffer[size] = 0; 3353 mFileMetaData->setCString(metadataKey, (const char *)buffer + 6); 3354 } else { 3355 // Convert from UTF-16 string to UTF-8 string. 3356 String8 tmpUTF8str(framedata, len16); 3357 mFileMetaData->setCString(metadataKey, tmpUTF8str.string()); 3358 } 3359 } 3360 3361 delete[] buffer; 3362 buffer = NULL; 3363 3364 return OK; 3365} 3366 3367void MPEG4Extractor::parseID3v2MetaData(off64_t offset) { 3368 ID3 id3(mDataSource, true /* ignorev1 */, offset); 3369 3370 if (id3.isValid()) { 3371 struct Map { 3372 int key; 3373 const char *tag1; 3374 const char *tag2; 3375 }; 3376 static const Map kMap[] = { 3377 { kKeyAlbum, "TALB", "TAL" }, 3378 { kKeyArtist, "TPE1", "TP1" }, 3379 { kKeyAlbumArtist, "TPE2", "TP2" }, 3380 { kKeyComposer, "TCOM", "TCM" }, 3381 { kKeyGenre, "TCON", "TCO" }, 3382 { kKeyTitle, "TIT2", "TT2" }, 3383 { kKeyYear, "TYE", "TYER" }, 3384 { kKeyAuthor, "TXT", "TEXT" }, 3385 { kKeyCDTrackNumber, "TRK", "TRCK" }, 3386 { kKeyDiscNumber, "TPA", "TPOS" }, 3387 { kKeyCompilation, "TCP", "TCMP" }, 3388 }; 3389 static const size_t kNumMapEntries = sizeof(kMap) / sizeof(kMap[0]); 3390 3391 for (size_t i = 0; i < kNumMapEntries; ++i) { 3392 if (!mFileMetaData->hasData(kMap[i].key)) { 3393 ID3::Iterator *it = new ID3::Iterator(id3, kMap[i].tag1); 3394 if (it->done()) { 3395 delete it; 3396 it = new ID3::Iterator(id3, kMap[i].tag2); 3397 } 3398 3399 if (it->done()) { 3400 delete it; 3401 continue; 3402 } 3403 3404 String8 s; 3405 it->getString(&s); 3406 delete it; 3407 3408 mFileMetaData->setCString(kMap[i].key, s); 3409 } 3410 } 3411 3412 size_t dataSize; 3413 String8 mime; 3414 const void *data = id3.getAlbumArt(&dataSize, &mime); 3415 3416 if (data) { 3417 mFileMetaData->setData(kKeyAlbumArt, MetaData::TYPE_NONE, data, dataSize); 3418 mFileMetaData->setCString(kKeyAlbumArtMIME, mime.string()); 3419 } 3420 } 3421} 3422 3423sp<MediaSource> MPEG4Extractor::getTrack(size_t index) { 3424 status_t err; 3425 if ((err = readMetaData()) != OK) { 3426 return NULL; 3427 } 3428 3429 Track *track = mFirstTrack; 3430 while (index > 0) { 3431 if (track == NULL) { 3432 return NULL; 3433 } 3434 3435 track = track->next; 3436 --index; 3437 } 3438 3439 if (track == NULL) { 3440 return NULL; 3441 } 3442 3443 3444 Trex *trex = NULL; 3445 int32_t trackId; 3446 if (track->meta->findInt32(kKeyTrackID, &trackId)) { 3447 for (size_t i = 0; i < mTrex.size(); i++) { 3448 Trex *t = &mTrex.editItemAt(i); 3449 if (t->track_ID == (uint32_t) trackId) { 3450 trex = t; 3451 break; 3452 } 3453 } 3454 } else { 3455 ALOGE("b/21657957"); 3456 return NULL; 3457 } 3458 3459 ALOGV("getTrack called, pssh: %zu", mPssh.size()); 3460 3461 const char *mime; 3462 if (!track->meta->findCString(kKeyMIMEType, &mime)) { 3463 return NULL; 3464 } 3465 3466 sp<ItemTable> itemTable; 3467 if (!strcasecmp(mime, MEDIA_MIMETYPE_VIDEO_AVC)) { 3468 uint32_t type; 3469 const void *data; 3470 size_t size; 3471 if (!track->meta->findData(kKeyAVCC, &type, &data, &size)) { 3472 return NULL; 3473 } 3474 3475 const uint8_t *ptr = (const uint8_t *)data; 3476 3477 if (size < 7 || ptr[0] != 1) { // configurationVersion == 1 3478 return NULL; 3479 } 3480 } else if (!strcasecmp(mime, MEDIA_MIMETYPE_VIDEO_HEVC) 3481 || !strcasecmp(mime, MEDIA_MIMETYPE_IMAGE_ANDROID_HEIC)) { 3482 uint32_t type; 3483 const void *data; 3484 size_t size; 3485 if (!track->meta->findData(kKeyHVCC, &type, &data, &size)) { 3486 return NULL; 3487 } 3488 3489 const uint8_t *ptr = (const uint8_t *)data; 3490 3491 if (size < 22 || ptr[0] != 1) { // configurationVersion == 1 3492 return NULL; 3493 } 3494 if (!strcasecmp(mime, MEDIA_MIMETYPE_IMAGE_ANDROID_HEIC)) { 3495 itemTable = mItemTable; 3496 } 3497 } 3498 3499 sp<MPEG4Source> source = new MPEG4Source(this, 3500 track->meta, mDataSource, track->timescale, track->sampleTable, 3501 mSidxEntries, trex, mMoofOffset, itemTable); 3502 if (source->init() != OK) { 3503 return NULL; 3504 } 3505 return source; 3506} 3507 3508// static 3509status_t MPEG4Extractor::verifyTrack(Track *track) { 3510 const char *mime; 3511 CHECK(track->meta->findCString(kKeyMIMEType, &mime)); 3512 3513 uint32_t type; 3514 const void *data; 3515 size_t size; 3516 if (!strcasecmp(mime, MEDIA_MIMETYPE_VIDEO_AVC)) { 3517 if (!track->meta->findData(kKeyAVCC, &type, &data, &size) 3518 || type != kTypeAVCC) { 3519 return ERROR_MALFORMED; 3520 } 3521 } else if (!strcasecmp(mime, MEDIA_MIMETYPE_VIDEO_HEVC)) { 3522 if (!track->meta->findData(kKeyHVCC, &type, &data, &size) 3523 || type != kTypeHVCC) { 3524 return ERROR_MALFORMED; 3525 } 3526 } else if (!strcasecmp(mime, MEDIA_MIMETYPE_VIDEO_MPEG4) 3527 || !strcasecmp(mime, MEDIA_MIMETYPE_VIDEO_MPEG2) 3528 || !strcasecmp(mime, MEDIA_MIMETYPE_AUDIO_AAC)) { 3529 if (!track->meta->findData(kKeyESDS, &type, &data, &size) 3530 || type != kTypeESDS) { 3531 return ERROR_MALFORMED; 3532 } 3533 } 3534 3535 if (track->sampleTable == NULL || !track->sampleTable->isValid()) { 3536 // Make sure we have all the metadata we need. 3537 ALOGE("stbl atom missing/invalid."); 3538 return ERROR_MALFORMED; 3539 } 3540 3541 if (track->timescale == 0) { 3542 ALOGE("timescale invalid."); 3543 return ERROR_MALFORMED; 3544 } 3545 3546 return OK; 3547} 3548 3549typedef enum { 3550 //AOT_NONE = -1, 3551 //AOT_NULL_OBJECT = 0, 3552 //AOT_AAC_MAIN = 1, /**< Main profile */ 3553 AOT_AAC_LC = 2, /**< Low Complexity object */ 3554 //AOT_AAC_SSR = 3, 3555 //AOT_AAC_LTP = 4, 3556 AOT_SBR = 5, 3557 //AOT_AAC_SCAL = 6, 3558 //AOT_TWIN_VQ = 7, 3559 //AOT_CELP = 8, 3560 //AOT_HVXC = 9, 3561 //AOT_RSVD_10 = 10, /**< (reserved) */ 3562 //AOT_RSVD_11 = 11, /**< (reserved) */ 3563 //AOT_TTSI = 12, /**< TTSI Object */ 3564 //AOT_MAIN_SYNTH = 13, /**< Main Synthetic object */ 3565 //AOT_WAV_TAB_SYNTH = 14, /**< Wavetable Synthesis object */ 3566 //AOT_GEN_MIDI = 15, /**< General MIDI object */ 3567 //AOT_ALG_SYNTH_AUD_FX = 16, /**< Algorithmic Synthesis and Audio FX object */ 3568 AOT_ER_AAC_LC = 17, /**< Error Resilient(ER) AAC Low Complexity */ 3569 //AOT_RSVD_18 = 18, /**< (reserved) */ 3570 //AOT_ER_AAC_LTP = 19, /**< Error Resilient(ER) AAC LTP object */ 3571 AOT_ER_AAC_SCAL = 20, /**< Error Resilient(ER) AAC Scalable object */ 3572 //AOT_ER_TWIN_VQ = 21, /**< Error Resilient(ER) TwinVQ object */ 3573 AOT_ER_BSAC = 22, /**< Error Resilient(ER) BSAC object */ 3574 AOT_ER_AAC_LD = 23, /**< Error Resilient(ER) AAC LowDelay object */ 3575 //AOT_ER_CELP = 24, /**< Error Resilient(ER) CELP object */ 3576 //AOT_ER_HVXC = 25, /**< Error Resilient(ER) HVXC object */ 3577 //AOT_ER_HILN = 26, /**< Error Resilient(ER) HILN object */ 3578 //AOT_ER_PARA = 27, /**< Error Resilient(ER) Parametric object */ 3579 //AOT_RSVD_28 = 28, /**< might become SSC */ 3580 AOT_PS = 29, /**< PS, Parametric Stereo (includes SBR) */ 3581 //AOT_MPEGS = 30, /**< MPEG Surround */ 3582 3583 AOT_ESCAPE = 31, /**< Signal AOT uses more than 5 bits */ 3584 3585 //AOT_MP3ONMP4_L1 = 32, /**< MPEG-Layer1 in mp4 */ 3586 //AOT_MP3ONMP4_L2 = 33, /**< MPEG-Layer2 in mp4 */ 3587 //AOT_MP3ONMP4_L3 = 34, /**< MPEG-Layer3 in mp4 */ 3588 //AOT_RSVD_35 = 35, /**< might become DST */ 3589 //AOT_RSVD_36 = 36, /**< might become ALS */ 3590 //AOT_AAC_SLS = 37, /**< AAC + SLS */ 3591 //AOT_SLS = 38, /**< SLS */ 3592 //AOT_ER_AAC_ELD = 39, /**< AAC Enhanced Low Delay */ 3593 3594 //AOT_USAC = 42, /**< USAC */ 3595 //AOT_SAOC = 43, /**< SAOC */ 3596 //AOT_LD_MPEGS = 44, /**< Low Delay MPEG Surround */ 3597 3598 //AOT_RSVD50 = 50, /**< Interim AOT for Rsvd50 */ 3599} AUDIO_OBJECT_TYPE; 3600 3601status_t MPEG4Extractor::updateAudioTrackInfoFromESDS_MPEG4Audio( 3602 const void *esds_data, size_t esds_size) { 3603 ESDS esds(esds_data, esds_size); 3604 3605 uint8_t objectTypeIndication; 3606 if (esds.getObjectTypeIndication(&objectTypeIndication) != OK) { 3607 return ERROR_MALFORMED; 3608 } 3609 3610 if (objectTypeIndication == 0xe1) { 3611 // This isn't MPEG4 audio at all, it's QCELP 14k... 3612 if (mLastTrack == NULL) 3613 return ERROR_MALFORMED; 3614 3615 mLastTrack->meta->setCString(kKeyMIMEType, MEDIA_MIMETYPE_AUDIO_QCELP); 3616 return OK; 3617 } 3618 3619 if (objectTypeIndication == 0x6b) { 3620 // The media subtype is MP3 audio 3621 // Our software MP3 audio decoder may not be able to handle 3622 // packetized MP3 audio; for now, lets just return ERROR_UNSUPPORTED 3623 ALOGE("MP3 track in MP4/3GPP file is not supported"); 3624 return ERROR_UNSUPPORTED; 3625 } 3626 3627 if (mLastTrack != NULL) { 3628 uint32_t maxBitrate = 0; 3629 uint32_t avgBitrate = 0; 3630 esds.getBitRate(&maxBitrate, &avgBitrate); 3631 if (maxBitrate > 0 && maxBitrate < INT32_MAX) { 3632 mLastTrack->meta->setInt32(kKeyMaxBitRate, (int32_t)maxBitrate); 3633 } 3634 if (avgBitrate > 0 && avgBitrate < INT32_MAX) { 3635 mLastTrack->meta->setInt32(kKeyBitRate, (int32_t)avgBitrate); 3636 } 3637 } 3638 3639 const uint8_t *csd; 3640 size_t csd_size; 3641 if (esds.getCodecSpecificInfo( 3642 (const void **)&csd, &csd_size) != OK) { 3643 return ERROR_MALFORMED; 3644 } 3645 3646 if (kUseHexDump) { 3647 printf("ESD of size %zu\n", csd_size); 3648 hexdump(csd, csd_size); 3649 } 3650 3651 if (csd_size == 0) { 3652 // There's no further information, i.e. no codec specific data 3653 // Let's assume that the information provided in the mpeg4 headers 3654 // is accurate and hope for the best. 3655 3656 return OK; 3657 } 3658 3659 if (csd_size < 2) { 3660 return ERROR_MALFORMED; 3661 } 3662 3663 static uint32_t kSamplingRate[] = { 3664 96000, 88200, 64000, 48000, 44100, 32000, 24000, 22050, 3665 16000, 12000, 11025, 8000, 7350 3666 }; 3667 3668 ABitReader br(csd, csd_size); 3669 uint32_t objectType = br.getBits(5); 3670 3671 if (objectType == 31) { // AAC-ELD => additional 6 bits 3672 objectType = 32 + br.getBits(6); 3673 } 3674 3675 if (mLastTrack == NULL) 3676 return ERROR_MALFORMED; 3677 3678 //keep AOT type 3679 mLastTrack->meta->setInt32(kKeyAACAOT, objectType); 3680 3681 uint32_t freqIndex = br.getBits(4); 3682 3683 int32_t sampleRate = 0; 3684 int32_t numChannels = 0; 3685 if (freqIndex == 15) { 3686 if (br.numBitsLeft() < 28) return ERROR_MALFORMED; 3687 sampleRate = br.getBits(24); 3688 numChannels = br.getBits(4); 3689 } else { 3690 if (br.numBitsLeft() < 4) return ERROR_MALFORMED; 3691 numChannels = br.getBits(4); 3692 3693 if (freqIndex == 13 || freqIndex == 14) { 3694 return ERROR_MALFORMED; 3695 } 3696 3697 sampleRate = kSamplingRate[freqIndex]; 3698 } 3699 3700 if (objectType == AOT_SBR || objectType == AOT_PS) {//SBR specific config per 14496-3 table 1.13 3701 if (br.numBitsLeft() < 4) return ERROR_MALFORMED; 3702 uint32_t extFreqIndex = br.getBits(4); 3703 int32_t extSampleRate __unused; 3704 if (extFreqIndex == 15) { 3705 if (csd_size < 8) { 3706 return ERROR_MALFORMED; 3707 } 3708 if (br.numBitsLeft() < 24) return ERROR_MALFORMED; 3709 extSampleRate = br.getBits(24); 3710 } else { 3711 if (extFreqIndex == 13 || extFreqIndex == 14) { 3712 return ERROR_MALFORMED; 3713 } 3714 extSampleRate = kSamplingRate[extFreqIndex]; 3715 } 3716 //TODO: save the extension sampling rate value in meta data => 3717 // mLastTrack->meta->setInt32(kKeyExtSampleRate, extSampleRate); 3718 } 3719 3720 switch (numChannels) { 3721 // values defined in 14496-3_2009 amendment-4 Table 1.19 - Channel Configuration 3722 case 0: 3723 case 1:// FC 3724 case 2:// FL FR 3725 case 3:// FC, FL FR 3726 case 4:// FC, FL FR, RC 3727 case 5:// FC, FL FR, SL SR 3728 case 6:// FC, FL FR, SL SR, LFE 3729 //numChannels already contains the right value 3730 break; 3731 case 11:// FC, FL FR, SL SR, RC, LFE 3732 numChannels = 7; 3733 break; 3734 case 7: // FC, FCL FCR, FL FR, SL SR, LFE 3735 case 12:// FC, FL FR, SL SR, RL RR, LFE 3736 case 14:// FC, FL FR, SL SR, LFE, FHL FHR 3737 numChannels = 8; 3738 break; 3739 default: 3740 return ERROR_UNSUPPORTED; 3741 } 3742 3743 { 3744 if (objectType == AOT_SBR || objectType == AOT_PS) { 3745 if (br.numBitsLeft() < 5) return ERROR_MALFORMED; 3746 objectType = br.getBits(5); 3747 3748 if (objectType == AOT_ESCAPE) { 3749 if (br.numBitsLeft() < 6) return ERROR_MALFORMED; 3750 objectType = 32 + br.getBits(6); 3751 } 3752 } 3753 if (objectType == AOT_AAC_LC || objectType == AOT_ER_AAC_LC || 3754 objectType == AOT_ER_AAC_LD || objectType == AOT_ER_AAC_SCAL || 3755 objectType == AOT_ER_BSAC) { 3756 if (br.numBitsLeft() < 2) return ERROR_MALFORMED; 3757 const int32_t frameLengthFlag __unused = br.getBits(1); 3758 3759 const int32_t dependsOnCoreCoder = br.getBits(1); 3760 3761 if (dependsOnCoreCoder ) { 3762 if (br.numBitsLeft() < 14) return ERROR_MALFORMED; 3763 const int32_t coreCoderDelay __unused = br.getBits(14); 3764 } 3765 3766 int32_t extensionFlag = -1; 3767 if (br.numBitsLeft() > 0) { 3768 extensionFlag = br.getBits(1); 3769 } else { 3770 switch (objectType) { 3771 // 14496-3 4.5.1.1 extensionFlag 3772 case AOT_AAC_LC: 3773 extensionFlag = 0; 3774 break; 3775 case AOT_ER_AAC_LC: 3776 case AOT_ER_AAC_SCAL: 3777 case AOT_ER_BSAC: 3778 case AOT_ER_AAC_LD: 3779 extensionFlag = 1; 3780 break; 3781 default: 3782 return ERROR_MALFORMED; 3783 break; 3784 } 3785 ALOGW("csd missing extension flag; assuming %d for object type %u.", 3786 extensionFlag, objectType); 3787 } 3788 3789 if (numChannels == 0) { 3790 int32_t channelsEffectiveNum = 0; 3791 int32_t channelsNum = 0; 3792 if (br.numBitsLeft() < 32) { 3793 return ERROR_MALFORMED; 3794 } 3795 const int32_t ElementInstanceTag __unused = br.getBits(4); 3796 const int32_t Profile __unused = br.getBits(2); 3797 const int32_t SamplingFrequencyIndex __unused = br.getBits(4); 3798 const int32_t NumFrontChannelElements = br.getBits(4); 3799 const int32_t NumSideChannelElements = br.getBits(4); 3800 const int32_t NumBackChannelElements = br.getBits(4); 3801 const int32_t NumLfeChannelElements = br.getBits(2); 3802 const int32_t NumAssocDataElements __unused = br.getBits(3); 3803 const int32_t NumValidCcElements __unused = br.getBits(4); 3804 3805 const int32_t MonoMixdownPresent = br.getBits(1); 3806 3807 if (MonoMixdownPresent != 0) { 3808 if (br.numBitsLeft() < 4) return ERROR_MALFORMED; 3809 const int32_t MonoMixdownElementNumber __unused = br.getBits(4); 3810 } 3811 3812 if (br.numBitsLeft() < 1) return ERROR_MALFORMED; 3813 const int32_t StereoMixdownPresent = br.getBits(1); 3814 if (StereoMixdownPresent != 0) { 3815 if (br.numBitsLeft() < 4) return ERROR_MALFORMED; 3816 const int32_t StereoMixdownElementNumber __unused = br.getBits(4); 3817 } 3818 3819 if (br.numBitsLeft() < 1) return ERROR_MALFORMED; 3820 const int32_t MatrixMixdownIndexPresent = br.getBits(1); 3821 if (MatrixMixdownIndexPresent != 0) { 3822 if (br.numBitsLeft() < 3) return ERROR_MALFORMED; 3823 const int32_t MatrixMixdownIndex __unused = br.getBits(2); 3824 const int32_t PseudoSurroundEnable __unused = br.getBits(1); 3825 } 3826 3827 int i; 3828 for (i=0; i < NumFrontChannelElements; i++) { 3829 if (br.numBitsLeft() < 5) return ERROR_MALFORMED; 3830 const int32_t FrontElementIsCpe = br.getBits(1); 3831 const int32_t FrontElementTagSelect __unused = br.getBits(4); 3832 channelsNum += FrontElementIsCpe ? 2 : 1; 3833 } 3834 3835 for (i=0; i < NumSideChannelElements; i++) { 3836 if (br.numBitsLeft() < 5) return ERROR_MALFORMED; 3837 const int32_t SideElementIsCpe = br.getBits(1); 3838 const int32_t SideElementTagSelect __unused = br.getBits(4); 3839 channelsNum += SideElementIsCpe ? 2 : 1; 3840 } 3841 3842 for (i=0; i < NumBackChannelElements; i++) { 3843 if (br.numBitsLeft() < 5) return ERROR_MALFORMED; 3844 const int32_t BackElementIsCpe = br.getBits(1); 3845 const int32_t BackElementTagSelect __unused = br.getBits(4); 3846 channelsNum += BackElementIsCpe ? 2 : 1; 3847 } 3848 channelsEffectiveNum = channelsNum; 3849 3850 for (i=0; i < NumLfeChannelElements; i++) { 3851 if (br.numBitsLeft() < 4) return ERROR_MALFORMED; 3852 const int32_t LfeElementTagSelect __unused = br.getBits(4); 3853 channelsNum += 1; 3854 } 3855 ALOGV("mpeg4 audio channelsNum = %d", channelsNum); 3856 ALOGV("mpeg4 audio channelsEffectiveNum = %d", channelsEffectiveNum); 3857 numChannels = channelsNum; 3858 } 3859 } 3860 } 3861 3862 if (numChannels == 0) { 3863 return ERROR_UNSUPPORTED; 3864 } 3865 3866 if (mLastTrack == NULL) 3867 return ERROR_MALFORMED; 3868 3869 int32_t prevSampleRate; 3870 CHECK(mLastTrack->meta->findInt32(kKeySampleRate, &prevSampleRate)); 3871 3872 if (prevSampleRate != sampleRate) { 3873 ALOGV("mpeg4 audio sample rate different from previous setting. " 3874 "was: %d, now: %d", prevSampleRate, sampleRate); 3875 } 3876 3877 mLastTrack->meta->setInt32(kKeySampleRate, sampleRate); 3878 3879 int32_t prevChannelCount; 3880 CHECK(mLastTrack->meta->findInt32(kKeyChannelCount, &prevChannelCount)); 3881 3882 if (prevChannelCount != numChannels) { 3883 ALOGV("mpeg4 audio channel count different from previous setting. " 3884 "was: %d, now: %d", prevChannelCount, numChannels); 3885 } 3886 3887 mLastTrack->meta->setInt32(kKeyChannelCount, numChannels); 3888 3889 return OK; 3890} 3891 3892//////////////////////////////////////////////////////////////////////////////// 3893 3894MPEG4Source::MPEG4Source( 3895 const sp<MPEG4Extractor> &owner, 3896 const sp<MetaData> &format, 3897 const sp<DataSource> &dataSource, 3898 int32_t timeScale, 3899 const sp<SampleTable> &sampleTable, 3900 Vector<SidxEntry> &sidx, 3901 const Trex *trex, 3902 off64_t firstMoofOffset, 3903 const sp<ItemTable> &itemTable) 3904 : mOwner(owner), 3905 mFormat(format), 3906 mDataSource(dataSource), 3907 mTimescale(timeScale), 3908 mSampleTable(sampleTable), 3909 mCurrentSampleIndex(0), 3910 mCurrentFragmentIndex(0), 3911 mSegments(sidx), 3912 mTrex(trex), 3913 mFirstMoofOffset(firstMoofOffset), 3914 mCurrentMoofOffset(firstMoofOffset), 3915 mNextMoofOffset(-1), 3916 mCurrentTime(0), 3917 mCurrentSampleInfoAllocSize(0), 3918 mCurrentSampleInfoSizes(NULL), 3919 mCurrentSampleInfoOffsetsAllocSize(0), 3920 mCurrentSampleInfoOffsets(NULL), 3921 mIsAVC(false), 3922 mIsHEVC(false), 3923 mNALLengthSize(0), 3924 mStarted(false), 3925 mGroup(NULL), 3926 mBuffer(NULL), 3927 mWantsNALFragments(false), 3928 mSrcBuffer(NULL), 3929 mIsHeif(itemTable != NULL), 3930 mItemTable(itemTable) { 3931 3932 memset(&mTrackFragmentHeaderInfo, 0, sizeof(mTrackFragmentHeaderInfo)); 3933 3934 mFormat->findInt32(kKeyCryptoMode, &mCryptoMode); 3935 mDefaultIVSize = 0; 3936 mFormat->findInt32(kKeyCryptoDefaultIVSize, &mDefaultIVSize); 3937 uint32_t keytype; 3938 const void *key; 3939 size_t keysize; 3940 if (mFormat->findData(kKeyCryptoKey, &keytype, &key, &keysize)) { 3941 CHECK(keysize <= 16); 3942 memset(mCryptoKey, 0, 16); 3943 memcpy(mCryptoKey, key, keysize); 3944 } 3945 3946 const char *mime; 3947 bool success = mFormat->findCString(kKeyMIMEType, &mime); 3948 CHECK(success); 3949 3950 mIsAVC = !strcasecmp(mime, MEDIA_MIMETYPE_VIDEO_AVC); 3951 mIsHEVC = !strcasecmp(mime, MEDIA_MIMETYPE_VIDEO_HEVC) || 3952 !strcasecmp(mime, MEDIA_MIMETYPE_IMAGE_ANDROID_HEIC); 3953 3954 if (mIsAVC) { 3955 uint32_t type; 3956 const void *data; 3957 size_t size; 3958 CHECK(format->findData(kKeyAVCC, &type, &data, &size)); 3959 3960 const uint8_t *ptr = (const uint8_t *)data; 3961 3962 CHECK(size >= 7); 3963 CHECK_EQ((unsigned)ptr[0], 1u); // configurationVersion == 1 3964 3965 // The number of bytes used to encode the length of a NAL unit. 3966 mNALLengthSize = 1 + (ptr[4] & 3); 3967 } else if (mIsHEVC) { 3968 uint32_t type; 3969 const void *data; 3970 size_t size; 3971 CHECK(format->findData(kKeyHVCC, &type, &data, &size)); 3972 3973 const uint8_t *ptr = (const uint8_t *)data; 3974 3975 CHECK(size >= 22); 3976 CHECK_EQ((unsigned)ptr[0], 1u); // configurationVersion == 1 3977 3978 mNALLengthSize = 1 + (ptr[14 + 7] & 3); 3979 } 3980 3981 CHECK(format->findInt32(kKeyTrackID, &mTrackId)); 3982 3983} 3984 3985status_t MPEG4Source::init() { 3986 if (mFirstMoofOffset != 0) { 3987 off64_t offset = mFirstMoofOffset; 3988 return parseChunk(&offset); 3989 } 3990 return OK; 3991} 3992 3993MPEG4Source::~MPEG4Source() { 3994 if (mStarted) { 3995 stop(); 3996 } 3997 free(mCurrentSampleInfoSizes); 3998 free(mCurrentSampleInfoOffsets); 3999} 4000 4001status_t MPEG4Source::start(MetaData *params) { 4002 Mutex::Autolock autoLock(mLock); 4003 4004 CHECK(!mStarted); 4005 4006 int32_t val; 4007 if (params && params->findInt32(kKeyWantsNALFragments, &val) 4008 && val != 0) { 4009 mWantsNALFragments = true; 4010 } else { 4011 mWantsNALFragments = false; 4012 } 4013 4014 int32_t tmp; 4015 CHECK(mFormat->findInt32(kKeyMaxInputSize, &tmp)); 4016 size_t max_size = tmp; 4017 4018 // A somewhat arbitrary limit that should be sufficient for 8k video frames 4019 // If you see the message below for a valid input stream: increase the limit 4020 const size_t kMaxBufferSize = 64 * 1024 * 1024; 4021 if (max_size > kMaxBufferSize) { 4022 ALOGE("bogus max input size: %zu > %zu", max_size, kMaxBufferSize); 4023 return ERROR_MALFORMED; 4024 } 4025 if (max_size == 0) { 4026 ALOGE("zero max input size"); 4027 return ERROR_MALFORMED; 4028 } 4029 4030 // Allow up to kMaxBuffers, but not if the total exceeds kMaxBufferSize. 4031 const size_t kMaxBuffers = 8; 4032 const size_t buffers = min(kMaxBufferSize / max_size, kMaxBuffers); 4033 mGroup = new MediaBufferGroup(buffers, max_size); 4034 mSrcBuffer = new (std::nothrow) uint8_t[max_size]; 4035 if (mSrcBuffer == NULL) { 4036 // file probably specified a bad max size 4037 delete mGroup; 4038 mGroup = NULL; 4039 return ERROR_MALFORMED; 4040 } 4041 4042 mStarted = true; 4043 4044 return OK; 4045} 4046 4047status_t MPEG4Source::stop() { 4048 Mutex::Autolock autoLock(mLock); 4049 4050 CHECK(mStarted); 4051 4052 if (mBuffer != NULL) { 4053 mBuffer->release(); 4054 mBuffer = NULL; 4055 } 4056 4057 delete[] mSrcBuffer; 4058 mSrcBuffer = NULL; 4059 4060 delete mGroup; 4061 mGroup = NULL; 4062 4063 mStarted = false; 4064 mCurrentSampleIndex = 0; 4065 4066 return OK; 4067} 4068 4069status_t MPEG4Source::parseChunk(off64_t *offset) { 4070 uint32_t hdr[2]; 4071 if (mDataSource->readAt(*offset, hdr, 8) < 8) { 4072 return ERROR_IO; 4073 } 4074 uint64_t chunk_size = ntohl(hdr[0]); 4075 uint32_t chunk_type = ntohl(hdr[1]); 4076 off64_t data_offset = *offset + 8; 4077 4078 if (chunk_size == 1) { 4079 if (mDataSource->readAt(*offset + 8, &chunk_size, 8) < 8) { 4080 return ERROR_IO; 4081 } 4082 chunk_size = ntoh64(chunk_size); 4083 data_offset += 8; 4084 4085 if (chunk_size < 16) { 4086 // The smallest valid chunk is 16 bytes long in this case. 4087 return ERROR_MALFORMED; 4088 } 4089 } else if (chunk_size < 8) { 4090 // The smallest valid chunk is 8 bytes long. 4091 return ERROR_MALFORMED; 4092 } 4093 4094 char chunk[5]; 4095 MakeFourCCString(chunk_type, chunk); 4096 ALOGV("MPEG4Source chunk %s @ %#llx", chunk, (long long)*offset); 4097 4098 off64_t chunk_data_size = *offset + chunk_size - data_offset; 4099 4100 switch(chunk_type) { 4101 4102 case FOURCC('t', 'r', 'a', 'f'): 4103 case FOURCC('m', 'o', 'o', 'f'): { 4104 off64_t stop_offset = *offset + chunk_size; 4105 *offset = data_offset; 4106 while (*offset < stop_offset) { 4107 status_t err = parseChunk(offset); 4108 if (err != OK) { 4109 return err; 4110 } 4111 } 4112 if (chunk_type == FOURCC('m', 'o', 'o', 'f')) { 4113 // *offset points to the box following this moof. Find the next moof from there. 4114 4115 while (true) { 4116 if (mDataSource->readAt(*offset, hdr, 8) < 8) { 4117 // no more box to the end of file. 4118 break; 4119 } 4120 chunk_size = ntohl(hdr[0]); 4121 chunk_type = ntohl(hdr[1]); 4122 if (chunk_size == 1) { 4123 // ISO/IEC 14496-12:2012, 8.8.4 Movie Fragment Box, moof is a Box 4124 // which is defined in 4.2 Object Structure. 4125 // When chunk_size==1, 8 bytes follows as "largesize". 4126 if (mDataSource->readAt(*offset + 8, &chunk_size, 8) < 8) { 4127 return ERROR_IO; 4128 } 4129 chunk_size = ntoh64(chunk_size); 4130 if (chunk_size < 16) { 4131 // The smallest valid chunk is 16 bytes long in this case. 4132 return ERROR_MALFORMED; 4133 } 4134 } else if (chunk_size == 0) { 4135 // next box extends to end of file. 4136 } else if (chunk_size < 8) { 4137 // The smallest valid chunk is 8 bytes long in this case. 4138 return ERROR_MALFORMED; 4139 } 4140 4141 if (chunk_type == FOURCC('m', 'o', 'o', 'f')) { 4142 mNextMoofOffset = *offset; 4143 break; 4144 } else if (chunk_size == 0) { 4145 break; 4146 } 4147 *offset += chunk_size; 4148 } 4149 } 4150 break; 4151 } 4152 4153 case FOURCC('t', 'f', 'h', 'd'): { 4154 status_t err; 4155 if ((err = parseTrackFragmentHeader(data_offset, chunk_data_size)) != OK) { 4156 return err; 4157 } 4158 *offset += chunk_size; 4159 break; 4160 } 4161 4162 case FOURCC('t', 'r', 'u', 'n'): { 4163 status_t err; 4164 if (mLastParsedTrackId == mTrackId) { 4165 if ((err = parseTrackFragmentRun(data_offset, chunk_data_size)) != OK) { 4166 return err; 4167 } 4168 } 4169 4170 *offset += chunk_size; 4171 break; 4172 } 4173 4174 case FOURCC('s', 'a', 'i', 'z'): { 4175 status_t err; 4176 if ((err = parseSampleAuxiliaryInformationSizes(data_offset, chunk_data_size)) != OK) { 4177 return err; 4178 } 4179 *offset += chunk_size; 4180 break; 4181 } 4182 case FOURCC('s', 'a', 'i', 'o'): { 4183 status_t err; 4184 if ((err = parseSampleAuxiliaryInformationOffsets(data_offset, chunk_data_size)) != OK) { 4185 return err; 4186 } 4187 *offset += chunk_size; 4188 break; 4189 } 4190 4191 case FOURCC('m', 'd', 'a', 't'): { 4192 // parse DRM info if present 4193 ALOGV("MPEG4Source::parseChunk mdat"); 4194 // if saiz/saoi was previously observed, do something with the sampleinfos 4195 *offset += chunk_size; 4196 break; 4197 } 4198 4199 default: { 4200 *offset += chunk_size; 4201 break; 4202 } 4203 } 4204 return OK; 4205} 4206 4207status_t MPEG4Source::parseSampleAuxiliaryInformationSizes( 4208 off64_t offset, off64_t /* size */) { 4209 ALOGV("parseSampleAuxiliaryInformationSizes"); 4210 // 14496-12 8.7.12 4211 uint8_t version; 4212 if (mDataSource->readAt( 4213 offset, &version, sizeof(version)) 4214 < (ssize_t)sizeof(version)) { 4215 return ERROR_IO; 4216 } 4217 4218 if (version != 0) { 4219 return ERROR_UNSUPPORTED; 4220 } 4221 offset++; 4222 4223 uint32_t flags; 4224 if (!mDataSource->getUInt24(offset, &flags)) { 4225 return ERROR_IO; 4226 } 4227 offset += 3; 4228 4229 if (flags & 1) { 4230 uint32_t tmp; 4231 if (!mDataSource->getUInt32(offset, &tmp)) { 4232 return ERROR_MALFORMED; 4233 } 4234 mCurrentAuxInfoType = tmp; 4235 offset += 4; 4236 if (!mDataSource->getUInt32(offset, &tmp)) { 4237 return ERROR_MALFORMED; 4238 } 4239 mCurrentAuxInfoTypeParameter = tmp; 4240 offset += 4; 4241 } 4242 4243 uint8_t defsize; 4244 if (mDataSource->readAt(offset, &defsize, 1) != 1) { 4245 return ERROR_MALFORMED; 4246 } 4247 mCurrentDefaultSampleInfoSize = defsize; 4248 offset++; 4249 4250 uint32_t smplcnt; 4251 if (!mDataSource->getUInt32(offset, &smplcnt)) { 4252 return ERROR_MALFORMED; 4253 } 4254 mCurrentSampleInfoCount = smplcnt; 4255 offset += 4; 4256 4257 if (mCurrentDefaultSampleInfoSize != 0) { 4258 ALOGV("@@@@ using default sample info size of %d", mCurrentDefaultSampleInfoSize); 4259 return OK; 4260 } 4261 if (smplcnt > mCurrentSampleInfoAllocSize) { 4262 uint8_t * newPtr = (uint8_t*) realloc(mCurrentSampleInfoSizes, smplcnt); 4263 if (newPtr == NULL) { 4264 ALOGE("failed to realloc %u -> %u", mCurrentSampleInfoAllocSize, smplcnt); 4265 return NO_MEMORY; 4266 } 4267 mCurrentSampleInfoSizes = newPtr; 4268 mCurrentSampleInfoAllocSize = smplcnt; 4269 } 4270 4271 mDataSource->readAt(offset, mCurrentSampleInfoSizes, smplcnt); 4272 return OK; 4273} 4274 4275status_t MPEG4Source::parseSampleAuxiliaryInformationOffsets( 4276 off64_t offset, off64_t /* size */) { 4277 ALOGV("parseSampleAuxiliaryInformationOffsets"); 4278 // 14496-12 8.7.13 4279 uint8_t version; 4280 if (mDataSource->readAt(offset, &version, sizeof(version)) != 1) { 4281 return ERROR_IO; 4282 } 4283 offset++; 4284 4285 uint32_t flags; 4286 if (!mDataSource->getUInt24(offset, &flags)) { 4287 return ERROR_IO; 4288 } 4289 offset += 3; 4290 4291 uint32_t entrycount; 4292 if (!mDataSource->getUInt32(offset, &entrycount)) { 4293 return ERROR_IO; 4294 } 4295 offset += 4; 4296 if (entrycount == 0) { 4297 return OK; 4298 } 4299 if (entrycount > UINT32_MAX / 8) { 4300 return ERROR_MALFORMED; 4301 } 4302 4303 if (entrycount > mCurrentSampleInfoOffsetsAllocSize) { 4304 uint64_t *newPtr = (uint64_t *)realloc(mCurrentSampleInfoOffsets, entrycount * 8); 4305 if (newPtr == NULL) { 4306 ALOGE("failed to realloc %u -> %u", mCurrentSampleInfoOffsetsAllocSize, entrycount * 8); 4307 return NO_MEMORY; 4308 } 4309 mCurrentSampleInfoOffsets = newPtr; 4310 mCurrentSampleInfoOffsetsAllocSize = entrycount; 4311 } 4312 mCurrentSampleInfoOffsetCount = entrycount; 4313 4314 if (mCurrentSampleInfoOffsets == NULL) { 4315 return OK; 4316 } 4317 4318 for (size_t i = 0; i < entrycount; i++) { 4319 if (version == 0) { 4320 uint32_t tmp; 4321 if (!mDataSource->getUInt32(offset, &tmp)) { 4322 return ERROR_IO; 4323 } 4324 mCurrentSampleInfoOffsets[i] = tmp; 4325 offset += 4; 4326 } else { 4327 uint64_t tmp; 4328 if (!mDataSource->getUInt64(offset, &tmp)) { 4329 return ERROR_IO; 4330 } 4331 mCurrentSampleInfoOffsets[i] = tmp; 4332 offset += 8; 4333 } 4334 } 4335 4336 // parse clear/encrypted data 4337 4338 off64_t drmoffset = mCurrentSampleInfoOffsets[0]; // from moof 4339 4340 drmoffset += mCurrentMoofOffset; 4341 int ivlength; 4342 CHECK(mFormat->findInt32(kKeyCryptoDefaultIVSize, &ivlength)); 4343 4344 // only 0, 8 and 16 byte initialization vectors are supported 4345 if (ivlength != 0 && ivlength != 8 && ivlength != 16) { 4346 ALOGW("unsupported IV length: %d", ivlength); 4347 return ERROR_MALFORMED; 4348 } 4349 // read CencSampleAuxiliaryDataFormats 4350 for (size_t i = 0; i < mCurrentSampleInfoCount; i++) { 4351 if (i >= mCurrentSamples.size()) { 4352 ALOGW("too few samples"); 4353 break; 4354 } 4355 Sample *smpl = &mCurrentSamples.editItemAt(i); 4356 4357 memset(smpl->iv, 0, 16); 4358 if (mDataSource->readAt(drmoffset, smpl->iv, ivlength) != ivlength) { 4359 return ERROR_IO; 4360 } 4361 4362 drmoffset += ivlength; 4363 4364 int32_t smplinfosize = mCurrentDefaultSampleInfoSize; 4365 if (smplinfosize == 0) { 4366 smplinfosize = mCurrentSampleInfoSizes[i]; 4367 } 4368 if (smplinfosize > ivlength) { 4369 uint16_t numsubsamples; 4370 if (!mDataSource->getUInt16(drmoffset, &numsubsamples)) { 4371 return ERROR_IO; 4372 } 4373 drmoffset += 2; 4374 for (size_t j = 0; j < numsubsamples; j++) { 4375 uint16_t numclear; 4376 uint32_t numencrypted; 4377 if (!mDataSource->getUInt16(drmoffset, &numclear)) { 4378 return ERROR_IO; 4379 } 4380 drmoffset += 2; 4381 if (!mDataSource->getUInt32(drmoffset, &numencrypted)) { 4382 return ERROR_IO; 4383 } 4384 drmoffset += 4; 4385 smpl->clearsizes.add(numclear); 4386 smpl->encryptedsizes.add(numencrypted); 4387 } 4388 } else { 4389 smpl->clearsizes.add(0); 4390 smpl->encryptedsizes.add(smpl->size); 4391 } 4392 } 4393 4394 4395 return OK; 4396} 4397 4398status_t MPEG4Source::parseTrackFragmentHeader(off64_t offset, off64_t size) { 4399 4400 if (size < 8) { 4401 return -EINVAL; 4402 } 4403 4404 uint32_t flags; 4405 if (!mDataSource->getUInt32(offset, &flags)) { // actually version + flags 4406 return ERROR_MALFORMED; 4407 } 4408 4409 if (flags & 0xff000000) { 4410 return -EINVAL; 4411 } 4412 4413 if (!mDataSource->getUInt32(offset + 4, (uint32_t*)&mLastParsedTrackId)) { 4414 return ERROR_MALFORMED; 4415 } 4416 4417 if (mLastParsedTrackId != mTrackId) { 4418 // this is not the right track, skip it 4419 return OK; 4420 } 4421 4422 mTrackFragmentHeaderInfo.mFlags = flags; 4423 mTrackFragmentHeaderInfo.mTrackID = mLastParsedTrackId; 4424 offset += 8; 4425 size -= 8; 4426 4427 ALOGV("fragment header: %08x %08x", flags, mTrackFragmentHeaderInfo.mTrackID); 4428 4429 if (flags & TrackFragmentHeaderInfo::kBaseDataOffsetPresent) { 4430 if (size < 8) { 4431 return -EINVAL; 4432 } 4433 4434 if (!mDataSource->getUInt64(offset, &mTrackFragmentHeaderInfo.mBaseDataOffset)) { 4435 return ERROR_MALFORMED; 4436 } 4437 offset += 8; 4438 size -= 8; 4439 } 4440 4441 if (flags & TrackFragmentHeaderInfo::kSampleDescriptionIndexPresent) { 4442 if (size < 4) { 4443 return -EINVAL; 4444 } 4445 4446 if (!mDataSource->getUInt32(offset, &mTrackFragmentHeaderInfo.mSampleDescriptionIndex)) { 4447 return ERROR_MALFORMED; 4448 } 4449 offset += 4; 4450 size -= 4; 4451 } 4452 4453 if (flags & TrackFragmentHeaderInfo::kDefaultSampleDurationPresent) { 4454 if (size < 4) { 4455 return -EINVAL; 4456 } 4457 4458 if (!mDataSource->getUInt32(offset, &mTrackFragmentHeaderInfo.mDefaultSampleDuration)) { 4459 return ERROR_MALFORMED; 4460 } 4461 offset += 4; 4462 size -= 4; 4463 } 4464 4465 if (flags & TrackFragmentHeaderInfo::kDefaultSampleSizePresent) { 4466 if (size < 4) { 4467 return -EINVAL; 4468 } 4469 4470 if (!mDataSource->getUInt32(offset, &mTrackFragmentHeaderInfo.mDefaultSampleSize)) { 4471 return ERROR_MALFORMED; 4472 } 4473 offset += 4; 4474 size -= 4; 4475 } 4476 4477 if (flags & TrackFragmentHeaderInfo::kDefaultSampleFlagsPresent) { 4478 if (size < 4) { 4479 return -EINVAL; 4480 } 4481 4482 if (!mDataSource->getUInt32(offset, &mTrackFragmentHeaderInfo.mDefaultSampleFlags)) { 4483 return ERROR_MALFORMED; 4484 } 4485 offset += 4; 4486 size -= 4; 4487 } 4488 4489 if (!(flags & TrackFragmentHeaderInfo::kBaseDataOffsetPresent)) { 4490 mTrackFragmentHeaderInfo.mBaseDataOffset = mCurrentMoofOffset; 4491 } 4492 4493 mTrackFragmentHeaderInfo.mDataOffset = 0; 4494 return OK; 4495} 4496 4497status_t MPEG4Source::parseTrackFragmentRun(off64_t offset, off64_t size) { 4498 4499 ALOGV("MPEG4Extractor::parseTrackFragmentRun"); 4500 if (size < 8) { 4501 return -EINVAL; 4502 } 4503 4504 enum { 4505 kDataOffsetPresent = 0x01, 4506 kFirstSampleFlagsPresent = 0x04, 4507 kSampleDurationPresent = 0x100, 4508 kSampleSizePresent = 0x200, 4509 kSampleFlagsPresent = 0x400, 4510 kSampleCompositionTimeOffsetPresent = 0x800, 4511 }; 4512 4513 uint32_t flags; 4514 if (!mDataSource->getUInt32(offset, &flags)) { 4515 return ERROR_MALFORMED; 4516 } 4517 // |version| only affects SampleCompositionTimeOffset field. 4518 // If version == 0, SampleCompositionTimeOffset is uint32_t; 4519 // Otherwise, SampleCompositionTimeOffset is int32_t. 4520 // Sample.compositionOffset is defined as int32_t. 4521 uint8_t version = flags >> 24; 4522 flags &= 0xffffff; 4523 ALOGV("fragment run version: 0x%02x, flags: 0x%06x", version, flags); 4524 4525 if ((flags & kFirstSampleFlagsPresent) && (flags & kSampleFlagsPresent)) { 4526 // These two shall not be used together. 4527 return -EINVAL; 4528 } 4529 4530 uint32_t sampleCount; 4531 if (!mDataSource->getUInt32(offset + 4, &sampleCount)) { 4532 return ERROR_MALFORMED; 4533 } 4534 offset += 8; 4535 size -= 8; 4536 4537 uint64_t dataOffset = mTrackFragmentHeaderInfo.mDataOffset; 4538 4539 uint32_t firstSampleFlags = 0; 4540 4541 if (flags & kDataOffsetPresent) { 4542 if (size < 4) { 4543 return -EINVAL; 4544 } 4545 4546 int32_t dataOffsetDelta; 4547 if (!mDataSource->getUInt32(offset, (uint32_t*)&dataOffsetDelta)) { 4548 return ERROR_MALFORMED; 4549 } 4550 4551 dataOffset = mTrackFragmentHeaderInfo.mBaseDataOffset + dataOffsetDelta; 4552 4553 offset += 4; 4554 size -= 4; 4555 } 4556 4557 if (flags & kFirstSampleFlagsPresent) { 4558 if (size < 4) { 4559 return -EINVAL; 4560 } 4561 4562 if (!mDataSource->getUInt32(offset, &firstSampleFlags)) { 4563 return ERROR_MALFORMED; 4564 } 4565 offset += 4; 4566 size -= 4; 4567 } 4568 4569 uint32_t sampleDuration = 0, sampleSize = 0, sampleFlags = 0, 4570 sampleCtsOffset = 0; 4571 4572 size_t bytesPerSample = 0; 4573 if (flags & kSampleDurationPresent) { 4574 bytesPerSample += 4; 4575 } else if (mTrackFragmentHeaderInfo.mFlags 4576 & TrackFragmentHeaderInfo::kDefaultSampleDurationPresent) { 4577 sampleDuration = mTrackFragmentHeaderInfo.mDefaultSampleDuration; 4578 } else if (mTrex) { 4579 sampleDuration = mTrex->default_sample_duration; 4580 } 4581 4582 if (flags & kSampleSizePresent) { 4583 bytesPerSample += 4; 4584 } else if (mTrackFragmentHeaderInfo.mFlags 4585 & TrackFragmentHeaderInfo::kDefaultSampleSizePresent) { 4586 sampleSize = mTrackFragmentHeaderInfo.mDefaultSampleSize; 4587 } else { 4588 sampleSize = mTrackFragmentHeaderInfo.mDefaultSampleSize; 4589 } 4590 4591 if (flags & kSampleFlagsPresent) { 4592 bytesPerSample += 4; 4593 } else if (mTrackFragmentHeaderInfo.mFlags 4594 & TrackFragmentHeaderInfo::kDefaultSampleFlagsPresent) { 4595 sampleFlags = mTrackFragmentHeaderInfo.mDefaultSampleFlags; 4596 } else { 4597 sampleFlags = mTrackFragmentHeaderInfo.mDefaultSampleFlags; 4598 } 4599 4600 if (flags & kSampleCompositionTimeOffsetPresent) { 4601 bytesPerSample += 4; 4602 } else { 4603 sampleCtsOffset = 0; 4604 } 4605 4606 if (size < (off64_t)(sampleCount * bytesPerSample)) { 4607 return -EINVAL; 4608 } 4609 4610 Sample tmp; 4611 for (uint32_t i = 0; i < sampleCount; ++i) { 4612 if (flags & kSampleDurationPresent) { 4613 if (!mDataSource->getUInt32(offset, &sampleDuration)) { 4614 return ERROR_MALFORMED; 4615 } 4616 offset += 4; 4617 } 4618 4619 if (flags & kSampleSizePresent) { 4620 if (!mDataSource->getUInt32(offset, &sampleSize)) { 4621 return ERROR_MALFORMED; 4622 } 4623 offset += 4; 4624 } 4625 4626 if (flags & kSampleFlagsPresent) { 4627 if (!mDataSource->getUInt32(offset, &sampleFlags)) { 4628 return ERROR_MALFORMED; 4629 } 4630 offset += 4; 4631 } 4632 4633 if (flags & kSampleCompositionTimeOffsetPresent) { 4634 if (!mDataSource->getUInt32(offset, &sampleCtsOffset)) { 4635 return ERROR_MALFORMED; 4636 } 4637 offset += 4; 4638 } 4639 4640 ALOGV("adding sample %d at offset 0x%08" PRIx64 ", size %u, duration %u, " 4641 " flags 0x%08x", i + 1, 4642 dataOffset, sampleSize, sampleDuration, 4643 (flags & kFirstSampleFlagsPresent) && i == 0 4644 ? firstSampleFlags : sampleFlags); 4645 tmp.offset = dataOffset; 4646 tmp.size = sampleSize; 4647 tmp.duration = sampleDuration; 4648 tmp.compositionOffset = sampleCtsOffset; 4649 mCurrentSamples.add(tmp); 4650 4651 dataOffset += sampleSize; 4652 } 4653 4654 mTrackFragmentHeaderInfo.mDataOffset = dataOffset; 4655 4656 return OK; 4657} 4658 4659sp<MetaData> MPEG4Source::getFormat() { 4660 Mutex::Autolock autoLock(mLock); 4661 4662 return mFormat; 4663} 4664 4665size_t MPEG4Source::parseNALSize(const uint8_t *data) const { 4666 switch (mNALLengthSize) { 4667 case 1: 4668 return *data; 4669 case 2: 4670 return U16_AT(data); 4671 case 3: 4672 return ((size_t)data[0] << 16) | U16_AT(&data[1]); 4673 case 4: 4674 return U32_AT(data); 4675 } 4676 4677 // This cannot happen, mNALLengthSize springs to life by adding 1 to 4678 // a 2-bit integer. 4679 CHECK(!"Should not be here."); 4680 4681 return 0; 4682} 4683 4684status_t MPEG4Source::read( 4685 MediaBuffer **out, const ReadOptions *options) { 4686 Mutex::Autolock autoLock(mLock); 4687 4688 CHECK(mStarted); 4689 4690 if (options != nullptr && options->getNonBlocking() && !mGroup->has_buffers()) { 4691 *out = nullptr; 4692 return WOULD_BLOCK; 4693 } 4694 4695 if (mFirstMoofOffset > 0) { 4696 return fragmentedRead(out, options); 4697 } 4698 4699 *out = NULL; 4700 4701 int64_t targetSampleTimeUs = -1; 4702 4703 int64_t seekTimeUs; 4704 ReadOptions::SeekMode mode; 4705 if (options && options->getSeekTo(&seekTimeUs, &mode)) { 4706 if (mIsHeif) { 4707 CHECK(mSampleTable == NULL); 4708 CHECK(mItemTable != NULL); 4709 int32_t imageIndex; 4710 if (!mFormat->findInt32(kKeyTrackID, &imageIndex)) { 4711 return ERROR_MALFORMED; 4712 } 4713 4714 status_t err; 4715 if (seekTimeUs >= 0) { 4716 err = mItemTable->findImageItem(imageIndex, &mCurrentSampleIndex); 4717 } else { 4718 err = mItemTable->findThumbnailItem(imageIndex, &mCurrentSampleIndex); 4719 } 4720 if (err != OK) { 4721 return err; 4722 } 4723 } else { 4724 uint32_t findFlags = 0; 4725 switch (mode) { 4726 case ReadOptions::SEEK_PREVIOUS_SYNC: 4727 findFlags = SampleTable::kFlagBefore; 4728 break; 4729 case ReadOptions::SEEK_NEXT_SYNC: 4730 findFlags = SampleTable::kFlagAfter; 4731 break; 4732 case ReadOptions::SEEK_CLOSEST_SYNC: 4733 case ReadOptions::SEEK_CLOSEST: 4734 findFlags = SampleTable::kFlagClosest; 4735 break; 4736 case ReadOptions::SEEK_FRAME_INDEX: 4737 findFlags = SampleTable::kFlagFrameIndex; 4738 break; 4739 default: 4740 CHECK(!"Should not be here."); 4741 break; 4742 } 4743 4744 uint32_t sampleIndex; 4745 status_t err = mSampleTable->findSampleAtTime( 4746 seekTimeUs, 1000000, mTimescale, 4747 &sampleIndex, findFlags); 4748 4749 if (mode == ReadOptions::SEEK_CLOSEST 4750 || mode == ReadOptions::SEEK_FRAME_INDEX) { 4751 // We found the closest sample already, now we want the sync 4752 // sample preceding it (or the sample itself of course), even 4753 // if the subsequent sync sample is closer. 4754 findFlags = SampleTable::kFlagBefore; 4755 } 4756 4757 uint32_t syncSampleIndex; 4758 if (err == OK) { 4759 err = mSampleTable->findSyncSampleNear( 4760 sampleIndex, &syncSampleIndex, findFlags); 4761 } 4762 4763 uint32_t sampleTime; 4764 if (err == OK) { 4765 err = mSampleTable->getMetaDataForSample( 4766 sampleIndex, NULL, NULL, &sampleTime); 4767 } 4768 4769 if (err != OK) { 4770 if (err == ERROR_OUT_OF_RANGE) { 4771 // An attempt to seek past the end of the stream would 4772 // normally cause this ERROR_OUT_OF_RANGE error. Propagating 4773 // this all the way to the MediaPlayer would cause abnormal 4774 // termination. Legacy behaviour appears to be to behave as if 4775 // we had seeked to the end of stream, ending normally. 4776 err = ERROR_END_OF_STREAM; 4777 } 4778 ALOGV("end of stream"); 4779 return err; 4780 } 4781 4782 if (mode == ReadOptions::SEEK_CLOSEST 4783 || mode == ReadOptions::SEEK_FRAME_INDEX) { 4784 targetSampleTimeUs = (sampleTime * 1000000ll) / mTimescale; 4785 } 4786 4787#if 0 4788 uint32_t syncSampleTime; 4789 CHECK_EQ(OK, mSampleTable->getMetaDataForSample( 4790 syncSampleIndex, NULL, NULL, &syncSampleTime)); 4791 4792 ALOGI("seek to time %lld us => sample at time %lld us, " 4793 "sync sample at time %lld us", 4794 seekTimeUs, 4795 sampleTime * 1000000ll / mTimescale, 4796 syncSampleTime * 1000000ll / mTimescale); 4797#endif 4798 4799 mCurrentSampleIndex = syncSampleIndex; 4800 } 4801 4802 if (mBuffer != NULL) { 4803 mBuffer->release(); 4804 mBuffer = NULL; 4805 } 4806 4807 // fall through 4808 } 4809 4810 off64_t offset = 0; 4811 size_t size = 0; 4812 uint32_t cts, stts; 4813 bool isSyncSample; 4814 bool newBuffer = false; 4815 if (mBuffer == NULL) { 4816 newBuffer = true; 4817 4818 status_t err; 4819 if (!mIsHeif) { 4820 err = mSampleTable->getMetaDataForSample( 4821 mCurrentSampleIndex, &offset, &size, &cts, &isSyncSample, &stts); 4822 } else { 4823 err = mItemTable->getImageOffsetAndSize( 4824 options && options->getSeekTo(&seekTimeUs, &mode) ? 4825 &mCurrentSampleIndex : NULL, &offset, &size); 4826 4827 cts = stts = 0; 4828 isSyncSample = 0; 4829 ALOGV("image offset %lld, size %zu", (long long)offset, size); 4830 } 4831 4832 if (err != OK) { 4833 return err; 4834 } 4835 4836 err = mGroup->acquire_buffer(&mBuffer); 4837 4838 if (err != OK) { 4839 CHECK(mBuffer == NULL); 4840 return err; 4841 } 4842 if (size > mBuffer->size()) { 4843 ALOGE("buffer too small: %zu > %zu", size, mBuffer->size()); 4844 mBuffer->release(); 4845 mBuffer = NULL; 4846 return ERROR_BUFFER_TOO_SMALL; 4847 } 4848 } 4849 4850 if ((!mIsAVC && !mIsHEVC) || mWantsNALFragments) { 4851 if (newBuffer) { 4852 ssize_t num_bytes_read = 4853 mDataSource->readAt(offset, (uint8_t *)mBuffer->data(), size); 4854 4855 if (num_bytes_read < (ssize_t)size) { 4856 mBuffer->release(); 4857 mBuffer = NULL; 4858 4859 return ERROR_IO; 4860 } 4861 4862 CHECK(mBuffer != NULL); 4863 mBuffer->set_range(0, size); 4864 mBuffer->meta_data()->clear(); 4865 mBuffer->meta_data()->setInt64( 4866 kKeyTime, ((int64_t)cts * 1000000) / mTimescale); 4867 mBuffer->meta_data()->setInt64( 4868 kKeyDuration, ((int64_t)stts * 1000000) / mTimescale); 4869 4870 if (targetSampleTimeUs >= 0) { 4871 mBuffer->meta_data()->setInt64( 4872 kKeyTargetTime, targetSampleTimeUs); 4873 } 4874 4875 if (isSyncSample) { 4876 mBuffer->meta_data()->setInt32(kKeyIsSyncFrame, 1); 4877 } 4878 4879 ++mCurrentSampleIndex; 4880 } 4881 4882 if (!mIsAVC && !mIsHEVC) { 4883 *out = mBuffer; 4884 mBuffer = NULL; 4885 4886 return OK; 4887 } 4888 4889 // Each NAL unit is split up into its constituent fragments and 4890 // each one of them returned in its own buffer. 4891 4892 CHECK(mBuffer->range_length() >= mNALLengthSize); 4893 4894 const uint8_t *src = 4895 (const uint8_t *)mBuffer->data() + mBuffer->range_offset(); 4896 4897 size_t nal_size = parseNALSize(src); 4898 if (mNALLengthSize > SIZE_MAX - nal_size) { 4899 ALOGE("b/24441553, b/24445122"); 4900 } 4901 if (mBuffer->range_length() - mNALLengthSize < nal_size) { 4902 ALOGE("incomplete NAL unit."); 4903 4904 mBuffer->release(); 4905 mBuffer = NULL; 4906 4907 return ERROR_MALFORMED; 4908 } 4909 4910 MediaBuffer *clone = mBuffer->clone(); 4911 CHECK(clone != NULL); 4912 clone->set_range(mBuffer->range_offset() + mNALLengthSize, nal_size); 4913 4914 CHECK(mBuffer != NULL); 4915 mBuffer->set_range( 4916 mBuffer->range_offset() + mNALLengthSize + nal_size, 4917 mBuffer->range_length() - mNALLengthSize - nal_size); 4918 4919 if (mBuffer->range_length() == 0) { 4920 mBuffer->release(); 4921 mBuffer = NULL; 4922 } 4923 4924 *out = clone; 4925 4926 return OK; 4927 } else { 4928 // Whole NAL units are returned but each fragment is prefixed by 4929 // the start code (0x00 00 00 01). 4930 ssize_t num_bytes_read = 0; 4931 int32_t drm = 0; 4932 bool usesDRM = (mFormat->findInt32(kKeyIsDRM, &drm) && drm != 0); 4933 if (usesDRM) { 4934 num_bytes_read = 4935 mDataSource->readAt(offset, (uint8_t*)mBuffer->data(), size); 4936 } else { 4937 num_bytes_read = mDataSource->readAt(offset, mSrcBuffer, size); 4938 } 4939 4940 if (num_bytes_read < (ssize_t)size) { 4941 mBuffer->release(); 4942 mBuffer = NULL; 4943 4944 return ERROR_IO; 4945 } 4946 4947 if (usesDRM) { 4948 CHECK(mBuffer != NULL); 4949 mBuffer->set_range(0, size); 4950 4951 } else { 4952 uint8_t *dstData = (uint8_t *)mBuffer->data(); 4953 size_t srcOffset = 0; 4954 size_t dstOffset = 0; 4955 4956 while (srcOffset < size) { 4957 bool isMalFormed = !isInRange((size_t)0u, size, srcOffset, mNALLengthSize); 4958 size_t nalLength = 0; 4959 if (!isMalFormed) { 4960 nalLength = parseNALSize(&mSrcBuffer[srcOffset]); 4961 srcOffset += mNALLengthSize; 4962 isMalFormed = !isInRange((size_t)0u, size, srcOffset, nalLength); 4963 } 4964 4965 if (isMalFormed) { 4966 ALOGE("Video is malformed"); 4967 mBuffer->release(); 4968 mBuffer = NULL; 4969 return ERROR_MALFORMED; 4970 } 4971 4972 if (nalLength == 0) { 4973 continue; 4974 } 4975 4976 if (dstOffset > SIZE_MAX - 4 || 4977 dstOffset + 4 > SIZE_MAX - nalLength || 4978 dstOffset + 4 + nalLength > mBuffer->size()) { 4979 ALOGE("b/27208621 : %zu %zu", dstOffset, mBuffer->size()); 4980 android_errorWriteLog(0x534e4554, "27208621"); 4981 mBuffer->release(); 4982 mBuffer = NULL; 4983 return ERROR_MALFORMED; 4984 } 4985 4986 dstData[dstOffset++] = 0; 4987 dstData[dstOffset++] = 0; 4988 dstData[dstOffset++] = 0; 4989 dstData[dstOffset++] = 1; 4990 memcpy(&dstData[dstOffset], &mSrcBuffer[srcOffset], nalLength); 4991 srcOffset += nalLength; 4992 dstOffset += nalLength; 4993 } 4994 CHECK_EQ(srcOffset, size); 4995 CHECK(mBuffer != NULL); 4996 mBuffer->set_range(0, dstOffset); 4997 } 4998 4999 mBuffer->meta_data()->clear(); 5000 mBuffer->meta_data()->setInt64( 5001 kKeyTime, ((int64_t)cts * 1000000) / mTimescale); 5002 mBuffer->meta_data()->setInt64( 5003 kKeyDuration, ((int64_t)stts * 1000000) / mTimescale); 5004 5005 if (targetSampleTimeUs >= 0) { 5006 mBuffer->meta_data()->setInt64( 5007 kKeyTargetTime, targetSampleTimeUs); 5008 } 5009 5010 if (mIsAVC) { 5011 uint32_t layerId = FindAVCLayerId( 5012 (const uint8_t *)mBuffer->data(), mBuffer->range_length()); 5013 mBuffer->meta_data()->setInt32(kKeyTemporalLayerId, layerId); 5014 } 5015 5016 if (isSyncSample) { 5017 mBuffer->meta_data()->setInt32(kKeyIsSyncFrame, 1); 5018 } 5019 5020 ++mCurrentSampleIndex; 5021 5022 *out = mBuffer; 5023 mBuffer = NULL; 5024 5025 return OK; 5026 } 5027} 5028 5029status_t MPEG4Source::fragmentedRead( 5030 MediaBuffer **out, const ReadOptions *options) { 5031 5032 ALOGV("MPEG4Source::fragmentedRead"); 5033 5034 CHECK(mStarted); 5035 5036 *out = NULL; 5037 5038 int64_t targetSampleTimeUs = -1; 5039 5040 int64_t seekTimeUs; 5041 ReadOptions::SeekMode mode; 5042 if (options && options->getSeekTo(&seekTimeUs, &mode)) { 5043 5044 int numSidxEntries = mSegments.size(); 5045 if (numSidxEntries != 0) { 5046 int64_t totalTime = 0; 5047 off64_t totalOffset = mFirstMoofOffset; 5048 for (int i = 0; i < numSidxEntries; i++) { 5049 const SidxEntry *se = &mSegments[i]; 5050 if (totalTime + se->mDurationUs > seekTimeUs) { 5051 // The requested time is somewhere in this segment 5052 if ((mode == ReadOptions::SEEK_NEXT_SYNC && seekTimeUs > totalTime) || 5053 (mode == ReadOptions::SEEK_CLOSEST_SYNC && 5054 (seekTimeUs - totalTime) > (totalTime + se->mDurationUs - seekTimeUs))) { 5055 // requested next sync, or closest sync and it was closer to the end of 5056 // this segment 5057 totalTime += se->mDurationUs; 5058 totalOffset += se->mSize; 5059 } 5060 break; 5061 } 5062 totalTime += se->mDurationUs; 5063 totalOffset += se->mSize; 5064 } 5065 mCurrentMoofOffset = totalOffset; 5066 mNextMoofOffset = -1; 5067 mCurrentSamples.clear(); 5068 mCurrentSampleIndex = 0; 5069 status_t err = parseChunk(&totalOffset); 5070 if (err != OK) { 5071 return err; 5072 } 5073 mCurrentTime = totalTime * mTimescale / 1000000ll; 5074 } else { 5075 // without sidx boxes, we can only seek to 0 5076 mCurrentMoofOffset = mFirstMoofOffset; 5077 mNextMoofOffset = -1; 5078 mCurrentSamples.clear(); 5079 mCurrentSampleIndex = 0; 5080 off64_t tmp = mCurrentMoofOffset; 5081 status_t err = parseChunk(&tmp); 5082 if (err != OK) { 5083 return err; 5084 } 5085 mCurrentTime = 0; 5086 } 5087 5088 if (mBuffer != NULL) { 5089 mBuffer->release(); 5090 mBuffer = NULL; 5091 } 5092 5093 // fall through 5094 } 5095 5096 off64_t offset = 0; 5097 size_t size = 0; 5098 uint32_t cts = 0; 5099 bool isSyncSample = false; 5100 bool newBuffer = false; 5101 if (mBuffer == NULL) { 5102 newBuffer = true; 5103 5104 if (mCurrentSampleIndex >= mCurrentSamples.size()) { 5105 // move to next fragment if there is one 5106 if (mNextMoofOffset <= mCurrentMoofOffset) { 5107 return ERROR_END_OF_STREAM; 5108 } 5109 off64_t nextMoof = mNextMoofOffset; 5110 mCurrentMoofOffset = nextMoof; 5111 mCurrentSamples.clear(); 5112 mCurrentSampleIndex = 0; 5113 status_t err = parseChunk(&nextMoof); 5114 if (err != OK) { 5115 return err; 5116 } 5117 if (mCurrentSampleIndex >= mCurrentSamples.size()) { 5118 return ERROR_END_OF_STREAM; 5119 } 5120 } 5121 5122 const Sample *smpl = &mCurrentSamples[mCurrentSampleIndex]; 5123 offset = smpl->offset; 5124 size = smpl->size; 5125 cts = mCurrentTime + smpl->compositionOffset; 5126 mCurrentTime += smpl->duration; 5127 isSyncSample = (mCurrentSampleIndex == 0); // XXX 5128 5129 status_t err = mGroup->acquire_buffer(&mBuffer); 5130 5131 if (err != OK) { 5132 CHECK(mBuffer == NULL); 5133 ALOGV("acquire_buffer returned %d", err); 5134 return err; 5135 } 5136 if (size > mBuffer->size()) { 5137 ALOGE("buffer too small: %zu > %zu", size, mBuffer->size()); 5138 mBuffer->release(); 5139 mBuffer = NULL; 5140 return ERROR_BUFFER_TOO_SMALL; 5141 } 5142 } 5143 5144 const Sample *smpl = &mCurrentSamples[mCurrentSampleIndex]; 5145 const sp<MetaData> bufmeta = mBuffer->meta_data(); 5146 bufmeta->clear(); 5147 if (smpl->encryptedsizes.size()) { 5148 // store clear/encrypted lengths in metadata 5149 bufmeta->setData(kKeyPlainSizes, 0, 5150 smpl->clearsizes.array(), smpl->clearsizes.size() * 4); 5151 bufmeta->setData(kKeyEncryptedSizes, 0, 5152 smpl->encryptedsizes.array(), smpl->encryptedsizes.size() * 4); 5153 bufmeta->setData(kKeyCryptoIV, 0, smpl->iv, 16); // use 16 or the actual size? 5154 bufmeta->setInt32(kKeyCryptoDefaultIVSize, mDefaultIVSize); 5155 bufmeta->setInt32(kKeyCryptoMode, mCryptoMode); 5156 bufmeta->setData(kKeyCryptoKey, 0, mCryptoKey, 16); 5157 } 5158 5159 if ((!mIsAVC && !mIsHEVC)|| mWantsNALFragments) { 5160 if (newBuffer) { 5161 if (!isInRange((size_t)0u, mBuffer->size(), size)) { 5162 mBuffer->release(); 5163 mBuffer = NULL; 5164 5165 ALOGE("fragmentedRead ERROR_MALFORMED size %zu", size); 5166 return ERROR_MALFORMED; 5167 } 5168 5169 ssize_t num_bytes_read = 5170 mDataSource->readAt(offset, (uint8_t *)mBuffer->data(), size); 5171 5172 if (num_bytes_read < (ssize_t)size) { 5173 mBuffer->release(); 5174 mBuffer = NULL; 5175 5176 ALOGE("i/o error"); 5177 return ERROR_IO; 5178 } 5179 5180 CHECK(mBuffer != NULL); 5181 mBuffer->set_range(0, size); 5182 mBuffer->meta_data()->setInt64( 5183 kKeyTime, ((int64_t)cts * 1000000) / mTimescale); 5184 mBuffer->meta_data()->setInt64( 5185 kKeyDuration, ((int64_t)smpl->duration * 1000000) / mTimescale); 5186 5187 if (targetSampleTimeUs >= 0) { 5188 mBuffer->meta_data()->setInt64( 5189 kKeyTargetTime, targetSampleTimeUs); 5190 } 5191 5192 if (mIsAVC) { 5193 uint32_t layerId = FindAVCLayerId( 5194 (const uint8_t *)mBuffer->data(), mBuffer->range_length()); 5195 mBuffer->meta_data()->setInt32(kKeyTemporalLayerId, layerId); 5196 } 5197 5198 if (isSyncSample) { 5199 mBuffer->meta_data()->setInt32(kKeyIsSyncFrame, 1); 5200 } 5201 5202 ++mCurrentSampleIndex; 5203 } 5204 5205 if (!mIsAVC && !mIsHEVC) { 5206 *out = mBuffer; 5207 mBuffer = NULL; 5208 5209 return OK; 5210 } 5211 5212 // Each NAL unit is split up into its constituent fragments and 5213 // each one of them returned in its own buffer. 5214 5215 CHECK(mBuffer->range_length() >= mNALLengthSize); 5216 5217 const uint8_t *src = 5218 (const uint8_t *)mBuffer->data() + mBuffer->range_offset(); 5219 5220 size_t nal_size = parseNALSize(src); 5221 if (mNALLengthSize > SIZE_MAX - nal_size) { 5222 ALOGE("b/24441553, b/24445122"); 5223 } 5224 5225 if (mBuffer->range_length() - mNALLengthSize < nal_size) { 5226 ALOGE("incomplete NAL unit."); 5227 5228 mBuffer->release(); 5229 mBuffer = NULL; 5230 5231 return ERROR_MALFORMED; 5232 } 5233 5234 MediaBuffer *clone = mBuffer->clone(); 5235 CHECK(clone != NULL); 5236 clone->set_range(mBuffer->range_offset() + mNALLengthSize, nal_size); 5237 5238 CHECK(mBuffer != NULL); 5239 mBuffer->set_range( 5240 mBuffer->range_offset() + mNALLengthSize + nal_size, 5241 mBuffer->range_length() - mNALLengthSize - nal_size); 5242 5243 if (mBuffer->range_length() == 0) { 5244 mBuffer->release(); 5245 mBuffer = NULL; 5246 } 5247 5248 *out = clone; 5249 5250 return OK; 5251 } else { 5252 ALOGV("whole NAL"); 5253 // Whole NAL units are returned but each fragment is prefixed by 5254 // the start code (0x00 00 00 01). 5255 ssize_t num_bytes_read = 0; 5256 int32_t drm = 0; 5257 bool usesDRM = (mFormat->findInt32(kKeyIsDRM, &drm) && drm != 0); 5258 void *data = NULL; 5259 bool isMalFormed = false; 5260 if (usesDRM) { 5261 if (mBuffer == NULL || !isInRange((size_t)0u, mBuffer->size(), size)) { 5262 isMalFormed = true; 5263 } else { 5264 data = mBuffer->data(); 5265 } 5266 } else { 5267 int32_t max_size; 5268 if (mFormat == NULL 5269 || !mFormat->findInt32(kKeyMaxInputSize, &max_size) 5270 || !isInRange((size_t)0u, (size_t)max_size, size)) { 5271 isMalFormed = true; 5272 } else { 5273 data = mSrcBuffer; 5274 } 5275 } 5276 5277 if (isMalFormed || data == NULL) { 5278 ALOGE("isMalFormed size %zu", size); 5279 if (mBuffer != NULL) { 5280 mBuffer->release(); 5281 mBuffer = NULL; 5282 } 5283 return ERROR_MALFORMED; 5284 } 5285 num_bytes_read = mDataSource->readAt(offset, data, size); 5286 5287 if (num_bytes_read < (ssize_t)size) { 5288 mBuffer->release(); 5289 mBuffer = NULL; 5290 5291 ALOGE("i/o error"); 5292 return ERROR_IO; 5293 } 5294 5295 if (usesDRM) { 5296 CHECK(mBuffer != NULL); 5297 mBuffer->set_range(0, size); 5298 5299 } else { 5300 uint8_t *dstData = (uint8_t *)mBuffer->data(); 5301 size_t srcOffset = 0; 5302 size_t dstOffset = 0; 5303 5304 while (srcOffset < size) { 5305 isMalFormed = !isInRange((size_t)0u, size, srcOffset, mNALLengthSize); 5306 size_t nalLength = 0; 5307 if (!isMalFormed) { 5308 nalLength = parseNALSize(&mSrcBuffer[srcOffset]); 5309 srcOffset += mNALLengthSize; 5310 isMalFormed = !isInRange((size_t)0u, size, srcOffset, nalLength) 5311 || !isInRange((size_t)0u, mBuffer->size(), dstOffset, (size_t)4u) 5312 || !isInRange((size_t)0u, mBuffer->size(), dstOffset + 4, nalLength); 5313 } 5314 5315 if (isMalFormed) { 5316 ALOGE("Video is malformed; nalLength %zu", nalLength); 5317 mBuffer->release(); 5318 mBuffer = NULL; 5319 return ERROR_MALFORMED; 5320 } 5321 5322 if (nalLength == 0) { 5323 continue; 5324 } 5325 5326 if (dstOffset > SIZE_MAX - 4 || 5327 dstOffset + 4 > SIZE_MAX - nalLength || 5328 dstOffset + 4 + nalLength > mBuffer->size()) { 5329 ALOGE("b/26365349 : %zu %zu", dstOffset, mBuffer->size()); 5330 android_errorWriteLog(0x534e4554, "26365349"); 5331 mBuffer->release(); 5332 mBuffer = NULL; 5333 return ERROR_MALFORMED; 5334 } 5335 5336 dstData[dstOffset++] = 0; 5337 dstData[dstOffset++] = 0; 5338 dstData[dstOffset++] = 0; 5339 dstData[dstOffset++] = 1; 5340 memcpy(&dstData[dstOffset], &mSrcBuffer[srcOffset], nalLength); 5341 srcOffset += nalLength; 5342 dstOffset += nalLength; 5343 } 5344 CHECK_EQ(srcOffset, size); 5345 CHECK(mBuffer != NULL); 5346 mBuffer->set_range(0, dstOffset); 5347 } 5348 5349 mBuffer->meta_data()->setInt64( 5350 kKeyTime, ((int64_t)cts * 1000000) / mTimescale); 5351 mBuffer->meta_data()->setInt64( 5352 kKeyDuration, ((int64_t)smpl->duration * 1000000) / mTimescale); 5353 5354 if (targetSampleTimeUs >= 0) { 5355 mBuffer->meta_data()->setInt64( 5356 kKeyTargetTime, targetSampleTimeUs); 5357 } 5358 5359 if (isSyncSample) { 5360 mBuffer->meta_data()->setInt32(kKeyIsSyncFrame, 1); 5361 } 5362 5363 ++mCurrentSampleIndex; 5364 5365 *out = mBuffer; 5366 mBuffer = NULL; 5367 5368 return OK; 5369 } 5370} 5371 5372MPEG4Extractor::Track *MPEG4Extractor::findTrackByMimePrefix( 5373 const char *mimePrefix) { 5374 for (Track *track = mFirstTrack; track != NULL; track = track->next) { 5375 const char *mime; 5376 if (track->meta != NULL 5377 && track->meta->findCString(kKeyMIMEType, &mime) 5378 && !strncasecmp(mime, mimePrefix, strlen(mimePrefix))) { 5379 return track; 5380 } 5381 } 5382 5383 return NULL; 5384} 5385 5386static bool LegacySniffMPEG4( 5387 const sp<DataSource> &source, String8 *mimeType, float *confidence) { 5388 uint8_t header[8]; 5389 5390 ssize_t n = source->readAt(4, header, sizeof(header)); 5391 if (n < (ssize_t)sizeof(header)) { 5392 return false; 5393 } 5394 5395 if (!memcmp(header, "ftyp3gp", 7) || !memcmp(header, "ftypmp42", 8) 5396 || !memcmp(header, "ftyp3gr6", 8) || !memcmp(header, "ftyp3gs6", 8) 5397 || !memcmp(header, "ftyp3ge6", 8) || !memcmp(header, "ftyp3gg6", 8) 5398 || !memcmp(header, "ftypisom", 8) || !memcmp(header, "ftypM4V ", 8) 5399 || !memcmp(header, "ftypM4A ", 8) || !memcmp(header, "ftypf4v ", 8) 5400 || !memcmp(header, "ftypkddi", 8) || !memcmp(header, "ftypM4VP", 8) 5401 || !memcmp(header, "ftypmif1", 8) || !memcmp(header, "ftypheic", 8) 5402 || !memcmp(header, "ftypmsf1", 8) || !memcmp(header, "ftyphevc", 8)) { 5403 *mimeType = MEDIA_MIMETYPE_CONTAINER_MPEG4; 5404 *confidence = 0.4; 5405 5406 return true; 5407 } 5408 5409 return false; 5410} 5411 5412static bool isCompatibleBrand(uint32_t fourcc) { 5413 static const uint32_t kCompatibleBrands[] = { 5414 FOURCC('i', 's', 'o', 'm'), 5415 FOURCC('i', 's', 'o', '2'), 5416 FOURCC('a', 'v', 'c', '1'), 5417 FOURCC('h', 'v', 'c', '1'), 5418 FOURCC('h', 'e', 'v', '1'), 5419 FOURCC('3', 'g', 'p', '4'), 5420 FOURCC('m', 'p', '4', '1'), 5421 FOURCC('m', 'p', '4', '2'), 5422 FOURCC('d', 'a', 's', 'h'), 5423 5424 // Won't promise that the following file types can be played. 5425 // Just give these file types a chance. 5426 FOURCC('q', 't', ' ', ' '), // Apple's QuickTime 5427 FOURCC('M', 'S', 'N', 'V'), // Sony's PSP 5428 5429 FOURCC('3', 'g', '2', 'a'), // 3GPP2 5430 FOURCC('3', 'g', '2', 'b'), 5431 FOURCC('m', 'i', 'f', '1'), // HEIF image 5432 FOURCC('h', 'e', 'i', 'c'), // HEIF image 5433 FOURCC('m', 's', 'f', '1'), // HEIF image sequence 5434 FOURCC('h', 'e', 'v', 'c'), // HEIF image sequence 5435 }; 5436 5437 for (size_t i = 0; 5438 i < sizeof(kCompatibleBrands) / sizeof(kCompatibleBrands[0]); 5439 ++i) { 5440 if (kCompatibleBrands[i] == fourcc) { 5441 return true; 5442 } 5443 } 5444 5445 return false; 5446} 5447 5448// Attempt to actually parse the 'ftyp' atom and determine if a suitable 5449// compatible brand is present. 5450// Also try to identify where this file's metadata ends 5451// (end of the 'moov' atom) and report it to the caller as part of 5452// the metadata. 5453static bool BetterSniffMPEG4( 5454 const sp<DataSource> &source, String8 *mimeType, float *confidence, 5455 sp<AMessage> *meta) { 5456 // We scan up to 128 bytes to identify this file as an MP4. 5457 static const off64_t kMaxScanOffset = 128ll; 5458 5459 off64_t offset = 0ll; 5460 bool foundGoodFileType = false; 5461 off64_t moovAtomEndOffset = -1ll; 5462 bool done = false; 5463 5464 while (!done && offset < kMaxScanOffset) { 5465 uint32_t hdr[2]; 5466 if (source->readAt(offset, hdr, 8) < 8) { 5467 return false; 5468 } 5469 5470 uint64_t chunkSize = ntohl(hdr[0]); 5471 uint32_t chunkType = ntohl(hdr[1]); 5472 off64_t chunkDataOffset = offset + 8; 5473 5474 if (chunkSize == 1) { 5475 if (source->readAt(offset + 8, &chunkSize, 8) < 8) { 5476 return false; 5477 } 5478 5479 chunkSize = ntoh64(chunkSize); 5480 chunkDataOffset += 8; 5481 5482 if (chunkSize < 16) { 5483 // The smallest valid chunk is 16 bytes long in this case. 5484 return false; 5485 } 5486 5487 } else if (chunkSize < 8) { 5488 // The smallest valid chunk is 8 bytes long. 5489 return false; 5490 } 5491 5492 // (data_offset - offset) is either 8 or 16 5493 off64_t chunkDataSize = chunkSize - (chunkDataOffset - offset); 5494 if (chunkDataSize < 0) { 5495 ALOGE("b/23540914"); 5496 return false; 5497 } 5498 5499 char chunkstring[5]; 5500 MakeFourCCString(chunkType, chunkstring); 5501 ALOGV("saw chunk type %s, size %" PRIu64 " @ %lld", chunkstring, chunkSize, (long long)offset); 5502 switch (chunkType) { 5503 case FOURCC('f', 't', 'y', 'p'): 5504 { 5505 if (chunkDataSize < 8) { 5506 return false; 5507 } 5508 5509 uint32_t numCompatibleBrands = (chunkDataSize - 8) / 4; 5510 for (size_t i = 0; i < numCompatibleBrands + 2; ++i) { 5511 if (i == 1) { 5512 // Skip this index, it refers to the minorVersion, 5513 // not a brand. 5514 continue; 5515 } 5516 5517 uint32_t brand; 5518 if (source->readAt( 5519 chunkDataOffset + 4 * i, &brand, 4) < 4) { 5520 return false; 5521 } 5522 5523 brand = ntohl(brand); 5524 5525 if (isCompatibleBrand(brand)) { 5526 foundGoodFileType = true; 5527 break; 5528 } 5529 } 5530 5531 if (!foundGoodFileType) { 5532 return false; 5533 } 5534 5535 break; 5536 } 5537 5538 case FOURCC('m', 'o', 'o', 'v'): 5539 { 5540 moovAtomEndOffset = offset + chunkSize; 5541 5542 done = true; 5543 break; 5544 } 5545 5546 default: 5547 break; 5548 } 5549 5550 offset += chunkSize; 5551 } 5552 5553 if (!foundGoodFileType) { 5554 return false; 5555 } 5556 5557 *mimeType = MEDIA_MIMETYPE_CONTAINER_MPEG4; 5558 *confidence = 0.4f; 5559 5560 if (moovAtomEndOffset >= 0) { 5561 *meta = new AMessage; 5562 (*meta)->setInt64("meta-data-size", moovAtomEndOffset); 5563 5564 ALOGV("found metadata size: %lld", (long long)moovAtomEndOffset); 5565 } 5566 5567 return true; 5568} 5569 5570static MediaExtractor* CreateExtractor( 5571 const sp<DataSource> &source, 5572 const sp<AMessage>& meta __unused) { 5573 return new MPEG4Extractor(source); 5574} 5575 5576static MediaExtractor::CreatorFunc Sniff( 5577 const sp<DataSource> &source, 5578 String8 *mimeType, 5579 float *confidence, 5580 sp<AMessage> *meta) { 5581 if (BetterSniffMPEG4(source, mimeType, confidence, meta)) { 5582 return CreateExtractor; 5583 } 5584 5585 if (LegacySniffMPEG4(source, mimeType, confidence)) { 5586 ALOGW("Identified supported mpeg4 through LegacySniffMPEG4."); 5587 return CreateExtractor; 5588 } 5589 5590 return NULL; 5591} 5592 5593extern "C" { 5594// This is the only symbol that needs to be exported 5595__attribute__ ((visibility ("default"))) 5596MediaExtractor::ExtractorDef GETEXTRACTORDEF() { 5597 return { 5598 MediaExtractor::EXTRACTORDEF_VERSION, 5599 UUID("27575c67-4417-4c54-8d3d-8e626985a164"), 5600 1, // version 5601 "MP4 Extractor", 5602 Sniff 5603 }; 5604} 5605 5606} // extern "C" 5607 5608} // namespace android 5609