MPEG4Extractor.cpp revision cbaad4b189c8566318bdb148cf905fe2b333b8ed
1/* 2 * Copyright (C) 2009 The Android Open Source Project 3 * 4 * Licensed under the Apache License, Version 2.0 (the "License"); 5 * you may not use this file except in compliance with the License. 6 * You may obtain a copy of the License at 7 * 8 * http://www.apache.org/licenses/LICENSE-2.0 9 * 10 * Unless required by applicable law or agreed to in writing, software 11 * distributed under the License is distributed on an "AS IS" BASIS, 12 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 * See the License for the specific language governing permissions and 14 * limitations under the License. 15 */ 16 17//#define LOG_NDEBUG 0 18#define LOG_TAG "MPEG4Extractor" 19 20#include <ctype.h> 21#include <inttypes.h> 22#include <stdint.h> 23#include <stdlib.h> 24#include <string.h> 25 26#include <utils/Log.h> 27 28#include "include/MPEG4Extractor.h" 29#include "include/SampleTable.h" 30#include "include/ESDS.h" 31 32#include <media/stagefright/foundation/ABitReader.h> 33#include <media/stagefright/foundation/ABuffer.h> 34#include <media/stagefright/foundation/ADebug.h> 35#include <media/stagefright/foundation/AMessage.h> 36#include <media/stagefright/foundation/AUtils.h> 37#include <media/stagefright/MediaBuffer.h> 38#include <media/stagefright/MediaBufferGroup.h> 39#include <media/stagefright/MediaDefs.h> 40#include <media/stagefright/MediaSource.h> 41#include <media/stagefright/MetaData.h> 42#include <utils/String8.h> 43 44#include <byteswap.h> 45#include "include/ID3.h" 46 47#ifndef UINT32_MAX 48#define UINT32_MAX (4294967295U) 49#endif 50 51namespace android { 52 53enum { 54 // maximum size of an atom. Some atoms can be bigger according to the spec, 55 // but we only allow up to this size. 56 kMaxAtomSize = 64 * 1024 * 1024, 57}; 58 59class MPEG4Source : public MediaSource { 60public: 61 // Caller retains ownership of both "dataSource" and "sampleTable". 62 MPEG4Source(const sp<MPEG4Extractor> &owner, 63 const sp<MetaData> &format, 64 const sp<DataSource> &dataSource, 65 int32_t timeScale, 66 const sp<SampleTable> &sampleTable, 67 Vector<SidxEntry> &sidx, 68 const Trex *trex, 69 off64_t firstMoofOffset); 70 71 virtual status_t start(MetaData *params = NULL); 72 virtual status_t stop(); 73 74 virtual sp<MetaData> getFormat(); 75 76 virtual status_t read(MediaBuffer **buffer, const ReadOptions *options = NULL); 77 virtual status_t fragmentedRead(MediaBuffer **buffer, const ReadOptions *options = NULL); 78 79protected: 80 virtual ~MPEG4Source(); 81 82private: 83 Mutex mLock; 84 85 // keep the MPEG4Extractor around, since we're referencing its data 86 sp<MPEG4Extractor> mOwner; 87 sp<MetaData> mFormat; 88 sp<DataSource> mDataSource; 89 int32_t mTimescale; 90 sp<SampleTable> mSampleTable; 91 uint32_t mCurrentSampleIndex; 92 uint32_t mCurrentFragmentIndex; 93 Vector<SidxEntry> &mSegments; 94 const Trex *mTrex; 95 off64_t mFirstMoofOffset; 96 off64_t mCurrentMoofOffset; 97 off64_t mNextMoofOffset; 98 uint32_t mCurrentTime; 99 int32_t mLastParsedTrackId; 100 int32_t mTrackId; 101 102 int32_t mCryptoMode; // passed in from extractor 103 int32_t mDefaultIVSize; // passed in from extractor 104 uint8_t mCryptoKey[16]; // passed in from extractor 105 uint32_t mCurrentAuxInfoType; 106 uint32_t mCurrentAuxInfoTypeParameter; 107 int32_t mCurrentDefaultSampleInfoSize; 108 uint32_t mCurrentSampleInfoCount; 109 uint32_t mCurrentSampleInfoAllocSize; 110 uint8_t* mCurrentSampleInfoSizes; 111 uint32_t mCurrentSampleInfoOffsetCount; 112 uint32_t mCurrentSampleInfoOffsetsAllocSize; 113 uint64_t* mCurrentSampleInfoOffsets; 114 115 bool mIsAVC; 116 bool mIsHEVC; 117 size_t mNALLengthSize; 118 119 bool mStarted; 120 121 MediaBufferGroup *mGroup; 122 123 MediaBuffer *mBuffer; 124 125 bool mWantsNALFragments; 126 127 uint8_t *mSrcBuffer; 128 129 size_t parseNALSize(const uint8_t *data) const; 130 status_t parseChunk(off64_t *offset); 131 status_t parseTrackFragmentHeader(off64_t offset, off64_t size); 132 status_t parseTrackFragmentRun(off64_t offset, off64_t size); 133 status_t parseSampleAuxiliaryInformationSizes(off64_t offset, off64_t size); 134 status_t parseSampleAuxiliaryInformationOffsets(off64_t offset, off64_t size); 135 136 struct TrackFragmentHeaderInfo { 137 enum Flags { 138 kBaseDataOffsetPresent = 0x01, 139 kSampleDescriptionIndexPresent = 0x02, 140 kDefaultSampleDurationPresent = 0x08, 141 kDefaultSampleSizePresent = 0x10, 142 kDefaultSampleFlagsPresent = 0x20, 143 kDurationIsEmpty = 0x10000, 144 }; 145 146 uint32_t mTrackID; 147 uint32_t mFlags; 148 uint64_t mBaseDataOffset; 149 uint32_t mSampleDescriptionIndex; 150 uint32_t mDefaultSampleDuration; 151 uint32_t mDefaultSampleSize; 152 uint32_t mDefaultSampleFlags; 153 154 uint64_t mDataOffset; 155 }; 156 TrackFragmentHeaderInfo mTrackFragmentHeaderInfo; 157 158 struct Sample { 159 off64_t offset; 160 size_t size; 161 uint32_t duration; 162 int32_t compositionOffset; 163 uint8_t iv[16]; 164 Vector<size_t> clearsizes; 165 Vector<size_t> encryptedsizes; 166 }; 167 Vector<Sample> mCurrentSamples; 168 169 MPEG4Source(const MPEG4Source &); 170 MPEG4Source &operator=(const MPEG4Source &); 171}; 172 173// This custom data source wraps an existing one and satisfies requests 174// falling entirely within a cached range from the cache while forwarding 175// all remaining requests to the wrapped datasource. 176// This is used to cache the full sampletable metadata for a single track, 177// possibly wrapping multiple times to cover all tracks, i.e. 178// Each MPEG4DataSource caches the sampletable metadata for a single track. 179 180struct MPEG4DataSource : public DataSource { 181 MPEG4DataSource(const sp<DataSource> &source); 182 183 virtual status_t initCheck() const; 184 virtual ssize_t readAt(off64_t offset, void *data, size_t size); 185 virtual status_t getSize(off64_t *size); 186 virtual uint32_t flags(); 187 188 status_t setCachedRange(off64_t offset, size_t size); 189 190protected: 191 virtual ~MPEG4DataSource(); 192 193private: 194 Mutex mLock; 195 196 sp<DataSource> mSource; 197 off64_t mCachedOffset; 198 size_t mCachedSize; 199 uint8_t *mCache; 200 201 void clearCache(); 202 203 MPEG4DataSource(const MPEG4DataSource &); 204 MPEG4DataSource &operator=(const MPEG4DataSource &); 205}; 206 207MPEG4DataSource::MPEG4DataSource(const sp<DataSource> &source) 208 : mSource(source), 209 mCachedOffset(0), 210 mCachedSize(0), 211 mCache(NULL) { 212} 213 214MPEG4DataSource::~MPEG4DataSource() { 215 clearCache(); 216} 217 218void MPEG4DataSource::clearCache() { 219 if (mCache) { 220 free(mCache); 221 mCache = NULL; 222 } 223 224 mCachedOffset = 0; 225 mCachedSize = 0; 226} 227 228status_t MPEG4DataSource::initCheck() const { 229 return mSource->initCheck(); 230} 231 232ssize_t MPEG4DataSource::readAt(off64_t offset, void *data, size_t size) { 233 Mutex::Autolock autoLock(mLock); 234 235 if (isInRange(mCachedOffset, mCachedSize, offset, size)) { 236 memcpy(data, &mCache[offset - mCachedOffset], size); 237 return size; 238 } 239 240 return mSource->readAt(offset, data, size); 241} 242 243status_t MPEG4DataSource::getSize(off64_t *size) { 244 return mSource->getSize(size); 245} 246 247uint32_t MPEG4DataSource::flags() { 248 return mSource->flags(); 249} 250 251status_t MPEG4DataSource::setCachedRange(off64_t offset, size_t size) { 252 Mutex::Autolock autoLock(mLock); 253 254 clearCache(); 255 256 mCache = (uint8_t *)malloc(size); 257 258 if (mCache == NULL) { 259 return -ENOMEM; 260 } 261 262 mCachedOffset = offset; 263 mCachedSize = size; 264 265 ssize_t err = mSource->readAt(mCachedOffset, mCache, mCachedSize); 266 267 if (err < (ssize_t)size) { 268 clearCache(); 269 270 return ERROR_IO; 271 } 272 273 return OK; 274} 275 276//////////////////////////////////////////////////////////////////////////////// 277 278static void hexdump(const void *_data, size_t size) { 279 const uint8_t *data = (const uint8_t *)_data; 280 size_t offset = 0; 281 while (offset < size) { 282 printf("0x%04zx ", offset); 283 284 size_t n = size - offset; 285 if (n > 16) { 286 n = 16; 287 } 288 289 for (size_t i = 0; i < 16; ++i) { 290 if (i == 8) { 291 printf(" "); 292 } 293 294 if (offset + i < size) { 295 printf("%02x ", data[offset + i]); 296 } else { 297 printf(" "); 298 } 299 } 300 301 printf(" "); 302 303 for (size_t i = 0; i < n; ++i) { 304 if (isprint(data[offset + i])) { 305 printf("%c", data[offset + i]); 306 } else { 307 printf("."); 308 } 309 } 310 311 printf("\n"); 312 313 offset += 16; 314 } 315} 316 317static const char *FourCC2MIME(uint32_t fourcc) { 318 switch (fourcc) { 319 case FOURCC('m', 'p', '4', 'a'): 320 return MEDIA_MIMETYPE_AUDIO_AAC; 321 322 case FOURCC('s', 'a', 'm', 'r'): 323 return MEDIA_MIMETYPE_AUDIO_AMR_NB; 324 325 case FOURCC('s', 'a', 'w', 'b'): 326 return MEDIA_MIMETYPE_AUDIO_AMR_WB; 327 328 case FOURCC('m', 'p', '4', 'v'): 329 return MEDIA_MIMETYPE_VIDEO_MPEG4; 330 331 case FOURCC('s', '2', '6', '3'): 332 case FOURCC('h', '2', '6', '3'): 333 case FOURCC('H', '2', '6', '3'): 334 return MEDIA_MIMETYPE_VIDEO_H263; 335 336 case FOURCC('a', 'v', 'c', '1'): 337 return MEDIA_MIMETYPE_VIDEO_AVC; 338 339 case FOURCC('h', 'v', 'c', '1'): 340 case FOURCC('h', 'e', 'v', '1'): 341 return MEDIA_MIMETYPE_VIDEO_HEVC; 342 default: 343 CHECK(!"should not be here."); 344 return NULL; 345 } 346} 347 348static bool AdjustChannelsAndRate(uint32_t fourcc, uint32_t *channels, uint32_t *rate) { 349 if (!strcasecmp(MEDIA_MIMETYPE_AUDIO_AMR_NB, FourCC2MIME(fourcc))) { 350 // AMR NB audio is always mono, 8kHz 351 *channels = 1; 352 *rate = 8000; 353 return true; 354 } else if (!strcasecmp(MEDIA_MIMETYPE_AUDIO_AMR_WB, FourCC2MIME(fourcc))) { 355 // AMR WB audio is always mono, 16kHz 356 *channels = 1; 357 *rate = 16000; 358 return true; 359 } 360 return false; 361} 362 363MPEG4Extractor::MPEG4Extractor(const sp<DataSource> &source) 364 : mMoofOffset(0), 365 mDataSource(source), 366 mInitCheck(NO_INIT), 367 mHasVideo(false), 368 mHeaderTimescale(0), 369 mFirstTrack(NULL), 370 mLastTrack(NULL), 371 mFileMetaData(new MetaData), 372 mFirstSINF(NULL), 373 mIsDrm(false) { 374} 375 376MPEG4Extractor::~MPEG4Extractor() { 377 Track *track = mFirstTrack; 378 while (track) { 379 Track *next = track->next; 380 381 delete track; 382 track = next; 383 } 384 mFirstTrack = mLastTrack = NULL; 385 386 SINF *sinf = mFirstSINF; 387 while (sinf) { 388 SINF *next = sinf->next; 389 delete[] sinf->IPMPData; 390 delete sinf; 391 sinf = next; 392 } 393 mFirstSINF = NULL; 394 395 for (size_t i = 0; i < mPssh.size(); i++) { 396 delete [] mPssh[i].data; 397 } 398} 399 400uint32_t MPEG4Extractor::flags() const { 401 return CAN_PAUSE | 402 ((mMoofOffset == 0 || mSidxEntries.size() != 0) ? 403 (CAN_SEEK_BACKWARD | CAN_SEEK_FORWARD | CAN_SEEK) : 0); 404} 405 406sp<MetaData> MPEG4Extractor::getMetaData() { 407 status_t err; 408 if ((err = readMetaData()) != OK) { 409 return new MetaData; 410 } 411 412 return mFileMetaData; 413} 414 415size_t MPEG4Extractor::countTracks() { 416 status_t err; 417 if ((err = readMetaData()) != OK) { 418 ALOGV("MPEG4Extractor::countTracks: no tracks"); 419 return 0; 420 } 421 422 size_t n = 0; 423 Track *track = mFirstTrack; 424 while (track) { 425 ++n; 426 track = track->next; 427 } 428 429 ALOGV("MPEG4Extractor::countTracks: %zu tracks", n); 430 return n; 431} 432 433sp<MetaData> MPEG4Extractor::getTrackMetaData( 434 size_t index, uint32_t flags) { 435 status_t err; 436 if ((err = readMetaData()) != OK) { 437 return NULL; 438 } 439 440 Track *track = mFirstTrack; 441 while (index > 0) { 442 if (track == NULL) { 443 return NULL; 444 } 445 446 track = track->next; 447 --index; 448 } 449 450 if (track == NULL) { 451 return NULL; 452 } 453 454 if ((flags & kIncludeExtensiveMetaData) 455 && !track->includes_expensive_metadata) { 456 track->includes_expensive_metadata = true; 457 458 const char *mime; 459 CHECK(track->meta->findCString(kKeyMIMEType, &mime)); 460 if (!strncasecmp("video/", mime, 6)) { 461 if (mMoofOffset > 0) { 462 int64_t duration; 463 if (track->meta->findInt64(kKeyDuration, &duration)) { 464 // nothing fancy, just pick a frame near 1/4th of the duration 465 track->meta->setInt64( 466 kKeyThumbnailTime, duration / 4); 467 } 468 } else { 469 uint32_t sampleIndex; 470 uint32_t sampleTime; 471 if (track->timescale != 0 && 472 track->sampleTable->findThumbnailSample(&sampleIndex) == OK 473 && track->sampleTable->getMetaDataForSample( 474 sampleIndex, NULL /* offset */, NULL /* size */, 475 &sampleTime) == OK) { 476 track->meta->setInt64( 477 kKeyThumbnailTime, 478 ((int64_t)sampleTime * 1000000) / track->timescale); 479 } 480 } 481 } 482 } 483 484 return track->meta; 485} 486 487static void MakeFourCCString(uint32_t x, char *s) { 488 s[0] = x >> 24; 489 s[1] = (x >> 16) & 0xff; 490 s[2] = (x >> 8) & 0xff; 491 s[3] = x & 0xff; 492 s[4] = '\0'; 493} 494 495status_t MPEG4Extractor::readMetaData() { 496 if (mInitCheck != NO_INIT) { 497 return mInitCheck; 498 } 499 500 off64_t offset = 0; 501 status_t err; 502 while (true) { 503 off64_t orig_offset = offset; 504 err = parseChunk(&offset, 0); 505 506 if (err != OK && err != UNKNOWN_ERROR) { 507 break; 508 } else if (offset <= orig_offset) { 509 // only continue parsing if the offset was advanced, 510 // otherwise we might end up in an infinite loop 511 ALOGE("did not advance: 0x%lld->0x%lld", orig_offset, offset); 512 err = ERROR_MALFORMED; 513 break; 514 } else if (err == OK) { 515 continue; 516 } 517 518 uint32_t hdr[2]; 519 if (mDataSource->readAt(offset, hdr, 8) < 8) { 520 break; 521 } 522 uint32_t chunk_type = ntohl(hdr[1]); 523 if (chunk_type == FOURCC('m', 'o', 'o', 'f')) { 524 // store the offset of the first segment 525 mMoofOffset = offset; 526 } else if (chunk_type != FOURCC('m', 'd', 'a', 't')) { 527 // keep parsing until we get to the data 528 continue; 529 } 530 break; 531 } 532 533 if (mInitCheck == OK) { 534 if (mHasVideo) { 535 mFileMetaData->setCString( 536 kKeyMIMEType, MEDIA_MIMETYPE_CONTAINER_MPEG4); 537 } else { 538 mFileMetaData->setCString(kKeyMIMEType, "audio/mp4"); 539 } 540 } else { 541 mInitCheck = err; 542 } 543 544 CHECK_NE(err, (status_t)NO_INIT); 545 546 // copy pssh data into file metadata 547 int psshsize = 0; 548 for (size_t i = 0; i < mPssh.size(); i++) { 549 psshsize += 20 + mPssh[i].datalen; 550 } 551 if (psshsize) { 552 char *buf = (char*)malloc(psshsize); 553 if (!buf) { 554 ALOGE("b/28471206"); 555 return NO_MEMORY; 556 } 557 char *ptr = buf; 558 for (size_t i = 0; i < mPssh.size(); i++) { 559 memcpy(ptr, mPssh[i].uuid, 20); // uuid + length 560 memcpy(ptr + 20, mPssh[i].data, mPssh[i].datalen); 561 ptr += (20 + mPssh[i].datalen); 562 } 563 mFileMetaData->setData(kKeyPssh, 'pssh', buf, psshsize); 564 free(buf); 565 } 566 return mInitCheck; 567} 568 569char* MPEG4Extractor::getDrmTrackInfo(size_t trackID, int *len) { 570 if (mFirstSINF == NULL) { 571 return NULL; 572 } 573 574 SINF *sinf = mFirstSINF; 575 while (sinf && (trackID != sinf->trackID)) { 576 sinf = sinf->next; 577 } 578 579 if (sinf == NULL) { 580 return NULL; 581 } 582 583 *len = sinf->len; 584 return sinf->IPMPData; 585} 586 587// Reads an encoded integer 7 bits at a time until it encounters the high bit clear. 588static int32_t readSize(off64_t offset, 589 const sp<DataSource> DataSource, uint8_t *numOfBytes) { 590 uint32_t size = 0; 591 uint8_t data; 592 bool moreData = true; 593 *numOfBytes = 0; 594 595 while (moreData) { 596 if (DataSource->readAt(offset, &data, 1) < 1) { 597 return -1; 598 } 599 offset ++; 600 moreData = (data >= 128) ? true : false; 601 size = (size << 7) | (data & 0x7f); // Take last 7 bits 602 (*numOfBytes) ++; 603 } 604 605 return size; 606} 607 608status_t MPEG4Extractor::parseDrmSINF( 609 off64_t * /* offset */, off64_t data_offset) { 610 uint8_t updateIdTag; 611 if (mDataSource->readAt(data_offset, &updateIdTag, 1) < 1) { 612 return ERROR_IO; 613 } 614 data_offset ++; 615 616 if (0x01/*OBJECT_DESCRIPTOR_UPDATE_ID_TAG*/ != updateIdTag) { 617 return ERROR_MALFORMED; 618 } 619 620 uint8_t numOfBytes; 621 int32_t size = readSize(data_offset, mDataSource, &numOfBytes); 622 if (size < 0) { 623 return ERROR_IO; 624 } 625 int32_t classSize = size; 626 data_offset += numOfBytes; 627 628 while(size >= 11 ) { 629 uint8_t descriptorTag; 630 if (mDataSource->readAt(data_offset, &descriptorTag, 1) < 1) { 631 return ERROR_IO; 632 } 633 data_offset ++; 634 635 if (0x11/*OBJECT_DESCRIPTOR_ID_TAG*/ != descriptorTag) { 636 return ERROR_MALFORMED; 637 } 638 639 uint8_t buffer[8]; 640 //ObjectDescriptorID and ObjectDescriptor url flag 641 if (mDataSource->readAt(data_offset, buffer, 2) < 2) { 642 return ERROR_IO; 643 } 644 data_offset += 2; 645 646 if ((buffer[1] >> 5) & 0x0001) { //url flag is set 647 return ERROR_MALFORMED; 648 } 649 650 if (mDataSource->readAt(data_offset, buffer, 8) < 8) { 651 return ERROR_IO; 652 } 653 data_offset += 8; 654 655 if ((0x0F/*ES_ID_REF_TAG*/ != buffer[1]) 656 || ( 0x0A/*IPMP_DESCRIPTOR_POINTER_ID_TAG*/ != buffer[5])) { 657 return ERROR_MALFORMED; 658 } 659 660 SINF *sinf = new SINF; 661 sinf->trackID = U16_AT(&buffer[3]); 662 sinf->IPMPDescriptorID = buffer[7]; 663 sinf->next = mFirstSINF; 664 mFirstSINF = sinf; 665 666 size -= (8 + 2 + 1); 667 } 668 669 if (size != 0) { 670 return ERROR_MALFORMED; 671 } 672 673 if (mDataSource->readAt(data_offset, &updateIdTag, 1) < 1) { 674 return ERROR_IO; 675 } 676 data_offset ++; 677 678 if(0x05/*IPMP_DESCRIPTOR_UPDATE_ID_TAG*/ != updateIdTag) { 679 return ERROR_MALFORMED; 680 } 681 682 size = readSize(data_offset, mDataSource, &numOfBytes); 683 if (size < 0) { 684 return ERROR_IO; 685 } 686 classSize = size; 687 data_offset += numOfBytes; 688 689 while (size > 0) { 690 uint8_t tag; 691 int32_t dataLen; 692 if (mDataSource->readAt(data_offset, &tag, 1) < 1) { 693 return ERROR_IO; 694 } 695 data_offset ++; 696 697 if (0x0B/*IPMP_DESCRIPTOR_ID_TAG*/ == tag) { 698 uint8_t id; 699 dataLen = readSize(data_offset, mDataSource, &numOfBytes); 700 if (dataLen < 0) { 701 return ERROR_IO; 702 } else if (dataLen < 4) { 703 return ERROR_MALFORMED; 704 } 705 data_offset += numOfBytes; 706 707 if (mDataSource->readAt(data_offset, &id, 1) < 1) { 708 return ERROR_IO; 709 } 710 data_offset ++; 711 712 SINF *sinf = mFirstSINF; 713 while (sinf && (sinf->IPMPDescriptorID != id)) { 714 sinf = sinf->next; 715 } 716 if (sinf == NULL) { 717 return ERROR_MALFORMED; 718 } 719 sinf->len = dataLen - 3; 720 sinf->IPMPData = new (std::nothrow) char[sinf->len]; 721 if (sinf->IPMPData == NULL) { 722 return ERROR_MALFORMED; 723 } 724 data_offset += 2; 725 726 if (mDataSource->readAt(data_offset, sinf->IPMPData, sinf->len) < sinf->len) { 727 return ERROR_IO; 728 } 729 data_offset += sinf->len; 730 731 size -= (dataLen + numOfBytes + 1); 732 } 733 } 734 735 if (size != 0) { 736 return ERROR_MALFORMED; 737 } 738 739 return UNKNOWN_ERROR; // Return a dummy error. 740} 741 742struct PathAdder { 743 PathAdder(Vector<uint32_t> *path, uint32_t chunkType) 744 : mPath(path) { 745 mPath->push(chunkType); 746 } 747 748 ~PathAdder() { 749 mPath->pop(); 750 } 751 752private: 753 Vector<uint32_t> *mPath; 754 755 PathAdder(const PathAdder &); 756 PathAdder &operator=(const PathAdder &); 757}; 758 759static bool underMetaDataPath(const Vector<uint32_t> &path) { 760 return path.size() >= 5 761 && path[0] == FOURCC('m', 'o', 'o', 'v') 762 && path[1] == FOURCC('u', 'd', 't', 'a') 763 && path[2] == FOURCC('m', 'e', 't', 'a') 764 && path[3] == FOURCC('i', 'l', 's', 't'); 765} 766 767// Given a time in seconds since Jan 1 1904, produce a human-readable string. 768static void convertTimeToDate(int64_t time_1904, String8 *s) { 769 time_t time_1970 = time_1904 - (((66 * 365 + 17) * 24) * 3600); 770 771 char tmp[32]; 772 strftime(tmp, sizeof(tmp), "%Y%m%dT%H%M%S.000Z", gmtime(&time_1970)); 773 774 s->setTo(tmp); 775} 776 777status_t MPEG4Extractor::parseChunk(off64_t *offset, int depth) { 778 ALOGV("entering parseChunk %lld/%d", *offset, depth); 779 uint32_t hdr[2]; 780 if (mDataSource->readAt(*offset, hdr, 8) < 8) { 781 return ERROR_IO; 782 } 783 uint64_t chunk_size = ntohl(hdr[0]); 784 uint32_t chunk_type = ntohl(hdr[1]); 785 off64_t data_offset = *offset + 8; 786 787 if (chunk_size == 1) { 788 if (mDataSource->readAt(*offset + 8, &chunk_size, 8) < 8) { 789 return ERROR_IO; 790 } 791 chunk_size = ntoh64(chunk_size); 792 data_offset += 8; 793 794 if (chunk_size < 16) { 795 // The smallest valid chunk is 16 bytes long in this case. 796 return ERROR_MALFORMED; 797 } 798 } else if (chunk_size == 0) { 799 if (depth == 0) { 800 // atom extends to end of file 801 off64_t sourceSize; 802 if (mDataSource->getSize(&sourceSize) == OK) { 803 chunk_size = (sourceSize - *offset); 804 } else { 805 // XXX could we just pick a "sufficiently large" value here? 806 ALOGE("atom size is 0, and data source has no size"); 807 return ERROR_MALFORMED; 808 } 809 } else { 810 // not allowed for non-toplevel atoms, skip it 811 *offset += 4; 812 return OK; 813 } 814 } else if (chunk_size < 8) { 815 // The smallest valid chunk is 8 bytes long. 816 ALOGE("invalid chunk size: %" PRIu64, chunk_size); 817 return ERROR_MALFORMED; 818 } 819 820 char chunk[5]; 821 MakeFourCCString(chunk_type, chunk); 822 ALOGV("chunk: %s @ %lld, %d", chunk, *offset, depth); 823 824#if 0 825 static const char kWhitespace[] = " "; 826 const char *indent = &kWhitespace[sizeof(kWhitespace) - 1 - 2 * depth]; 827 printf("%sfound chunk '%s' of size %" PRIu64 "\n", indent, chunk, chunk_size); 828 829 char buffer[256]; 830 size_t n = chunk_size; 831 if (n > sizeof(buffer)) { 832 n = sizeof(buffer); 833 } 834 if (mDataSource->readAt(*offset, buffer, n) 835 < (ssize_t)n) { 836 return ERROR_IO; 837 } 838 839 hexdump(buffer, n); 840#endif 841 842 PathAdder autoAdder(&mPath, chunk_type); 843 844 off64_t chunk_data_size = *offset + chunk_size - data_offset; 845 if (chunk_type != FOURCC('m', 'd', 'a', 't') && chunk_data_size > kMaxAtomSize) { 846 char errMsg[100]; 847 sprintf(errMsg, "%s atom has size %" PRId64, chunk, chunk_data_size); 848 ALOGE("%s (b/28615448)", errMsg); 849 android_errorWriteWithInfoLog(0x534e4554, "28615448", -1, errMsg, strlen(errMsg)); 850 return ERROR_MALFORMED; 851 } 852 853 if (chunk_type != FOURCC('c', 'p', 'r', 't') 854 && chunk_type != FOURCC('c', 'o', 'v', 'r') 855 && mPath.size() == 5 && underMetaDataPath(mPath)) { 856 off64_t stop_offset = *offset + chunk_size; 857 *offset = data_offset; 858 while (*offset < stop_offset) { 859 status_t err = parseChunk(offset, depth + 1); 860 if (err != OK) { 861 return err; 862 } 863 } 864 865 if (*offset != stop_offset) { 866 return ERROR_MALFORMED; 867 } 868 869 return OK; 870 } 871 872 switch(chunk_type) { 873 case FOURCC('m', 'o', 'o', 'v'): 874 case FOURCC('t', 'r', 'a', 'k'): 875 case FOURCC('m', 'd', 'i', 'a'): 876 case FOURCC('m', 'i', 'n', 'f'): 877 case FOURCC('d', 'i', 'n', 'f'): 878 case FOURCC('s', 't', 'b', 'l'): 879 case FOURCC('m', 'v', 'e', 'x'): 880 case FOURCC('m', 'o', 'o', 'f'): 881 case FOURCC('t', 'r', 'a', 'f'): 882 case FOURCC('m', 'f', 'r', 'a'): 883 case FOURCC('u', 'd', 't', 'a'): 884 case FOURCC('i', 'l', 's', 't'): 885 case FOURCC('s', 'i', 'n', 'f'): 886 case FOURCC('s', 'c', 'h', 'i'): 887 case FOURCC('e', 'd', 't', 's'): 888 { 889 if (chunk_type == FOURCC('m', 'o', 'o', 'v') && depth != 0) { 890 ALOGE("moov: depth %d", depth); 891 return ERROR_MALFORMED; 892 } 893 if (chunk_type == FOURCC('s', 't', 'b', 'l')) { 894 ALOGV("sampleTable chunk is %" PRIu64 " bytes long.", chunk_size); 895 896 if (mDataSource->flags() 897 & (DataSource::kWantsPrefetching 898 | DataSource::kIsCachingDataSource)) { 899 sp<MPEG4DataSource> cachedSource = 900 new MPEG4DataSource(mDataSource); 901 902 if (cachedSource->setCachedRange(*offset, chunk_size) == OK) { 903 mDataSource = cachedSource; 904 } 905 } 906 907 if (mLastTrack == NULL) { 908 return ERROR_MALFORMED; 909 } 910 mLastTrack->sampleTable = new SampleTable(mDataSource); 911 } 912 913 bool isTrack = false; 914 if (chunk_type == FOURCC('t', 'r', 'a', 'k')) { 915 if (depth != 1) { 916 ALOGE("trak: depth %d", depth); 917 return ERROR_MALFORMED; 918 } 919 isTrack = true; 920 921 Track *track = new Track; 922 track->next = NULL; 923 if (mLastTrack) { 924 mLastTrack->next = track; 925 } else { 926 mFirstTrack = track; 927 } 928 mLastTrack = track; 929 930 track->meta = new MetaData; 931 track->includes_expensive_metadata = false; 932 track->skipTrack = false; 933 track->timescale = 0; 934 track->meta->setCString(kKeyMIMEType, "application/octet-stream"); 935 } 936 937 off64_t stop_offset = *offset + chunk_size; 938 *offset = data_offset; 939 while (*offset < stop_offset) { 940 status_t err = parseChunk(offset, depth + 1); 941 if (err != OK) { 942 if (isTrack) { 943 mLastTrack->skipTrack = true; 944 break; 945 } 946 return err; 947 } 948 } 949 950 if (*offset != stop_offset) { 951 return ERROR_MALFORMED; 952 } 953 954 if (isTrack) { 955 int32_t trackId; 956 // There must be exact one track header per track. 957 if (!mLastTrack->meta->findInt32(kKeyTrackID, &trackId)) { 958 mLastTrack->skipTrack = true; 959 } 960 if (mLastTrack->skipTrack) { 961 Track *cur = mFirstTrack; 962 963 if (cur == mLastTrack) { 964 delete cur; 965 mFirstTrack = mLastTrack = NULL; 966 } else { 967 while (cur && cur->next != mLastTrack) { 968 cur = cur->next; 969 } 970 cur->next = NULL; 971 delete mLastTrack; 972 mLastTrack = cur; 973 } 974 975 return OK; 976 } 977 978 status_t err = verifyTrack(mLastTrack); 979 980 if (err != OK) { 981 return err; 982 } 983 } else if (chunk_type == FOURCC('m', 'o', 'o', 'v')) { 984 mInitCheck = OK; 985 986 if (!mIsDrm) { 987 return UNKNOWN_ERROR; // Return a dummy error. 988 } else { 989 return OK; 990 } 991 } 992 break; 993 } 994 995 case FOURCC('e', 'l', 's', 't'): 996 { 997 *offset += chunk_size; 998 999 // See 14496-12 8.6.6 1000 uint8_t version; 1001 if (mDataSource->readAt(data_offset, &version, 1) < 1) { 1002 return ERROR_IO; 1003 } 1004 1005 uint32_t entry_count; 1006 if (!mDataSource->getUInt32(data_offset + 4, &entry_count)) { 1007 return ERROR_IO; 1008 } 1009 1010 if (entry_count != 1) { 1011 // we only support a single entry at the moment, for gapless playback 1012 ALOGW("ignoring edit list with %d entries", entry_count); 1013 } else if (mHeaderTimescale == 0) { 1014 ALOGW("ignoring edit list because timescale is 0"); 1015 } else if (mLastTrack == NULL) { 1016 return ERROR_MALFORMED; 1017 } else { 1018 off64_t entriesoffset = data_offset + 8; 1019 uint64_t segment_duration; 1020 int64_t media_time; 1021 1022 if (version == 1) { 1023 if (!mDataSource->getUInt64(entriesoffset, &segment_duration) || 1024 !mDataSource->getUInt64(entriesoffset + 8, (uint64_t*)&media_time)) { 1025 return ERROR_IO; 1026 } 1027 } else if (version == 0) { 1028 uint32_t sd; 1029 int32_t mt; 1030 if (!mDataSource->getUInt32(entriesoffset, &sd) || 1031 !mDataSource->getUInt32(entriesoffset + 4, (uint32_t*)&mt)) { 1032 return ERROR_IO; 1033 } 1034 segment_duration = sd; 1035 media_time = mt; 1036 } else { 1037 return ERROR_IO; 1038 } 1039 1040 uint64_t halfscale = mHeaderTimescale / 2; 1041 segment_duration = (segment_duration * 1000000 + halfscale)/ mHeaderTimescale; 1042 media_time = (media_time * 1000000 + halfscale) / mHeaderTimescale; 1043 1044 int64_t duration; 1045 int32_t samplerate; 1046 if (mLastTrack->meta->findInt64(kKeyDuration, &duration) && 1047 mLastTrack->meta->findInt32(kKeySampleRate, &samplerate)) { 1048 1049 int64_t delay = (media_time * samplerate + 500000) / 1000000; 1050 mLastTrack->meta->setInt32(kKeyEncoderDelay, delay); 1051 1052 int64_t paddingus = duration - (segment_duration + media_time); 1053 if (paddingus < 0) { 1054 // track duration from media header (which is what kKeyDuration is) might 1055 // be slightly shorter than the segment duration, which would make the 1056 // padding negative. Clamp to zero. 1057 paddingus = 0; 1058 } 1059 int64_t paddingsamples = (paddingus * samplerate + 500000) / 1000000; 1060 mLastTrack->meta->setInt32(kKeyEncoderPadding, paddingsamples); 1061 } 1062 } 1063 break; 1064 } 1065 1066 case FOURCC('f', 'r', 'm', 'a'): 1067 { 1068 *offset += chunk_size; 1069 if (mLastTrack == NULL) { 1070 return ERROR_MALFORMED; 1071 } 1072 1073 uint32_t original_fourcc; 1074 if (mDataSource->readAt(data_offset, &original_fourcc, 4) < 4) { 1075 return ERROR_IO; 1076 } 1077 original_fourcc = ntohl(original_fourcc); 1078 ALOGV("read original format: %d", original_fourcc); 1079 mLastTrack->meta->setCString(kKeyMIMEType, FourCC2MIME(original_fourcc)); 1080 uint32_t num_channels = 0; 1081 uint32_t sample_rate = 0; 1082 if (AdjustChannelsAndRate(original_fourcc, &num_channels, &sample_rate)) { 1083 mLastTrack->meta->setInt32(kKeyChannelCount, num_channels); 1084 mLastTrack->meta->setInt32(kKeySampleRate, sample_rate); 1085 } 1086 break; 1087 } 1088 1089 case FOURCC('t', 'e', 'n', 'c'): 1090 { 1091 *offset += chunk_size; 1092 if (mLastTrack == NULL) { 1093 return ERROR_MALFORMED; 1094 } 1095 1096 if (chunk_size < 32) { 1097 return ERROR_MALFORMED; 1098 } 1099 1100 // tenc box contains 1 byte version, 3 byte flags, 3 byte default algorithm id, one byte 1101 // default IV size, 16 bytes default KeyID 1102 // (ISO 23001-7) 1103 char buf[4]; 1104 memset(buf, 0, 4); 1105 if (mDataSource->readAt(data_offset + 4, buf + 1, 3) < 3) { 1106 return ERROR_IO; 1107 } 1108 uint32_t defaultAlgorithmId = ntohl(*((int32_t*)buf)); 1109 if (defaultAlgorithmId > 1) { 1110 // only 0 (clear) and 1 (AES-128) are valid 1111 return ERROR_MALFORMED; 1112 } 1113 1114 memset(buf, 0, 4); 1115 if (mDataSource->readAt(data_offset + 7, buf + 3, 1) < 1) { 1116 return ERROR_IO; 1117 } 1118 uint32_t defaultIVSize = ntohl(*((int32_t*)buf)); 1119 1120 if ((defaultAlgorithmId == 0 && defaultIVSize != 0) || 1121 (defaultAlgorithmId != 0 && defaultIVSize == 0)) { 1122 // only unencrypted data must have 0 IV size 1123 return ERROR_MALFORMED; 1124 } else if (defaultIVSize != 0 && 1125 defaultIVSize != 8 && 1126 defaultIVSize != 16) { 1127 // only supported sizes are 0, 8 and 16 1128 return ERROR_MALFORMED; 1129 } 1130 1131 uint8_t defaultKeyId[16]; 1132 1133 if (mDataSource->readAt(data_offset + 8, &defaultKeyId, 16) < 16) { 1134 return ERROR_IO; 1135 } 1136 1137 mLastTrack->meta->setInt32(kKeyCryptoMode, defaultAlgorithmId); 1138 mLastTrack->meta->setInt32(kKeyCryptoDefaultIVSize, defaultIVSize); 1139 mLastTrack->meta->setData(kKeyCryptoKey, 'tenc', defaultKeyId, 16); 1140 break; 1141 } 1142 1143 case FOURCC('t', 'k', 'h', 'd'): 1144 { 1145 *offset += chunk_size; 1146 if (mLastTrack == NULL) { 1147 return ERROR_MALFORMED; 1148 } 1149 1150 status_t err; 1151 if ((err = parseTrackHeader(data_offset, chunk_data_size)) != OK) { 1152 return err; 1153 } 1154 1155 break; 1156 } 1157 1158 case FOURCC('p', 's', 's', 'h'): 1159 { 1160 *offset += chunk_size; 1161 1162 PsshInfo pssh; 1163 1164 if (mDataSource->readAt(data_offset + 4, &pssh.uuid, 16) < 16) { 1165 return ERROR_IO; 1166 } 1167 1168 uint32_t psshdatalen = 0; 1169 if (mDataSource->readAt(data_offset + 20, &psshdatalen, 4) < 4) { 1170 return ERROR_IO; 1171 } 1172 pssh.datalen = ntohl(psshdatalen); 1173 ALOGV("pssh data size: %d", pssh.datalen); 1174 if (pssh.datalen + 20 > chunk_size) { 1175 // pssh data length exceeds size of containing box 1176 return ERROR_MALFORMED; 1177 } 1178 1179 pssh.data = new (std::nothrow) uint8_t[pssh.datalen]; 1180 if (pssh.data == NULL) { 1181 return ERROR_MALFORMED; 1182 } 1183 ALOGV("allocated pssh @ %p", pssh.data); 1184 ssize_t requested = (ssize_t) pssh.datalen; 1185 if (mDataSource->readAt(data_offset + 24, pssh.data, requested) < requested) { 1186 delete[] pssh.data; 1187 return ERROR_IO; 1188 } 1189 mPssh.push_back(pssh); 1190 1191 break; 1192 } 1193 1194 case FOURCC('m', 'd', 'h', 'd'): 1195 { 1196 *offset += chunk_size; 1197 if (mLastTrack == NULL) { 1198 return ERROR_MALFORMED; 1199 } 1200 1201 if (chunk_data_size < 4) { 1202 return ERROR_MALFORMED; 1203 } 1204 1205 uint8_t version; 1206 if (mDataSource->readAt( 1207 data_offset, &version, sizeof(version)) 1208 < (ssize_t)sizeof(version)) { 1209 return ERROR_IO; 1210 } 1211 1212 off64_t timescale_offset; 1213 1214 if (version == 1) { 1215 timescale_offset = data_offset + 4 + 16; 1216 } else if (version == 0) { 1217 timescale_offset = data_offset + 4 + 8; 1218 } else { 1219 return ERROR_IO; 1220 } 1221 1222 uint32_t timescale; 1223 if (mDataSource->readAt( 1224 timescale_offset, ×cale, sizeof(timescale)) 1225 < (ssize_t)sizeof(timescale)) { 1226 return ERROR_IO; 1227 } 1228 1229 mLastTrack->timescale = ntohl(timescale); 1230 1231 // 14496-12 says all ones means indeterminate, but some files seem to use 1232 // 0 instead. We treat both the same. 1233 int64_t duration = 0; 1234 if (version == 1) { 1235 if (mDataSource->readAt( 1236 timescale_offset + 4, &duration, sizeof(duration)) 1237 < (ssize_t)sizeof(duration)) { 1238 return ERROR_IO; 1239 } 1240 if (duration != -1) { 1241 duration = ntoh64(duration); 1242 } 1243 } else { 1244 uint32_t duration32; 1245 if (mDataSource->readAt( 1246 timescale_offset + 4, &duration32, sizeof(duration32)) 1247 < (ssize_t)sizeof(duration32)) { 1248 return ERROR_IO; 1249 } 1250 if (duration32 != 0xffffffff) { 1251 duration = ntohl(duration32); 1252 } 1253 } 1254 if (duration != 0) { 1255 mLastTrack->meta->setInt64( 1256 kKeyDuration, (duration * 1000000) / mLastTrack->timescale); 1257 } 1258 1259 uint8_t lang[2]; 1260 off64_t lang_offset; 1261 if (version == 1) { 1262 lang_offset = timescale_offset + 4 + 8; 1263 } else if (version == 0) { 1264 lang_offset = timescale_offset + 4 + 4; 1265 } else { 1266 return ERROR_IO; 1267 } 1268 1269 if (mDataSource->readAt(lang_offset, &lang, sizeof(lang)) 1270 < (ssize_t)sizeof(lang)) { 1271 return ERROR_IO; 1272 } 1273 1274 // To get the ISO-639-2/T three character language code 1275 // 1 bit pad followed by 3 5-bits characters. Each character 1276 // is packed as the difference between its ASCII value and 0x60. 1277 char lang_code[4]; 1278 lang_code[0] = ((lang[0] >> 2) & 0x1f) + 0x60; 1279 lang_code[1] = ((lang[0] & 0x3) << 3 | (lang[1] >> 5)) + 0x60; 1280 lang_code[2] = (lang[1] & 0x1f) + 0x60; 1281 lang_code[3] = '\0'; 1282 1283 mLastTrack->meta->setCString( 1284 kKeyMediaLanguage, lang_code); 1285 1286 break; 1287 } 1288 1289 case FOURCC('s', 't', 's', 'd'): 1290 { 1291 if (mLastTrack == NULL) { 1292 return ERROR_MALFORMED; 1293 } 1294 1295 uint8_t buffer[8]; 1296 if (chunk_data_size < (off64_t)sizeof(buffer)) { 1297 return ERROR_MALFORMED; 1298 } 1299 1300 if (mDataSource->readAt( 1301 data_offset, buffer, 8) < 8) { 1302 return ERROR_IO; 1303 } 1304 1305 if (U32_AT(buffer) != 0) { 1306 // Should be version 0, flags 0. 1307 return ERROR_MALFORMED; 1308 } 1309 1310 uint32_t entry_count = U32_AT(&buffer[4]); 1311 1312 if (entry_count > 1) { 1313 // For 3GPP timed text, there could be multiple tx3g boxes contain 1314 // multiple text display formats. These formats will be used to 1315 // display the timed text. 1316 // For encrypted files, there may also be more than one entry. 1317 const char *mime; 1318 CHECK(mLastTrack->meta->findCString(kKeyMIMEType, &mime)); 1319 if (strcasecmp(mime, MEDIA_MIMETYPE_TEXT_3GPP) && 1320 strcasecmp(mime, "application/octet-stream")) { 1321 // For now we only support a single type of media per track. 1322 mLastTrack->skipTrack = true; 1323 *offset += chunk_size; 1324 break; 1325 } 1326 } 1327 off64_t stop_offset = *offset + chunk_size; 1328 *offset = data_offset + 8; 1329 for (uint32_t i = 0; i < entry_count; ++i) { 1330 status_t err = parseChunk(offset, depth + 1); 1331 if (err != OK) { 1332 return err; 1333 } 1334 } 1335 1336 if (*offset != stop_offset) { 1337 return ERROR_MALFORMED; 1338 } 1339 break; 1340 } 1341 1342 case FOURCC('m', 'p', '4', 'a'): 1343 case FOURCC('e', 'n', 'c', 'a'): 1344 case FOURCC('s', 'a', 'm', 'r'): 1345 case FOURCC('s', 'a', 'w', 'b'): 1346 { 1347 if (mLastTrack == NULL) { 1348 return ERROR_MALFORMED; 1349 } 1350 uint8_t buffer[8 + 20]; 1351 if (chunk_data_size < (ssize_t)sizeof(buffer)) { 1352 // Basic AudioSampleEntry size. 1353 return ERROR_MALFORMED; 1354 } 1355 1356 if (mDataSource->readAt( 1357 data_offset, buffer, sizeof(buffer)) < (ssize_t)sizeof(buffer)) { 1358 return ERROR_IO; 1359 } 1360 1361 uint16_t data_ref_index = U16_AT(&buffer[6]); 1362 uint32_t num_channels = U16_AT(&buffer[16]); 1363 1364 uint16_t sample_size = U16_AT(&buffer[18]); 1365 uint32_t sample_rate = U32_AT(&buffer[24]) >> 16; 1366 1367 if (chunk_type != FOURCC('e', 'n', 'c', 'a')) { 1368 // if the chunk type is enca, we'll get the type from the sinf/frma box later 1369 mLastTrack->meta->setCString(kKeyMIMEType, FourCC2MIME(chunk_type)); 1370 AdjustChannelsAndRate(chunk_type, &num_channels, &sample_rate); 1371 } 1372 ALOGV("*** coding='%s' %d channels, size %d, rate %d\n", 1373 chunk, num_channels, sample_size, sample_rate); 1374 mLastTrack->meta->setInt32(kKeyChannelCount, num_channels); 1375 mLastTrack->meta->setInt32(kKeySampleRate, sample_rate); 1376 1377 off64_t stop_offset = *offset + chunk_size; 1378 *offset = data_offset + sizeof(buffer); 1379 while (*offset < stop_offset) { 1380 status_t err = parseChunk(offset, depth + 1); 1381 if (err != OK) { 1382 return err; 1383 } 1384 } 1385 1386 if (*offset != stop_offset) { 1387 return ERROR_MALFORMED; 1388 } 1389 break; 1390 } 1391 1392 case FOURCC('m', 'p', '4', 'v'): 1393 case FOURCC('e', 'n', 'c', 'v'): 1394 case FOURCC('s', '2', '6', '3'): 1395 case FOURCC('H', '2', '6', '3'): 1396 case FOURCC('h', '2', '6', '3'): 1397 case FOURCC('a', 'v', 'c', '1'): 1398 case FOURCC('h', 'v', 'c', '1'): 1399 case FOURCC('h', 'e', 'v', '1'): 1400 { 1401 mHasVideo = true; 1402 1403 uint8_t buffer[78]; 1404 if (chunk_data_size < (ssize_t)sizeof(buffer)) { 1405 // Basic VideoSampleEntry size. 1406 return ERROR_MALFORMED; 1407 } 1408 1409 if (mDataSource->readAt( 1410 data_offset, buffer, sizeof(buffer)) < (ssize_t)sizeof(buffer)) { 1411 return ERROR_IO; 1412 } 1413 1414 uint16_t data_ref_index = U16_AT(&buffer[6]); 1415 uint16_t width = U16_AT(&buffer[6 + 18]); 1416 uint16_t height = U16_AT(&buffer[6 + 20]); 1417 1418 // The video sample is not standard-compliant if it has invalid dimension. 1419 // Use some default width and height value, and 1420 // let the decoder figure out the actual width and height (and thus 1421 // be prepared for INFO_FOMRAT_CHANGED event). 1422 if (width == 0) width = 352; 1423 if (height == 0) height = 288; 1424 1425 // printf("*** coding='%s' width=%d height=%d\n", 1426 // chunk, width, height); 1427 1428 if (chunk_type != FOURCC('e', 'n', 'c', 'v')) { 1429 // if the chunk type is encv, we'll get the type from the sinf/frma box later 1430 mLastTrack->meta->setCString(kKeyMIMEType, FourCC2MIME(chunk_type)); 1431 } 1432 mLastTrack->meta->setInt32(kKeyWidth, width); 1433 mLastTrack->meta->setInt32(kKeyHeight, height); 1434 1435 off64_t stop_offset = *offset + chunk_size; 1436 *offset = data_offset + sizeof(buffer); 1437 while (*offset < stop_offset) { 1438 status_t err = parseChunk(offset, depth + 1); 1439 if (err != OK) { 1440 return err; 1441 } 1442 } 1443 1444 if (*offset != stop_offset) { 1445 return ERROR_MALFORMED; 1446 } 1447 break; 1448 } 1449 1450 case FOURCC('s', 't', 'c', 'o'): 1451 case FOURCC('c', 'o', '6', '4'): 1452 { 1453 if (mLastTrack == NULL) { 1454 return ERROR_MALFORMED; 1455 } 1456 status_t err = 1457 mLastTrack->sampleTable->setChunkOffsetParams( 1458 chunk_type, data_offset, chunk_data_size); 1459 1460 *offset += chunk_size; 1461 1462 if (err != OK) { 1463 return err; 1464 } 1465 1466 break; 1467 } 1468 1469 case FOURCC('s', 't', 's', 'c'): 1470 { 1471 if (mLastTrack == NULL) { 1472 return ERROR_MALFORMED; 1473 } 1474 status_t err = 1475 mLastTrack->sampleTable->setSampleToChunkParams( 1476 data_offset, chunk_data_size); 1477 1478 *offset += chunk_size; 1479 1480 if (err != OK) { 1481 return err; 1482 } 1483 1484 break; 1485 } 1486 1487 case FOURCC('s', 't', 's', 'z'): 1488 case FOURCC('s', 't', 'z', '2'): 1489 { 1490 if (mLastTrack == NULL) { 1491 return ERROR_MALFORMED; 1492 } 1493 status_t err = 1494 mLastTrack->sampleTable->setSampleSizeParams( 1495 chunk_type, data_offset, chunk_data_size); 1496 1497 *offset += chunk_size; 1498 1499 if (err != OK) { 1500 return err; 1501 } 1502 1503 size_t max_size; 1504 err = mLastTrack->sampleTable->getMaxSampleSize(&max_size); 1505 1506 if (err != OK) { 1507 return err; 1508 } 1509 1510 if (max_size != 0) { 1511 // Assume that a given buffer only contains at most 10 chunks, 1512 // each chunk originally prefixed with a 2 byte length will 1513 // have a 4 byte header (0x00 0x00 0x00 0x01) after conversion, 1514 // and thus will grow by 2 bytes per chunk. 1515 if (max_size > SIZE_MAX - 10 * 2) { 1516 ALOGE("max sample size too big: %zu", max_size); 1517 return ERROR_MALFORMED; 1518 } 1519 mLastTrack->meta->setInt32(kKeyMaxInputSize, max_size + 10 * 2); 1520 } else { 1521 // No size was specified. Pick a conservatively large size. 1522 uint32_t width, height; 1523 if (!mLastTrack->meta->findInt32(kKeyWidth, (int32_t*)&width) || 1524 !mLastTrack->meta->findInt32(kKeyHeight,(int32_t*) &height)) { 1525 ALOGE("No width or height, assuming worst case 1080p"); 1526 width = 1920; 1527 height = 1080; 1528 } else { 1529 // A resolution was specified, check that it's not too big. The values below 1530 // were chosen so that the calculations below don't cause overflows, they're 1531 // not indicating that resolutions up to 32kx32k are actually supported. 1532 if (width > 32768 || height > 32768) { 1533 ALOGE("can't support %u x %u video", width, height); 1534 return ERROR_MALFORMED; 1535 } 1536 } 1537 1538 const char *mime; 1539 CHECK(mLastTrack->meta->findCString(kKeyMIMEType, &mime)); 1540 if (!strcmp(mime, MEDIA_MIMETYPE_VIDEO_AVC)) { 1541 // AVC requires compression ratio of at least 2, and uses 1542 // macroblocks 1543 max_size = ((width + 15) / 16) * ((height + 15) / 16) * 192; 1544 } else { 1545 // For all other formats there is no minimum compression 1546 // ratio. Use compression ratio of 1. 1547 max_size = width * height * 3 / 2; 1548 } 1549 mLastTrack->meta->setInt32(kKeyMaxInputSize, max_size); 1550 } 1551 1552 // NOTE: setting another piece of metadata invalidates any pointers (such as the 1553 // mimetype) previously obtained, so don't cache them. 1554 const char *mime; 1555 CHECK(mLastTrack->meta->findCString(kKeyMIMEType, &mime)); 1556 // Calculate average frame rate. 1557 if (!strncasecmp("video/", mime, 6)) { 1558 size_t nSamples = mLastTrack->sampleTable->countSamples(); 1559 int64_t durationUs; 1560 if (mLastTrack->meta->findInt64(kKeyDuration, &durationUs)) { 1561 if (durationUs > 0) { 1562 int32_t frameRate = (nSamples * 1000000LL + 1563 (durationUs >> 1)) / durationUs; 1564 mLastTrack->meta->setInt32(kKeyFrameRate, frameRate); 1565 } 1566 } 1567 } 1568 1569 break; 1570 } 1571 1572 case FOURCC('s', 't', 't', 's'): 1573 { 1574 *offset += chunk_size; 1575 1576 if (mLastTrack == NULL) { 1577 return ERROR_MALFORMED; 1578 } 1579 status_t err = 1580 mLastTrack->sampleTable->setTimeToSampleParams( 1581 data_offset, chunk_data_size); 1582 1583 if (err != OK) { 1584 return err; 1585 } 1586 1587 break; 1588 } 1589 1590 case FOURCC('c', 't', 't', 's'): 1591 { 1592 *offset += chunk_size; 1593 1594 if (mLastTrack == NULL) { 1595 return ERROR_MALFORMED; 1596 } 1597 status_t err = 1598 mLastTrack->sampleTable->setCompositionTimeToSampleParams( 1599 data_offset, chunk_data_size); 1600 1601 if (err != OK) { 1602 return err; 1603 } 1604 1605 break; 1606 } 1607 1608 case FOURCC('s', 't', 's', 's'): 1609 { 1610 *offset += chunk_size; 1611 1612 if (mLastTrack == NULL) { 1613 return ERROR_MALFORMED; 1614 } 1615 status_t err = 1616 mLastTrack->sampleTable->setSyncSampleParams( 1617 data_offset, chunk_data_size); 1618 1619 if (err != OK) { 1620 return err; 1621 } 1622 1623 break; 1624 } 1625 1626 // @xyz 1627 case FOURCC('\xA9', 'x', 'y', 'z'): 1628 { 1629 *offset += chunk_size; 1630 1631 // Best case the total data length inside "@xyz" box 1632 // would be 8, for instance "@xyz" + "\x00\x04\x15\xc7" + "0+0/", 1633 // where "\x00\x04" is the text string length with value = 4, 1634 // "\0x15\xc7" is the language code = en, and "0+0" is a 1635 // location (string) value with longitude = 0 and latitude = 0. 1636 if (chunk_data_size < 8) { 1637 return ERROR_MALFORMED; 1638 } 1639 1640 // Worst case the location string length would be 18, 1641 // for instance +90.0000-180.0000, without the trailing "/" and 1642 // the string length + language code. 1643 char buffer[18]; 1644 1645 // Substracting 5 from the data size is because the text string length + 1646 // language code takes 4 bytes, and the trailing slash "/" takes 1 byte. 1647 off64_t location_length = chunk_data_size - 5; 1648 if (location_length >= (off64_t) sizeof(buffer)) { 1649 return ERROR_MALFORMED; 1650 } 1651 1652 if (mDataSource->readAt( 1653 data_offset + 4, buffer, location_length) < location_length) { 1654 return ERROR_IO; 1655 } 1656 1657 buffer[location_length] = '\0'; 1658 mFileMetaData->setCString(kKeyLocation, buffer); 1659 break; 1660 } 1661 1662 case FOURCC('e', 's', 'd', 's'): 1663 { 1664 *offset += chunk_size; 1665 if (mLastTrack == NULL) { 1666 return ERROR_MALFORMED; 1667 } 1668 1669 if (chunk_data_size < 4) { 1670 return ERROR_MALFORMED; 1671 } 1672 1673 uint8_t buffer[256]; 1674 if (chunk_data_size > (off64_t)sizeof(buffer)) { 1675 return ERROR_BUFFER_TOO_SMALL; 1676 } 1677 1678 if (mDataSource->readAt( 1679 data_offset, buffer, chunk_data_size) < chunk_data_size) { 1680 return ERROR_IO; 1681 } 1682 1683 if (U32_AT(buffer) != 0) { 1684 // Should be version 0, flags 0. 1685 return ERROR_MALFORMED; 1686 } 1687 1688 mLastTrack->meta->setData( 1689 kKeyESDS, kTypeESDS, &buffer[4], chunk_data_size - 4); 1690 1691 if (mPath.size() >= 2 1692 && mPath[mPath.size() - 2] == FOURCC('m', 'p', '4', 'a')) { 1693 // Information from the ESDS must be relied on for proper 1694 // setup of sample rate and channel count for MPEG4 Audio. 1695 // The generic header appears to only contain generic 1696 // information... 1697 1698 status_t err = updateAudioTrackInfoFromESDS_MPEG4Audio( 1699 &buffer[4], chunk_data_size - 4); 1700 1701 if (err != OK) { 1702 return err; 1703 } 1704 } 1705 1706 break; 1707 } 1708 1709 case FOURCC('a', 'v', 'c', 'C'): 1710 { 1711 *offset += chunk_size; 1712 if (mLastTrack == NULL) { 1713 return ERROR_MALFORMED; 1714 } 1715 1716 sp<ABuffer> buffer = new ABuffer(chunk_data_size); 1717 1718 if (buffer->data() == NULL) { 1719 ALOGE("b/28471206"); 1720 return NO_MEMORY; 1721 } 1722 1723 if (mDataSource->readAt( 1724 data_offset, buffer->data(), chunk_data_size) < chunk_data_size) { 1725 return ERROR_IO; 1726 } 1727 1728 mLastTrack->meta->setData( 1729 kKeyAVCC, kTypeAVCC, buffer->data(), chunk_data_size); 1730 1731 break; 1732 } 1733 case FOURCC('h', 'v', 'c', 'C'): 1734 { 1735 sp<ABuffer> buffer = new ABuffer(chunk_data_size); 1736 if (mLastTrack == NULL) { 1737 return ERROR_MALFORMED; 1738 } 1739 1740 if (buffer->data() == NULL) { 1741 ALOGE("b/28471206"); 1742 return NO_MEMORY; 1743 } 1744 1745 if (mDataSource->readAt( 1746 data_offset, buffer->data(), chunk_data_size) < chunk_data_size) { 1747 return ERROR_IO; 1748 } 1749 1750 mLastTrack->meta->setData( 1751 kKeyHVCC, kTypeHVCC, buffer->data(), chunk_data_size); 1752 1753 *offset += chunk_size; 1754 break; 1755 } 1756 1757 case FOURCC('d', '2', '6', '3'): 1758 { 1759 *offset += chunk_size; 1760 if (mLastTrack == NULL) { 1761 return ERROR_MALFORMED; 1762 } 1763 /* 1764 * d263 contains a fixed 7 bytes part: 1765 * vendor - 4 bytes 1766 * version - 1 byte 1767 * level - 1 byte 1768 * profile - 1 byte 1769 * optionally, "d263" box itself may contain a 16-byte 1770 * bit rate box (bitr) 1771 * average bit rate - 4 bytes 1772 * max bit rate - 4 bytes 1773 */ 1774 char buffer[23]; 1775 if (chunk_data_size != 7 && 1776 chunk_data_size != 23) { 1777 ALOGE("Incorrect D263 box size %lld", chunk_data_size); 1778 return ERROR_MALFORMED; 1779 } 1780 1781 if (mDataSource->readAt( 1782 data_offset, buffer, chunk_data_size) < chunk_data_size) { 1783 return ERROR_IO; 1784 } 1785 1786 mLastTrack->meta->setData(kKeyD263, kTypeD263, buffer, chunk_data_size); 1787 1788 break; 1789 } 1790 1791 case FOURCC('m', 'e', 't', 'a'): 1792 { 1793 uint8_t buffer[4]; 1794 if (chunk_data_size < (off64_t)sizeof(buffer)) { 1795 *offset += chunk_size; 1796 return ERROR_MALFORMED; 1797 } 1798 1799 if (mDataSource->readAt( 1800 data_offset, buffer, 4) < 4) { 1801 *offset += chunk_size; 1802 return ERROR_IO; 1803 } 1804 1805 if (U32_AT(buffer) != 0) { 1806 // Should be version 0, flags 0. 1807 1808 // If it's not, let's assume this is one of those 1809 // apparently malformed chunks that don't have flags 1810 // and completely different semantics than what's 1811 // in the MPEG4 specs and skip it. 1812 *offset += chunk_size; 1813 return OK; 1814 } 1815 1816 off64_t stop_offset = *offset + chunk_size; 1817 *offset = data_offset + sizeof(buffer); 1818 while (*offset < stop_offset) { 1819 status_t err = parseChunk(offset, depth + 1); 1820 if (err != OK) { 1821 return err; 1822 } 1823 } 1824 1825 if (*offset != stop_offset) { 1826 return ERROR_MALFORMED; 1827 } 1828 break; 1829 } 1830 1831 case FOURCC('m', 'e', 'a', 'n'): 1832 case FOURCC('n', 'a', 'm', 'e'): 1833 case FOURCC('d', 'a', 't', 'a'): 1834 { 1835 *offset += chunk_size; 1836 1837 if (mPath.size() == 6 && underMetaDataPath(mPath)) { 1838 status_t err = parseITunesMetaData(data_offset, chunk_data_size); 1839 1840 if (err != OK) { 1841 return err; 1842 } 1843 } 1844 1845 break; 1846 } 1847 1848 case FOURCC('m', 'v', 'h', 'd'): 1849 { 1850 *offset += chunk_size; 1851 1852 if (depth != 1) { 1853 ALOGE("mvhd: depth %d", depth); 1854 return ERROR_MALFORMED; 1855 } 1856 if (chunk_data_size < 32) { 1857 return ERROR_MALFORMED; 1858 } 1859 1860 uint8_t header[32]; 1861 if (mDataSource->readAt( 1862 data_offset, header, sizeof(header)) 1863 < (ssize_t)sizeof(header)) { 1864 return ERROR_IO; 1865 } 1866 1867 uint64_t creationTime; 1868 uint64_t duration = 0; 1869 if (header[0] == 1) { 1870 creationTime = U64_AT(&header[4]); 1871 mHeaderTimescale = U32_AT(&header[20]); 1872 duration = U64_AT(&header[24]); 1873 if (duration == 0xffffffffffffffff) { 1874 duration = 0; 1875 } 1876 } else if (header[0] != 0) { 1877 return ERROR_MALFORMED; 1878 } else { 1879 creationTime = U32_AT(&header[4]); 1880 mHeaderTimescale = U32_AT(&header[12]); 1881 uint32_t d32 = U32_AT(&header[16]); 1882 if (d32 == 0xffffffff) { 1883 d32 = 0; 1884 } 1885 duration = d32; 1886 } 1887 if (duration != 0) { 1888 mFileMetaData->setInt64(kKeyDuration, duration * 1000000 / mHeaderTimescale); 1889 } 1890 1891 String8 s; 1892 convertTimeToDate(creationTime, &s); 1893 1894 mFileMetaData->setCString(kKeyDate, s.string()); 1895 1896 break; 1897 } 1898 1899 case FOURCC('m', 'e', 'h', 'd'): 1900 { 1901 *offset += chunk_size; 1902 1903 if (chunk_data_size < 8) { 1904 return ERROR_MALFORMED; 1905 } 1906 1907 uint8_t flags[4]; 1908 if (mDataSource->readAt( 1909 data_offset, flags, sizeof(flags)) 1910 < (ssize_t)sizeof(flags)) { 1911 return ERROR_IO; 1912 } 1913 1914 uint64_t duration = 0; 1915 if (flags[0] == 1) { 1916 // 64 bit 1917 if (chunk_data_size < 12) { 1918 return ERROR_MALFORMED; 1919 } 1920 mDataSource->getUInt64(data_offset + 4, &duration); 1921 if (duration == 0xffffffffffffffff) { 1922 duration = 0; 1923 } 1924 } else if (flags[0] == 0) { 1925 // 32 bit 1926 uint32_t d32; 1927 mDataSource->getUInt32(data_offset + 4, &d32); 1928 if (d32 == 0xffffffff) { 1929 d32 = 0; 1930 } 1931 duration = d32; 1932 } else { 1933 return ERROR_MALFORMED; 1934 } 1935 1936 if (duration != 0) { 1937 mFileMetaData->setInt64(kKeyDuration, duration * 1000000 / mHeaderTimescale); 1938 } 1939 1940 break; 1941 } 1942 1943 case FOURCC('m', 'd', 'a', 't'): 1944 { 1945 ALOGV("mdat chunk, drm: %d", mIsDrm); 1946 if (!mIsDrm) { 1947 *offset += chunk_size; 1948 break; 1949 } 1950 1951 if (chunk_size < 8) { 1952 return ERROR_MALFORMED; 1953 } 1954 1955 return parseDrmSINF(offset, data_offset); 1956 } 1957 1958 case FOURCC('h', 'd', 'l', 'r'): 1959 { 1960 *offset += chunk_size; 1961 1962 uint32_t buffer; 1963 if (mDataSource->readAt( 1964 data_offset + 8, &buffer, 4) < 4) { 1965 return ERROR_IO; 1966 } 1967 1968 uint32_t type = ntohl(buffer); 1969 // For the 3GPP file format, the handler-type within the 'hdlr' box 1970 // shall be 'text'. We also want to support 'sbtl' handler type 1971 // for a practical reason as various MPEG4 containers use it. 1972 if (type == FOURCC('t', 'e', 'x', 't') || type == FOURCC('s', 'b', 't', 'l')) { 1973 if (mLastTrack != NULL) { 1974 mLastTrack->meta->setCString(kKeyMIMEType, MEDIA_MIMETYPE_TEXT_3GPP); 1975 } 1976 } 1977 1978 break; 1979 } 1980 1981 case FOURCC('t', 'r', 'e', 'x'): 1982 { 1983 *offset += chunk_size; 1984 1985 if (chunk_data_size < 24) { 1986 return ERROR_IO; 1987 } 1988 uint32_t duration; 1989 Trex trex; 1990 if (!mDataSource->getUInt32(data_offset + 4, &trex.track_ID) || 1991 !mDataSource->getUInt32(data_offset + 8, &trex.default_sample_description_index) || 1992 !mDataSource->getUInt32(data_offset + 12, &trex.default_sample_duration) || 1993 !mDataSource->getUInt32(data_offset + 16, &trex.default_sample_size) || 1994 !mDataSource->getUInt32(data_offset + 20, &trex.default_sample_flags)) { 1995 return ERROR_IO; 1996 } 1997 mTrex.add(trex); 1998 break; 1999 } 2000 2001 case FOURCC('t', 'x', '3', 'g'): 2002 { 2003 if (mLastTrack == NULL) { 2004 return ERROR_MALFORMED; 2005 } 2006 uint32_t type; 2007 const void *data; 2008 size_t size = 0; 2009 if (!mLastTrack->meta->findData( 2010 kKeyTextFormatData, &type, &data, &size)) { 2011 size = 0; 2012 } 2013 2014 if ((chunk_size > SIZE_MAX) || (SIZE_MAX - chunk_size <= size)) { 2015 return ERROR_MALFORMED; 2016 } 2017 2018 uint8_t *buffer = new (std::nothrow) uint8_t[size + chunk_size]; 2019 if (buffer == NULL) { 2020 return ERROR_MALFORMED; 2021 } 2022 2023 if (size > 0) { 2024 memcpy(buffer, data, size); 2025 } 2026 2027 if ((size_t)(mDataSource->readAt(*offset, buffer + size, chunk_size)) 2028 < chunk_size) { 2029 delete[] buffer; 2030 buffer = NULL; 2031 2032 // advance read pointer so we don't end up reading this again 2033 *offset += chunk_size; 2034 return ERROR_IO; 2035 } 2036 2037 mLastTrack->meta->setData( 2038 kKeyTextFormatData, 0, buffer, size + chunk_size); 2039 2040 delete[] buffer; 2041 2042 *offset += chunk_size; 2043 break; 2044 } 2045 2046 case FOURCC('c', 'o', 'v', 'r'): 2047 { 2048 *offset += chunk_size; 2049 2050 if (mFileMetaData != NULL) { 2051 ALOGV("chunk_data_size = %lld and data_offset = %lld", 2052 chunk_data_size, data_offset); 2053 2054 if (chunk_data_size < 0 || static_cast<uint64_t>(chunk_data_size) >= SIZE_MAX - 1) { 2055 return ERROR_MALFORMED; 2056 } 2057 sp<ABuffer> buffer = new ABuffer(chunk_data_size + 1); 2058 if (buffer->data() == NULL) { 2059 ALOGE("b/28471206"); 2060 return NO_MEMORY; 2061 } 2062 if (mDataSource->readAt( 2063 data_offset, buffer->data(), chunk_data_size) != (ssize_t)chunk_data_size) { 2064 return ERROR_IO; 2065 } 2066 const int kSkipBytesOfDataBox = 16; 2067 if (chunk_data_size <= kSkipBytesOfDataBox) { 2068 return ERROR_MALFORMED; 2069 } 2070 2071 mFileMetaData->setData( 2072 kKeyAlbumArt, MetaData::TYPE_NONE, 2073 buffer->data() + kSkipBytesOfDataBox, chunk_data_size - kSkipBytesOfDataBox); 2074 } 2075 2076 break; 2077 } 2078 2079 case FOURCC('t', 'i', 't', 'l'): 2080 case FOURCC('p', 'e', 'r', 'f'): 2081 case FOURCC('a', 'u', 't', 'h'): 2082 case FOURCC('g', 'n', 'r', 'e'): 2083 case FOURCC('a', 'l', 'b', 'm'): 2084 case FOURCC('y', 'r', 'r', 'c'): 2085 { 2086 *offset += chunk_size; 2087 2088 status_t err = parse3GPPMetaData(data_offset, chunk_data_size, depth); 2089 2090 if (err != OK) { 2091 return err; 2092 } 2093 2094 break; 2095 } 2096 2097 case FOURCC('I', 'D', '3', '2'): 2098 { 2099 *offset += chunk_size; 2100 2101 if (chunk_data_size < 6) { 2102 return ERROR_MALFORMED; 2103 } 2104 2105 parseID3v2MetaData(data_offset + 6); 2106 2107 break; 2108 } 2109 2110 case FOURCC('-', '-', '-', '-'): 2111 { 2112 mLastCommentMean.clear(); 2113 mLastCommentName.clear(); 2114 mLastCommentData.clear(); 2115 *offset += chunk_size; 2116 break; 2117 } 2118 2119 case FOURCC('s', 'i', 'd', 'x'): 2120 { 2121 if (mLastTrack == NULL) { 2122 return ERROR_MALFORMED; 2123 } 2124 parseSegmentIndex(data_offset, chunk_data_size); 2125 *offset += chunk_size; 2126 return UNKNOWN_ERROR; // stop parsing after sidx 2127 } 2128 2129 default: 2130 { 2131 *offset += chunk_size; 2132 break; 2133 } 2134 } 2135 2136 return OK; 2137} 2138 2139status_t MPEG4Extractor::parseSegmentIndex(off64_t offset, size_t size) { 2140 ALOGV("MPEG4Extractor::parseSegmentIndex"); 2141 2142 if (size < 12) { 2143 return -EINVAL; 2144 } 2145 2146 uint32_t flags; 2147 if (!mDataSource->getUInt32(offset, &flags)) { 2148 return ERROR_MALFORMED; 2149 } 2150 2151 uint32_t version = flags >> 24; 2152 flags &= 0xffffff; 2153 2154 ALOGV("sidx version %d", version); 2155 2156 uint32_t referenceId; 2157 if (!mDataSource->getUInt32(offset + 4, &referenceId)) { 2158 return ERROR_MALFORMED; 2159 } 2160 2161 uint32_t timeScale; 2162 if (!mDataSource->getUInt32(offset + 8, &timeScale)) { 2163 return ERROR_MALFORMED; 2164 } 2165 ALOGV("sidx refid/timescale: %d/%d", referenceId, timeScale); 2166 2167 uint64_t earliestPresentationTime; 2168 uint64_t firstOffset; 2169 2170 offset += 12; 2171 size -= 12; 2172 2173 if (version == 0) { 2174 if (size < 8) { 2175 return -EINVAL; 2176 } 2177 uint32_t tmp; 2178 if (!mDataSource->getUInt32(offset, &tmp)) { 2179 return ERROR_MALFORMED; 2180 } 2181 earliestPresentationTime = tmp; 2182 if (!mDataSource->getUInt32(offset + 4, &tmp)) { 2183 return ERROR_MALFORMED; 2184 } 2185 firstOffset = tmp; 2186 offset += 8; 2187 size -= 8; 2188 } else { 2189 if (size < 16) { 2190 return -EINVAL; 2191 } 2192 if (!mDataSource->getUInt64(offset, &earliestPresentationTime)) { 2193 return ERROR_MALFORMED; 2194 } 2195 if (!mDataSource->getUInt64(offset + 8, &firstOffset)) { 2196 return ERROR_MALFORMED; 2197 } 2198 offset += 16; 2199 size -= 16; 2200 } 2201 ALOGV("sidx pres/off: %" PRIu64 "/%" PRIu64, earliestPresentationTime, firstOffset); 2202 2203 if (size < 4) { 2204 return -EINVAL; 2205 } 2206 2207 uint16_t referenceCount; 2208 if (!mDataSource->getUInt16(offset + 2, &referenceCount)) { 2209 return ERROR_MALFORMED; 2210 } 2211 offset += 4; 2212 size -= 4; 2213 ALOGV("refcount: %d", referenceCount); 2214 2215 if (size < referenceCount * 12) { 2216 return -EINVAL; 2217 } 2218 2219 uint64_t total_duration = 0; 2220 for (unsigned int i = 0; i < referenceCount; i++) { 2221 uint32_t d1, d2, d3; 2222 2223 if (!mDataSource->getUInt32(offset, &d1) || // size 2224 !mDataSource->getUInt32(offset + 4, &d2) || // duration 2225 !mDataSource->getUInt32(offset + 8, &d3)) { // flags 2226 return ERROR_MALFORMED; 2227 } 2228 2229 if (d1 & 0x80000000) { 2230 ALOGW("sub-sidx boxes not supported yet"); 2231 } 2232 bool sap = d3 & 0x80000000; 2233 uint32_t saptype = (d3 >> 28) & 7; 2234 if (!sap || (saptype != 1 && saptype != 2)) { 2235 // type 1 and 2 are sync samples 2236 ALOGW("not a stream access point, or unsupported type: %08x", d3); 2237 } 2238 total_duration += d2; 2239 offset += 12; 2240 ALOGV(" item %d, %08x %08x %08x", i, d1, d2, d3); 2241 SidxEntry se; 2242 se.mSize = d1 & 0x7fffffff; 2243 se.mDurationUs = 1000000LL * d2 / timeScale; 2244 mSidxEntries.add(se); 2245 } 2246 2247 uint64_t sidxDuration = total_duration * 1000000 / timeScale; 2248 2249 int64_t metaDuration; 2250 if (!mLastTrack->meta->findInt64(kKeyDuration, &metaDuration) || metaDuration == 0) { 2251 mLastTrack->meta->setInt64(kKeyDuration, sidxDuration); 2252 } 2253 return OK; 2254} 2255 2256 2257 2258status_t MPEG4Extractor::parseTrackHeader( 2259 off64_t data_offset, off64_t data_size) { 2260 if (data_size < 4) { 2261 return ERROR_MALFORMED; 2262 } 2263 2264 uint8_t version; 2265 if (mDataSource->readAt(data_offset, &version, 1) < 1) { 2266 return ERROR_IO; 2267 } 2268 2269 size_t dynSize = (version == 1) ? 36 : 24; 2270 2271 uint8_t buffer[36 + 60]; 2272 2273 if (data_size != (off64_t)dynSize + 60) { 2274 return ERROR_MALFORMED; 2275 } 2276 2277 if (mDataSource->readAt( 2278 data_offset, buffer, data_size) < (ssize_t)data_size) { 2279 return ERROR_IO; 2280 } 2281 2282 uint64_t ctime, mtime, duration; 2283 int32_t id; 2284 2285 if (version == 1) { 2286 ctime = U64_AT(&buffer[4]); 2287 mtime = U64_AT(&buffer[12]); 2288 id = U32_AT(&buffer[20]); 2289 duration = U64_AT(&buffer[28]); 2290 } else if (version == 0) { 2291 ctime = U32_AT(&buffer[4]); 2292 mtime = U32_AT(&buffer[8]); 2293 id = U32_AT(&buffer[12]); 2294 duration = U32_AT(&buffer[20]); 2295 } else { 2296 return ERROR_UNSUPPORTED; 2297 } 2298 2299 mLastTrack->meta->setInt32(kKeyTrackID, id); 2300 2301 size_t matrixOffset = dynSize + 16; 2302 int32_t a00 = U32_AT(&buffer[matrixOffset]); 2303 int32_t a01 = U32_AT(&buffer[matrixOffset + 4]); 2304 int32_t dx = U32_AT(&buffer[matrixOffset + 8]); 2305 int32_t a10 = U32_AT(&buffer[matrixOffset + 12]); 2306 int32_t a11 = U32_AT(&buffer[matrixOffset + 16]); 2307 int32_t dy = U32_AT(&buffer[matrixOffset + 20]); 2308 2309#if 0 2310 ALOGI("x' = %.2f * x + %.2f * y + %.2f", 2311 a00 / 65536.0f, a01 / 65536.0f, dx / 65536.0f); 2312 ALOGI("y' = %.2f * x + %.2f * y + %.2f", 2313 a10 / 65536.0f, a11 / 65536.0f, dy / 65536.0f); 2314#endif 2315 2316 uint32_t rotationDegrees; 2317 2318 static const int32_t kFixedOne = 0x10000; 2319 if (a00 == kFixedOne && a01 == 0 && a10 == 0 && a11 == kFixedOne) { 2320 // Identity, no rotation 2321 rotationDegrees = 0; 2322 } else if (a00 == 0 && a01 == kFixedOne && a10 == -kFixedOne && a11 == 0) { 2323 rotationDegrees = 90; 2324 } else if (a00 == 0 && a01 == -kFixedOne && a10 == kFixedOne && a11 == 0) { 2325 rotationDegrees = 270; 2326 } else if (a00 == -kFixedOne && a01 == 0 && a10 == 0 && a11 == -kFixedOne) { 2327 rotationDegrees = 180; 2328 } else { 2329 ALOGW("We only support 0,90,180,270 degree rotation matrices"); 2330 rotationDegrees = 0; 2331 } 2332 2333 if (rotationDegrees != 0) { 2334 mLastTrack->meta->setInt32(kKeyRotation, rotationDegrees); 2335 } 2336 2337 // Handle presentation display size, which could be different 2338 // from the image size indicated by kKeyWidth and kKeyHeight. 2339 uint32_t width = U32_AT(&buffer[dynSize + 52]); 2340 uint32_t height = U32_AT(&buffer[dynSize + 56]); 2341 mLastTrack->meta->setInt32(kKeyDisplayWidth, width >> 16); 2342 mLastTrack->meta->setInt32(kKeyDisplayHeight, height >> 16); 2343 2344 return OK; 2345} 2346 2347status_t MPEG4Extractor::parseITunesMetaData(off64_t offset, size_t size) { 2348 if (size < 4 || size == SIZE_MAX) { 2349 return ERROR_MALFORMED; 2350 } 2351 2352 uint8_t *buffer = new (std::nothrow) uint8_t[size + 1]; 2353 if (buffer == NULL) { 2354 return ERROR_MALFORMED; 2355 } 2356 if (mDataSource->readAt( 2357 offset, buffer, size) != (ssize_t)size) { 2358 delete[] buffer; 2359 buffer = NULL; 2360 2361 return ERROR_IO; 2362 } 2363 2364 uint32_t flags = U32_AT(buffer); 2365 2366 uint32_t metadataKey = 0; 2367 char chunk[5]; 2368 MakeFourCCString(mPath[4], chunk); 2369 ALOGV("meta: %s @ %lld", chunk, offset); 2370 switch (mPath[4]) { 2371 case FOURCC(0xa9, 'a', 'l', 'b'): 2372 { 2373 metadataKey = kKeyAlbum; 2374 break; 2375 } 2376 case FOURCC(0xa9, 'A', 'R', 'T'): 2377 { 2378 metadataKey = kKeyArtist; 2379 break; 2380 } 2381 case FOURCC('a', 'A', 'R', 'T'): 2382 { 2383 metadataKey = kKeyAlbumArtist; 2384 break; 2385 } 2386 case FOURCC(0xa9, 'd', 'a', 'y'): 2387 { 2388 metadataKey = kKeyYear; 2389 break; 2390 } 2391 case FOURCC(0xa9, 'n', 'a', 'm'): 2392 { 2393 metadataKey = kKeyTitle; 2394 break; 2395 } 2396 case FOURCC(0xa9, 'w', 'r', 't'): 2397 { 2398 metadataKey = kKeyWriter; 2399 break; 2400 } 2401 case FOURCC('c', 'o', 'v', 'r'): 2402 { 2403 metadataKey = kKeyAlbumArt; 2404 break; 2405 } 2406 case FOURCC('g', 'n', 'r', 'e'): 2407 { 2408 metadataKey = kKeyGenre; 2409 break; 2410 } 2411 case FOURCC(0xa9, 'g', 'e', 'n'): 2412 { 2413 metadataKey = kKeyGenre; 2414 break; 2415 } 2416 case FOURCC('c', 'p', 'i', 'l'): 2417 { 2418 if (size == 9 && flags == 21) { 2419 char tmp[16]; 2420 sprintf(tmp, "%d", 2421 (int)buffer[size - 1]); 2422 2423 mFileMetaData->setCString(kKeyCompilation, tmp); 2424 } 2425 break; 2426 } 2427 case FOURCC('t', 'r', 'k', 'n'): 2428 { 2429 if (size == 16 && flags == 0) { 2430 char tmp[16]; 2431 uint16_t* pTrack = (uint16_t*)&buffer[10]; 2432 uint16_t* pTotalTracks = (uint16_t*)&buffer[12]; 2433 sprintf(tmp, "%d/%d", ntohs(*pTrack), ntohs(*pTotalTracks)); 2434 2435 mFileMetaData->setCString(kKeyCDTrackNumber, tmp); 2436 } 2437 break; 2438 } 2439 case FOURCC('d', 'i', 's', 'k'): 2440 { 2441 if ((size == 14 || size == 16) && flags == 0) { 2442 char tmp[16]; 2443 uint16_t* pDisc = (uint16_t*)&buffer[10]; 2444 uint16_t* pTotalDiscs = (uint16_t*)&buffer[12]; 2445 sprintf(tmp, "%d/%d", ntohs(*pDisc), ntohs(*pTotalDiscs)); 2446 2447 mFileMetaData->setCString(kKeyDiscNumber, tmp); 2448 } 2449 break; 2450 } 2451 case FOURCC('-', '-', '-', '-'): 2452 { 2453 buffer[size] = '\0'; 2454 switch (mPath[5]) { 2455 case FOURCC('m', 'e', 'a', 'n'): 2456 mLastCommentMean.setTo((const char *)buffer + 4); 2457 break; 2458 case FOURCC('n', 'a', 'm', 'e'): 2459 mLastCommentName.setTo((const char *)buffer + 4); 2460 break; 2461 case FOURCC('d', 'a', 't', 'a'): 2462 if (size < 8) { 2463 delete[] buffer; 2464 buffer = NULL; 2465 ALOGE("b/24346430"); 2466 return ERROR_MALFORMED; 2467 } 2468 mLastCommentData.setTo((const char *)buffer + 8); 2469 break; 2470 } 2471 2472 // Once we have a set of mean/name/data info, go ahead and process 2473 // it to see if its something we are interested in. Whether or not 2474 // were are interested in the specific tag, make sure to clear out 2475 // the set so we can be ready to process another tuple should one 2476 // show up later in the file. 2477 if ((mLastCommentMean.length() != 0) && 2478 (mLastCommentName.length() != 0) && 2479 (mLastCommentData.length() != 0)) { 2480 2481 if (mLastCommentMean == "com.apple.iTunes" 2482 && mLastCommentName == "iTunSMPB") { 2483 int32_t delay, padding; 2484 if (sscanf(mLastCommentData, 2485 " %*x %x %x %*x", &delay, &padding) == 2) { 2486 if (mLastTrack == NULL) { 2487 delete[] buffer; 2488 return ERROR_MALFORMED; 2489 } 2490 mLastTrack->meta->setInt32(kKeyEncoderDelay, delay); 2491 mLastTrack->meta->setInt32(kKeyEncoderPadding, padding); 2492 } 2493 } 2494 2495 mLastCommentMean.clear(); 2496 mLastCommentName.clear(); 2497 mLastCommentData.clear(); 2498 } 2499 break; 2500 } 2501 2502 default: 2503 break; 2504 } 2505 2506 if (size >= 8 && metadataKey && !mFileMetaData->hasData(metadataKey)) { 2507 if (metadataKey == kKeyAlbumArt) { 2508 mFileMetaData->setData( 2509 kKeyAlbumArt, MetaData::TYPE_NONE, 2510 buffer + 8, size - 8); 2511 } else if (metadataKey == kKeyGenre) { 2512 if (flags == 0) { 2513 // uint8_t genre code, iTunes genre codes are 2514 // the standard id3 codes, except they start 2515 // at 1 instead of 0 (e.g. Pop is 14, not 13) 2516 // We use standard id3 numbering, so subtract 1. 2517 int genrecode = (int)buffer[size - 1]; 2518 genrecode--; 2519 if (genrecode < 0) { 2520 genrecode = 255; // reserved for 'unknown genre' 2521 } 2522 char genre[10]; 2523 sprintf(genre, "%d", genrecode); 2524 2525 mFileMetaData->setCString(metadataKey, genre); 2526 } else if (flags == 1) { 2527 // custom genre string 2528 buffer[size] = '\0'; 2529 2530 mFileMetaData->setCString( 2531 metadataKey, (const char *)buffer + 8); 2532 } 2533 } else { 2534 buffer[size] = '\0'; 2535 2536 mFileMetaData->setCString( 2537 metadataKey, (const char *)buffer + 8); 2538 } 2539 } 2540 2541 delete[] buffer; 2542 buffer = NULL; 2543 2544 return OK; 2545} 2546 2547status_t MPEG4Extractor::parse3GPPMetaData(off64_t offset, size_t size, int depth) { 2548 if (size < 4 || size == SIZE_MAX) { 2549 return ERROR_MALFORMED; 2550 } 2551 2552 uint8_t *buffer = new (std::nothrow) uint8_t[size + 1]; 2553 if (buffer == NULL) { 2554 return ERROR_MALFORMED; 2555 } 2556 if (mDataSource->readAt( 2557 offset, buffer, size) != (ssize_t)size) { 2558 delete[] buffer; 2559 buffer = NULL; 2560 2561 return ERROR_IO; 2562 } 2563 2564 uint32_t metadataKey = 0; 2565 switch (mPath[depth]) { 2566 case FOURCC('t', 'i', 't', 'l'): 2567 { 2568 metadataKey = kKeyTitle; 2569 break; 2570 } 2571 case FOURCC('p', 'e', 'r', 'f'): 2572 { 2573 metadataKey = kKeyArtist; 2574 break; 2575 } 2576 case FOURCC('a', 'u', 't', 'h'): 2577 { 2578 metadataKey = kKeyWriter; 2579 break; 2580 } 2581 case FOURCC('g', 'n', 'r', 'e'): 2582 { 2583 metadataKey = kKeyGenre; 2584 break; 2585 } 2586 case FOURCC('a', 'l', 'b', 'm'): 2587 { 2588 if (buffer[size - 1] != '\0') { 2589 char tmp[4]; 2590 sprintf(tmp, "%u", buffer[size - 1]); 2591 2592 mFileMetaData->setCString(kKeyCDTrackNumber, tmp); 2593 } 2594 2595 metadataKey = kKeyAlbum; 2596 break; 2597 } 2598 case FOURCC('y', 'r', 'r', 'c'): 2599 { 2600 char tmp[5]; 2601 uint16_t year = U16_AT(&buffer[4]); 2602 2603 if (year < 10000) { 2604 sprintf(tmp, "%u", year); 2605 2606 mFileMetaData->setCString(kKeyYear, tmp); 2607 } 2608 break; 2609 } 2610 2611 default: 2612 break; 2613 } 2614 2615 if (metadataKey > 0) { 2616 bool isUTF8 = true; // Common case 2617 char16_t *framedata = NULL; 2618 int len16 = 0; // Number of UTF-16 characters 2619 2620 // smallest possible valid UTF-16 string w BOM: 0xfe 0xff 0x00 0x00 2621 if (size < 6) { 2622 return ERROR_MALFORMED; 2623 } 2624 2625 if (size - 6 >= 4) { 2626 len16 = ((size - 6) / 2) - 1; // don't include 0x0000 terminator 2627 framedata = (char16_t *)(buffer + 6); 2628 if (0xfffe == *framedata) { 2629 // endianness marker (BOM) doesn't match host endianness 2630 for (int i = 0; i < len16; i++) { 2631 framedata[i] = bswap_16(framedata[i]); 2632 } 2633 // BOM is now swapped to 0xfeff, we will execute next block too 2634 } 2635 2636 if (0xfeff == *framedata) { 2637 // Remove the BOM 2638 framedata++; 2639 len16--; 2640 isUTF8 = false; 2641 } 2642 // else normal non-zero-length UTF-8 string 2643 // we can't handle UTF-16 without BOM as there is no other 2644 // indication of encoding. 2645 } 2646 2647 if (isUTF8) { 2648 buffer[size] = 0; 2649 mFileMetaData->setCString(metadataKey, (const char *)buffer + 6); 2650 } else { 2651 // Convert from UTF-16 string to UTF-8 string. 2652 String8 tmpUTF8str(framedata, len16); 2653 mFileMetaData->setCString(metadataKey, tmpUTF8str.string()); 2654 } 2655 } 2656 2657 delete[] buffer; 2658 buffer = NULL; 2659 2660 return OK; 2661} 2662 2663void MPEG4Extractor::parseID3v2MetaData(off64_t offset) { 2664 ID3 id3(mDataSource, true /* ignorev1 */, offset); 2665 2666 if (id3.isValid()) { 2667 struct Map { 2668 int key; 2669 const char *tag1; 2670 const char *tag2; 2671 }; 2672 static const Map kMap[] = { 2673 { kKeyAlbum, "TALB", "TAL" }, 2674 { kKeyArtist, "TPE1", "TP1" }, 2675 { kKeyAlbumArtist, "TPE2", "TP2" }, 2676 { kKeyComposer, "TCOM", "TCM" }, 2677 { kKeyGenre, "TCON", "TCO" }, 2678 { kKeyTitle, "TIT2", "TT2" }, 2679 { kKeyYear, "TYE", "TYER" }, 2680 { kKeyAuthor, "TXT", "TEXT" }, 2681 { kKeyCDTrackNumber, "TRK", "TRCK" }, 2682 { kKeyDiscNumber, "TPA", "TPOS" }, 2683 { kKeyCompilation, "TCP", "TCMP" }, 2684 }; 2685 static const size_t kNumMapEntries = sizeof(kMap) / sizeof(kMap[0]); 2686 2687 for (size_t i = 0; i < kNumMapEntries; ++i) { 2688 if (!mFileMetaData->hasData(kMap[i].key)) { 2689 ID3::Iterator *it = new ID3::Iterator(id3, kMap[i].tag1); 2690 if (it->done()) { 2691 delete it; 2692 it = new ID3::Iterator(id3, kMap[i].tag2); 2693 } 2694 2695 if (it->done()) { 2696 delete it; 2697 continue; 2698 } 2699 2700 String8 s; 2701 it->getString(&s); 2702 delete it; 2703 2704 mFileMetaData->setCString(kMap[i].key, s); 2705 } 2706 } 2707 2708 size_t dataSize; 2709 String8 mime; 2710 const void *data = id3.getAlbumArt(&dataSize, &mime); 2711 2712 if (data) { 2713 mFileMetaData->setData(kKeyAlbumArt, MetaData::TYPE_NONE, data, dataSize); 2714 mFileMetaData->setCString(kKeyAlbumArtMIME, mime.string()); 2715 } 2716 } 2717} 2718 2719sp<MediaSource> MPEG4Extractor::getTrack(size_t index) { 2720 status_t err; 2721 if ((err = readMetaData()) != OK) { 2722 return NULL; 2723 } 2724 2725 Track *track = mFirstTrack; 2726 while (index > 0) { 2727 if (track == NULL) { 2728 return NULL; 2729 } 2730 2731 track = track->next; 2732 --index; 2733 } 2734 2735 if (track == NULL) { 2736 return NULL; 2737 } 2738 2739 2740 Trex *trex = NULL; 2741 int32_t trackId; 2742 if (track->meta->findInt32(kKeyTrackID, &trackId)) { 2743 for (size_t i = 0; i < mTrex.size(); i++) { 2744 Trex *t = &mTrex.editItemAt(index); 2745 if (t->track_ID == (uint32_t) trackId) { 2746 trex = t; 2747 break; 2748 } 2749 } 2750 } else { 2751 ALOGE("b/21657957"); 2752 return NULL; 2753 } 2754 2755 ALOGV("getTrack called, pssh: %zu", mPssh.size()); 2756 2757 return new MPEG4Source(this, 2758 track->meta, mDataSource, track->timescale, track->sampleTable, 2759 mSidxEntries, trex, mMoofOffset); 2760} 2761 2762// static 2763status_t MPEG4Extractor::verifyTrack(Track *track) { 2764 const char *mime; 2765 CHECK(track->meta->findCString(kKeyMIMEType, &mime)); 2766 2767 uint32_t type; 2768 const void *data; 2769 size_t size; 2770 if (!strcasecmp(mime, MEDIA_MIMETYPE_VIDEO_AVC)) { 2771 if (!track->meta->findData(kKeyAVCC, &type, &data, &size) 2772 || type != kTypeAVCC) { 2773 return ERROR_MALFORMED; 2774 } 2775 } else if (!strcasecmp(mime, MEDIA_MIMETYPE_VIDEO_HEVC)) { 2776 if (!track->meta->findData(kKeyHVCC, &type, &data, &size) 2777 || type != kTypeHVCC) { 2778 return ERROR_MALFORMED; 2779 } 2780 } else if (!strcasecmp(mime, MEDIA_MIMETYPE_VIDEO_MPEG4) 2781 || !strcasecmp(mime, MEDIA_MIMETYPE_AUDIO_AAC)) { 2782 if (!track->meta->findData(kKeyESDS, &type, &data, &size) 2783 || type != kTypeESDS) { 2784 return ERROR_MALFORMED; 2785 } 2786 } 2787 2788 if (track->sampleTable == NULL || !track->sampleTable->isValid()) { 2789 // Make sure we have all the metadata we need. 2790 ALOGE("stbl atom missing/invalid."); 2791 return ERROR_MALFORMED; 2792 } 2793 2794 return OK; 2795} 2796 2797typedef enum { 2798 //AOT_NONE = -1, 2799 //AOT_NULL_OBJECT = 0, 2800 //AOT_AAC_MAIN = 1, /**< Main profile */ 2801 AOT_AAC_LC = 2, /**< Low Complexity object */ 2802 //AOT_AAC_SSR = 3, 2803 //AOT_AAC_LTP = 4, 2804 AOT_SBR = 5, 2805 //AOT_AAC_SCAL = 6, 2806 //AOT_TWIN_VQ = 7, 2807 //AOT_CELP = 8, 2808 //AOT_HVXC = 9, 2809 //AOT_RSVD_10 = 10, /**< (reserved) */ 2810 //AOT_RSVD_11 = 11, /**< (reserved) */ 2811 //AOT_TTSI = 12, /**< TTSI Object */ 2812 //AOT_MAIN_SYNTH = 13, /**< Main Synthetic object */ 2813 //AOT_WAV_TAB_SYNTH = 14, /**< Wavetable Synthesis object */ 2814 //AOT_GEN_MIDI = 15, /**< General MIDI object */ 2815 //AOT_ALG_SYNTH_AUD_FX = 16, /**< Algorithmic Synthesis and Audio FX object */ 2816 AOT_ER_AAC_LC = 17, /**< Error Resilient(ER) AAC Low Complexity */ 2817 //AOT_RSVD_18 = 18, /**< (reserved) */ 2818 //AOT_ER_AAC_LTP = 19, /**< Error Resilient(ER) AAC LTP object */ 2819 AOT_ER_AAC_SCAL = 20, /**< Error Resilient(ER) AAC Scalable object */ 2820 //AOT_ER_TWIN_VQ = 21, /**< Error Resilient(ER) TwinVQ object */ 2821 AOT_ER_BSAC = 22, /**< Error Resilient(ER) BSAC object */ 2822 AOT_ER_AAC_LD = 23, /**< Error Resilient(ER) AAC LowDelay object */ 2823 //AOT_ER_CELP = 24, /**< Error Resilient(ER) CELP object */ 2824 //AOT_ER_HVXC = 25, /**< Error Resilient(ER) HVXC object */ 2825 //AOT_ER_HILN = 26, /**< Error Resilient(ER) HILN object */ 2826 //AOT_ER_PARA = 27, /**< Error Resilient(ER) Parametric object */ 2827 //AOT_RSVD_28 = 28, /**< might become SSC */ 2828 AOT_PS = 29, /**< PS, Parametric Stereo (includes SBR) */ 2829 //AOT_MPEGS = 30, /**< MPEG Surround */ 2830 2831 AOT_ESCAPE = 31, /**< Signal AOT uses more than 5 bits */ 2832 2833 //AOT_MP3ONMP4_L1 = 32, /**< MPEG-Layer1 in mp4 */ 2834 //AOT_MP3ONMP4_L2 = 33, /**< MPEG-Layer2 in mp4 */ 2835 //AOT_MP3ONMP4_L3 = 34, /**< MPEG-Layer3 in mp4 */ 2836 //AOT_RSVD_35 = 35, /**< might become DST */ 2837 //AOT_RSVD_36 = 36, /**< might become ALS */ 2838 //AOT_AAC_SLS = 37, /**< AAC + SLS */ 2839 //AOT_SLS = 38, /**< SLS */ 2840 //AOT_ER_AAC_ELD = 39, /**< AAC Enhanced Low Delay */ 2841 2842 //AOT_USAC = 42, /**< USAC */ 2843 //AOT_SAOC = 43, /**< SAOC */ 2844 //AOT_LD_MPEGS = 44, /**< Low Delay MPEG Surround */ 2845 2846 //AOT_RSVD50 = 50, /**< Interim AOT for Rsvd50 */ 2847} AUDIO_OBJECT_TYPE; 2848 2849status_t MPEG4Extractor::updateAudioTrackInfoFromESDS_MPEG4Audio( 2850 const void *esds_data, size_t esds_size) { 2851 ESDS esds(esds_data, esds_size); 2852 2853 uint8_t objectTypeIndication; 2854 if (esds.getObjectTypeIndication(&objectTypeIndication) != OK) { 2855 return ERROR_MALFORMED; 2856 } 2857 2858 if (objectTypeIndication == 0xe1) { 2859 // This isn't MPEG4 audio at all, it's QCELP 14k... 2860 mLastTrack->meta->setCString(kKeyMIMEType, MEDIA_MIMETYPE_AUDIO_QCELP); 2861 return OK; 2862 } 2863 2864 if (objectTypeIndication == 0x6b) { 2865 // The media subtype is MP3 audio 2866 // Our software MP3 audio decoder may not be able to handle 2867 // packetized MP3 audio; for now, lets just return ERROR_UNSUPPORTED 2868 ALOGE("MP3 track in MP4/3GPP file is not supported"); 2869 return ERROR_UNSUPPORTED; 2870 } 2871 2872 const uint8_t *csd; 2873 size_t csd_size; 2874 if (esds.getCodecSpecificInfo( 2875 (const void **)&csd, &csd_size) != OK) { 2876 return ERROR_MALFORMED; 2877 } 2878 2879#if 0 2880 printf("ESD of size %d\n", csd_size); 2881 hexdump(csd, csd_size); 2882#endif 2883 2884 if (csd_size == 0) { 2885 // There's no further information, i.e. no codec specific data 2886 // Let's assume that the information provided in the mpeg4 headers 2887 // is accurate and hope for the best. 2888 2889 return OK; 2890 } 2891 2892 if (csd_size < 2) { 2893 return ERROR_MALFORMED; 2894 } 2895 2896 static uint32_t kSamplingRate[] = { 2897 96000, 88200, 64000, 48000, 44100, 32000, 24000, 22050, 2898 16000, 12000, 11025, 8000, 7350 2899 }; 2900 2901 ABitReader br(csd, csd_size); 2902 uint32_t objectType = br.getBits(5); 2903 2904 if (objectType == 31) { // AAC-ELD => additional 6 bits 2905 objectType = 32 + br.getBits(6); 2906 } 2907 2908 //keep AOT type 2909 mLastTrack->meta->setInt32(kKeyAACAOT, objectType); 2910 2911 uint32_t freqIndex = br.getBits(4); 2912 2913 int32_t sampleRate = 0; 2914 int32_t numChannels = 0; 2915 if (freqIndex == 15) { 2916 if (csd_size < 5) { 2917 return ERROR_MALFORMED; 2918 } 2919 sampleRate = br.getBits(24); 2920 numChannels = br.getBits(4); 2921 } else { 2922 numChannels = br.getBits(4); 2923 2924 if (freqIndex == 13 || freqIndex == 14) { 2925 return ERROR_MALFORMED; 2926 } 2927 2928 sampleRate = kSamplingRate[freqIndex]; 2929 } 2930 2931 if (objectType == AOT_SBR || objectType == AOT_PS) {//SBR specific config per 14496-3 table 1.13 2932 uint32_t extFreqIndex = br.getBits(4); 2933 int32_t extSampleRate; 2934 if (extFreqIndex == 15) { 2935 if (csd_size < 8) { 2936 return ERROR_MALFORMED; 2937 } 2938 extSampleRate = br.getBits(24); 2939 } else { 2940 if (extFreqIndex == 13 || extFreqIndex == 14) { 2941 return ERROR_MALFORMED; 2942 } 2943 extSampleRate = kSamplingRate[extFreqIndex]; 2944 } 2945 //TODO: save the extension sampling rate value in meta data => 2946 // mLastTrack->meta->setInt32(kKeyExtSampleRate, extSampleRate); 2947 } 2948 2949 switch (numChannels) { 2950 // values defined in 14496-3_2009 amendment-4 Table 1.19 - Channel Configuration 2951 case 0: 2952 case 1:// FC 2953 case 2:// FL FR 2954 case 3:// FC, FL FR 2955 case 4:// FC, FL FR, RC 2956 case 5:// FC, FL FR, SL SR 2957 case 6:// FC, FL FR, SL SR, LFE 2958 //numChannels already contains the right value 2959 break; 2960 case 11:// FC, FL FR, SL SR, RC, LFE 2961 numChannels = 7; 2962 break; 2963 case 7: // FC, FCL FCR, FL FR, SL SR, LFE 2964 case 12:// FC, FL FR, SL SR, RL RR, LFE 2965 case 14:// FC, FL FR, SL SR, LFE, FHL FHR 2966 numChannels = 8; 2967 break; 2968 default: 2969 return ERROR_UNSUPPORTED; 2970 } 2971 2972 { 2973 if (objectType == AOT_SBR || objectType == AOT_PS) { 2974 objectType = br.getBits(5); 2975 2976 if (objectType == AOT_ESCAPE) { 2977 objectType = 32 + br.getBits(6); 2978 } 2979 } 2980 if (objectType == AOT_AAC_LC || objectType == AOT_ER_AAC_LC || 2981 objectType == AOT_ER_AAC_LD || objectType == AOT_ER_AAC_SCAL || 2982 objectType == AOT_ER_BSAC) { 2983 const int32_t frameLengthFlag = br.getBits(1); 2984 2985 const int32_t dependsOnCoreCoder = br.getBits(1); 2986 2987 if (dependsOnCoreCoder ) { 2988 const int32_t coreCoderDelay = br.getBits(14); 2989 } 2990 2991 int32_t extensionFlag = -1; 2992 if (br.numBitsLeft() > 0) { 2993 extensionFlag = br.getBits(1); 2994 } else { 2995 switch (objectType) { 2996 // 14496-3 4.5.1.1 extensionFlag 2997 case AOT_AAC_LC: 2998 extensionFlag = 0; 2999 break; 3000 case AOT_ER_AAC_LC: 3001 case AOT_ER_AAC_SCAL: 3002 case AOT_ER_BSAC: 3003 case AOT_ER_AAC_LD: 3004 extensionFlag = 1; 3005 break; 3006 default: 3007 TRESPASS(); 3008 break; 3009 } 3010 ALOGW("csd missing extension flag; assuming %d for object type %u.", 3011 extensionFlag, objectType); 3012 } 3013 3014 if (numChannels == 0) { 3015 int32_t channelsEffectiveNum = 0; 3016 int32_t channelsNum = 0; 3017 const int32_t ElementInstanceTag = br.getBits(4); 3018 const int32_t Profile = br.getBits(2); 3019 const int32_t SamplingFrequencyIndex = br.getBits(4); 3020 const int32_t NumFrontChannelElements = br.getBits(4); 3021 const int32_t NumSideChannelElements = br.getBits(4); 3022 const int32_t NumBackChannelElements = br.getBits(4); 3023 const int32_t NumLfeChannelElements = br.getBits(2); 3024 const int32_t NumAssocDataElements = br.getBits(3); 3025 const int32_t NumValidCcElements = br.getBits(4); 3026 3027 const int32_t MonoMixdownPresent = br.getBits(1); 3028 if (MonoMixdownPresent != 0) { 3029 const int32_t MonoMixdownElementNumber = br.getBits(4); 3030 } 3031 3032 const int32_t StereoMixdownPresent = br.getBits(1); 3033 if (StereoMixdownPresent != 0) { 3034 const int32_t StereoMixdownElementNumber = br.getBits(4); 3035 } 3036 3037 const int32_t MatrixMixdownIndexPresent = br.getBits(1); 3038 if (MatrixMixdownIndexPresent != 0) { 3039 const int32_t MatrixMixdownIndex = br.getBits(2); 3040 const int32_t PseudoSurroundEnable = br.getBits(1); 3041 } 3042 3043 int i; 3044 for (i=0; i < NumFrontChannelElements; i++) { 3045 const int32_t FrontElementIsCpe = br.getBits(1); 3046 const int32_t FrontElementTagSelect = br.getBits(4); 3047 channelsNum += FrontElementIsCpe ? 2 : 1; 3048 } 3049 3050 for (i=0; i < NumSideChannelElements; i++) { 3051 const int32_t SideElementIsCpe = br.getBits(1); 3052 const int32_t SideElementTagSelect = br.getBits(4); 3053 channelsNum += SideElementIsCpe ? 2 : 1; 3054 } 3055 3056 for (i=0; i < NumBackChannelElements; i++) { 3057 const int32_t BackElementIsCpe = br.getBits(1); 3058 const int32_t BackElementTagSelect = br.getBits(4); 3059 channelsNum += BackElementIsCpe ? 2 : 1; 3060 } 3061 channelsEffectiveNum = channelsNum; 3062 3063 for (i=0; i < NumLfeChannelElements; i++) { 3064 const int32_t LfeElementTagSelect = br.getBits(4); 3065 channelsNum += 1; 3066 } 3067 ALOGV("mpeg4 audio channelsNum = %d", channelsNum); 3068 ALOGV("mpeg4 audio channelsEffectiveNum = %d", channelsEffectiveNum); 3069 numChannels = channelsNum; 3070 } 3071 } 3072 } 3073 3074 if (numChannels == 0) { 3075 return ERROR_UNSUPPORTED; 3076 } 3077 3078 int32_t prevSampleRate; 3079 CHECK(mLastTrack->meta->findInt32(kKeySampleRate, &prevSampleRate)); 3080 3081 if (prevSampleRate != sampleRate) { 3082 ALOGV("mpeg4 audio sample rate different from previous setting. " 3083 "was: %d, now: %d", prevSampleRate, sampleRate); 3084 } 3085 3086 mLastTrack->meta->setInt32(kKeySampleRate, sampleRate); 3087 3088 int32_t prevChannelCount; 3089 CHECK(mLastTrack->meta->findInt32(kKeyChannelCount, &prevChannelCount)); 3090 3091 if (prevChannelCount != numChannels) { 3092 ALOGV("mpeg4 audio channel count different from previous setting. " 3093 "was: %d, now: %d", prevChannelCount, numChannels); 3094 } 3095 3096 mLastTrack->meta->setInt32(kKeyChannelCount, numChannels); 3097 3098 return OK; 3099} 3100 3101//////////////////////////////////////////////////////////////////////////////// 3102 3103MPEG4Source::MPEG4Source( 3104 const sp<MPEG4Extractor> &owner, 3105 const sp<MetaData> &format, 3106 const sp<DataSource> &dataSource, 3107 int32_t timeScale, 3108 const sp<SampleTable> &sampleTable, 3109 Vector<SidxEntry> &sidx, 3110 const Trex *trex, 3111 off64_t firstMoofOffset) 3112 : mOwner(owner), 3113 mFormat(format), 3114 mDataSource(dataSource), 3115 mTimescale(timeScale), 3116 mSampleTable(sampleTable), 3117 mCurrentSampleIndex(0), 3118 mCurrentFragmentIndex(0), 3119 mSegments(sidx), 3120 mTrex(trex), 3121 mFirstMoofOffset(firstMoofOffset), 3122 mCurrentMoofOffset(firstMoofOffset), 3123 mCurrentTime(0), 3124 mCurrentSampleInfoAllocSize(0), 3125 mCurrentSampleInfoSizes(NULL), 3126 mCurrentSampleInfoOffsetsAllocSize(0), 3127 mCurrentSampleInfoOffsets(NULL), 3128 mIsAVC(false), 3129 mIsHEVC(false), 3130 mNALLengthSize(0), 3131 mStarted(false), 3132 mGroup(NULL), 3133 mBuffer(NULL), 3134 mWantsNALFragments(false), 3135 mSrcBuffer(NULL) { 3136 3137 memset(&mTrackFragmentHeaderInfo, 0, sizeof(mTrackFragmentHeaderInfo)); 3138 3139 mFormat->findInt32(kKeyCryptoMode, &mCryptoMode); 3140 mDefaultIVSize = 0; 3141 mFormat->findInt32(kKeyCryptoDefaultIVSize, &mDefaultIVSize); 3142 uint32_t keytype; 3143 const void *key; 3144 size_t keysize; 3145 if (mFormat->findData(kKeyCryptoKey, &keytype, &key, &keysize)) { 3146 CHECK(keysize <= 16); 3147 memset(mCryptoKey, 0, 16); 3148 memcpy(mCryptoKey, key, keysize); 3149 } 3150 3151 const char *mime; 3152 bool success = mFormat->findCString(kKeyMIMEType, &mime); 3153 CHECK(success); 3154 3155 mIsAVC = !strcasecmp(mime, MEDIA_MIMETYPE_VIDEO_AVC); 3156 mIsHEVC = !strcasecmp(mime, MEDIA_MIMETYPE_VIDEO_HEVC); 3157 3158 if (mIsAVC) { 3159 uint32_t type; 3160 const void *data; 3161 size_t size; 3162 CHECK(format->findData(kKeyAVCC, &type, &data, &size)); 3163 3164 const uint8_t *ptr = (const uint8_t *)data; 3165 3166 CHECK(size >= 7); 3167 CHECK_EQ((unsigned)ptr[0], 1u); // configurationVersion == 1 3168 3169 // The number of bytes used to encode the length of a NAL unit. 3170 mNALLengthSize = 1 + (ptr[4] & 3); 3171 } else if (mIsHEVC) { 3172 uint32_t type; 3173 const void *data; 3174 size_t size; 3175 CHECK(format->findData(kKeyHVCC, &type, &data, &size)); 3176 3177 const uint8_t *ptr = (const uint8_t *)data; 3178 3179 CHECK(size >= 7); 3180 CHECK_EQ((unsigned)ptr[0], 1u); // configurationVersion == 1 3181 3182 mNALLengthSize = 1 + (ptr[14 + 7] & 3); 3183 } 3184 3185 CHECK(format->findInt32(kKeyTrackID, &mTrackId)); 3186 3187 if (mFirstMoofOffset != 0) { 3188 off64_t offset = mFirstMoofOffset; 3189 parseChunk(&offset); 3190 } 3191} 3192 3193MPEG4Source::~MPEG4Source() { 3194 if (mStarted) { 3195 stop(); 3196 } 3197 free(mCurrentSampleInfoSizes); 3198 free(mCurrentSampleInfoOffsets); 3199} 3200 3201status_t MPEG4Source::start(MetaData *params) { 3202 Mutex::Autolock autoLock(mLock); 3203 3204 CHECK(!mStarted); 3205 3206 int32_t val; 3207 if (params && params->findInt32(kKeyWantsNALFragments, &val) 3208 && val != 0) { 3209 mWantsNALFragments = true; 3210 } else { 3211 mWantsNALFragments = false; 3212 } 3213 3214 int32_t tmp; 3215 CHECK(mFormat->findInt32(kKeyMaxInputSize, &tmp)); 3216 size_t max_size = tmp; 3217 3218 // A somewhat arbitrary limit that should be sufficient for 8k video frames 3219 // If you see the message below for a valid input stream: increase the limit 3220 if (max_size > 64 * 1024 * 1024) { 3221 ALOGE("bogus max input size: %zu", max_size); 3222 return ERROR_MALFORMED; 3223 } 3224 mGroup = new MediaBufferGroup; 3225 mGroup->add_buffer(new MediaBuffer(max_size)); 3226 3227 mSrcBuffer = new (std::nothrow) uint8_t[max_size]; 3228 if (mSrcBuffer == NULL) { 3229 // file probably specified a bad max size 3230 delete mGroup; 3231 mGroup = NULL; 3232 return ERROR_MALFORMED; 3233 } 3234 3235 mStarted = true; 3236 3237 return OK; 3238} 3239 3240status_t MPEG4Source::stop() { 3241 Mutex::Autolock autoLock(mLock); 3242 3243 CHECK(mStarted); 3244 3245 if (mBuffer != NULL) { 3246 mBuffer->release(); 3247 mBuffer = NULL; 3248 } 3249 3250 delete[] mSrcBuffer; 3251 mSrcBuffer = NULL; 3252 3253 delete mGroup; 3254 mGroup = NULL; 3255 3256 mStarted = false; 3257 mCurrentSampleIndex = 0; 3258 3259 return OK; 3260} 3261 3262status_t MPEG4Source::parseChunk(off64_t *offset) { 3263 uint32_t hdr[2]; 3264 if (mDataSource->readAt(*offset, hdr, 8) < 8) { 3265 return ERROR_IO; 3266 } 3267 uint64_t chunk_size = ntohl(hdr[0]); 3268 uint32_t chunk_type = ntohl(hdr[1]); 3269 off64_t data_offset = *offset + 8; 3270 3271 if (chunk_size == 1) { 3272 if (mDataSource->readAt(*offset + 8, &chunk_size, 8) < 8) { 3273 return ERROR_IO; 3274 } 3275 chunk_size = ntoh64(chunk_size); 3276 data_offset += 8; 3277 3278 if (chunk_size < 16) { 3279 // The smallest valid chunk is 16 bytes long in this case. 3280 return ERROR_MALFORMED; 3281 } 3282 } else if (chunk_size < 8) { 3283 // The smallest valid chunk is 8 bytes long. 3284 return ERROR_MALFORMED; 3285 } 3286 3287 char chunk[5]; 3288 MakeFourCCString(chunk_type, chunk); 3289 ALOGV("MPEG4Source chunk %s @ %llx", chunk, *offset); 3290 3291 off64_t chunk_data_size = *offset + chunk_size - data_offset; 3292 3293 switch(chunk_type) { 3294 3295 case FOURCC('t', 'r', 'a', 'f'): 3296 case FOURCC('m', 'o', 'o', 'f'): { 3297 off64_t stop_offset = *offset + chunk_size; 3298 *offset = data_offset; 3299 while (*offset < stop_offset) { 3300 status_t err = parseChunk(offset); 3301 if (err != OK) { 3302 return err; 3303 } 3304 } 3305 if (chunk_type == FOURCC('m', 'o', 'o', 'f')) { 3306 // *offset points to the box following this moof. Find the next moof from there. 3307 3308 while (true) { 3309 if (mDataSource->readAt(*offset, hdr, 8) < 8) { 3310 return ERROR_END_OF_STREAM; 3311 } 3312 chunk_size = ntohl(hdr[0]); 3313 chunk_type = ntohl(hdr[1]); 3314 if (chunk_type == FOURCC('m', 'o', 'o', 'f')) { 3315 mNextMoofOffset = *offset; 3316 break; 3317 } 3318 *offset += chunk_size; 3319 } 3320 } 3321 break; 3322 } 3323 3324 case FOURCC('t', 'f', 'h', 'd'): { 3325 status_t err; 3326 if ((err = parseTrackFragmentHeader(data_offset, chunk_data_size)) != OK) { 3327 return err; 3328 } 3329 *offset += chunk_size; 3330 break; 3331 } 3332 3333 case FOURCC('t', 'r', 'u', 'n'): { 3334 status_t err; 3335 if (mLastParsedTrackId == mTrackId) { 3336 if ((err = parseTrackFragmentRun(data_offset, chunk_data_size)) != OK) { 3337 return err; 3338 } 3339 } 3340 3341 *offset += chunk_size; 3342 break; 3343 } 3344 3345 case FOURCC('s', 'a', 'i', 'z'): { 3346 status_t err; 3347 if ((err = parseSampleAuxiliaryInformationSizes(data_offset, chunk_data_size)) != OK) { 3348 return err; 3349 } 3350 *offset += chunk_size; 3351 break; 3352 } 3353 case FOURCC('s', 'a', 'i', 'o'): { 3354 status_t err; 3355 if ((err = parseSampleAuxiliaryInformationOffsets(data_offset, chunk_data_size)) != OK) { 3356 return err; 3357 } 3358 *offset += chunk_size; 3359 break; 3360 } 3361 3362 case FOURCC('m', 'd', 'a', 't'): { 3363 // parse DRM info if present 3364 ALOGV("MPEG4Source::parseChunk mdat"); 3365 // if saiz/saoi was previously observed, do something with the sampleinfos 3366 *offset += chunk_size; 3367 break; 3368 } 3369 3370 default: { 3371 *offset += chunk_size; 3372 break; 3373 } 3374 } 3375 return OK; 3376} 3377 3378status_t MPEG4Source::parseSampleAuxiliaryInformationSizes( 3379 off64_t offset, off64_t /* size */) { 3380 ALOGV("parseSampleAuxiliaryInformationSizes"); 3381 // 14496-12 8.7.12 3382 uint8_t version; 3383 if (mDataSource->readAt( 3384 offset, &version, sizeof(version)) 3385 < (ssize_t)sizeof(version)) { 3386 return ERROR_IO; 3387 } 3388 3389 if (version != 0) { 3390 return ERROR_UNSUPPORTED; 3391 } 3392 offset++; 3393 3394 uint32_t flags; 3395 if (!mDataSource->getUInt24(offset, &flags)) { 3396 return ERROR_IO; 3397 } 3398 offset += 3; 3399 3400 if (flags & 1) { 3401 uint32_t tmp; 3402 if (!mDataSource->getUInt32(offset, &tmp)) { 3403 return ERROR_MALFORMED; 3404 } 3405 mCurrentAuxInfoType = tmp; 3406 offset += 4; 3407 if (!mDataSource->getUInt32(offset, &tmp)) { 3408 return ERROR_MALFORMED; 3409 } 3410 mCurrentAuxInfoTypeParameter = tmp; 3411 offset += 4; 3412 } 3413 3414 uint8_t defsize; 3415 if (mDataSource->readAt(offset, &defsize, 1) != 1) { 3416 return ERROR_MALFORMED; 3417 } 3418 mCurrentDefaultSampleInfoSize = defsize; 3419 offset++; 3420 3421 uint32_t smplcnt; 3422 if (!mDataSource->getUInt32(offset, &smplcnt)) { 3423 return ERROR_MALFORMED; 3424 } 3425 mCurrentSampleInfoCount = smplcnt; 3426 offset += 4; 3427 3428 if (mCurrentDefaultSampleInfoSize != 0) { 3429 ALOGV("@@@@ using default sample info size of %d", mCurrentDefaultSampleInfoSize); 3430 return OK; 3431 } 3432 if (smplcnt > mCurrentSampleInfoAllocSize) { 3433 mCurrentSampleInfoSizes = (uint8_t*) realloc(mCurrentSampleInfoSizes, smplcnt); 3434 mCurrentSampleInfoAllocSize = smplcnt; 3435 } 3436 3437 mDataSource->readAt(offset, mCurrentSampleInfoSizes, smplcnt); 3438 return OK; 3439} 3440 3441status_t MPEG4Source::parseSampleAuxiliaryInformationOffsets( 3442 off64_t offset, off64_t /* size */) { 3443 ALOGV("parseSampleAuxiliaryInformationOffsets"); 3444 // 14496-12 8.7.13 3445 uint8_t version; 3446 if (mDataSource->readAt(offset, &version, sizeof(version)) != 1) { 3447 return ERROR_IO; 3448 } 3449 offset++; 3450 3451 uint32_t flags; 3452 if (!mDataSource->getUInt24(offset, &flags)) { 3453 return ERROR_IO; 3454 } 3455 offset += 3; 3456 3457 uint32_t entrycount; 3458 if (!mDataSource->getUInt32(offset, &entrycount)) { 3459 return ERROR_IO; 3460 } 3461 offset += 4; 3462 if (entrycount == 0) { 3463 return OK; 3464 } 3465 if (entrycount > UINT32_MAX / 8) { 3466 return ERROR_MALFORMED; 3467 } 3468 3469 if (entrycount > mCurrentSampleInfoOffsetsAllocSize) { 3470 uint64_t *newPtr = (uint64_t *)realloc(mCurrentSampleInfoOffsets, entrycount * 8); 3471 if (newPtr == NULL) { 3472 return NO_MEMORY; 3473 } 3474 mCurrentSampleInfoOffsets = newPtr; 3475 mCurrentSampleInfoOffsetsAllocSize = entrycount; 3476 } 3477 mCurrentSampleInfoOffsetCount = entrycount; 3478 3479 if (mCurrentSampleInfoOffsets == NULL) { 3480 return OK; 3481 } 3482 3483 for (size_t i = 0; i < entrycount; i++) { 3484 if (version == 0) { 3485 uint32_t tmp; 3486 if (!mDataSource->getUInt32(offset, &tmp)) { 3487 return ERROR_IO; 3488 } 3489 mCurrentSampleInfoOffsets[i] = tmp; 3490 offset += 4; 3491 } else { 3492 uint64_t tmp; 3493 if (!mDataSource->getUInt64(offset, &tmp)) { 3494 return ERROR_IO; 3495 } 3496 mCurrentSampleInfoOffsets[i] = tmp; 3497 offset += 8; 3498 } 3499 } 3500 3501 // parse clear/encrypted data 3502 3503 off64_t drmoffset = mCurrentSampleInfoOffsets[0]; // from moof 3504 3505 drmoffset += mCurrentMoofOffset; 3506 int ivlength; 3507 CHECK(mFormat->findInt32(kKeyCryptoDefaultIVSize, &ivlength)); 3508 3509 // only 0, 8 and 16 byte initialization vectors are supported 3510 if (ivlength != 0 && ivlength != 8 && ivlength != 16) { 3511 ALOGW("unsupported IV length: %d", ivlength); 3512 return ERROR_MALFORMED; 3513 } 3514 // read CencSampleAuxiliaryDataFormats 3515 for (size_t i = 0; i < mCurrentSampleInfoCount; i++) { 3516 if (i >= mCurrentSamples.size()) { 3517 ALOGW("too few samples"); 3518 break; 3519 } 3520 Sample *smpl = &mCurrentSamples.editItemAt(i); 3521 3522 memset(smpl->iv, 0, 16); 3523 if (mDataSource->readAt(drmoffset, smpl->iv, ivlength) != ivlength) { 3524 return ERROR_IO; 3525 } 3526 3527 drmoffset += ivlength; 3528 3529 int32_t smplinfosize = mCurrentDefaultSampleInfoSize; 3530 if (smplinfosize == 0) { 3531 smplinfosize = mCurrentSampleInfoSizes[i]; 3532 } 3533 if (smplinfosize > ivlength) { 3534 uint16_t numsubsamples; 3535 if (!mDataSource->getUInt16(drmoffset, &numsubsamples)) { 3536 return ERROR_IO; 3537 } 3538 drmoffset += 2; 3539 for (size_t j = 0; j < numsubsamples; j++) { 3540 uint16_t numclear; 3541 uint32_t numencrypted; 3542 if (!mDataSource->getUInt16(drmoffset, &numclear)) { 3543 return ERROR_IO; 3544 } 3545 drmoffset += 2; 3546 if (!mDataSource->getUInt32(drmoffset, &numencrypted)) { 3547 return ERROR_IO; 3548 } 3549 drmoffset += 4; 3550 smpl->clearsizes.add(numclear); 3551 smpl->encryptedsizes.add(numencrypted); 3552 } 3553 } else { 3554 smpl->clearsizes.add(0); 3555 smpl->encryptedsizes.add(smpl->size); 3556 } 3557 } 3558 3559 3560 return OK; 3561} 3562 3563status_t MPEG4Source::parseTrackFragmentHeader(off64_t offset, off64_t size) { 3564 3565 if (size < 8) { 3566 return -EINVAL; 3567 } 3568 3569 uint32_t flags; 3570 if (!mDataSource->getUInt32(offset, &flags)) { // actually version + flags 3571 return ERROR_MALFORMED; 3572 } 3573 3574 if (flags & 0xff000000) { 3575 return -EINVAL; 3576 } 3577 3578 if (!mDataSource->getUInt32(offset + 4, (uint32_t*)&mLastParsedTrackId)) { 3579 return ERROR_MALFORMED; 3580 } 3581 3582 if (mLastParsedTrackId != mTrackId) { 3583 // this is not the right track, skip it 3584 return OK; 3585 } 3586 3587 mTrackFragmentHeaderInfo.mFlags = flags; 3588 mTrackFragmentHeaderInfo.mTrackID = mLastParsedTrackId; 3589 offset += 8; 3590 size -= 8; 3591 3592 ALOGV("fragment header: %08x %08x", flags, mTrackFragmentHeaderInfo.mTrackID); 3593 3594 if (flags & TrackFragmentHeaderInfo::kBaseDataOffsetPresent) { 3595 if (size < 8) { 3596 return -EINVAL; 3597 } 3598 3599 if (!mDataSource->getUInt64(offset, &mTrackFragmentHeaderInfo.mBaseDataOffset)) { 3600 return ERROR_MALFORMED; 3601 } 3602 offset += 8; 3603 size -= 8; 3604 } 3605 3606 if (flags & TrackFragmentHeaderInfo::kSampleDescriptionIndexPresent) { 3607 if (size < 4) { 3608 return -EINVAL; 3609 } 3610 3611 if (!mDataSource->getUInt32(offset, &mTrackFragmentHeaderInfo.mSampleDescriptionIndex)) { 3612 return ERROR_MALFORMED; 3613 } 3614 offset += 4; 3615 size -= 4; 3616 } 3617 3618 if (flags & TrackFragmentHeaderInfo::kDefaultSampleDurationPresent) { 3619 if (size < 4) { 3620 return -EINVAL; 3621 } 3622 3623 if (!mDataSource->getUInt32(offset, &mTrackFragmentHeaderInfo.mDefaultSampleDuration)) { 3624 return ERROR_MALFORMED; 3625 } 3626 offset += 4; 3627 size -= 4; 3628 } 3629 3630 if (flags & TrackFragmentHeaderInfo::kDefaultSampleSizePresent) { 3631 if (size < 4) { 3632 return -EINVAL; 3633 } 3634 3635 if (!mDataSource->getUInt32(offset, &mTrackFragmentHeaderInfo.mDefaultSampleSize)) { 3636 return ERROR_MALFORMED; 3637 } 3638 offset += 4; 3639 size -= 4; 3640 } 3641 3642 if (flags & TrackFragmentHeaderInfo::kDefaultSampleFlagsPresent) { 3643 if (size < 4) { 3644 return -EINVAL; 3645 } 3646 3647 if (!mDataSource->getUInt32(offset, &mTrackFragmentHeaderInfo.mDefaultSampleFlags)) { 3648 return ERROR_MALFORMED; 3649 } 3650 offset += 4; 3651 size -= 4; 3652 } 3653 3654 if (!(flags & TrackFragmentHeaderInfo::kBaseDataOffsetPresent)) { 3655 mTrackFragmentHeaderInfo.mBaseDataOffset = mCurrentMoofOffset; 3656 } 3657 3658 mTrackFragmentHeaderInfo.mDataOffset = 0; 3659 return OK; 3660} 3661 3662status_t MPEG4Source::parseTrackFragmentRun(off64_t offset, off64_t size) { 3663 3664 ALOGV("MPEG4Extractor::parseTrackFragmentRun"); 3665 if (size < 8) { 3666 return -EINVAL; 3667 } 3668 3669 enum { 3670 kDataOffsetPresent = 0x01, 3671 kFirstSampleFlagsPresent = 0x04, 3672 kSampleDurationPresent = 0x100, 3673 kSampleSizePresent = 0x200, 3674 kSampleFlagsPresent = 0x400, 3675 kSampleCompositionTimeOffsetPresent = 0x800, 3676 }; 3677 3678 uint32_t flags; 3679 if (!mDataSource->getUInt32(offset, &flags)) { 3680 return ERROR_MALFORMED; 3681 } 3682 ALOGV("fragment run flags: %08x", flags); 3683 3684 if (flags & 0xff000000) { 3685 return -EINVAL; 3686 } 3687 3688 if ((flags & kFirstSampleFlagsPresent) && (flags & kSampleFlagsPresent)) { 3689 // These two shall not be used together. 3690 return -EINVAL; 3691 } 3692 3693 uint32_t sampleCount; 3694 if (!mDataSource->getUInt32(offset + 4, &sampleCount)) { 3695 return ERROR_MALFORMED; 3696 } 3697 offset += 8; 3698 size -= 8; 3699 3700 uint64_t dataOffset = mTrackFragmentHeaderInfo.mDataOffset; 3701 3702 uint32_t firstSampleFlags = 0; 3703 3704 if (flags & kDataOffsetPresent) { 3705 if (size < 4) { 3706 return -EINVAL; 3707 } 3708 3709 int32_t dataOffsetDelta; 3710 if (!mDataSource->getUInt32(offset, (uint32_t*)&dataOffsetDelta)) { 3711 return ERROR_MALFORMED; 3712 } 3713 3714 dataOffset = mTrackFragmentHeaderInfo.mBaseDataOffset + dataOffsetDelta; 3715 3716 offset += 4; 3717 size -= 4; 3718 } 3719 3720 if (flags & kFirstSampleFlagsPresent) { 3721 if (size < 4) { 3722 return -EINVAL; 3723 } 3724 3725 if (!mDataSource->getUInt32(offset, &firstSampleFlags)) { 3726 return ERROR_MALFORMED; 3727 } 3728 offset += 4; 3729 size -= 4; 3730 } 3731 3732 uint32_t sampleDuration = 0, sampleSize = 0, sampleFlags = 0, 3733 sampleCtsOffset = 0; 3734 3735 size_t bytesPerSample = 0; 3736 if (flags & kSampleDurationPresent) { 3737 bytesPerSample += 4; 3738 } else if (mTrackFragmentHeaderInfo.mFlags 3739 & TrackFragmentHeaderInfo::kDefaultSampleDurationPresent) { 3740 sampleDuration = mTrackFragmentHeaderInfo.mDefaultSampleDuration; 3741 } else if (mTrex) { 3742 sampleDuration = mTrex->default_sample_duration; 3743 } 3744 3745 if (flags & kSampleSizePresent) { 3746 bytesPerSample += 4; 3747 } else if (mTrackFragmentHeaderInfo.mFlags 3748 & TrackFragmentHeaderInfo::kDefaultSampleSizePresent) { 3749 sampleSize = mTrackFragmentHeaderInfo.mDefaultSampleSize; 3750 } else { 3751 sampleSize = mTrackFragmentHeaderInfo.mDefaultSampleSize; 3752 } 3753 3754 if (flags & kSampleFlagsPresent) { 3755 bytesPerSample += 4; 3756 } else if (mTrackFragmentHeaderInfo.mFlags 3757 & TrackFragmentHeaderInfo::kDefaultSampleFlagsPresent) { 3758 sampleFlags = mTrackFragmentHeaderInfo.mDefaultSampleFlags; 3759 } else { 3760 sampleFlags = mTrackFragmentHeaderInfo.mDefaultSampleFlags; 3761 } 3762 3763 if (flags & kSampleCompositionTimeOffsetPresent) { 3764 bytesPerSample += 4; 3765 } else { 3766 sampleCtsOffset = 0; 3767 } 3768 3769 if (size < (off64_t)sampleCount * bytesPerSample) { 3770 return -EINVAL; 3771 } 3772 3773 Sample tmp; 3774 for (uint32_t i = 0; i < sampleCount; ++i) { 3775 if (flags & kSampleDurationPresent) { 3776 if (!mDataSource->getUInt32(offset, &sampleDuration)) { 3777 return ERROR_MALFORMED; 3778 } 3779 offset += 4; 3780 } 3781 3782 if (flags & kSampleSizePresent) { 3783 if (!mDataSource->getUInt32(offset, &sampleSize)) { 3784 return ERROR_MALFORMED; 3785 } 3786 offset += 4; 3787 } 3788 3789 if (flags & kSampleFlagsPresent) { 3790 if (!mDataSource->getUInt32(offset, &sampleFlags)) { 3791 return ERROR_MALFORMED; 3792 } 3793 offset += 4; 3794 } 3795 3796 if (flags & kSampleCompositionTimeOffsetPresent) { 3797 if (!mDataSource->getUInt32(offset, &sampleCtsOffset)) { 3798 return ERROR_MALFORMED; 3799 } 3800 offset += 4; 3801 } 3802 3803 ALOGV("adding sample %d at offset 0x%08" PRIx64 ", size %u, duration %u, " 3804 " flags 0x%08x", i + 1, 3805 dataOffset, sampleSize, sampleDuration, 3806 (flags & kFirstSampleFlagsPresent) && i == 0 3807 ? firstSampleFlags : sampleFlags); 3808 tmp.offset = dataOffset; 3809 tmp.size = sampleSize; 3810 tmp.duration = sampleDuration; 3811 tmp.compositionOffset = sampleCtsOffset; 3812 mCurrentSamples.add(tmp); 3813 3814 dataOffset += sampleSize; 3815 } 3816 3817 mTrackFragmentHeaderInfo.mDataOffset = dataOffset; 3818 3819 return OK; 3820} 3821 3822sp<MetaData> MPEG4Source::getFormat() { 3823 Mutex::Autolock autoLock(mLock); 3824 3825 return mFormat; 3826} 3827 3828size_t MPEG4Source::parseNALSize(const uint8_t *data) const { 3829 switch (mNALLengthSize) { 3830 case 1: 3831 return *data; 3832 case 2: 3833 return U16_AT(data); 3834 case 3: 3835 return ((size_t)data[0] << 16) | U16_AT(&data[1]); 3836 case 4: 3837 return U32_AT(data); 3838 } 3839 3840 // This cannot happen, mNALLengthSize springs to life by adding 1 to 3841 // a 2-bit integer. 3842 CHECK(!"Should not be here."); 3843 3844 return 0; 3845} 3846 3847status_t MPEG4Source::read( 3848 MediaBuffer **out, const ReadOptions *options) { 3849 Mutex::Autolock autoLock(mLock); 3850 3851 CHECK(mStarted); 3852 3853 if (mFirstMoofOffset > 0) { 3854 return fragmentedRead(out, options); 3855 } 3856 3857 *out = NULL; 3858 3859 int64_t targetSampleTimeUs = -1; 3860 3861 int64_t seekTimeUs; 3862 ReadOptions::SeekMode mode; 3863 if (options && options->getSeekTo(&seekTimeUs, &mode)) { 3864 uint32_t findFlags = 0; 3865 switch (mode) { 3866 case ReadOptions::SEEK_PREVIOUS_SYNC: 3867 findFlags = SampleTable::kFlagBefore; 3868 break; 3869 case ReadOptions::SEEK_NEXT_SYNC: 3870 findFlags = SampleTable::kFlagAfter; 3871 break; 3872 case ReadOptions::SEEK_CLOSEST_SYNC: 3873 case ReadOptions::SEEK_CLOSEST: 3874 findFlags = SampleTable::kFlagClosest; 3875 break; 3876 default: 3877 CHECK(!"Should not be here."); 3878 break; 3879 } 3880 3881 uint32_t sampleIndex; 3882 status_t err = mSampleTable->findSampleAtTime( 3883 seekTimeUs, 1000000, mTimescale, 3884 &sampleIndex, findFlags); 3885 3886 if (mode == ReadOptions::SEEK_CLOSEST) { 3887 // We found the closest sample already, now we want the sync 3888 // sample preceding it (or the sample itself of course), even 3889 // if the subsequent sync sample is closer. 3890 findFlags = SampleTable::kFlagBefore; 3891 } 3892 3893 uint32_t syncSampleIndex; 3894 if (err == OK) { 3895 err = mSampleTable->findSyncSampleNear( 3896 sampleIndex, &syncSampleIndex, findFlags); 3897 } 3898 3899 uint32_t sampleTime; 3900 if (err == OK) { 3901 err = mSampleTable->getMetaDataForSample( 3902 sampleIndex, NULL, NULL, &sampleTime); 3903 } 3904 3905 if (err != OK) { 3906 if (err == ERROR_OUT_OF_RANGE) { 3907 // An attempt to seek past the end of the stream would 3908 // normally cause this ERROR_OUT_OF_RANGE error. Propagating 3909 // this all the way to the MediaPlayer would cause abnormal 3910 // termination. Legacy behaviour appears to be to behave as if 3911 // we had seeked to the end of stream, ending normally. 3912 err = ERROR_END_OF_STREAM; 3913 } 3914 ALOGV("end of stream"); 3915 return err; 3916 } 3917 3918 if (mode == ReadOptions::SEEK_CLOSEST) { 3919 targetSampleTimeUs = (sampleTime * 1000000ll) / mTimescale; 3920 } 3921 3922#if 0 3923 uint32_t syncSampleTime; 3924 CHECK_EQ(OK, mSampleTable->getMetaDataForSample( 3925 syncSampleIndex, NULL, NULL, &syncSampleTime)); 3926 3927 ALOGI("seek to time %lld us => sample at time %lld us, " 3928 "sync sample at time %lld us", 3929 seekTimeUs, 3930 sampleTime * 1000000ll / mTimescale, 3931 syncSampleTime * 1000000ll / mTimescale); 3932#endif 3933 3934 mCurrentSampleIndex = syncSampleIndex; 3935 if (mBuffer != NULL) { 3936 mBuffer->release(); 3937 mBuffer = NULL; 3938 } 3939 3940 // fall through 3941 } 3942 3943 off64_t offset; 3944 size_t size; 3945 uint32_t cts, stts; 3946 bool isSyncSample; 3947 bool newBuffer = false; 3948 if (mBuffer == NULL) { 3949 newBuffer = true; 3950 3951 status_t err = 3952 mSampleTable->getMetaDataForSample( 3953 mCurrentSampleIndex, &offset, &size, &cts, &isSyncSample, &stts); 3954 3955 if (err != OK) { 3956 return err; 3957 } 3958 3959 err = mGroup->acquire_buffer(&mBuffer); 3960 3961 if (err != OK) { 3962 CHECK(mBuffer == NULL); 3963 return err; 3964 } 3965 if (size > mBuffer->size()) { 3966 ALOGE("buffer too small: %zu > %zu", size, mBuffer->size()); 3967 return ERROR_BUFFER_TOO_SMALL; 3968 } 3969 } 3970 3971 if ((!mIsAVC && !mIsHEVC) || mWantsNALFragments) { 3972 if (newBuffer) { 3973 ssize_t num_bytes_read = 3974 mDataSource->readAt(offset, (uint8_t *)mBuffer->data(), size); 3975 3976 if (num_bytes_read < (ssize_t)size) { 3977 mBuffer->release(); 3978 mBuffer = NULL; 3979 3980 return ERROR_IO; 3981 } 3982 3983 CHECK(mBuffer != NULL); 3984 mBuffer->set_range(0, size); 3985 mBuffer->meta_data()->clear(); 3986 mBuffer->meta_data()->setInt64( 3987 kKeyTime, ((int64_t)cts * 1000000) / mTimescale); 3988 mBuffer->meta_data()->setInt64( 3989 kKeyDuration, ((int64_t)stts * 1000000) / mTimescale); 3990 3991 if (targetSampleTimeUs >= 0) { 3992 mBuffer->meta_data()->setInt64( 3993 kKeyTargetTime, targetSampleTimeUs); 3994 } 3995 3996 if (isSyncSample) { 3997 mBuffer->meta_data()->setInt32(kKeyIsSyncFrame, 1); 3998 } 3999 4000 ++mCurrentSampleIndex; 4001 } 4002 4003 if (!mIsAVC && !mIsHEVC) { 4004 *out = mBuffer; 4005 mBuffer = NULL; 4006 4007 return OK; 4008 } 4009 4010 // Each NAL unit is split up into its constituent fragments and 4011 // each one of them returned in its own buffer. 4012 4013 CHECK(mBuffer->range_length() >= mNALLengthSize); 4014 4015 const uint8_t *src = 4016 (const uint8_t *)mBuffer->data() + mBuffer->range_offset(); 4017 4018 size_t nal_size = parseNALSize(src); 4019 if (mNALLengthSize > SIZE_MAX - nal_size) { 4020 ALOGE("b/24441553, b/24445122"); 4021 } 4022 if (mBuffer->range_length() - mNALLengthSize < nal_size) { 4023 ALOGE("incomplete NAL unit."); 4024 4025 mBuffer->release(); 4026 mBuffer = NULL; 4027 4028 return ERROR_MALFORMED; 4029 } 4030 4031 MediaBuffer *clone = mBuffer->clone(); 4032 CHECK(clone != NULL); 4033 clone->set_range(mBuffer->range_offset() + mNALLengthSize, nal_size); 4034 4035 CHECK(mBuffer != NULL); 4036 mBuffer->set_range( 4037 mBuffer->range_offset() + mNALLengthSize + nal_size, 4038 mBuffer->range_length() - mNALLengthSize - nal_size); 4039 4040 if (mBuffer->range_length() == 0) { 4041 mBuffer->release(); 4042 mBuffer = NULL; 4043 } 4044 4045 *out = clone; 4046 4047 return OK; 4048 } else { 4049 // Whole NAL units are returned but each fragment is prefixed by 4050 // the start code (0x00 00 00 01). 4051 ssize_t num_bytes_read = 0; 4052 int32_t drm = 0; 4053 bool usesDRM = (mFormat->findInt32(kKeyIsDRM, &drm) && drm != 0); 4054 if (usesDRM) { 4055 num_bytes_read = 4056 mDataSource->readAt(offset, (uint8_t*)mBuffer->data(), size); 4057 } else { 4058 num_bytes_read = mDataSource->readAt(offset, mSrcBuffer, size); 4059 } 4060 4061 if (num_bytes_read < (ssize_t)size) { 4062 mBuffer->release(); 4063 mBuffer = NULL; 4064 4065 return ERROR_IO; 4066 } 4067 4068 if (usesDRM) { 4069 CHECK(mBuffer != NULL); 4070 mBuffer->set_range(0, size); 4071 4072 } else { 4073 uint8_t *dstData = (uint8_t *)mBuffer->data(); 4074 size_t srcOffset = 0; 4075 size_t dstOffset = 0; 4076 4077 while (srcOffset < size) { 4078 bool isMalFormed = !isInRange((size_t)0u, size, srcOffset, mNALLengthSize); 4079 size_t nalLength = 0; 4080 if (!isMalFormed) { 4081 nalLength = parseNALSize(&mSrcBuffer[srcOffset]); 4082 srcOffset += mNALLengthSize; 4083 isMalFormed = !isInRange((size_t)0u, size, srcOffset, nalLength); 4084 } 4085 4086 if (isMalFormed) { 4087 ALOGE("Video is malformed"); 4088 mBuffer->release(); 4089 mBuffer = NULL; 4090 return ERROR_MALFORMED; 4091 } 4092 4093 if (nalLength == 0) { 4094 continue; 4095 } 4096 4097 if (dstOffset > SIZE_MAX - 4 || 4098 dstOffset + 4 > SIZE_MAX - nalLength || 4099 dstOffset + 4 + nalLength > mBuffer->size()) { 4100 ALOGE("b/27208621 : %zu %zu", dstOffset, mBuffer->size()); 4101 android_errorWriteLog(0x534e4554, "27208621"); 4102 mBuffer->release(); 4103 mBuffer = NULL; 4104 return ERROR_MALFORMED; 4105 } 4106 4107 dstData[dstOffset++] = 0; 4108 dstData[dstOffset++] = 0; 4109 dstData[dstOffset++] = 0; 4110 dstData[dstOffset++] = 1; 4111 memcpy(&dstData[dstOffset], &mSrcBuffer[srcOffset], nalLength); 4112 srcOffset += nalLength; 4113 dstOffset += nalLength; 4114 } 4115 CHECK_EQ(srcOffset, size); 4116 CHECK(mBuffer != NULL); 4117 mBuffer->set_range(0, dstOffset); 4118 } 4119 4120 mBuffer->meta_data()->clear(); 4121 mBuffer->meta_data()->setInt64( 4122 kKeyTime, ((int64_t)cts * 1000000) / mTimescale); 4123 mBuffer->meta_data()->setInt64( 4124 kKeyDuration, ((int64_t)stts * 1000000) / mTimescale); 4125 4126 if (targetSampleTimeUs >= 0) { 4127 mBuffer->meta_data()->setInt64( 4128 kKeyTargetTime, targetSampleTimeUs); 4129 } 4130 4131 if (isSyncSample) { 4132 mBuffer->meta_data()->setInt32(kKeyIsSyncFrame, 1); 4133 } 4134 4135 ++mCurrentSampleIndex; 4136 4137 *out = mBuffer; 4138 mBuffer = NULL; 4139 4140 return OK; 4141 } 4142} 4143 4144status_t MPEG4Source::fragmentedRead( 4145 MediaBuffer **out, const ReadOptions *options) { 4146 4147 ALOGV("MPEG4Source::fragmentedRead"); 4148 4149 CHECK(mStarted); 4150 4151 *out = NULL; 4152 4153 int64_t targetSampleTimeUs = -1; 4154 4155 int64_t seekTimeUs; 4156 ReadOptions::SeekMode mode; 4157 if (options && options->getSeekTo(&seekTimeUs, &mode)) { 4158 4159 int numSidxEntries = mSegments.size(); 4160 if (numSidxEntries != 0) { 4161 int64_t totalTime = 0; 4162 off64_t totalOffset = mFirstMoofOffset; 4163 for (int i = 0; i < numSidxEntries; i++) { 4164 const SidxEntry *se = &mSegments[i]; 4165 if (totalTime + se->mDurationUs > seekTimeUs) { 4166 // The requested time is somewhere in this segment 4167 if ((mode == ReadOptions::SEEK_NEXT_SYNC && seekTimeUs > totalTime) || 4168 (mode == ReadOptions::SEEK_CLOSEST_SYNC && 4169 (seekTimeUs - totalTime) > (totalTime + se->mDurationUs - seekTimeUs))) { 4170 // requested next sync, or closest sync and it was closer to the end of 4171 // this segment 4172 totalTime += se->mDurationUs; 4173 totalOffset += se->mSize; 4174 } 4175 break; 4176 } 4177 totalTime += se->mDurationUs; 4178 totalOffset += se->mSize; 4179 } 4180 mCurrentMoofOffset = totalOffset; 4181 mCurrentSamples.clear(); 4182 mCurrentSampleIndex = 0; 4183 parseChunk(&totalOffset); 4184 mCurrentTime = totalTime * mTimescale / 1000000ll; 4185 } else { 4186 // without sidx boxes, we can only seek to 0 4187 mCurrentMoofOffset = mFirstMoofOffset; 4188 mCurrentSamples.clear(); 4189 mCurrentSampleIndex = 0; 4190 off64_t tmp = mCurrentMoofOffset; 4191 parseChunk(&tmp); 4192 mCurrentTime = 0; 4193 } 4194 4195 if (mBuffer != NULL) { 4196 mBuffer->release(); 4197 mBuffer = NULL; 4198 } 4199 4200 // fall through 4201 } 4202 4203 off64_t offset = 0; 4204 size_t size = 0; 4205 uint32_t cts = 0; 4206 bool isSyncSample = false; 4207 bool newBuffer = false; 4208 if (mBuffer == NULL) { 4209 newBuffer = true; 4210 4211 if (mCurrentSampleIndex >= mCurrentSamples.size()) { 4212 // move to next fragment if there is one 4213 if (mNextMoofOffset <= mCurrentMoofOffset) { 4214 return ERROR_END_OF_STREAM; 4215 } 4216 off64_t nextMoof = mNextMoofOffset; 4217 mCurrentMoofOffset = nextMoof; 4218 mCurrentSamples.clear(); 4219 mCurrentSampleIndex = 0; 4220 parseChunk(&nextMoof); 4221 if (mCurrentSampleIndex >= mCurrentSamples.size()) { 4222 return ERROR_END_OF_STREAM; 4223 } 4224 } 4225 4226 const Sample *smpl = &mCurrentSamples[mCurrentSampleIndex]; 4227 offset = smpl->offset; 4228 size = smpl->size; 4229 cts = mCurrentTime + smpl->compositionOffset; 4230 mCurrentTime += smpl->duration; 4231 isSyncSample = (mCurrentSampleIndex == 0); // XXX 4232 4233 status_t err = mGroup->acquire_buffer(&mBuffer); 4234 4235 if (err != OK) { 4236 CHECK(mBuffer == NULL); 4237 ALOGV("acquire_buffer returned %d", err); 4238 return err; 4239 } 4240 if (size > mBuffer->size()) { 4241 ALOGE("buffer too small: %zu > %zu", size, mBuffer->size()); 4242 return ERROR_BUFFER_TOO_SMALL; 4243 } 4244 } 4245 4246 const Sample *smpl = &mCurrentSamples[mCurrentSampleIndex]; 4247 const sp<MetaData> bufmeta = mBuffer->meta_data(); 4248 bufmeta->clear(); 4249 if (smpl->encryptedsizes.size()) { 4250 // store clear/encrypted lengths in metadata 4251 bufmeta->setData(kKeyPlainSizes, 0, 4252 smpl->clearsizes.array(), smpl->clearsizes.size() * 4); 4253 bufmeta->setData(kKeyEncryptedSizes, 0, 4254 smpl->encryptedsizes.array(), smpl->encryptedsizes.size() * 4); 4255 bufmeta->setData(kKeyCryptoIV, 0, smpl->iv, 16); // use 16 or the actual size? 4256 bufmeta->setInt32(kKeyCryptoDefaultIVSize, mDefaultIVSize); 4257 bufmeta->setInt32(kKeyCryptoMode, mCryptoMode); 4258 bufmeta->setData(kKeyCryptoKey, 0, mCryptoKey, 16); 4259 } 4260 4261 if ((!mIsAVC && !mIsHEVC)|| mWantsNALFragments) { 4262 if (newBuffer) { 4263 if (!isInRange((size_t)0u, mBuffer->size(), size)) { 4264 mBuffer->release(); 4265 mBuffer = NULL; 4266 4267 ALOGE("fragmentedRead ERROR_MALFORMED size %zu", size); 4268 return ERROR_MALFORMED; 4269 } 4270 4271 ssize_t num_bytes_read = 4272 mDataSource->readAt(offset, (uint8_t *)mBuffer->data(), size); 4273 4274 if (num_bytes_read < (ssize_t)size) { 4275 mBuffer->release(); 4276 mBuffer = NULL; 4277 4278 ALOGE("i/o error"); 4279 return ERROR_IO; 4280 } 4281 4282 CHECK(mBuffer != NULL); 4283 mBuffer->set_range(0, size); 4284 mBuffer->meta_data()->setInt64( 4285 kKeyTime, ((int64_t)cts * 1000000) / mTimescale); 4286 mBuffer->meta_data()->setInt64( 4287 kKeyDuration, ((int64_t)smpl->duration * 1000000) / mTimescale); 4288 4289 if (targetSampleTimeUs >= 0) { 4290 mBuffer->meta_data()->setInt64( 4291 kKeyTargetTime, targetSampleTimeUs); 4292 } 4293 4294 if (isSyncSample) { 4295 mBuffer->meta_data()->setInt32(kKeyIsSyncFrame, 1); 4296 } 4297 4298 ++mCurrentSampleIndex; 4299 } 4300 4301 if (!mIsAVC && !mIsHEVC) { 4302 *out = mBuffer; 4303 mBuffer = NULL; 4304 4305 return OK; 4306 } 4307 4308 // Each NAL unit is split up into its constituent fragments and 4309 // each one of them returned in its own buffer. 4310 4311 CHECK(mBuffer->range_length() >= mNALLengthSize); 4312 4313 const uint8_t *src = 4314 (const uint8_t *)mBuffer->data() + mBuffer->range_offset(); 4315 4316 size_t nal_size = parseNALSize(src); 4317 if (mNALLengthSize > SIZE_MAX - nal_size) { 4318 ALOGE("b/24441553, b/24445122"); 4319 } 4320 4321 if (mBuffer->range_length() - mNALLengthSize < nal_size) { 4322 ALOGE("incomplete NAL unit."); 4323 4324 mBuffer->release(); 4325 mBuffer = NULL; 4326 4327 return ERROR_MALFORMED; 4328 } 4329 4330 MediaBuffer *clone = mBuffer->clone(); 4331 CHECK(clone != NULL); 4332 clone->set_range(mBuffer->range_offset() + mNALLengthSize, nal_size); 4333 4334 CHECK(mBuffer != NULL); 4335 mBuffer->set_range( 4336 mBuffer->range_offset() + mNALLengthSize + nal_size, 4337 mBuffer->range_length() - mNALLengthSize - nal_size); 4338 4339 if (mBuffer->range_length() == 0) { 4340 mBuffer->release(); 4341 mBuffer = NULL; 4342 } 4343 4344 *out = clone; 4345 4346 return OK; 4347 } else { 4348 ALOGV("whole NAL"); 4349 // Whole NAL units are returned but each fragment is prefixed by 4350 // the start code (0x00 00 00 01). 4351 ssize_t num_bytes_read = 0; 4352 int32_t drm = 0; 4353 bool usesDRM = (mFormat->findInt32(kKeyIsDRM, &drm) && drm != 0); 4354 void *data = NULL; 4355 bool isMalFormed = false; 4356 if (usesDRM) { 4357 if (mBuffer == NULL || !isInRange((size_t)0u, mBuffer->size(), size)) { 4358 isMalFormed = true; 4359 } else { 4360 data = mBuffer->data(); 4361 } 4362 } else { 4363 int32_t max_size; 4364 if (mFormat == NULL 4365 || !mFormat->findInt32(kKeyMaxInputSize, &max_size) 4366 || !isInRange((size_t)0u, (size_t)max_size, size)) { 4367 isMalFormed = true; 4368 } else { 4369 data = mSrcBuffer; 4370 } 4371 } 4372 4373 if (isMalFormed || data == NULL) { 4374 ALOGE("isMalFormed size %zu", size); 4375 if (mBuffer != NULL) { 4376 mBuffer->release(); 4377 mBuffer = NULL; 4378 } 4379 return ERROR_MALFORMED; 4380 } 4381 num_bytes_read = mDataSource->readAt(offset, data, size); 4382 4383 if (num_bytes_read < (ssize_t)size) { 4384 mBuffer->release(); 4385 mBuffer = NULL; 4386 4387 ALOGE("i/o error"); 4388 return ERROR_IO; 4389 } 4390 4391 if (usesDRM) { 4392 CHECK(mBuffer != NULL); 4393 mBuffer->set_range(0, size); 4394 4395 } else { 4396 uint8_t *dstData = (uint8_t *)mBuffer->data(); 4397 size_t srcOffset = 0; 4398 size_t dstOffset = 0; 4399 4400 while (srcOffset < size) { 4401 isMalFormed = !isInRange((size_t)0u, size, srcOffset, mNALLengthSize); 4402 size_t nalLength = 0; 4403 if (!isMalFormed) { 4404 nalLength = parseNALSize(&mSrcBuffer[srcOffset]); 4405 srcOffset += mNALLengthSize; 4406 isMalFormed = !isInRange((size_t)0u, size, srcOffset, nalLength) 4407 || !isInRange((size_t)0u, mBuffer->size(), dstOffset, (size_t)4u) 4408 || !isInRange((size_t)0u, mBuffer->size(), dstOffset + 4, nalLength); 4409 } 4410 4411 if (isMalFormed) { 4412 ALOGE("Video is malformed; nalLength %zu", nalLength); 4413 mBuffer->release(); 4414 mBuffer = NULL; 4415 return ERROR_MALFORMED; 4416 } 4417 4418 if (nalLength == 0) { 4419 continue; 4420 } 4421 4422 if (dstOffset > SIZE_MAX - 4 || 4423 dstOffset + 4 > SIZE_MAX - nalLength || 4424 dstOffset + 4 + nalLength > mBuffer->size()) { 4425 ALOGE("b/26365349 : %zu %zu", dstOffset, mBuffer->size()); 4426 android_errorWriteLog(0x534e4554, "26365349"); 4427 mBuffer->release(); 4428 mBuffer = NULL; 4429 return ERROR_MALFORMED; 4430 } 4431 4432 dstData[dstOffset++] = 0; 4433 dstData[dstOffset++] = 0; 4434 dstData[dstOffset++] = 0; 4435 dstData[dstOffset++] = 1; 4436 memcpy(&dstData[dstOffset], &mSrcBuffer[srcOffset], nalLength); 4437 srcOffset += nalLength; 4438 dstOffset += nalLength; 4439 } 4440 CHECK_EQ(srcOffset, size); 4441 CHECK(mBuffer != NULL); 4442 mBuffer->set_range(0, dstOffset); 4443 } 4444 4445 mBuffer->meta_data()->setInt64( 4446 kKeyTime, ((int64_t)cts * 1000000) / mTimescale); 4447 mBuffer->meta_data()->setInt64( 4448 kKeyDuration, ((int64_t)smpl->duration * 1000000) / mTimescale); 4449 4450 if (targetSampleTimeUs >= 0) { 4451 mBuffer->meta_data()->setInt64( 4452 kKeyTargetTime, targetSampleTimeUs); 4453 } 4454 4455 if (isSyncSample) { 4456 mBuffer->meta_data()->setInt32(kKeyIsSyncFrame, 1); 4457 } 4458 4459 ++mCurrentSampleIndex; 4460 4461 *out = mBuffer; 4462 mBuffer = NULL; 4463 4464 return OK; 4465 } 4466} 4467 4468MPEG4Extractor::Track *MPEG4Extractor::findTrackByMimePrefix( 4469 const char *mimePrefix) { 4470 for (Track *track = mFirstTrack; track != NULL; track = track->next) { 4471 const char *mime; 4472 if (track->meta != NULL 4473 && track->meta->findCString(kKeyMIMEType, &mime) 4474 && !strncasecmp(mime, mimePrefix, strlen(mimePrefix))) { 4475 return track; 4476 } 4477 } 4478 4479 return NULL; 4480} 4481 4482static bool LegacySniffMPEG4( 4483 const sp<DataSource> &source, String8 *mimeType, float *confidence) { 4484 uint8_t header[8]; 4485 4486 ssize_t n = source->readAt(4, header, sizeof(header)); 4487 if (n < (ssize_t)sizeof(header)) { 4488 return false; 4489 } 4490 4491 if (!memcmp(header, "ftyp3gp", 7) || !memcmp(header, "ftypmp42", 8) 4492 || !memcmp(header, "ftyp3gr6", 8) || !memcmp(header, "ftyp3gs6", 8) 4493 || !memcmp(header, "ftyp3ge6", 8) || !memcmp(header, "ftyp3gg6", 8) 4494 || !memcmp(header, "ftypisom", 8) || !memcmp(header, "ftypM4V ", 8) 4495 || !memcmp(header, "ftypM4A ", 8) || !memcmp(header, "ftypf4v ", 8) 4496 || !memcmp(header, "ftypkddi", 8) || !memcmp(header, "ftypM4VP", 8)) { 4497 *mimeType = MEDIA_MIMETYPE_CONTAINER_MPEG4; 4498 *confidence = 0.4; 4499 4500 return true; 4501 } 4502 4503 return false; 4504} 4505 4506static bool isCompatibleBrand(uint32_t fourcc) { 4507 static const uint32_t kCompatibleBrands[] = { 4508 FOURCC('i', 's', 'o', 'm'), 4509 FOURCC('i', 's', 'o', '2'), 4510 FOURCC('a', 'v', 'c', '1'), 4511 FOURCC('h', 'v', 'c', '1'), 4512 FOURCC('h', 'e', 'v', '1'), 4513 FOURCC('3', 'g', 'p', '4'), 4514 FOURCC('m', 'p', '4', '1'), 4515 FOURCC('m', 'p', '4', '2'), 4516 4517 // Won't promise that the following file types can be played. 4518 // Just give these file types a chance. 4519 FOURCC('q', 't', ' ', ' '), // Apple's QuickTime 4520 FOURCC('M', 'S', 'N', 'V'), // Sony's PSP 4521 4522 FOURCC('3', 'g', '2', 'a'), // 3GPP2 4523 FOURCC('3', 'g', '2', 'b'), 4524 }; 4525 4526 for (size_t i = 0; 4527 i < sizeof(kCompatibleBrands) / sizeof(kCompatibleBrands[0]); 4528 ++i) { 4529 if (kCompatibleBrands[i] == fourcc) { 4530 return true; 4531 } 4532 } 4533 4534 return false; 4535} 4536 4537// Attempt to actually parse the 'ftyp' atom and determine if a suitable 4538// compatible brand is present. 4539// Also try to identify where this file's metadata ends 4540// (end of the 'moov' atom) and report it to the caller as part of 4541// the metadata. 4542static bool BetterSniffMPEG4( 4543 const sp<DataSource> &source, String8 *mimeType, float *confidence, 4544 sp<AMessage> *meta) { 4545 // We scan up to 128 bytes to identify this file as an MP4. 4546 static const off64_t kMaxScanOffset = 128ll; 4547 4548 off64_t offset = 0ll; 4549 bool foundGoodFileType = false; 4550 off64_t moovAtomEndOffset = -1ll; 4551 bool done = false; 4552 4553 while (!done && offset < kMaxScanOffset) { 4554 uint32_t hdr[2]; 4555 if (source->readAt(offset, hdr, 8) < 8) { 4556 return false; 4557 } 4558 4559 uint64_t chunkSize = ntohl(hdr[0]); 4560 uint32_t chunkType = ntohl(hdr[1]); 4561 off64_t chunkDataOffset = offset + 8; 4562 4563 if (chunkSize == 1) { 4564 if (source->readAt(offset + 8, &chunkSize, 8) < 8) { 4565 return false; 4566 } 4567 4568 chunkSize = ntoh64(chunkSize); 4569 chunkDataOffset += 8; 4570 4571 if (chunkSize < 16) { 4572 // The smallest valid chunk is 16 bytes long in this case. 4573 return false; 4574 } 4575 } else if (chunkSize < 8) { 4576 // The smallest valid chunk is 8 bytes long. 4577 return false; 4578 } 4579 4580 off64_t chunkDataSize = offset + chunkSize - chunkDataOffset; 4581 4582 char chunkstring[5]; 4583 MakeFourCCString(chunkType, chunkstring); 4584 ALOGV("saw chunk type %s, size %" PRIu64 " @ %lld", chunkstring, chunkSize, offset); 4585 switch (chunkType) { 4586 case FOURCC('f', 't', 'y', 'p'): 4587 { 4588 if (chunkDataSize < 8) { 4589 return false; 4590 } 4591 4592 uint32_t numCompatibleBrands = (chunkDataSize - 8) / 4; 4593 for (size_t i = 0; i < numCompatibleBrands + 2; ++i) { 4594 if (i == 1) { 4595 // Skip this index, it refers to the minorVersion, 4596 // not a brand. 4597 continue; 4598 } 4599 4600 uint32_t brand; 4601 if (source->readAt( 4602 chunkDataOffset + 4 * i, &brand, 4) < 4) { 4603 return false; 4604 } 4605 4606 brand = ntohl(brand); 4607 4608 if (isCompatibleBrand(brand)) { 4609 foundGoodFileType = true; 4610 break; 4611 } 4612 } 4613 4614 if (!foundGoodFileType) { 4615 return false; 4616 } 4617 4618 break; 4619 } 4620 4621 case FOURCC('m', 'o', 'o', 'v'): 4622 { 4623 moovAtomEndOffset = offset + chunkSize; 4624 4625 done = true; 4626 break; 4627 } 4628 4629 default: 4630 break; 4631 } 4632 4633 offset += chunkSize; 4634 } 4635 4636 if (!foundGoodFileType) { 4637 return false; 4638 } 4639 4640 *mimeType = MEDIA_MIMETYPE_CONTAINER_MPEG4; 4641 *confidence = 0.4f; 4642 4643 if (moovAtomEndOffset >= 0) { 4644 *meta = new AMessage; 4645 (*meta)->setInt64("meta-data-size", moovAtomEndOffset); 4646 4647 ALOGV("found metadata size: %lld", moovAtomEndOffset); 4648 } 4649 4650 return true; 4651} 4652 4653bool SniffMPEG4( 4654 const sp<DataSource> &source, String8 *mimeType, float *confidence, 4655 sp<AMessage> *meta) { 4656 if (BetterSniffMPEG4(source, mimeType, confidence, meta)) { 4657 return true; 4658 } 4659 4660 if (LegacySniffMPEG4(source, mimeType, confidence)) { 4661 ALOGW("Identified supported mpeg4 through LegacySniffMPEG4."); 4662 return true; 4663 } 4664 4665 return false; 4666} 4667 4668} // namespace android 4669