MPEG4Extractor.cpp revision 6ace94d2952eac82fc4c86aa6d585258248bf18c
1/* 2 * Copyright (C) 2009 The Android Open Source Project 3 * 4 * Licensed under the Apache License, Version 2.0 (the "License"); 5 * you may not use this file except in compliance with the License. 6 * You may obtain a copy of the License at 7 * 8 * http://www.apache.org/licenses/LICENSE-2.0 9 * 10 * Unless required by applicable law or agreed to in writing, software 11 * distributed under the License is distributed on an "AS IS" BASIS, 12 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 * See the License for the specific language governing permissions and 14 * limitations under the License. 15 */ 16 17//#define LOG_NDEBUG 0 18#define LOG_TAG "MPEG4Extractor" 19 20#include <ctype.h> 21#include <inttypes.h> 22#include <stdint.h> 23#include <stdlib.h> 24#include <string.h> 25 26#include <utils/Log.h> 27 28#include "include/MPEG4Extractor.h" 29#include "include/SampleTable.h" 30#include "include/ESDS.h" 31 32#include <media/stagefright/foundation/ABitReader.h> 33#include <media/stagefright/foundation/ABuffer.h> 34#include <media/stagefright/foundation/ADebug.h> 35#include <media/stagefright/foundation/AMessage.h> 36#include <media/stagefright/foundation/AUtils.h> 37#include <media/stagefright/MediaBuffer.h> 38#include <media/stagefright/MediaBufferGroup.h> 39#include <media/stagefright/MediaDefs.h> 40#include <media/stagefright/MediaSource.h> 41#include <media/stagefright/MetaData.h> 42#include <utils/String8.h> 43 44#include <byteswap.h> 45#include "include/ID3.h" 46 47#ifndef UINT32_MAX 48#define UINT32_MAX (4294967295U) 49#endif 50 51namespace android { 52 53enum { 54 // maximum size of an atom. Some atoms can be bigger according to the spec, 55 // but we only allow up to this size. 56 kMaxAtomSize = 64 * 1024 * 1024, 57}; 58 59class MPEG4Source : public MediaSource { 60public: 61 // Caller retains ownership of both "dataSource" and "sampleTable". 62 MPEG4Source(const sp<MPEG4Extractor> &owner, 63 const sp<MetaData> &format, 64 const sp<DataSource> &dataSource, 65 int32_t timeScale, 66 const sp<SampleTable> &sampleTable, 67 Vector<SidxEntry> &sidx, 68 const Trex *trex, 69 off64_t firstMoofOffset); 70 71 virtual status_t start(MetaData *params = NULL); 72 virtual status_t stop(); 73 74 virtual sp<MetaData> getFormat(); 75 76 virtual status_t read(MediaBuffer **buffer, const ReadOptions *options = NULL); 77 virtual status_t fragmentedRead(MediaBuffer **buffer, const ReadOptions *options = NULL); 78 79protected: 80 virtual ~MPEG4Source(); 81 82private: 83 Mutex mLock; 84 85 // keep the MPEG4Extractor around, since we're referencing its data 86 sp<MPEG4Extractor> mOwner; 87 sp<MetaData> mFormat; 88 sp<DataSource> mDataSource; 89 int32_t mTimescale; 90 sp<SampleTable> mSampleTable; 91 uint32_t mCurrentSampleIndex; 92 uint32_t mCurrentFragmentIndex; 93 Vector<SidxEntry> &mSegments; 94 const Trex *mTrex; 95 off64_t mFirstMoofOffset; 96 off64_t mCurrentMoofOffset; 97 off64_t mNextMoofOffset; 98 uint32_t mCurrentTime; 99 int32_t mLastParsedTrackId; 100 int32_t mTrackId; 101 102 int32_t mCryptoMode; // passed in from extractor 103 int32_t mDefaultIVSize; // passed in from extractor 104 uint8_t mCryptoKey[16]; // passed in from extractor 105 uint32_t mCurrentAuxInfoType; 106 uint32_t mCurrentAuxInfoTypeParameter; 107 int32_t mCurrentDefaultSampleInfoSize; 108 uint32_t mCurrentSampleInfoCount; 109 uint32_t mCurrentSampleInfoAllocSize; 110 uint8_t* mCurrentSampleInfoSizes; 111 uint32_t mCurrentSampleInfoOffsetCount; 112 uint32_t mCurrentSampleInfoOffsetsAllocSize; 113 uint64_t* mCurrentSampleInfoOffsets; 114 115 bool mIsAVC; 116 bool mIsHEVC; 117 size_t mNALLengthSize; 118 119 bool mStarted; 120 121 MediaBufferGroup *mGroup; 122 123 MediaBuffer *mBuffer; 124 125 bool mWantsNALFragments; 126 127 uint8_t *mSrcBuffer; 128 129 size_t parseNALSize(const uint8_t *data) const; 130 status_t parseChunk(off64_t *offset); 131 status_t parseTrackFragmentHeader(off64_t offset, off64_t size); 132 status_t parseTrackFragmentRun(off64_t offset, off64_t size); 133 status_t parseSampleAuxiliaryInformationSizes(off64_t offset, off64_t size); 134 status_t parseSampleAuxiliaryInformationOffsets(off64_t offset, off64_t size); 135 136 struct TrackFragmentHeaderInfo { 137 enum Flags { 138 kBaseDataOffsetPresent = 0x01, 139 kSampleDescriptionIndexPresent = 0x02, 140 kDefaultSampleDurationPresent = 0x08, 141 kDefaultSampleSizePresent = 0x10, 142 kDefaultSampleFlagsPresent = 0x20, 143 kDurationIsEmpty = 0x10000, 144 }; 145 146 uint32_t mTrackID; 147 uint32_t mFlags; 148 uint64_t mBaseDataOffset; 149 uint32_t mSampleDescriptionIndex; 150 uint32_t mDefaultSampleDuration; 151 uint32_t mDefaultSampleSize; 152 uint32_t mDefaultSampleFlags; 153 154 uint64_t mDataOffset; 155 }; 156 TrackFragmentHeaderInfo mTrackFragmentHeaderInfo; 157 158 struct Sample { 159 off64_t offset; 160 size_t size; 161 uint32_t duration; 162 int32_t compositionOffset; 163 uint8_t iv[16]; 164 Vector<size_t> clearsizes; 165 Vector<size_t> encryptedsizes; 166 }; 167 Vector<Sample> mCurrentSamples; 168 169 MPEG4Source(const MPEG4Source &); 170 MPEG4Source &operator=(const MPEG4Source &); 171}; 172 173// This custom data source wraps an existing one and satisfies requests 174// falling entirely within a cached range from the cache while forwarding 175// all remaining requests to the wrapped datasource. 176// This is used to cache the full sampletable metadata for a single track, 177// possibly wrapping multiple times to cover all tracks, i.e. 178// Each MPEG4DataSource caches the sampletable metadata for a single track. 179 180struct MPEG4DataSource : public DataSource { 181 MPEG4DataSource(const sp<DataSource> &source); 182 183 virtual status_t initCheck() const; 184 virtual ssize_t readAt(off64_t offset, void *data, size_t size); 185 virtual status_t getSize(off64_t *size); 186 virtual uint32_t flags(); 187 188 status_t setCachedRange(off64_t offset, size_t size); 189 190protected: 191 virtual ~MPEG4DataSource(); 192 193private: 194 Mutex mLock; 195 196 sp<DataSource> mSource; 197 off64_t mCachedOffset; 198 size_t mCachedSize; 199 uint8_t *mCache; 200 201 void clearCache(); 202 203 MPEG4DataSource(const MPEG4DataSource &); 204 MPEG4DataSource &operator=(const MPEG4DataSource &); 205}; 206 207MPEG4DataSource::MPEG4DataSource(const sp<DataSource> &source) 208 : mSource(source), 209 mCachedOffset(0), 210 mCachedSize(0), 211 mCache(NULL) { 212} 213 214MPEG4DataSource::~MPEG4DataSource() { 215 clearCache(); 216} 217 218void MPEG4DataSource::clearCache() { 219 if (mCache) { 220 free(mCache); 221 mCache = NULL; 222 } 223 224 mCachedOffset = 0; 225 mCachedSize = 0; 226} 227 228status_t MPEG4DataSource::initCheck() const { 229 return mSource->initCheck(); 230} 231 232ssize_t MPEG4DataSource::readAt(off64_t offset, void *data, size_t size) { 233 Mutex::Autolock autoLock(mLock); 234 235 if (isInRange(mCachedOffset, mCachedSize, offset, size)) { 236 memcpy(data, &mCache[offset - mCachedOffset], size); 237 return size; 238 } 239 240 return mSource->readAt(offset, data, size); 241} 242 243status_t MPEG4DataSource::getSize(off64_t *size) { 244 return mSource->getSize(size); 245} 246 247uint32_t MPEG4DataSource::flags() { 248 return mSource->flags(); 249} 250 251status_t MPEG4DataSource::setCachedRange(off64_t offset, size_t size) { 252 Mutex::Autolock autoLock(mLock); 253 254 clearCache(); 255 256 mCache = (uint8_t *)malloc(size); 257 258 if (mCache == NULL) { 259 return -ENOMEM; 260 } 261 262 mCachedOffset = offset; 263 mCachedSize = size; 264 265 ssize_t err = mSource->readAt(mCachedOffset, mCache, mCachedSize); 266 267 if (err < (ssize_t)size) { 268 clearCache(); 269 270 return ERROR_IO; 271 } 272 273 return OK; 274} 275 276//////////////////////////////////////////////////////////////////////////////// 277 278static void hexdump(const void *_data, size_t size) { 279 const uint8_t *data = (const uint8_t *)_data; 280 size_t offset = 0; 281 while (offset < size) { 282 printf("0x%04zx ", offset); 283 284 size_t n = size - offset; 285 if (n > 16) { 286 n = 16; 287 } 288 289 for (size_t i = 0; i < 16; ++i) { 290 if (i == 8) { 291 printf(" "); 292 } 293 294 if (offset + i < size) { 295 printf("%02x ", data[offset + i]); 296 } else { 297 printf(" "); 298 } 299 } 300 301 printf(" "); 302 303 for (size_t i = 0; i < n; ++i) { 304 if (isprint(data[offset + i])) { 305 printf("%c", data[offset + i]); 306 } else { 307 printf("."); 308 } 309 } 310 311 printf("\n"); 312 313 offset += 16; 314 } 315} 316 317static const char *FourCC2MIME(uint32_t fourcc) { 318 switch (fourcc) { 319 case FOURCC('m', 'p', '4', 'a'): 320 return MEDIA_MIMETYPE_AUDIO_AAC; 321 322 case FOURCC('s', 'a', 'm', 'r'): 323 return MEDIA_MIMETYPE_AUDIO_AMR_NB; 324 325 case FOURCC('s', 'a', 'w', 'b'): 326 return MEDIA_MIMETYPE_AUDIO_AMR_WB; 327 328 case FOURCC('m', 'p', '4', 'v'): 329 return MEDIA_MIMETYPE_VIDEO_MPEG4; 330 331 case FOURCC('s', '2', '6', '3'): 332 case FOURCC('h', '2', '6', '3'): 333 case FOURCC('H', '2', '6', '3'): 334 return MEDIA_MIMETYPE_VIDEO_H263; 335 336 case FOURCC('a', 'v', 'c', '1'): 337 return MEDIA_MIMETYPE_VIDEO_AVC; 338 339 case FOURCC('h', 'v', 'c', '1'): 340 case FOURCC('h', 'e', 'v', '1'): 341 return MEDIA_MIMETYPE_VIDEO_HEVC; 342 default: 343 CHECK(!"should not be here."); 344 return NULL; 345 } 346} 347 348static bool AdjustChannelsAndRate(uint32_t fourcc, uint32_t *channels, uint32_t *rate) { 349 if (!strcasecmp(MEDIA_MIMETYPE_AUDIO_AMR_NB, FourCC2MIME(fourcc))) { 350 // AMR NB audio is always mono, 8kHz 351 *channels = 1; 352 *rate = 8000; 353 return true; 354 } else if (!strcasecmp(MEDIA_MIMETYPE_AUDIO_AMR_WB, FourCC2MIME(fourcc))) { 355 // AMR WB audio is always mono, 16kHz 356 *channels = 1; 357 *rate = 16000; 358 return true; 359 } 360 return false; 361} 362 363MPEG4Extractor::MPEG4Extractor(const sp<DataSource> &source) 364 : mMoofOffset(0), 365 mDataSource(source), 366 mInitCheck(NO_INIT), 367 mHasVideo(false), 368 mHeaderTimescale(0), 369 mFirstTrack(NULL), 370 mLastTrack(NULL), 371 mFileMetaData(new MetaData), 372 mFirstSINF(NULL), 373 mIsDrm(false) { 374} 375 376MPEG4Extractor::~MPEG4Extractor() { 377 Track *track = mFirstTrack; 378 while (track) { 379 Track *next = track->next; 380 381 delete track; 382 track = next; 383 } 384 mFirstTrack = mLastTrack = NULL; 385 386 SINF *sinf = mFirstSINF; 387 while (sinf) { 388 SINF *next = sinf->next; 389 delete[] sinf->IPMPData; 390 delete sinf; 391 sinf = next; 392 } 393 mFirstSINF = NULL; 394 395 for (size_t i = 0; i < mPssh.size(); i++) { 396 delete [] mPssh[i].data; 397 } 398} 399 400uint32_t MPEG4Extractor::flags() const { 401 return CAN_PAUSE | 402 ((mMoofOffset == 0 || mSidxEntries.size() != 0) ? 403 (CAN_SEEK_BACKWARD | CAN_SEEK_FORWARD | CAN_SEEK) : 0); 404} 405 406sp<MetaData> MPEG4Extractor::getMetaData() { 407 status_t err; 408 if ((err = readMetaData()) != OK) { 409 return new MetaData; 410 } 411 412 return mFileMetaData; 413} 414 415size_t MPEG4Extractor::countTracks() { 416 status_t err; 417 if ((err = readMetaData()) != OK) { 418 ALOGV("MPEG4Extractor::countTracks: no tracks"); 419 return 0; 420 } 421 422 size_t n = 0; 423 Track *track = mFirstTrack; 424 while (track) { 425 ++n; 426 track = track->next; 427 } 428 429 ALOGV("MPEG4Extractor::countTracks: %zu tracks", n); 430 return n; 431} 432 433sp<MetaData> MPEG4Extractor::getTrackMetaData( 434 size_t index, uint32_t flags) { 435 status_t err; 436 if ((err = readMetaData()) != OK) { 437 return NULL; 438 } 439 440 Track *track = mFirstTrack; 441 while (index > 0) { 442 if (track == NULL) { 443 return NULL; 444 } 445 446 track = track->next; 447 --index; 448 } 449 450 if (track == NULL) { 451 return NULL; 452 } 453 454 if ((flags & kIncludeExtensiveMetaData) 455 && !track->includes_expensive_metadata) { 456 track->includes_expensive_metadata = true; 457 458 const char *mime; 459 CHECK(track->meta->findCString(kKeyMIMEType, &mime)); 460 if (!strncasecmp("video/", mime, 6)) { 461 if (mMoofOffset > 0) { 462 int64_t duration; 463 if (track->meta->findInt64(kKeyDuration, &duration)) { 464 // nothing fancy, just pick a frame near 1/4th of the duration 465 track->meta->setInt64( 466 kKeyThumbnailTime, duration / 4); 467 } 468 } else { 469 uint32_t sampleIndex; 470 uint32_t sampleTime; 471 if (track->timescale != 0 && 472 track->sampleTable->findThumbnailSample(&sampleIndex) == OK 473 && track->sampleTable->getMetaDataForSample( 474 sampleIndex, NULL /* offset */, NULL /* size */, 475 &sampleTime) == OK) { 476 track->meta->setInt64( 477 kKeyThumbnailTime, 478 ((int64_t)sampleTime * 1000000) / track->timescale); 479 } 480 } 481 } 482 } 483 484 return track->meta; 485} 486 487static void MakeFourCCString(uint32_t x, char *s) { 488 s[0] = x >> 24; 489 s[1] = (x >> 16) & 0xff; 490 s[2] = (x >> 8) & 0xff; 491 s[3] = x & 0xff; 492 s[4] = '\0'; 493} 494 495status_t MPEG4Extractor::readMetaData() { 496 if (mInitCheck != NO_INIT) { 497 return mInitCheck; 498 } 499 500 off64_t offset = 0; 501 status_t err; 502 while (true) { 503 off64_t orig_offset = offset; 504 err = parseChunk(&offset, 0); 505 506 if (err != OK && err != UNKNOWN_ERROR) { 507 break; 508 } else if (offset <= orig_offset) { 509 // only continue parsing if the offset was advanced, 510 // otherwise we might end up in an infinite loop 511 ALOGE("did not advance: 0x%lld->0x%lld", orig_offset, offset); 512 err = ERROR_MALFORMED; 513 break; 514 } else if (err == OK) { 515 continue; 516 } 517 518 uint32_t hdr[2]; 519 if (mDataSource->readAt(offset, hdr, 8) < 8) { 520 break; 521 } 522 uint32_t chunk_type = ntohl(hdr[1]); 523 if (chunk_type == FOURCC('m', 'o', 'o', 'f')) { 524 // store the offset of the first segment 525 mMoofOffset = offset; 526 } else if (chunk_type != FOURCC('m', 'd', 'a', 't')) { 527 // keep parsing until we get to the data 528 continue; 529 } 530 break; 531 } 532 533 if (mInitCheck == OK) { 534 if (mHasVideo) { 535 mFileMetaData->setCString( 536 kKeyMIMEType, MEDIA_MIMETYPE_CONTAINER_MPEG4); 537 } else { 538 mFileMetaData->setCString(kKeyMIMEType, "audio/mp4"); 539 } 540 } else { 541 mInitCheck = err; 542 } 543 544 CHECK_NE(err, (status_t)NO_INIT); 545 546 // copy pssh data into file metadata 547 int psshsize = 0; 548 for (size_t i = 0; i < mPssh.size(); i++) { 549 psshsize += 20 + mPssh[i].datalen; 550 } 551 if (psshsize) { 552 char *buf = (char*)malloc(psshsize); 553 if (!buf) { 554 ALOGE("b/28471206"); 555 return NO_MEMORY; 556 } 557 char *ptr = buf; 558 for (size_t i = 0; i < mPssh.size(); i++) { 559 memcpy(ptr, mPssh[i].uuid, 20); // uuid + length 560 memcpy(ptr + 20, mPssh[i].data, mPssh[i].datalen); 561 ptr += (20 + mPssh[i].datalen); 562 } 563 mFileMetaData->setData(kKeyPssh, 'pssh', buf, psshsize); 564 free(buf); 565 } 566 return mInitCheck; 567} 568 569char* MPEG4Extractor::getDrmTrackInfo(size_t trackID, int *len) { 570 if (mFirstSINF == NULL) { 571 return NULL; 572 } 573 574 SINF *sinf = mFirstSINF; 575 while (sinf && (trackID != sinf->trackID)) { 576 sinf = sinf->next; 577 } 578 579 if (sinf == NULL) { 580 return NULL; 581 } 582 583 *len = sinf->len; 584 return sinf->IPMPData; 585} 586 587// Reads an encoded integer 7 bits at a time until it encounters the high bit clear. 588static int32_t readSize(off64_t offset, 589 const sp<DataSource> DataSource, uint8_t *numOfBytes) { 590 uint32_t size = 0; 591 uint8_t data; 592 bool moreData = true; 593 *numOfBytes = 0; 594 595 while (moreData) { 596 if (DataSource->readAt(offset, &data, 1) < 1) { 597 return -1; 598 } 599 offset ++; 600 moreData = (data >= 128) ? true : false; 601 size = (size << 7) | (data & 0x7f); // Take last 7 bits 602 (*numOfBytes) ++; 603 } 604 605 return size; 606} 607 608status_t MPEG4Extractor::parseDrmSINF( 609 off64_t * /* offset */, off64_t data_offset) { 610 uint8_t updateIdTag; 611 if (mDataSource->readAt(data_offset, &updateIdTag, 1) < 1) { 612 return ERROR_IO; 613 } 614 data_offset ++; 615 616 if (0x01/*OBJECT_DESCRIPTOR_UPDATE_ID_TAG*/ != updateIdTag) { 617 return ERROR_MALFORMED; 618 } 619 620 uint8_t numOfBytes; 621 int32_t size = readSize(data_offset, mDataSource, &numOfBytes); 622 if (size < 0) { 623 return ERROR_IO; 624 } 625 int32_t classSize = size; 626 data_offset += numOfBytes; 627 628 while(size >= 11 ) { 629 uint8_t descriptorTag; 630 if (mDataSource->readAt(data_offset, &descriptorTag, 1) < 1) { 631 return ERROR_IO; 632 } 633 data_offset ++; 634 635 if (0x11/*OBJECT_DESCRIPTOR_ID_TAG*/ != descriptorTag) { 636 return ERROR_MALFORMED; 637 } 638 639 uint8_t buffer[8]; 640 //ObjectDescriptorID and ObjectDescriptor url flag 641 if (mDataSource->readAt(data_offset, buffer, 2) < 2) { 642 return ERROR_IO; 643 } 644 data_offset += 2; 645 646 if ((buffer[1] >> 5) & 0x0001) { //url flag is set 647 return ERROR_MALFORMED; 648 } 649 650 if (mDataSource->readAt(data_offset, buffer, 8) < 8) { 651 return ERROR_IO; 652 } 653 data_offset += 8; 654 655 if ((0x0F/*ES_ID_REF_TAG*/ != buffer[1]) 656 || ( 0x0A/*IPMP_DESCRIPTOR_POINTER_ID_TAG*/ != buffer[5])) { 657 return ERROR_MALFORMED; 658 } 659 660 SINF *sinf = new SINF; 661 sinf->trackID = U16_AT(&buffer[3]); 662 sinf->IPMPDescriptorID = buffer[7]; 663 sinf->next = mFirstSINF; 664 mFirstSINF = sinf; 665 666 size -= (8 + 2 + 1); 667 } 668 669 if (size != 0) { 670 return ERROR_MALFORMED; 671 } 672 673 if (mDataSource->readAt(data_offset, &updateIdTag, 1) < 1) { 674 return ERROR_IO; 675 } 676 data_offset ++; 677 678 if(0x05/*IPMP_DESCRIPTOR_UPDATE_ID_TAG*/ != updateIdTag) { 679 return ERROR_MALFORMED; 680 } 681 682 size = readSize(data_offset, mDataSource, &numOfBytes); 683 if (size < 0) { 684 return ERROR_IO; 685 } 686 classSize = size; 687 data_offset += numOfBytes; 688 689 while (size > 0) { 690 uint8_t tag; 691 int32_t dataLen; 692 if (mDataSource->readAt(data_offset, &tag, 1) < 1) { 693 return ERROR_IO; 694 } 695 data_offset ++; 696 697 if (0x0B/*IPMP_DESCRIPTOR_ID_TAG*/ == tag) { 698 uint8_t id; 699 dataLen = readSize(data_offset, mDataSource, &numOfBytes); 700 if (dataLen < 0) { 701 return ERROR_IO; 702 } else if (dataLen < 4) { 703 return ERROR_MALFORMED; 704 } 705 data_offset += numOfBytes; 706 707 if (mDataSource->readAt(data_offset, &id, 1) < 1) { 708 return ERROR_IO; 709 } 710 data_offset ++; 711 712 SINF *sinf = mFirstSINF; 713 while (sinf && (sinf->IPMPDescriptorID != id)) { 714 sinf = sinf->next; 715 } 716 if (sinf == NULL) { 717 return ERROR_MALFORMED; 718 } 719 sinf->len = dataLen - 3; 720 sinf->IPMPData = new (std::nothrow) char[sinf->len]; 721 if (sinf->IPMPData == NULL) { 722 return ERROR_MALFORMED; 723 } 724 data_offset += 2; 725 726 if (mDataSource->readAt(data_offset, sinf->IPMPData, sinf->len) < sinf->len) { 727 return ERROR_IO; 728 } 729 data_offset += sinf->len; 730 731 size -= (dataLen + numOfBytes + 1); 732 } 733 } 734 735 if (size != 0) { 736 return ERROR_MALFORMED; 737 } 738 739 return UNKNOWN_ERROR; // Return a dummy error. 740} 741 742struct PathAdder { 743 PathAdder(Vector<uint32_t> *path, uint32_t chunkType) 744 : mPath(path) { 745 mPath->push(chunkType); 746 } 747 748 ~PathAdder() { 749 mPath->pop(); 750 } 751 752private: 753 Vector<uint32_t> *mPath; 754 755 PathAdder(const PathAdder &); 756 PathAdder &operator=(const PathAdder &); 757}; 758 759static bool underMetaDataPath(const Vector<uint32_t> &path) { 760 return path.size() >= 5 761 && path[0] == FOURCC('m', 'o', 'o', 'v') 762 && path[1] == FOURCC('u', 'd', 't', 'a') 763 && path[2] == FOURCC('m', 'e', 't', 'a') 764 && path[3] == FOURCC('i', 'l', 's', 't'); 765} 766 767// Given a time in seconds since Jan 1 1904, produce a human-readable string. 768static void convertTimeToDate(int64_t time_1904, String8 *s) { 769 time_t time_1970 = time_1904 - (((66 * 365 + 17) * 24) * 3600); 770 771 char tmp[32]; 772 strftime(tmp, sizeof(tmp), "%Y%m%dT%H%M%S.000Z", gmtime(&time_1970)); 773 774 s->setTo(tmp); 775} 776 777status_t MPEG4Extractor::parseChunk(off64_t *offset, int depth) { 778 ALOGV("entering parseChunk %lld/%d", *offset, depth); 779 uint32_t hdr[2]; 780 if (mDataSource->readAt(*offset, hdr, 8) < 8) { 781 return ERROR_IO; 782 } 783 uint64_t chunk_size = ntohl(hdr[0]); 784 uint32_t chunk_type = ntohl(hdr[1]); 785 off64_t data_offset = *offset + 8; 786 787 if (chunk_size == 1) { 788 if (mDataSource->readAt(*offset + 8, &chunk_size, 8) < 8) { 789 return ERROR_IO; 790 } 791 chunk_size = ntoh64(chunk_size); 792 data_offset += 8; 793 794 if (chunk_size < 16) { 795 // The smallest valid chunk is 16 bytes long in this case. 796 return ERROR_MALFORMED; 797 } 798 } else if (chunk_size == 0) { 799 if (depth == 0) { 800 // atom extends to end of file 801 off64_t sourceSize; 802 if (mDataSource->getSize(&sourceSize) == OK) { 803 chunk_size = (sourceSize - *offset); 804 } else { 805 // XXX could we just pick a "sufficiently large" value here? 806 ALOGE("atom size is 0, and data source has no size"); 807 return ERROR_MALFORMED; 808 } 809 } else { 810 // not allowed for non-toplevel atoms, skip it 811 *offset += 4; 812 return OK; 813 } 814 } else if (chunk_size < 8) { 815 // The smallest valid chunk is 8 bytes long. 816 ALOGE("invalid chunk size: %" PRIu64, chunk_size); 817 return ERROR_MALFORMED; 818 } 819 820 char chunk[5]; 821 MakeFourCCString(chunk_type, chunk); 822 ALOGV("chunk: %s @ %lld, %d", chunk, *offset, depth); 823 824#if 0 825 static const char kWhitespace[] = " "; 826 const char *indent = &kWhitespace[sizeof(kWhitespace) - 1 - 2 * depth]; 827 printf("%sfound chunk '%s' of size %" PRIu64 "\n", indent, chunk, chunk_size); 828 829 char buffer[256]; 830 size_t n = chunk_size; 831 if (n > sizeof(buffer)) { 832 n = sizeof(buffer); 833 } 834 if (mDataSource->readAt(*offset, buffer, n) 835 < (ssize_t)n) { 836 return ERROR_IO; 837 } 838 839 hexdump(buffer, n); 840#endif 841 842 PathAdder autoAdder(&mPath, chunk_type); 843 844 off64_t chunk_data_size = *offset + chunk_size - data_offset; 845 if (chunk_type != FOURCC('m', 'd', 'a', 't') && chunk_data_size > kMaxAtomSize) { 846 char errMsg[100]; 847 sprintf(errMsg, "%s atom has size %" PRId64, chunk, chunk_data_size); 848 ALOGE("%s (b/28615448)", errMsg); 849 android_errorWriteWithInfoLog(0x534e4554, "28615448", -1, errMsg, strlen(errMsg)); 850 return ERROR_MALFORMED; 851 } 852 853 if (chunk_type != FOURCC('c', 'p', 'r', 't') 854 && chunk_type != FOURCC('c', 'o', 'v', 'r') 855 && mPath.size() == 5 && underMetaDataPath(mPath)) { 856 off64_t stop_offset = *offset + chunk_size; 857 *offset = data_offset; 858 while (*offset < stop_offset) { 859 status_t err = parseChunk(offset, depth + 1); 860 if (err != OK) { 861 return err; 862 } 863 } 864 865 if (*offset != stop_offset) { 866 return ERROR_MALFORMED; 867 } 868 869 return OK; 870 } 871 872 switch(chunk_type) { 873 case FOURCC('m', 'o', 'o', 'v'): 874 case FOURCC('t', 'r', 'a', 'k'): 875 case FOURCC('m', 'd', 'i', 'a'): 876 case FOURCC('m', 'i', 'n', 'f'): 877 case FOURCC('d', 'i', 'n', 'f'): 878 case FOURCC('s', 't', 'b', 'l'): 879 case FOURCC('m', 'v', 'e', 'x'): 880 case FOURCC('m', 'o', 'o', 'f'): 881 case FOURCC('t', 'r', 'a', 'f'): 882 case FOURCC('m', 'f', 'r', 'a'): 883 case FOURCC('u', 'd', 't', 'a'): 884 case FOURCC('i', 'l', 's', 't'): 885 case FOURCC('s', 'i', 'n', 'f'): 886 case FOURCC('s', 'c', 'h', 'i'): 887 case FOURCC('e', 'd', 't', 's'): 888 { 889 if (chunk_type == FOURCC('m', 'o', 'o', 'v') && depth != 0) { 890 ALOGE("moov: depth %d", depth); 891 return ERROR_MALFORMED; 892 } 893 if (chunk_type == FOURCC('s', 't', 'b', 'l')) { 894 ALOGV("sampleTable chunk is %" PRIu64 " bytes long.", chunk_size); 895 896 if (mDataSource->flags() 897 & (DataSource::kWantsPrefetching 898 | DataSource::kIsCachingDataSource)) { 899 sp<MPEG4DataSource> cachedSource = 900 new MPEG4DataSource(mDataSource); 901 902 if (cachedSource->setCachedRange(*offset, chunk_size) == OK) { 903 mDataSource = cachedSource; 904 } 905 } 906 907 if (mLastTrack == NULL) { 908 return ERROR_MALFORMED; 909 } 910 mLastTrack->sampleTable = new SampleTable(mDataSource); 911 } 912 913 bool isTrack = false; 914 if (chunk_type == FOURCC('t', 'r', 'a', 'k')) { 915 if (depth != 1) { 916 ALOGE("trak: depth %d", depth); 917 return ERROR_MALFORMED; 918 } 919 isTrack = true; 920 921 Track *track = new Track; 922 track->next = NULL; 923 if (mLastTrack) { 924 mLastTrack->next = track; 925 } else { 926 mFirstTrack = track; 927 } 928 mLastTrack = track; 929 930 track->meta = new MetaData; 931 track->includes_expensive_metadata = false; 932 track->skipTrack = false; 933 track->timescale = 0; 934 track->meta->setCString(kKeyMIMEType, "application/octet-stream"); 935 } 936 937 off64_t stop_offset = *offset + chunk_size; 938 *offset = data_offset; 939 while (*offset < stop_offset) { 940 status_t err = parseChunk(offset, depth + 1); 941 if (err != OK) { 942 if (isTrack) { 943 mLastTrack->skipTrack = true; 944 break; 945 } 946 return err; 947 } 948 } 949 950 if (*offset != stop_offset) { 951 return ERROR_MALFORMED; 952 } 953 954 if (isTrack) { 955 int32_t trackId; 956 // There must be exact one track header per track. 957 if (!mLastTrack->meta->findInt32(kKeyTrackID, &trackId)) { 958 mLastTrack->skipTrack = true; 959 } 960 if (mLastTrack->skipTrack) { 961 Track *cur = mFirstTrack; 962 963 if (cur == mLastTrack) { 964 delete cur; 965 mFirstTrack = mLastTrack = NULL; 966 } else { 967 while (cur && cur->next != mLastTrack) { 968 cur = cur->next; 969 } 970 cur->next = NULL; 971 delete mLastTrack; 972 mLastTrack = cur; 973 } 974 975 return OK; 976 } 977 978 status_t err = verifyTrack(mLastTrack); 979 980 if (err != OK) { 981 return err; 982 } 983 } else if (chunk_type == FOURCC('m', 'o', 'o', 'v')) { 984 mInitCheck = OK; 985 986 if (!mIsDrm) { 987 return UNKNOWN_ERROR; // Return a dummy error. 988 } else { 989 return OK; 990 } 991 } 992 break; 993 } 994 995 case FOURCC('e', 'l', 's', 't'): 996 { 997 *offset += chunk_size; 998 999 // See 14496-12 8.6.6 1000 uint8_t version; 1001 if (mDataSource->readAt(data_offset, &version, 1) < 1) { 1002 return ERROR_IO; 1003 } 1004 1005 uint32_t entry_count; 1006 if (!mDataSource->getUInt32(data_offset + 4, &entry_count)) { 1007 return ERROR_IO; 1008 } 1009 1010 if (entry_count != 1) { 1011 // we only support a single entry at the moment, for gapless playback 1012 ALOGW("ignoring edit list with %d entries", entry_count); 1013 } else if (mHeaderTimescale == 0) { 1014 ALOGW("ignoring edit list because timescale is 0"); 1015 } else if (mLastTrack == NULL) { 1016 return ERROR_MALFORMED; 1017 } else { 1018 off64_t entriesoffset = data_offset + 8; 1019 uint64_t segment_duration; 1020 int64_t media_time; 1021 1022 if (version == 1) { 1023 if (!mDataSource->getUInt64(entriesoffset, &segment_duration) || 1024 !mDataSource->getUInt64(entriesoffset + 8, (uint64_t*)&media_time)) { 1025 return ERROR_IO; 1026 } 1027 } else if (version == 0) { 1028 uint32_t sd; 1029 int32_t mt; 1030 if (!mDataSource->getUInt32(entriesoffset, &sd) || 1031 !mDataSource->getUInt32(entriesoffset + 4, (uint32_t*)&mt)) { 1032 return ERROR_IO; 1033 } 1034 segment_duration = sd; 1035 media_time = mt; 1036 } else { 1037 return ERROR_IO; 1038 } 1039 1040 uint64_t halfscale = mHeaderTimescale / 2; 1041 segment_duration = (segment_duration * 1000000 + halfscale)/ mHeaderTimescale; 1042 media_time = (media_time * 1000000 + halfscale) / mHeaderTimescale; 1043 1044 int64_t duration; 1045 int32_t samplerate; 1046 if (mLastTrack->meta->findInt64(kKeyDuration, &duration) && 1047 mLastTrack->meta->findInt32(kKeySampleRate, &samplerate)) { 1048 1049 int64_t delay = (media_time * samplerate + 500000) / 1000000; 1050 mLastTrack->meta->setInt32(kKeyEncoderDelay, delay); 1051 1052 int64_t paddingus = duration - (segment_duration + media_time); 1053 if (paddingus < 0) { 1054 // track duration from media header (which is what kKeyDuration is) might 1055 // be slightly shorter than the segment duration, which would make the 1056 // padding negative. Clamp to zero. 1057 paddingus = 0; 1058 } 1059 int64_t paddingsamples = (paddingus * samplerate + 500000) / 1000000; 1060 mLastTrack->meta->setInt32(kKeyEncoderPadding, paddingsamples); 1061 } 1062 } 1063 break; 1064 } 1065 1066 case FOURCC('f', 'r', 'm', 'a'): 1067 { 1068 *offset += chunk_size; 1069 if (mLastTrack == NULL) { 1070 return ERROR_MALFORMED; 1071 } 1072 1073 uint32_t original_fourcc; 1074 if (mDataSource->readAt(data_offset, &original_fourcc, 4) < 4) { 1075 return ERROR_IO; 1076 } 1077 original_fourcc = ntohl(original_fourcc); 1078 ALOGV("read original format: %d", original_fourcc); 1079 mLastTrack->meta->setCString(kKeyMIMEType, FourCC2MIME(original_fourcc)); 1080 uint32_t num_channels = 0; 1081 uint32_t sample_rate = 0; 1082 if (AdjustChannelsAndRate(original_fourcc, &num_channels, &sample_rate)) { 1083 mLastTrack->meta->setInt32(kKeyChannelCount, num_channels); 1084 mLastTrack->meta->setInt32(kKeySampleRate, sample_rate); 1085 } 1086 break; 1087 } 1088 1089 case FOURCC('t', 'e', 'n', 'c'): 1090 { 1091 *offset += chunk_size; 1092 if (mLastTrack == NULL) { 1093 return ERROR_MALFORMED; 1094 } 1095 1096 if (chunk_size < 32) { 1097 return ERROR_MALFORMED; 1098 } 1099 1100 // tenc box contains 1 byte version, 3 byte flags, 3 byte default algorithm id, one byte 1101 // default IV size, 16 bytes default KeyID 1102 // (ISO 23001-7) 1103 char buf[4]; 1104 memset(buf, 0, 4); 1105 if (mDataSource->readAt(data_offset + 4, buf + 1, 3) < 3) { 1106 return ERROR_IO; 1107 } 1108 uint32_t defaultAlgorithmId = ntohl(*((int32_t*)buf)); 1109 if (defaultAlgorithmId > 1) { 1110 // only 0 (clear) and 1 (AES-128) are valid 1111 return ERROR_MALFORMED; 1112 } 1113 1114 memset(buf, 0, 4); 1115 if (mDataSource->readAt(data_offset + 7, buf + 3, 1) < 1) { 1116 return ERROR_IO; 1117 } 1118 uint32_t defaultIVSize = ntohl(*((int32_t*)buf)); 1119 1120 if ((defaultAlgorithmId == 0 && defaultIVSize != 0) || 1121 (defaultAlgorithmId != 0 && defaultIVSize == 0)) { 1122 // only unencrypted data must have 0 IV size 1123 return ERROR_MALFORMED; 1124 } else if (defaultIVSize != 0 && 1125 defaultIVSize != 8 && 1126 defaultIVSize != 16) { 1127 // only supported sizes are 0, 8 and 16 1128 return ERROR_MALFORMED; 1129 } 1130 1131 uint8_t defaultKeyId[16]; 1132 1133 if (mDataSource->readAt(data_offset + 8, &defaultKeyId, 16) < 16) { 1134 return ERROR_IO; 1135 } 1136 1137 mLastTrack->meta->setInt32(kKeyCryptoMode, defaultAlgorithmId); 1138 mLastTrack->meta->setInt32(kKeyCryptoDefaultIVSize, defaultIVSize); 1139 mLastTrack->meta->setData(kKeyCryptoKey, 'tenc', defaultKeyId, 16); 1140 break; 1141 } 1142 1143 case FOURCC('t', 'k', 'h', 'd'): 1144 { 1145 *offset += chunk_size; 1146 if (mLastTrack == NULL) { 1147 return ERROR_MALFORMED; 1148 } 1149 1150 status_t err; 1151 if ((err = parseTrackHeader(data_offset, chunk_data_size)) != OK) { 1152 return err; 1153 } 1154 1155 break; 1156 } 1157 1158 case FOURCC('p', 's', 's', 'h'): 1159 { 1160 *offset += chunk_size; 1161 1162 PsshInfo pssh; 1163 1164 if (mDataSource->readAt(data_offset + 4, &pssh.uuid, 16) < 16) { 1165 return ERROR_IO; 1166 } 1167 1168 uint32_t psshdatalen = 0; 1169 if (mDataSource->readAt(data_offset + 20, &psshdatalen, 4) < 4) { 1170 return ERROR_IO; 1171 } 1172 pssh.datalen = ntohl(psshdatalen); 1173 ALOGV("pssh data size: %d", pssh.datalen); 1174 if (pssh.datalen + 20 > chunk_size) { 1175 // pssh data length exceeds size of containing box 1176 return ERROR_MALFORMED; 1177 } 1178 1179 pssh.data = new (std::nothrow) uint8_t[pssh.datalen]; 1180 if (pssh.data == NULL) { 1181 return ERROR_MALFORMED; 1182 } 1183 ALOGV("allocated pssh @ %p", pssh.data); 1184 ssize_t requested = (ssize_t) pssh.datalen; 1185 if (mDataSource->readAt(data_offset + 24, pssh.data, requested) < requested) { 1186 delete[] pssh.data; 1187 return ERROR_IO; 1188 } 1189 mPssh.push_back(pssh); 1190 1191 break; 1192 } 1193 1194 case FOURCC('m', 'd', 'h', 'd'): 1195 { 1196 *offset += chunk_size; 1197 if (mLastTrack == NULL) { 1198 return ERROR_MALFORMED; 1199 } 1200 1201 if (chunk_data_size < 4) { 1202 return ERROR_MALFORMED; 1203 } 1204 1205 uint8_t version; 1206 if (mDataSource->readAt( 1207 data_offset, &version, sizeof(version)) 1208 < (ssize_t)sizeof(version)) { 1209 return ERROR_IO; 1210 } 1211 1212 off64_t timescale_offset; 1213 1214 if (version == 1) { 1215 timescale_offset = data_offset + 4 + 16; 1216 } else if (version == 0) { 1217 timescale_offset = data_offset + 4 + 8; 1218 } else { 1219 return ERROR_IO; 1220 } 1221 1222 uint32_t timescale; 1223 if (mDataSource->readAt( 1224 timescale_offset, ×cale, sizeof(timescale)) 1225 < (ssize_t)sizeof(timescale)) { 1226 return ERROR_IO; 1227 } 1228 1229 mLastTrack->timescale = ntohl(timescale); 1230 1231 // 14496-12 says all ones means indeterminate, but some files seem to use 1232 // 0 instead. We treat both the same. 1233 int64_t duration = 0; 1234 if (version == 1) { 1235 if (mDataSource->readAt( 1236 timescale_offset + 4, &duration, sizeof(duration)) 1237 < (ssize_t)sizeof(duration)) { 1238 return ERROR_IO; 1239 } 1240 if (duration != -1) { 1241 duration = ntoh64(duration); 1242 } 1243 } else { 1244 uint32_t duration32; 1245 if (mDataSource->readAt( 1246 timescale_offset + 4, &duration32, sizeof(duration32)) 1247 < (ssize_t)sizeof(duration32)) { 1248 return ERROR_IO; 1249 } 1250 if (duration32 != 0xffffffff) { 1251 duration = ntohl(duration32); 1252 } 1253 } 1254 if (duration != 0) { 1255 mLastTrack->meta->setInt64( 1256 kKeyDuration, (duration * 1000000) / mLastTrack->timescale); 1257 } 1258 1259 uint8_t lang[2]; 1260 off64_t lang_offset; 1261 if (version == 1) { 1262 lang_offset = timescale_offset + 4 + 8; 1263 } else if (version == 0) { 1264 lang_offset = timescale_offset + 4 + 4; 1265 } else { 1266 return ERROR_IO; 1267 } 1268 1269 if (mDataSource->readAt(lang_offset, &lang, sizeof(lang)) 1270 < (ssize_t)sizeof(lang)) { 1271 return ERROR_IO; 1272 } 1273 1274 // To get the ISO-639-2/T three character language code 1275 // 1 bit pad followed by 3 5-bits characters. Each character 1276 // is packed as the difference between its ASCII value and 0x60. 1277 char lang_code[4]; 1278 lang_code[0] = ((lang[0] >> 2) & 0x1f) + 0x60; 1279 lang_code[1] = ((lang[0] & 0x3) << 3 | (lang[1] >> 5)) + 0x60; 1280 lang_code[2] = (lang[1] & 0x1f) + 0x60; 1281 lang_code[3] = '\0'; 1282 1283 mLastTrack->meta->setCString( 1284 kKeyMediaLanguage, lang_code); 1285 1286 break; 1287 } 1288 1289 case FOURCC('s', 't', 's', 'd'): 1290 { 1291 if (mLastTrack == NULL) { 1292 return ERROR_MALFORMED; 1293 } 1294 1295 uint8_t buffer[8]; 1296 if (chunk_data_size < (off64_t)sizeof(buffer)) { 1297 return ERROR_MALFORMED; 1298 } 1299 1300 if (mDataSource->readAt( 1301 data_offset, buffer, 8) < 8) { 1302 return ERROR_IO; 1303 } 1304 1305 if (U32_AT(buffer) != 0) { 1306 // Should be version 0, flags 0. 1307 return ERROR_MALFORMED; 1308 } 1309 1310 uint32_t entry_count = U32_AT(&buffer[4]); 1311 1312 if (entry_count > 1) { 1313 // For 3GPP timed text, there could be multiple tx3g boxes contain 1314 // multiple text display formats. These formats will be used to 1315 // display the timed text. 1316 // For encrypted files, there may also be more than one entry. 1317 const char *mime; 1318 CHECK(mLastTrack->meta->findCString(kKeyMIMEType, &mime)); 1319 if (strcasecmp(mime, MEDIA_MIMETYPE_TEXT_3GPP) && 1320 strcasecmp(mime, "application/octet-stream")) { 1321 // For now we only support a single type of media per track. 1322 mLastTrack->skipTrack = true; 1323 *offset += chunk_size; 1324 break; 1325 } 1326 } 1327 off64_t stop_offset = *offset + chunk_size; 1328 *offset = data_offset + 8; 1329 for (uint32_t i = 0; i < entry_count; ++i) { 1330 status_t err = parseChunk(offset, depth + 1); 1331 if (err != OK) { 1332 return err; 1333 } 1334 } 1335 1336 if (*offset != stop_offset) { 1337 return ERROR_MALFORMED; 1338 } 1339 break; 1340 } 1341 1342 case FOURCC('m', 'p', '4', 'a'): 1343 case FOURCC('e', 'n', 'c', 'a'): 1344 case FOURCC('s', 'a', 'm', 'r'): 1345 case FOURCC('s', 'a', 'w', 'b'): 1346 { 1347 if (mLastTrack == NULL) { 1348 return ERROR_MALFORMED; 1349 } 1350 uint8_t buffer[8 + 20]; 1351 if (chunk_data_size < (ssize_t)sizeof(buffer)) { 1352 // Basic AudioSampleEntry size. 1353 return ERROR_MALFORMED; 1354 } 1355 1356 if (mDataSource->readAt( 1357 data_offset, buffer, sizeof(buffer)) < (ssize_t)sizeof(buffer)) { 1358 return ERROR_IO; 1359 } 1360 1361 uint16_t data_ref_index = U16_AT(&buffer[6]); 1362 uint32_t num_channels = U16_AT(&buffer[16]); 1363 1364 uint16_t sample_size = U16_AT(&buffer[18]); 1365 uint32_t sample_rate = U32_AT(&buffer[24]) >> 16; 1366 1367 if (chunk_type != FOURCC('e', 'n', 'c', 'a')) { 1368 // if the chunk type is enca, we'll get the type from the sinf/frma box later 1369 mLastTrack->meta->setCString(kKeyMIMEType, FourCC2MIME(chunk_type)); 1370 AdjustChannelsAndRate(chunk_type, &num_channels, &sample_rate); 1371 } 1372 ALOGV("*** coding='%s' %d channels, size %d, rate %d\n", 1373 chunk, num_channels, sample_size, sample_rate); 1374 mLastTrack->meta->setInt32(kKeyChannelCount, num_channels); 1375 mLastTrack->meta->setInt32(kKeySampleRate, sample_rate); 1376 1377 off64_t stop_offset = *offset + chunk_size; 1378 *offset = data_offset + sizeof(buffer); 1379 while (*offset < stop_offset) { 1380 status_t err = parseChunk(offset, depth + 1); 1381 if (err != OK) { 1382 return err; 1383 } 1384 } 1385 1386 if (*offset != stop_offset) { 1387 return ERROR_MALFORMED; 1388 } 1389 break; 1390 } 1391 1392 case FOURCC('m', 'p', '4', 'v'): 1393 case FOURCC('e', 'n', 'c', 'v'): 1394 case FOURCC('s', '2', '6', '3'): 1395 case FOURCC('H', '2', '6', '3'): 1396 case FOURCC('h', '2', '6', '3'): 1397 case FOURCC('a', 'v', 'c', '1'): 1398 case FOURCC('h', 'v', 'c', '1'): 1399 case FOURCC('h', 'e', 'v', '1'): 1400 { 1401 mHasVideo = true; 1402 1403 uint8_t buffer[78]; 1404 if (chunk_data_size < (ssize_t)sizeof(buffer)) { 1405 // Basic VideoSampleEntry size. 1406 return ERROR_MALFORMED; 1407 } 1408 1409 if (mDataSource->readAt( 1410 data_offset, buffer, sizeof(buffer)) < (ssize_t)sizeof(buffer)) { 1411 return ERROR_IO; 1412 } 1413 1414 uint16_t data_ref_index = U16_AT(&buffer[6]); 1415 uint16_t width = U16_AT(&buffer[6 + 18]); 1416 uint16_t height = U16_AT(&buffer[6 + 20]); 1417 1418 // The video sample is not standard-compliant if it has invalid dimension. 1419 // Use some default width and height value, and 1420 // let the decoder figure out the actual width and height (and thus 1421 // be prepared for INFO_FOMRAT_CHANGED event). 1422 if (width == 0) width = 352; 1423 if (height == 0) height = 288; 1424 1425 // printf("*** coding='%s' width=%d height=%d\n", 1426 // chunk, width, height); 1427 1428 if (chunk_type != FOURCC('e', 'n', 'c', 'v')) { 1429 // if the chunk type is encv, we'll get the type from the sinf/frma box later 1430 mLastTrack->meta->setCString(kKeyMIMEType, FourCC2MIME(chunk_type)); 1431 } 1432 mLastTrack->meta->setInt32(kKeyWidth, width); 1433 mLastTrack->meta->setInt32(kKeyHeight, height); 1434 1435 off64_t stop_offset = *offset + chunk_size; 1436 *offset = data_offset + sizeof(buffer); 1437 while (*offset < stop_offset) { 1438 status_t err = parseChunk(offset, depth + 1); 1439 if (err != OK) { 1440 return err; 1441 } 1442 } 1443 1444 if (*offset != stop_offset) { 1445 return ERROR_MALFORMED; 1446 } 1447 break; 1448 } 1449 1450 case FOURCC('s', 't', 'c', 'o'): 1451 case FOURCC('c', 'o', '6', '4'): 1452 { 1453 if (mLastTrack == NULL) { 1454 return ERROR_MALFORMED; 1455 } 1456 status_t err = 1457 mLastTrack->sampleTable->setChunkOffsetParams( 1458 chunk_type, data_offset, chunk_data_size); 1459 1460 *offset += chunk_size; 1461 1462 if (err != OK) { 1463 return err; 1464 } 1465 1466 break; 1467 } 1468 1469 case FOURCC('s', 't', 's', 'c'): 1470 { 1471 if (mLastTrack == NULL) { 1472 return ERROR_MALFORMED; 1473 } 1474 status_t err = 1475 mLastTrack->sampleTable->setSampleToChunkParams( 1476 data_offset, chunk_data_size); 1477 1478 *offset += chunk_size; 1479 1480 if (err != OK) { 1481 return err; 1482 } 1483 1484 break; 1485 } 1486 1487 case FOURCC('s', 't', 's', 'z'): 1488 case FOURCC('s', 't', 'z', '2'): 1489 { 1490 if (mLastTrack == NULL) { 1491 return ERROR_MALFORMED; 1492 } 1493 status_t err = 1494 mLastTrack->sampleTable->setSampleSizeParams( 1495 chunk_type, data_offset, chunk_data_size); 1496 1497 *offset += chunk_size; 1498 1499 if (err != OK) { 1500 return err; 1501 } 1502 1503 size_t max_size; 1504 err = mLastTrack->sampleTable->getMaxSampleSize(&max_size); 1505 1506 if (err != OK) { 1507 return err; 1508 } 1509 1510 if (max_size != 0) { 1511 // Assume that a given buffer only contains at most 10 chunks, 1512 // each chunk originally prefixed with a 2 byte length will 1513 // have a 4 byte header (0x00 0x00 0x00 0x01) after conversion, 1514 // and thus will grow by 2 bytes per chunk. 1515 if (max_size > SIZE_MAX - 10 * 2) { 1516 ALOGE("max sample size too big: %zu", max_size); 1517 return ERROR_MALFORMED; 1518 } 1519 mLastTrack->meta->setInt32(kKeyMaxInputSize, max_size + 10 * 2); 1520 } else { 1521 // No size was specified. Pick a conservatively large size. 1522 uint32_t width, height; 1523 if (!mLastTrack->meta->findInt32(kKeyWidth, (int32_t*)&width) || 1524 !mLastTrack->meta->findInt32(kKeyHeight,(int32_t*) &height)) { 1525 ALOGE("No width or height, assuming worst case 1080p"); 1526 width = 1920; 1527 height = 1080; 1528 } else { 1529 // A resolution was specified, check that it's not too big. The values below 1530 // were chosen so that the calculations below don't cause overflows, they're 1531 // not indicating that resolutions up to 32kx32k are actually supported. 1532 if (width > 32768 || height > 32768) { 1533 ALOGE("can't support %u x %u video", width, height); 1534 return ERROR_MALFORMED; 1535 } 1536 } 1537 1538 const char *mime; 1539 CHECK(mLastTrack->meta->findCString(kKeyMIMEType, &mime)); 1540 if (!strcmp(mime, MEDIA_MIMETYPE_VIDEO_AVC)) { 1541 // AVC requires compression ratio of at least 2, and uses 1542 // macroblocks 1543 max_size = ((width + 15) / 16) * ((height + 15) / 16) * 192; 1544 } else { 1545 // For all other formats there is no minimum compression 1546 // ratio. Use compression ratio of 1. 1547 max_size = width * height * 3 / 2; 1548 } 1549 mLastTrack->meta->setInt32(kKeyMaxInputSize, max_size); 1550 } 1551 1552 // NOTE: setting another piece of metadata invalidates any pointers (such as the 1553 // mimetype) previously obtained, so don't cache them. 1554 const char *mime; 1555 CHECK(mLastTrack->meta->findCString(kKeyMIMEType, &mime)); 1556 // Calculate average frame rate. 1557 if (!strncasecmp("video/", mime, 6)) { 1558 size_t nSamples = mLastTrack->sampleTable->countSamples(); 1559 int64_t durationUs; 1560 if (mLastTrack->meta->findInt64(kKeyDuration, &durationUs)) { 1561 if (durationUs > 0) { 1562 int32_t frameRate = (nSamples * 1000000LL + 1563 (durationUs >> 1)) / durationUs; 1564 mLastTrack->meta->setInt32(kKeyFrameRate, frameRate); 1565 } 1566 } 1567 } 1568 1569 break; 1570 } 1571 1572 case FOURCC('s', 't', 't', 's'): 1573 { 1574 *offset += chunk_size; 1575 1576 if (mLastTrack == NULL) { 1577 return ERROR_MALFORMED; 1578 } 1579 status_t err = 1580 mLastTrack->sampleTable->setTimeToSampleParams( 1581 data_offset, chunk_data_size); 1582 1583 if (err != OK) { 1584 return err; 1585 } 1586 1587 break; 1588 } 1589 1590 case FOURCC('c', 't', 't', 's'): 1591 { 1592 *offset += chunk_size; 1593 1594 if (mLastTrack == NULL) { 1595 return ERROR_MALFORMED; 1596 } 1597 status_t err = 1598 mLastTrack->sampleTable->setCompositionTimeToSampleParams( 1599 data_offset, chunk_data_size); 1600 1601 if (err != OK) { 1602 return err; 1603 } 1604 1605 break; 1606 } 1607 1608 case FOURCC('s', 't', 's', 's'): 1609 { 1610 *offset += chunk_size; 1611 1612 if (mLastTrack == NULL) { 1613 return ERROR_MALFORMED; 1614 } 1615 status_t err = 1616 mLastTrack->sampleTable->setSyncSampleParams( 1617 data_offset, chunk_data_size); 1618 1619 if (err != OK) { 1620 return err; 1621 } 1622 1623 break; 1624 } 1625 1626 // @xyz 1627 case FOURCC('\xA9', 'x', 'y', 'z'): 1628 { 1629 *offset += chunk_size; 1630 1631 // Best case the total data length inside "@xyz" box 1632 // would be 8, for instance "@xyz" + "\x00\x04\x15\xc7" + "0+0/", 1633 // where "\x00\x04" is the text string length with value = 4, 1634 // "\0x15\xc7" is the language code = en, and "0+0" is a 1635 // location (string) value with longitude = 0 and latitude = 0. 1636 if (chunk_data_size < 8) { 1637 return ERROR_MALFORMED; 1638 } 1639 1640 // Worst case the location string length would be 18, 1641 // for instance +90.0000-180.0000, without the trailing "/" and 1642 // the string length + language code. 1643 char buffer[18]; 1644 1645 // Substracting 5 from the data size is because the text string length + 1646 // language code takes 4 bytes, and the trailing slash "/" takes 1 byte. 1647 off64_t location_length = chunk_data_size - 5; 1648 if (location_length >= (off64_t) sizeof(buffer)) { 1649 return ERROR_MALFORMED; 1650 } 1651 1652 if (mDataSource->readAt( 1653 data_offset + 4, buffer, location_length) < location_length) { 1654 return ERROR_IO; 1655 } 1656 1657 buffer[location_length] = '\0'; 1658 mFileMetaData->setCString(kKeyLocation, buffer); 1659 break; 1660 } 1661 1662 case FOURCC('e', 's', 'd', 's'): 1663 { 1664 *offset += chunk_size; 1665 if (mLastTrack == NULL) { 1666 return ERROR_MALFORMED; 1667 } 1668 1669 if (chunk_data_size < 4) { 1670 return ERROR_MALFORMED; 1671 } 1672 1673 uint8_t buffer[256]; 1674 if (chunk_data_size > (off64_t)sizeof(buffer)) { 1675 return ERROR_BUFFER_TOO_SMALL; 1676 } 1677 1678 if (mDataSource->readAt( 1679 data_offset, buffer, chunk_data_size) < chunk_data_size) { 1680 return ERROR_IO; 1681 } 1682 1683 if (U32_AT(buffer) != 0) { 1684 // Should be version 0, flags 0. 1685 return ERROR_MALFORMED; 1686 } 1687 1688 mLastTrack->meta->setData( 1689 kKeyESDS, kTypeESDS, &buffer[4], chunk_data_size - 4); 1690 1691 if (mPath.size() >= 2 1692 && mPath[mPath.size() - 2] == FOURCC('m', 'p', '4', 'a')) { 1693 // Information from the ESDS must be relied on for proper 1694 // setup of sample rate and channel count for MPEG4 Audio. 1695 // The generic header appears to only contain generic 1696 // information... 1697 1698 status_t err = updateAudioTrackInfoFromESDS_MPEG4Audio( 1699 &buffer[4], chunk_data_size - 4); 1700 1701 if (err != OK) { 1702 return err; 1703 } 1704 } 1705 1706 break; 1707 } 1708 1709 case FOURCC('a', 'v', 'c', 'C'): 1710 { 1711 *offset += chunk_size; 1712 if (mLastTrack == NULL) { 1713 return ERROR_MALFORMED; 1714 } 1715 1716 sp<ABuffer> buffer = new ABuffer(chunk_data_size); 1717 1718 if (buffer->data() == NULL) { 1719 ALOGE("b/28471206"); 1720 return NO_MEMORY; 1721 } 1722 1723 if (mDataSource->readAt( 1724 data_offset, buffer->data(), chunk_data_size) < chunk_data_size) { 1725 return ERROR_IO; 1726 } 1727 1728 mLastTrack->meta->setData( 1729 kKeyAVCC, kTypeAVCC, buffer->data(), chunk_data_size); 1730 1731 break; 1732 } 1733 case FOURCC('h', 'v', 'c', 'C'): 1734 { 1735 sp<ABuffer> buffer = new ABuffer(chunk_data_size); 1736 if (mLastTrack == NULL) { 1737 return ERROR_MALFORMED; 1738 } 1739 1740 if (buffer->data() == NULL) { 1741 ALOGE("b/28471206"); 1742 return NO_MEMORY; 1743 } 1744 1745 if (mDataSource->readAt( 1746 data_offset, buffer->data(), chunk_data_size) < chunk_data_size) { 1747 return ERROR_IO; 1748 } 1749 1750 mLastTrack->meta->setData( 1751 kKeyHVCC, kTypeHVCC, buffer->data(), chunk_data_size); 1752 1753 *offset += chunk_size; 1754 break; 1755 } 1756 1757 case FOURCC('d', '2', '6', '3'): 1758 { 1759 *offset += chunk_size; 1760 if (mLastTrack == NULL) { 1761 return ERROR_MALFORMED; 1762 } 1763 /* 1764 * d263 contains a fixed 7 bytes part: 1765 * vendor - 4 bytes 1766 * version - 1 byte 1767 * level - 1 byte 1768 * profile - 1 byte 1769 * optionally, "d263" box itself may contain a 16-byte 1770 * bit rate box (bitr) 1771 * average bit rate - 4 bytes 1772 * max bit rate - 4 bytes 1773 */ 1774 char buffer[23]; 1775 if (chunk_data_size != 7 && 1776 chunk_data_size != 23) { 1777 ALOGE("Incorrect D263 box size %lld", chunk_data_size); 1778 return ERROR_MALFORMED; 1779 } 1780 1781 if (mDataSource->readAt( 1782 data_offset, buffer, chunk_data_size) < chunk_data_size) { 1783 return ERROR_IO; 1784 } 1785 1786 mLastTrack->meta->setData(kKeyD263, kTypeD263, buffer, chunk_data_size); 1787 1788 break; 1789 } 1790 1791 case FOURCC('m', 'e', 't', 'a'): 1792 { 1793 uint8_t buffer[4]; 1794 if (chunk_data_size < (off64_t)sizeof(buffer)) { 1795 *offset += chunk_size; 1796 return ERROR_MALFORMED; 1797 } 1798 1799 if (mDataSource->readAt( 1800 data_offset, buffer, 4) < 4) { 1801 *offset += chunk_size; 1802 return ERROR_IO; 1803 } 1804 1805 if (U32_AT(buffer) != 0) { 1806 // Should be version 0, flags 0. 1807 1808 // If it's not, let's assume this is one of those 1809 // apparently malformed chunks that don't have flags 1810 // and completely different semantics than what's 1811 // in the MPEG4 specs and skip it. 1812 *offset += chunk_size; 1813 return OK; 1814 } 1815 1816 off64_t stop_offset = *offset + chunk_size; 1817 *offset = data_offset + sizeof(buffer); 1818 while (*offset < stop_offset) { 1819 status_t err = parseChunk(offset, depth + 1); 1820 if (err != OK) { 1821 return err; 1822 } 1823 } 1824 1825 if (*offset != stop_offset) { 1826 return ERROR_MALFORMED; 1827 } 1828 break; 1829 } 1830 1831 case FOURCC('m', 'e', 'a', 'n'): 1832 case FOURCC('n', 'a', 'm', 'e'): 1833 case FOURCC('d', 'a', 't', 'a'): 1834 { 1835 *offset += chunk_size; 1836 1837 if (mPath.size() == 6 && underMetaDataPath(mPath)) { 1838 status_t err = parseITunesMetaData(data_offset, chunk_data_size); 1839 1840 if (err != OK) { 1841 return err; 1842 } 1843 } 1844 1845 break; 1846 } 1847 1848 case FOURCC('m', 'v', 'h', 'd'): 1849 { 1850 *offset += chunk_size; 1851 1852 if (depth != 1) { 1853 ALOGE("mvhd: depth %d", depth); 1854 return ERROR_MALFORMED; 1855 } 1856 if (chunk_data_size < 32) { 1857 return ERROR_MALFORMED; 1858 } 1859 1860 uint8_t header[32]; 1861 if (mDataSource->readAt( 1862 data_offset, header, sizeof(header)) 1863 < (ssize_t)sizeof(header)) { 1864 return ERROR_IO; 1865 } 1866 1867 uint64_t creationTime; 1868 uint64_t duration = 0; 1869 if (header[0] == 1) { 1870 creationTime = U64_AT(&header[4]); 1871 mHeaderTimescale = U32_AT(&header[20]); 1872 duration = U64_AT(&header[24]); 1873 if (duration == 0xffffffffffffffff) { 1874 duration = 0; 1875 } 1876 } else if (header[0] != 0) { 1877 return ERROR_MALFORMED; 1878 } else { 1879 creationTime = U32_AT(&header[4]); 1880 mHeaderTimescale = U32_AT(&header[12]); 1881 uint32_t d32 = U32_AT(&header[16]); 1882 if (d32 == 0xffffffff) { 1883 d32 = 0; 1884 } 1885 duration = d32; 1886 } 1887 if (duration != 0) { 1888 mFileMetaData->setInt64(kKeyDuration, duration * 1000000 / mHeaderTimescale); 1889 } 1890 1891 String8 s; 1892 convertTimeToDate(creationTime, &s); 1893 1894 mFileMetaData->setCString(kKeyDate, s.string()); 1895 1896 break; 1897 } 1898 1899 case FOURCC('m', 'e', 'h', 'd'): 1900 { 1901 *offset += chunk_size; 1902 1903 if (chunk_data_size < 8) { 1904 return ERROR_MALFORMED; 1905 } 1906 1907 uint8_t flags[4]; 1908 if (mDataSource->readAt( 1909 data_offset, flags, sizeof(flags)) 1910 < (ssize_t)sizeof(flags)) { 1911 return ERROR_IO; 1912 } 1913 1914 uint64_t duration = 0; 1915 if (flags[0] == 1) { 1916 // 64 bit 1917 if (chunk_data_size < 12) { 1918 return ERROR_MALFORMED; 1919 } 1920 mDataSource->getUInt64(data_offset + 4, &duration); 1921 if (duration == 0xffffffffffffffff) { 1922 duration = 0; 1923 } 1924 } else if (flags[0] == 0) { 1925 // 32 bit 1926 uint32_t d32; 1927 mDataSource->getUInt32(data_offset + 4, &d32); 1928 if (d32 == 0xffffffff) { 1929 d32 = 0; 1930 } 1931 duration = d32; 1932 } else { 1933 return ERROR_MALFORMED; 1934 } 1935 1936 if (duration != 0) { 1937 mFileMetaData->setInt64(kKeyDuration, duration * 1000000 / mHeaderTimescale); 1938 } 1939 1940 break; 1941 } 1942 1943 case FOURCC('m', 'd', 'a', 't'): 1944 { 1945 ALOGV("mdat chunk, drm: %d", mIsDrm); 1946 if (!mIsDrm) { 1947 *offset += chunk_size; 1948 break; 1949 } 1950 1951 if (chunk_size < 8) { 1952 return ERROR_MALFORMED; 1953 } 1954 1955 return parseDrmSINF(offset, data_offset); 1956 } 1957 1958 case FOURCC('h', 'd', 'l', 'r'): 1959 { 1960 *offset += chunk_size; 1961 1962 uint32_t buffer; 1963 if (mDataSource->readAt( 1964 data_offset + 8, &buffer, 4) < 4) { 1965 return ERROR_IO; 1966 } 1967 1968 uint32_t type = ntohl(buffer); 1969 // For the 3GPP file format, the handler-type within the 'hdlr' box 1970 // shall be 'text'. We also want to support 'sbtl' handler type 1971 // for a practical reason as various MPEG4 containers use it. 1972 if (type == FOURCC('t', 'e', 'x', 't') || type == FOURCC('s', 'b', 't', 'l')) { 1973 if (mLastTrack != NULL) { 1974 mLastTrack->meta->setCString(kKeyMIMEType, MEDIA_MIMETYPE_TEXT_3GPP); 1975 } 1976 } 1977 1978 break; 1979 } 1980 1981 case FOURCC('t', 'r', 'e', 'x'): 1982 { 1983 *offset += chunk_size; 1984 1985 if (chunk_data_size < 24) { 1986 return ERROR_IO; 1987 } 1988 uint32_t duration; 1989 Trex trex; 1990 if (!mDataSource->getUInt32(data_offset + 4, &trex.track_ID) || 1991 !mDataSource->getUInt32(data_offset + 8, &trex.default_sample_description_index) || 1992 !mDataSource->getUInt32(data_offset + 12, &trex.default_sample_duration) || 1993 !mDataSource->getUInt32(data_offset + 16, &trex.default_sample_size) || 1994 !mDataSource->getUInt32(data_offset + 20, &trex.default_sample_flags)) { 1995 return ERROR_IO; 1996 } 1997 mTrex.add(trex); 1998 break; 1999 } 2000 2001 case FOURCC('t', 'x', '3', 'g'): 2002 { 2003 if (mLastTrack == NULL) { 2004 return ERROR_MALFORMED; 2005 } 2006 uint32_t type; 2007 const void *data; 2008 size_t size = 0; 2009 if (!mLastTrack->meta->findData( 2010 kKeyTextFormatData, &type, &data, &size)) { 2011 size = 0; 2012 } 2013 2014 if ((chunk_size > SIZE_MAX) || (SIZE_MAX - chunk_size <= size)) { 2015 return ERROR_MALFORMED; 2016 } 2017 2018 uint8_t *buffer = new (std::nothrow) uint8_t[size + chunk_size]; 2019 if (buffer == NULL) { 2020 return ERROR_MALFORMED; 2021 } 2022 2023 if (size > 0) { 2024 memcpy(buffer, data, size); 2025 } 2026 2027 if ((size_t)(mDataSource->readAt(*offset, buffer + size, chunk_size)) 2028 < chunk_size) { 2029 delete[] buffer; 2030 buffer = NULL; 2031 2032 // advance read pointer so we don't end up reading this again 2033 *offset += chunk_size; 2034 return ERROR_IO; 2035 } 2036 2037 mLastTrack->meta->setData( 2038 kKeyTextFormatData, 0, buffer, size + chunk_size); 2039 2040 delete[] buffer; 2041 2042 *offset += chunk_size; 2043 break; 2044 } 2045 2046 case FOURCC('c', 'o', 'v', 'r'): 2047 { 2048 *offset += chunk_size; 2049 2050 if (mFileMetaData != NULL) { 2051 ALOGV("chunk_data_size = %lld and data_offset = %lld", 2052 chunk_data_size, data_offset); 2053 2054 if (chunk_data_size < 0 || static_cast<uint64_t>(chunk_data_size) >= SIZE_MAX - 1) { 2055 return ERROR_MALFORMED; 2056 } 2057 sp<ABuffer> buffer = new ABuffer(chunk_data_size + 1); 2058 if (buffer->data() == NULL) { 2059 ALOGE("b/28471206"); 2060 return NO_MEMORY; 2061 } 2062 if (mDataSource->readAt( 2063 data_offset, buffer->data(), chunk_data_size) != (ssize_t)chunk_data_size) { 2064 return ERROR_IO; 2065 } 2066 const int kSkipBytesOfDataBox = 16; 2067 if (chunk_data_size <= kSkipBytesOfDataBox) { 2068 return ERROR_MALFORMED; 2069 } 2070 2071 mFileMetaData->setData( 2072 kKeyAlbumArt, MetaData::TYPE_NONE, 2073 buffer->data() + kSkipBytesOfDataBox, chunk_data_size - kSkipBytesOfDataBox); 2074 } 2075 2076 break; 2077 } 2078 2079 case FOURCC('t', 'i', 't', 'l'): 2080 case FOURCC('p', 'e', 'r', 'f'): 2081 case FOURCC('a', 'u', 't', 'h'): 2082 case FOURCC('g', 'n', 'r', 'e'): 2083 case FOURCC('a', 'l', 'b', 'm'): 2084 case FOURCC('y', 'r', 'r', 'c'): 2085 { 2086 *offset += chunk_size; 2087 2088 status_t err = parse3GPPMetaData(data_offset, chunk_data_size, depth); 2089 2090 if (err != OK) { 2091 return err; 2092 } 2093 2094 break; 2095 } 2096 2097 case FOURCC('I', 'D', '3', '2'): 2098 { 2099 *offset += chunk_size; 2100 2101 if (chunk_data_size < 6) { 2102 return ERROR_MALFORMED; 2103 } 2104 2105 parseID3v2MetaData(data_offset + 6); 2106 2107 break; 2108 } 2109 2110 case FOURCC('-', '-', '-', '-'): 2111 { 2112 mLastCommentMean.clear(); 2113 mLastCommentName.clear(); 2114 mLastCommentData.clear(); 2115 *offset += chunk_size; 2116 break; 2117 } 2118 2119 case FOURCC('s', 'i', 'd', 'x'): 2120 { 2121 if (mLastTrack == NULL) { 2122 return ERROR_MALFORMED; 2123 } 2124 parseSegmentIndex(data_offset, chunk_data_size); 2125 *offset += chunk_size; 2126 return UNKNOWN_ERROR; // stop parsing after sidx 2127 } 2128 2129 default: 2130 { 2131 *offset += chunk_size; 2132 break; 2133 } 2134 } 2135 2136 return OK; 2137} 2138 2139status_t MPEG4Extractor::parseSegmentIndex(off64_t offset, size_t size) { 2140 ALOGV("MPEG4Extractor::parseSegmentIndex"); 2141 2142 if (size < 12) { 2143 return -EINVAL; 2144 } 2145 2146 uint32_t flags; 2147 if (!mDataSource->getUInt32(offset, &flags)) { 2148 return ERROR_MALFORMED; 2149 } 2150 2151 uint32_t version = flags >> 24; 2152 flags &= 0xffffff; 2153 2154 ALOGV("sidx version %d", version); 2155 2156 uint32_t referenceId; 2157 if (!mDataSource->getUInt32(offset + 4, &referenceId)) { 2158 return ERROR_MALFORMED; 2159 } 2160 2161 uint32_t timeScale; 2162 if (!mDataSource->getUInt32(offset + 8, &timeScale)) { 2163 return ERROR_MALFORMED; 2164 } 2165 ALOGV("sidx refid/timescale: %d/%d", referenceId, timeScale); 2166 2167 uint64_t earliestPresentationTime; 2168 uint64_t firstOffset; 2169 2170 offset += 12; 2171 size -= 12; 2172 2173 if (version == 0) { 2174 if (size < 8) { 2175 return -EINVAL; 2176 } 2177 uint32_t tmp; 2178 if (!mDataSource->getUInt32(offset, &tmp)) { 2179 return ERROR_MALFORMED; 2180 } 2181 earliestPresentationTime = tmp; 2182 if (!mDataSource->getUInt32(offset + 4, &tmp)) { 2183 return ERROR_MALFORMED; 2184 } 2185 firstOffset = tmp; 2186 offset += 8; 2187 size -= 8; 2188 } else { 2189 if (size < 16) { 2190 return -EINVAL; 2191 } 2192 if (!mDataSource->getUInt64(offset, &earliestPresentationTime)) { 2193 return ERROR_MALFORMED; 2194 } 2195 if (!mDataSource->getUInt64(offset + 8, &firstOffset)) { 2196 return ERROR_MALFORMED; 2197 } 2198 offset += 16; 2199 size -= 16; 2200 } 2201 ALOGV("sidx pres/off: %" PRIu64 "/%" PRIu64, earliestPresentationTime, firstOffset); 2202 2203 if (size < 4) { 2204 return -EINVAL; 2205 } 2206 2207 uint16_t referenceCount; 2208 if (!mDataSource->getUInt16(offset + 2, &referenceCount)) { 2209 return ERROR_MALFORMED; 2210 } 2211 offset += 4; 2212 size -= 4; 2213 ALOGV("refcount: %d", referenceCount); 2214 2215 if (size < referenceCount * 12) { 2216 return -EINVAL; 2217 } 2218 2219 uint64_t total_duration = 0; 2220 for (unsigned int i = 0; i < referenceCount; i++) { 2221 uint32_t d1, d2, d3; 2222 2223 if (!mDataSource->getUInt32(offset, &d1) || // size 2224 !mDataSource->getUInt32(offset + 4, &d2) || // duration 2225 !mDataSource->getUInt32(offset + 8, &d3)) { // flags 2226 return ERROR_MALFORMED; 2227 } 2228 2229 if (d1 & 0x80000000) { 2230 ALOGW("sub-sidx boxes not supported yet"); 2231 } 2232 bool sap = d3 & 0x80000000; 2233 uint32_t saptype = (d3 >> 28) & 7; 2234 if (!sap || (saptype != 1 && saptype != 2)) { 2235 // type 1 and 2 are sync samples 2236 ALOGW("not a stream access point, or unsupported type: %08x", d3); 2237 } 2238 total_duration += d2; 2239 offset += 12; 2240 ALOGV(" item %d, %08x %08x %08x", i, d1, d2, d3); 2241 SidxEntry se; 2242 se.mSize = d1 & 0x7fffffff; 2243 se.mDurationUs = 1000000LL * d2 / timeScale; 2244 mSidxEntries.add(se); 2245 } 2246 2247 uint64_t sidxDuration = total_duration * 1000000 / timeScale; 2248 2249 int64_t metaDuration; 2250 if (!mLastTrack->meta->findInt64(kKeyDuration, &metaDuration) || metaDuration == 0) { 2251 mLastTrack->meta->setInt64(kKeyDuration, sidxDuration); 2252 } 2253 return OK; 2254} 2255 2256 2257 2258status_t MPEG4Extractor::parseTrackHeader( 2259 off64_t data_offset, off64_t data_size) { 2260 if (data_size < 4) { 2261 return ERROR_MALFORMED; 2262 } 2263 2264 uint8_t version; 2265 if (mDataSource->readAt(data_offset, &version, 1) < 1) { 2266 return ERROR_IO; 2267 } 2268 2269 size_t dynSize = (version == 1) ? 36 : 24; 2270 2271 uint8_t buffer[36 + 60]; 2272 2273 if (data_size != (off64_t)dynSize + 60) { 2274 return ERROR_MALFORMED; 2275 } 2276 2277 if (mDataSource->readAt( 2278 data_offset, buffer, data_size) < (ssize_t)data_size) { 2279 return ERROR_IO; 2280 } 2281 2282 uint64_t ctime, mtime, duration; 2283 int32_t id; 2284 2285 if (version == 1) { 2286 ctime = U64_AT(&buffer[4]); 2287 mtime = U64_AT(&buffer[12]); 2288 id = U32_AT(&buffer[20]); 2289 duration = U64_AT(&buffer[28]); 2290 } else if (version == 0) { 2291 ctime = U32_AT(&buffer[4]); 2292 mtime = U32_AT(&buffer[8]); 2293 id = U32_AT(&buffer[12]); 2294 duration = U32_AT(&buffer[20]); 2295 } else { 2296 return ERROR_UNSUPPORTED; 2297 } 2298 2299 mLastTrack->meta->setInt32(kKeyTrackID, id); 2300 2301 size_t matrixOffset = dynSize + 16; 2302 int32_t a00 = U32_AT(&buffer[matrixOffset]); 2303 int32_t a01 = U32_AT(&buffer[matrixOffset + 4]); 2304 int32_t dx = U32_AT(&buffer[matrixOffset + 8]); 2305 int32_t a10 = U32_AT(&buffer[matrixOffset + 12]); 2306 int32_t a11 = U32_AT(&buffer[matrixOffset + 16]); 2307 int32_t dy = U32_AT(&buffer[matrixOffset + 20]); 2308 2309#if 0 2310 ALOGI("x' = %.2f * x + %.2f * y + %.2f", 2311 a00 / 65536.0f, a01 / 65536.0f, dx / 65536.0f); 2312 ALOGI("y' = %.2f * x + %.2f * y + %.2f", 2313 a10 / 65536.0f, a11 / 65536.0f, dy / 65536.0f); 2314#endif 2315 2316 uint32_t rotationDegrees; 2317 2318 static const int32_t kFixedOne = 0x10000; 2319 if (a00 == kFixedOne && a01 == 0 && a10 == 0 && a11 == kFixedOne) { 2320 // Identity, no rotation 2321 rotationDegrees = 0; 2322 } else if (a00 == 0 && a01 == kFixedOne && a10 == -kFixedOne && a11 == 0) { 2323 rotationDegrees = 90; 2324 } else if (a00 == 0 && a01 == -kFixedOne && a10 == kFixedOne && a11 == 0) { 2325 rotationDegrees = 270; 2326 } else if (a00 == -kFixedOne && a01 == 0 && a10 == 0 && a11 == -kFixedOne) { 2327 rotationDegrees = 180; 2328 } else { 2329 ALOGW("We only support 0,90,180,270 degree rotation matrices"); 2330 rotationDegrees = 0; 2331 } 2332 2333 if (rotationDegrees != 0) { 2334 mLastTrack->meta->setInt32(kKeyRotation, rotationDegrees); 2335 } 2336 2337 // Handle presentation display size, which could be different 2338 // from the image size indicated by kKeyWidth and kKeyHeight. 2339 uint32_t width = U32_AT(&buffer[dynSize + 52]); 2340 uint32_t height = U32_AT(&buffer[dynSize + 56]); 2341 mLastTrack->meta->setInt32(kKeyDisplayWidth, width >> 16); 2342 mLastTrack->meta->setInt32(kKeyDisplayHeight, height >> 16); 2343 2344 return OK; 2345} 2346 2347status_t MPEG4Extractor::parseITunesMetaData(off64_t offset, size_t size) { 2348 if (size < 4 || size == SIZE_MAX) { 2349 return ERROR_MALFORMED; 2350 } 2351 2352 uint8_t *buffer = new (std::nothrow) uint8_t[size + 1]; 2353 if (buffer == NULL) { 2354 return ERROR_MALFORMED; 2355 } 2356 if (mDataSource->readAt( 2357 offset, buffer, size) != (ssize_t)size) { 2358 delete[] buffer; 2359 buffer = NULL; 2360 2361 return ERROR_IO; 2362 } 2363 2364 uint32_t flags = U32_AT(buffer); 2365 2366 uint32_t metadataKey = 0; 2367 char chunk[5]; 2368 MakeFourCCString(mPath[4], chunk); 2369 ALOGV("meta: %s @ %lld", chunk, offset); 2370 switch (mPath[4]) { 2371 case FOURCC(0xa9, 'a', 'l', 'b'): 2372 { 2373 metadataKey = kKeyAlbum; 2374 break; 2375 } 2376 case FOURCC(0xa9, 'A', 'R', 'T'): 2377 { 2378 metadataKey = kKeyArtist; 2379 break; 2380 } 2381 case FOURCC('a', 'A', 'R', 'T'): 2382 { 2383 metadataKey = kKeyAlbumArtist; 2384 break; 2385 } 2386 case FOURCC(0xa9, 'd', 'a', 'y'): 2387 { 2388 metadataKey = kKeyYear; 2389 break; 2390 } 2391 case FOURCC(0xa9, 'n', 'a', 'm'): 2392 { 2393 metadataKey = kKeyTitle; 2394 break; 2395 } 2396 case FOURCC(0xa9, 'w', 'r', 't'): 2397 { 2398 metadataKey = kKeyWriter; 2399 break; 2400 } 2401 case FOURCC('c', 'o', 'v', 'r'): 2402 { 2403 metadataKey = kKeyAlbumArt; 2404 break; 2405 } 2406 case FOURCC('g', 'n', 'r', 'e'): 2407 { 2408 metadataKey = kKeyGenre; 2409 break; 2410 } 2411 case FOURCC(0xa9, 'g', 'e', 'n'): 2412 { 2413 metadataKey = kKeyGenre; 2414 break; 2415 } 2416 case FOURCC('c', 'p', 'i', 'l'): 2417 { 2418 if (size == 9 && flags == 21) { 2419 char tmp[16]; 2420 sprintf(tmp, "%d", 2421 (int)buffer[size - 1]); 2422 2423 mFileMetaData->setCString(kKeyCompilation, tmp); 2424 } 2425 break; 2426 } 2427 case FOURCC('t', 'r', 'k', 'n'): 2428 { 2429 if (size == 16 && flags == 0) { 2430 char tmp[16]; 2431 uint16_t* pTrack = (uint16_t*)&buffer[10]; 2432 uint16_t* pTotalTracks = (uint16_t*)&buffer[12]; 2433 sprintf(tmp, "%d/%d", ntohs(*pTrack), ntohs(*pTotalTracks)); 2434 2435 mFileMetaData->setCString(kKeyCDTrackNumber, tmp); 2436 } 2437 break; 2438 } 2439 case FOURCC('d', 'i', 's', 'k'): 2440 { 2441 if ((size == 14 || size == 16) && flags == 0) { 2442 char tmp[16]; 2443 uint16_t* pDisc = (uint16_t*)&buffer[10]; 2444 uint16_t* pTotalDiscs = (uint16_t*)&buffer[12]; 2445 sprintf(tmp, "%d/%d", ntohs(*pDisc), ntohs(*pTotalDiscs)); 2446 2447 mFileMetaData->setCString(kKeyDiscNumber, tmp); 2448 } 2449 break; 2450 } 2451 case FOURCC('-', '-', '-', '-'): 2452 { 2453 buffer[size] = '\0'; 2454 switch (mPath[5]) { 2455 case FOURCC('m', 'e', 'a', 'n'): 2456 mLastCommentMean.setTo((const char *)buffer + 4); 2457 break; 2458 case FOURCC('n', 'a', 'm', 'e'): 2459 mLastCommentName.setTo((const char *)buffer + 4); 2460 break; 2461 case FOURCC('d', 'a', 't', 'a'): 2462 if (size < 8) { 2463 delete[] buffer; 2464 buffer = NULL; 2465 ALOGE("b/24346430"); 2466 return ERROR_MALFORMED; 2467 } 2468 mLastCommentData.setTo((const char *)buffer + 8); 2469 break; 2470 } 2471 2472 // Once we have a set of mean/name/data info, go ahead and process 2473 // it to see if its something we are interested in. Whether or not 2474 // were are interested in the specific tag, make sure to clear out 2475 // the set so we can be ready to process another tuple should one 2476 // show up later in the file. 2477 if ((mLastCommentMean.length() != 0) && 2478 (mLastCommentName.length() != 0) && 2479 (mLastCommentData.length() != 0)) { 2480 2481 if (mLastCommentMean == "com.apple.iTunes" 2482 && mLastCommentName == "iTunSMPB") { 2483 int32_t delay, padding; 2484 if (sscanf(mLastCommentData, 2485 " %*x %x %x %*x", &delay, &padding) == 2) { 2486 if (mLastTrack == NULL) { 2487 delete[] buffer; 2488 return ERROR_MALFORMED; 2489 } 2490 mLastTrack->meta->setInt32(kKeyEncoderDelay, delay); 2491 mLastTrack->meta->setInt32(kKeyEncoderPadding, padding); 2492 } 2493 } 2494 2495 mLastCommentMean.clear(); 2496 mLastCommentName.clear(); 2497 mLastCommentData.clear(); 2498 } 2499 break; 2500 } 2501 2502 default: 2503 break; 2504 } 2505 2506 if (size >= 8 && metadataKey && !mFileMetaData->hasData(metadataKey)) { 2507 if (metadataKey == kKeyAlbumArt) { 2508 mFileMetaData->setData( 2509 kKeyAlbumArt, MetaData::TYPE_NONE, 2510 buffer + 8, size - 8); 2511 } else if (metadataKey == kKeyGenre) { 2512 if (flags == 0) { 2513 // uint8_t genre code, iTunes genre codes are 2514 // the standard id3 codes, except they start 2515 // at 1 instead of 0 (e.g. Pop is 14, not 13) 2516 // We use standard id3 numbering, so subtract 1. 2517 int genrecode = (int)buffer[size - 1]; 2518 genrecode--; 2519 if (genrecode < 0) { 2520 genrecode = 255; // reserved for 'unknown genre' 2521 } 2522 char genre[10]; 2523 sprintf(genre, "%d", genrecode); 2524 2525 mFileMetaData->setCString(metadataKey, genre); 2526 } else if (flags == 1) { 2527 // custom genre string 2528 buffer[size] = '\0'; 2529 2530 mFileMetaData->setCString( 2531 metadataKey, (const char *)buffer + 8); 2532 } 2533 } else { 2534 buffer[size] = '\0'; 2535 2536 mFileMetaData->setCString( 2537 metadataKey, (const char *)buffer + 8); 2538 } 2539 } 2540 2541 delete[] buffer; 2542 buffer = NULL; 2543 2544 return OK; 2545} 2546 2547status_t MPEG4Extractor::parse3GPPMetaData(off64_t offset, size_t size, int depth) { 2548 if (size < 4 || size == SIZE_MAX) { 2549 return ERROR_MALFORMED; 2550 } 2551 2552 uint8_t *buffer = new (std::nothrow) uint8_t[size + 1]; 2553 if (buffer == NULL) { 2554 return ERROR_MALFORMED; 2555 } 2556 if (mDataSource->readAt( 2557 offset, buffer, size) != (ssize_t)size) { 2558 delete[] buffer; 2559 buffer = NULL; 2560 2561 return ERROR_IO; 2562 } 2563 2564 uint32_t metadataKey = 0; 2565 switch (mPath[depth]) { 2566 case FOURCC('t', 'i', 't', 'l'): 2567 { 2568 metadataKey = kKeyTitle; 2569 break; 2570 } 2571 case FOURCC('p', 'e', 'r', 'f'): 2572 { 2573 metadataKey = kKeyArtist; 2574 break; 2575 } 2576 case FOURCC('a', 'u', 't', 'h'): 2577 { 2578 metadataKey = kKeyWriter; 2579 break; 2580 } 2581 case FOURCC('g', 'n', 'r', 'e'): 2582 { 2583 metadataKey = kKeyGenre; 2584 break; 2585 } 2586 case FOURCC('a', 'l', 'b', 'm'): 2587 { 2588 if (buffer[size - 1] != '\0') { 2589 char tmp[4]; 2590 sprintf(tmp, "%u", buffer[size - 1]); 2591 2592 mFileMetaData->setCString(kKeyCDTrackNumber, tmp); 2593 } 2594 2595 metadataKey = kKeyAlbum; 2596 break; 2597 } 2598 case FOURCC('y', 'r', 'r', 'c'): 2599 { 2600 if (size < 6) { 2601 delete[] buffer; 2602 buffer = NULL; 2603 ALOGE("b/62133227"); 2604 android_errorWriteLog(0x534e4554, "62133227"); 2605 return ERROR_MALFORMED; 2606 } 2607 char tmp[5]; 2608 uint16_t year = U16_AT(&buffer[4]); 2609 2610 if (year < 10000) { 2611 sprintf(tmp, "%u", year); 2612 2613 mFileMetaData->setCString(kKeyYear, tmp); 2614 } 2615 break; 2616 } 2617 2618 default: 2619 break; 2620 } 2621 2622 if (metadataKey > 0) { 2623 bool isUTF8 = true; // Common case 2624 char16_t *framedata = NULL; 2625 int len16 = 0; // Number of UTF-16 characters 2626 2627 // smallest possible valid UTF-16 string w BOM: 0xfe 0xff 0x00 0x00 2628 if (size < 6) { 2629 delete[] buffer; 2630 buffer = NULL; 2631 return ERROR_MALFORMED; 2632 } 2633 2634 if (size - 6 >= 4) { 2635 len16 = ((size - 6) / 2) - 1; // don't include 0x0000 terminator 2636 framedata = (char16_t *)(buffer + 6); 2637 if (0xfffe == *framedata) { 2638 // endianness marker (BOM) doesn't match host endianness 2639 for (int i = 0; i < len16; i++) { 2640 framedata[i] = bswap_16(framedata[i]); 2641 } 2642 // BOM is now swapped to 0xfeff, we will execute next block too 2643 } 2644 2645 if (0xfeff == *framedata) { 2646 // Remove the BOM 2647 framedata++; 2648 len16--; 2649 isUTF8 = false; 2650 } 2651 // else normal non-zero-length UTF-8 string 2652 // we can't handle UTF-16 without BOM as there is no other 2653 // indication of encoding. 2654 } 2655 2656 if (isUTF8) { 2657 buffer[size] = 0; 2658 mFileMetaData->setCString(metadataKey, (const char *)buffer + 6); 2659 } else { 2660 // Convert from UTF-16 string to UTF-8 string. 2661 String8 tmpUTF8str(framedata, len16); 2662 mFileMetaData->setCString(metadataKey, tmpUTF8str.string()); 2663 } 2664 } 2665 2666 delete[] buffer; 2667 buffer = NULL; 2668 2669 return OK; 2670} 2671 2672void MPEG4Extractor::parseID3v2MetaData(off64_t offset) { 2673 ID3 id3(mDataSource, true /* ignorev1 */, offset); 2674 2675 if (id3.isValid()) { 2676 struct Map { 2677 int key; 2678 const char *tag1; 2679 const char *tag2; 2680 }; 2681 static const Map kMap[] = { 2682 { kKeyAlbum, "TALB", "TAL" }, 2683 { kKeyArtist, "TPE1", "TP1" }, 2684 { kKeyAlbumArtist, "TPE2", "TP2" }, 2685 { kKeyComposer, "TCOM", "TCM" }, 2686 { kKeyGenre, "TCON", "TCO" }, 2687 { kKeyTitle, "TIT2", "TT2" }, 2688 { kKeyYear, "TYE", "TYER" }, 2689 { kKeyAuthor, "TXT", "TEXT" }, 2690 { kKeyCDTrackNumber, "TRK", "TRCK" }, 2691 { kKeyDiscNumber, "TPA", "TPOS" }, 2692 { kKeyCompilation, "TCP", "TCMP" }, 2693 }; 2694 static const size_t kNumMapEntries = sizeof(kMap) / sizeof(kMap[0]); 2695 2696 for (size_t i = 0; i < kNumMapEntries; ++i) { 2697 if (!mFileMetaData->hasData(kMap[i].key)) { 2698 ID3::Iterator *it = new ID3::Iterator(id3, kMap[i].tag1); 2699 if (it->done()) { 2700 delete it; 2701 it = new ID3::Iterator(id3, kMap[i].tag2); 2702 } 2703 2704 if (it->done()) { 2705 delete it; 2706 continue; 2707 } 2708 2709 String8 s; 2710 it->getString(&s); 2711 delete it; 2712 2713 mFileMetaData->setCString(kMap[i].key, s); 2714 } 2715 } 2716 2717 size_t dataSize; 2718 String8 mime; 2719 const void *data = id3.getAlbumArt(&dataSize, &mime); 2720 2721 if (data) { 2722 mFileMetaData->setData(kKeyAlbumArt, MetaData::TYPE_NONE, data, dataSize); 2723 mFileMetaData->setCString(kKeyAlbumArtMIME, mime.string()); 2724 } 2725 } 2726} 2727 2728sp<MediaSource> MPEG4Extractor::getTrack(size_t index) { 2729 status_t err; 2730 if ((err = readMetaData()) != OK) { 2731 return NULL; 2732 } 2733 2734 Track *track = mFirstTrack; 2735 while (index > 0) { 2736 if (track == NULL) { 2737 return NULL; 2738 } 2739 2740 track = track->next; 2741 --index; 2742 } 2743 2744 if (track == NULL) { 2745 return NULL; 2746 } 2747 2748 2749 Trex *trex = NULL; 2750 int32_t trackId; 2751 if (track->meta->findInt32(kKeyTrackID, &trackId)) { 2752 for (size_t i = 0; i < mTrex.size(); i++) { 2753 Trex *t = &mTrex.editItemAt(index); 2754 if (t->track_ID == (uint32_t) trackId) { 2755 trex = t; 2756 break; 2757 } 2758 } 2759 } else { 2760 ALOGE("b/21657957"); 2761 return NULL; 2762 } 2763 2764 ALOGV("getTrack called, pssh: %zu", mPssh.size()); 2765 2766 return new MPEG4Source(this, 2767 track->meta, mDataSource, track->timescale, track->sampleTable, 2768 mSidxEntries, trex, mMoofOffset); 2769} 2770 2771// static 2772status_t MPEG4Extractor::verifyTrack(Track *track) { 2773 const char *mime; 2774 CHECK(track->meta->findCString(kKeyMIMEType, &mime)); 2775 2776 uint32_t type; 2777 const void *data; 2778 size_t size; 2779 if (!strcasecmp(mime, MEDIA_MIMETYPE_VIDEO_AVC)) { 2780 if (!track->meta->findData(kKeyAVCC, &type, &data, &size) 2781 || type != kTypeAVCC) { 2782 return ERROR_MALFORMED; 2783 } 2784 } else if (!strcasecmp(mime, MEDIA_MIMETYPE_VIDEO_HEVC)) { 2785 if (!track->meta->findData(kKeyHVCC, &type, &data, &size) 2786 || type != kTypeHVCC) { 2787 return ERROR_MALFORMED; 2788 } 2789 } else if (!strcasecmp(mime, MEDIA_MIMETYPE_VIDEO_MPEG4) 2790 || !strcasecmp(mime, MEDIA_MIMETYPE_AUDIO_AAC)) { 2791 if (!track->meta->findData(kKeyESDS, &type, &data, &size) 2792 || type != kTypeESDS) { 2793 return ERROR_MALFORMED; 2794 } 2795 } 2796 2797 if (track->sampleTable == NULL || !track->sampleTable->isValid()) { 2798 // Make sure we have all the metadata we need. 2799 ALOGE("stbl atom missing/invalid."); 2800 return ERROR_MALFORMED; 2801 } 2802 2803 return OK; 2804} 2805 2806typedef enum { 2807 //AOT_NONE = -1, 2808 //AOT_NULL_OBJECT = 0, 2809 //AOT_AAC_MAIN = 1, /**< Main profile */ 2810 AOT_AAC_LC = 2, /**< Low Complexity object */ 2811 //AOT_AAC_SSR = 3, 2812 //AOT_AAC_LTP = 4, 2813 AOT_SBR = 5, 2814 //AOT_AAC_SCAL = 6, 2815 //AOT_TWIN_VQ = 7, 2816 //AOT_CELP = 8, 2817 //AOT_HVXC = 9, 2818 //AOT_RSVD_10 = 10, /**< (reserved) */ 2819 //AOT_RSVD_11 = 11, /**< (reserved) */ 2820 //AOT_TTSI = 12, /**< TTSI Object */ 2821 //AOT_MAIN_SYNTH = 13, /**< Main Synthetic object */ 2822 //AOT_WAV_TAB_SYNTH = 14, /**< Wavetable Synthesis object */ 2823 //AOT_GEN_MIDI = 15, /**< General MIDI object */ 2824 //AOT_ALG_SYNTH_AUD_FX = 16, /**< Algorithmic Synthesis and Audio FX object */ 2825 AOT_ER_AAC_LC = 17, /**< Error Resilient(ER) AAC Low Complexity */ 2826 //AOT_RSVD_18 = 18, /**< (reserved) */ 2827 //AOT_ER_AAC_LTP = 19, /**< Error Resilient(ER) AAC LTP object */ 2828 AOT_ER_AAC_SCAL = 20, /**< Error Resilient(ER) AAC Scalable object */ 2829 //AOT_ER_TWIN_VQ = 21, /**< Error Resilient(ER) TwinVQ object */ 2830 AOT_ER_BSAC = 22, /**< Error Resilient(ER) BSAC object */ 2831 AOT_ER_AAC_LD = 23, /**< Error Resilient(ER) AAC LowDelay object */ 2832 //AOT_ER_CELP = 24, /**< Error Resilient(ER) CELP object */ 2833 //AOT_ER_HVXC = 25, /**< Error Resilient(ER) HVXC object */ 2834 //AOT_ER_HILN = 26, /**< Error Resilient(ER) HILN object */ 2835 //AOT_ER_PARA = 27, /**< Error Resilient(ER) Parametric object */ 2836 //AOT_RSVD_28 = 28, /**< might become SSC */ 2837 AOT_PS = 29, /**< PS, Parametric Stereo (includes SBR) */ 2838 //AOT_MPEGS = 30, /**< MPEG Surround */ 2839 2840 AOT_ESCAPE = 31, /**< Signal AOT uses more than 5 bits */ 2841 2842 //AOT_MP3ONMP4_L1 = 32, /**< MPEG-Layer1 in mp4 */ 2843 //AOT_MP3ONMP4_L2 = 33, /**< MPEG-Layer2 in mp4 */ 2844 //AOT_MP3ONMP4_L3 = 34, /**< MPEG-Layer3 in mp4 */ 2845 //AOT_RSVD_35 = 35, /**< might become DST */ 2846 //AOT_RSVD_36 = 36, /**< might become ALS */ 2847 //AOT_AAC_SLS = 37, /**< AAC + SLS */ 2848 //AOT_SLS = 38, /**< SLS */ 2849 //AOT_ER_AAC_ELD = 39, /**< AAC Enhanced Low Delay */ 2850 2851 //AOT_USAC = 42, /**< USAC */ 2852 //AOT_SAOC = 43, /**< SAOC */ 2853 //AOT_LD_MPEGS = 44, /**< Low Delay MPEG Surround */ 2854 2855 //AOT_RSVD50 = 50, /**< Interim AOT for Rsvd50 */ 2856} AUDIO_OBJECT_TYPE; 2857 2858status_t MPEG4Extractor::updateAudioTrackInfoFromESDS_MPEG4Audio( 2859 const void *esds_data, size_t esds_size) { 2860 ESDS esds(esds_data, esds_size); 2861 2862 uint8_t objectTypeIndication; 2863 if (esds.getObjectTypeIndication(&objectTypeIndication) != OK) { 2864 return ERROR_MALFORMED; 2865 } 2866 2867 if (objectTypeIndication == 0xe1) { 2868 // This isn't MPEG4 audio at all, it's QCELP 14k... 2869 mLastTrack->meta->setCString(kKeyMIMEType, MEDIA_MIMETYPE_AUDIO_QCELP); 2870 return OK; 2871 } 2872 2873 if (objectTypeIndication == 0x6b) { 2874 // The media subtype is MP3 audio 2875 // Our software MP3 audio decoder may not be able to handle 2876 // packetized MP3 audio; for now, lets just return ERROR_UNSUPPORTED 2877 ALOGE("MP3 track in MP4/3GPP file is not supported"); 2878 return ERROR_UNSUPPORTED; 2879 } 2880 2881 const uint8_t *csd; 2882 size_t csd_size; 2883 if (esds.getCodecSpecificInfo( 2884 (const void **)&csd, &csd_size) != OK) { 2885 return ERROR_MALFORMED; 2886 } 2887 2888#if 0 2889 printf("ESD of size %d\n", csd_size); 2890 hexdump(csd, csd_size); 2891#endif 2892 2893 if (csd_size == 0) { 2894 // There's no further information, i.e. no codec specific data 2895 // Let's assume that the information provided in the mpeg4 headers 2896 // is accurate and hope for the best. 2897 2898 return OK; 2899 } 2900 2901 if (csd_size < 2) { 2902 return ERROR_MALFORMED; 2903 } 2904 2905 static uint32_t kSamplingRate[] = { 2906 96000, 88200, 64000, 48000, 44100, 32000, 24000, 22050, 2907 16000, 12000, 11025, 8000, 7350 2908 }; 2909 2910 ABitReader br(csd, csd_size); 2911 uint32_t objectType = br.getBits(5); 2912 2913 if (objectType == 31) { // AAC-ELD => additional 6 bits 2914 objectType = 32 + br.getBits(6); 2915 } 2916 2917 //keep AOT type 2918 mLastTrack->meta->setInt32(kKeyAACAOT, objectType); 2919 2920 uint32_t freqIndex = br.getBits(4); 2921 2922 int32_t sampleRate = 0; 2923 int32_t numChannels = 0; 2924 if (freqIndex == 15) { 2925 if (csd_size < 5) { 2926 return ERROR_MALFORMED; 2927 } 2928 sampleRate = br.getBits(24); 2929 numChannels = br.getBits(4); 2930 } else { 2931 numChannels = br.getBits(4); 2932 2933 if (freqIndex == 13 || freqIndex == 14) { 2934 return ERROR_MALFORMED; 2935 } 2936 2937 sampleRate = kSamplingRate[freqIndex]; 2938 } 2939 2940 if (objectType == AOT_SBR || objectType == AOT_PS) {//SBR specific config per 14496-3 table 1.13 2941 uint32_t extFreqIndex = br.getBits(4); 2942 int32_t extSampleRate; 2943 if (extFreqIndex == 15) { 2944 if (csd_size < 8) { 2945 return ERROR_MALFORMED; 2946 } 2947 extSampleRate = br.getBits(24); 2948 } else { 2949 if (extFreqIndex == 13 || extFreqIndex == 14) { 2950 return ERROR_MALFORMED; 2951 } 2952 extSampleRate = kSamplingRate[extFreqIndex]; 2953 } 2954 //TODO: save the extension sampling rate value in meta data => 2955 // mLastTrack->meta->setInt32(kKeyExtSampleRate, extSampleRate); 2956 } 2957 2958 switch (numChannels) { 2959 // values defined in 14496-3_2009 amendment-4 Table 1.19 - Channel Configuration 2960 case 0: 2961 case 1:// FC 2962 case 2:// FL FR 2963 case 3:// FC, FL FR 2964 case 4:// FC, FL FR, RC 2965 case 5:// FC, FL FR, SL SR 2966 case 6:// FC, FL FR, SL SR, LFE 2967 //numChannels already contains the right value 2968 break; 2969 case 11:// FC, FL FR, SL SR, RC, LFE 2970 numChannels = 7; 2971 break; 2972 case 7: // FC, FCL FCR, FL FR, SL SR, LFE 2973 case 12:// FC, FL FR, SL SR, RL RR, LFE 2974 case 14:// FC, FL FR, SL SR, LFE, FHL FHR 2975 numChannels = 8; 2976 break; 2977 default: 2978 return ERROR_UNSUPPORTED; 2979 } 2980 2981 { 2982 if (objectType == AOT_SBR || objectType == AOT_PS) { 2983 objectType = br.getBits(5); 2984 2985 if (objectType == AOT_ESCAPE) { 2986 objectType = 32 + br.getBits(6); 2987 } 2988 } 2989 if (objectType == AOT_AAC_LC || objectType == AOT_ER_AAC_LC || 2990 objectType == AOT_ER_AAC_LD || objectType == AOT_ER_AAC_SCAL || 2991 objectType == AOT_ER_BSAC) { 2992 const int32_t frameLengthFlag = br.getBits(1); 2993 2994 const int32_t dependsOnCoreCoder = br.getBits(1); 2995 2996 if (dependsOnCoreCoder ) { 2997 const int32_t coreCoderDelay = br.getBits(14); 2998 } 2999 3000 int32_t extensionFlag = -1; 3001 if (br.numBitsLeft() > 0) { 3002 extensionFlag = br.getBits(1); 3003 } else { 3004 switch (objectType) { 3005 // 14496-3 4.5.1.1 extensionFlag 3006 case AOT_AAC_LC: 3007 extensionFlag = 0; 3008 break; 3009 case AOT_ER_AAC_LC: 3010 case AOT_ER_AAC_SCAL: 3011 case AOT_ER_BSAC: 3012 case AOT_ER_AAC_LD: 3013 extensionFlag = 1; 3014 break; 3015 default: 3016 TRESPASS(); 3017 break; 3018 } 3019 ALOGW("csd missing extension flag; assuming %d for object type %u.", 3020 extensionFlag, objectType); 3021 } 3022 3023 if (numChannels == 0) { 3024 int32_t channelsEffectiveNum = 0; 3025 int32_t channelsNum = 0; 3026 const int32_t ElementInstanceTag = br.getBits(4); 3027 const int32_t Profile = br.getBits(2); 3028 const int32_t SamplingFrequencyIndex = br.getBits(4); 3029 const int32_t NumFrontChannelElements = br.getBits(4); 3030 const int32_t NumSideChannelElements = br.getBits(4); 3031 const int32_t NumBackChannelElements = br.getBits(4); 3032 const int32_t NumLfeChannelElements = br.getBits(2); 3033 const int32_t NumAssocDataElements = br.getBits(3); 3034 const int32_t NumValidCcElements = br.getBits(4); 3035 3036 const int32_t MonoMixdownPresent = br.getBits(1); 3037 if (MonoMixdownPresent != 0) { 3038 const int32_t MonoMixdownElementNumber = br.getBits(4); 3039 } 3040 3041 const int32_t StereoMixdownPresent = br.getBits(1); 3042 if (StereoMixdownPresent != 0) { 3043 const int32_t StereoMixdownElementNumber = br.getBits(4); 3044 } 3045 3046 const int32_t MatrixMixdownIndexPresent = br.getBits(1); 3047 if (MatrixMixdownIndexPresent != 0) { 3048 const int32_t MatrixMixdownIndex = br.getBits(2); 3049 const int32_t PseudoSurroundEnable = br.getBits(1); 3050 } 3051 3052 int i; 3053 for (i=0; i < NumFrontChannelElements; i++) { 3054 const int32_t FrontElementIsCpe = br.getBits(1); 3055 const int32_t FrontElementTagSelect = br.getBits(4); 3056 channelsNum += FrontElementIsCpe ? 2 : 1; 3057 } 3058 3059 for (i=0; i < NumSideChannelElements; i++) { 3060 const int32_t SideElementIsCpe = br.getBits(1); 3061 const int32_t SideElementTagSelect = br.getBits(4); 3062 channelsNum += SideElementIsCpe ? 2 : 1; 3063 } 3064 3065 for (i=0; i < NumBackChannelElements; i++) { 3066 const int32_t BackElementIsCpe = br.getBits(1); 3067 const int32_t BackElementTagSelect = br.getBits(4); 3068 channelsNum += BackElementIsCpe ? 2 : 1; 3069 } 3070 channelsEffectiveNum = channelsNum; 3071 3072 for (i=0; i < NumLfeChannelElements; i++) { 3073 const int32_t LfeElementTagSelect = br.getBits(4); 3074 channelsNum += 1; 3075 } 3076 ALOGV("mpeg4 audio channelsNum = %d", channelsNum); 3077 ALOGV("mpeg4 audio channelsEffectiveNum = %d", channelsEffectiveNum); 3078 numChannels = channelsNum; 3079 } 3080 } 3081 } 3082 3083 if (numChannels == 0) { 3084 return ERROR_UNSUPPORTED; 3085 } 3086 3087 int32_t prevSampleRate; 3088 CHECK(mLastTrack->meta->findInt32(kKeySampleRate, &prevSampleRate)); 3089 3090 if (prevSampleRate != sampleRate) { 3091 ALOGV("mpeg4 audio sample rate different from previous setting. " 3092 "was: %d, now: %d", prevSampleRate, sampleRate); 3093 } 3094 3095 mLastTrack->meta->setInt32(kKeySampleRate, sampleRate); 3096 3097 int32_t prevChannelCount; 3098 CHECK(mLastTrack->meta->findInt32(kKeyChannelCount, &prevChannelCount)); 3099 3100 if (prevChannelCount != numChannels) { 3101 ALOGV("mpeg4 audio channel count different from previous setting. " 3102 "was: %d, now: %d", prevChannelCount, numChannels); 3103 } 3104 3105 mLastTrack->meta->setInt32(kKeyChannelCount, numChannels); 3106 3107 return OK; 3108} 3109 3110//////////////////////////////////////////////////////////////////////////////// 3111 3112MPEG4Source::MPEG4Source( 3113 const sp<MPEG4Extractor> &owner, 3114 const sp<MetaData> &format, 3115 const sp<DataSource> &dataSource, 3116 int32_t timeScale, 3117 const sp<SampleTable> &sampleTable, 3118 Vector<SidxEntry> &sidx, 3119 const Trex *trex, 3120 off64_t firstMoofOffset) 3121 : mOwner(owner), 3122 mFormat(format), 3123 mDataSource(dataSource), 3124 mTimescale(timeScale), 3125 mSampleTable(sampleTable), 3126 mCurrentSampleIndex(0), 3127 mCurrentFragmentIndex(0), 3128 mSegments(sidx), 3129 mTrex(trex), 3130 mFirstMoofOffset(firstMoofOffset), 3131 mCurrentMoofOffset(firstMoofOffset), 3132 mCurrentTime(0), 3133 mCurrentSampleInfoAllocSize(0), 3134 mCurrentSampleInfoSizes(NULL), 3135 mCurrentSampleInfoOffsetsAllocSize(0), 3136 mCurrentSampleInfoOffsets(NULL), 3137 mIsAVC(false), 3138 mIsHEVC(false), 3139 mNALLengthSize(0), 3140 mStarted(false), 3141 mGroup(NULL), 3142 mBuffer(NULL), 3143 mWantsNALFragments(false), 3144 mSrcBuffer(NULL) { 3145 3146 memset(&mTrackFragmentHeaderInfo, 0, sizeof(mTrackFragmentHeaderInfo)); 3147 3148 mFormat->findInt32(kKeyCryptoMode, &mCryptoMode); 3149 mDefaultIVSize = 0; 3150 mFormat->findInt32(kKeyCryptoDefaultIVSize, &mDefaultIVSize); 3151 uint32_t keytype; 3152 const void *key; 3153 size_t keysize; 3154 if (mFormat->findData(kKeyCryptoKey, &keytype, &key, &keysize)) { 3155 CHECK(keysize <= 16); 3156 memset(mCryptoKey, 0, 16); 3157 memcpy(mCryptoKey, key, keysize); 3158 } 3159 3160 const char *mime; 3161 bool success = mFormat->findCString(kKeyMIMEType, &mime); 3162 CHECK(success); 3163 3164 mIsAVC = !strcasecmp(mime, MEDIA_MIMETYPE_VIDEO_AVC); 3165 mIsHEVC = !strcasecmp(mime, MEDIA_MIMETYPE_VIDEO_HEVC); 3166 3167 if (mIsAVC) { 3168 uint32_t type; 3169 const void *data; 3170 size_t size; 3171 CHECK(format->findData(kKeyAVCC, &type, &data, &size)); 3172 3173 const uint8_t *ptr = (const uint8_t *)data; 3174 3175 CHECK(size >= 7); 3176 CHECK_EQ((unsigned)ptr[0], 1u); // configurationVersion == 1 3177 3178 // The number of bytes used to encode the length of a NAL unit. 3179 mNALLengthSize = 1 + (ptr[4] & 3); 3180 } else if (mIsHEVC) { 3181 uint32_t type; 3182 const void *data; 3183 size_t size; 3184 CHECK(format->findData(kKeyHVCC, &type, &data, &size)); 3185 3186 const uint8_t *ptr = (const uint8_t *)data; 3187 3188 CHECK(size >= 7); 3189 CHECK_EQ((unsigned)ptr[0], 1u); // configurationVersion == 1 3190 3191 mNALLengthSize = 1 + (ptr[14 + 7] & 3); 3192 } 3193 3194 CHECK(format->findInt32(kKeyTrackID, &mTrackId)); 3195 3196 if (mFirstMoofOffset != 0) { 3197 off64_t offset = mFirstMoofOffset; 3198 parseChunk(&offset); 3199 } 3200} 3201 3202MPEG4Source::~MPEG4Source() { 3203 if (mStarted) { 3204 stop(); 3205 } 3206 free(mCurrentSampleInfoSizes); 3207 free(mCurrentSampleInfoOffsets); 3208} 3209 3210status_t MPEG4Source::start(MetaData *params) { 3211 Mutex::Autolock autoLock(mLock); 3212 3213 CHECK(!mStarted); 3214 3215 int32_t val; 3216 if (params && params->findInt32(kKeyWantsNALFragments, &val) 3217 && val != 0) { 3218 mWantsNALFragments = true; 3219 } else { 3220 mWantsNALFragments = false; 3221 } 3222 3223 int32_t tmp; 3224 CHECK(mFormat->findInt32(kKeyMaxInputSize, &tmp)); 3225 size_t max_size = tmp; 3226 3227 // A somewhat arbitrary limit that should be sufficient for 8k video frames 3228 // If you see the message below for a valid input stream: increase the limit 3229 if (max_size > 64 * 1024 * 1024) { 3230 ALOGE("bogus max input size: %zu", max_size); 3231 return ERROR_MALFORMED; 3232 } 3233 mGroup = new MediaBufferGroup; 3234 mGroup->add_buffer(new MediaBuffer(max_size)); 3235 3236 mSrcBuffer = new (std::nothrow) uint8_t[max_size]; 3237 if (mSrcBuffer == NULL) { 3238 // file probably specified a bad max size 3239 delete mGroup; 3240 mGroup = NULL; 3241 return ERROR_MALFORMED; 3242 } 3243 3244 mStarted = true; 3245 3246 return OK; 3247} 3248 3249status_t MPEG4Source::stop() { 3250 Mutex::Autolock autoLock(mLock); 3251 3252 CHECK(mStarted); 3253 3254 if (mBuffer != NULL) { 3255 mBuffer->release(); 3256 mBuffer = NULL; 3257 } 3258 3259 delete[] mSrcBuffer; 3260 mSrcBuffer = NULL; 3261 3262 delete mGroup; 3263 mGroup = NULL; 3264 3265 mStarted = false; 3266 mCurrentSampleIndex = 0; 3267 3268 return OK; 3269} 3270 3271status_t MPEG4Source::parseChunk(off64_t *offset) { 3272 uint32_t hdr[2]; 3273 if (mDataSource->readAt(*offset, hdr, 8) < 8) { 3274 return ERROR_IO; 3275 } 3276 uint64_t chunk_size = ntohl(hdr[0]); 3277 uint32_t chunk_type = ntohl(hdr[1]); 3278 off64_t data_offset = *offset + 8; 3279 3280 if (chunk_size == 1) { 3281 if (mDataSource->readAt(*offset + 8, &chunk_size, 8) < 8) { 3282 return ERROR_IO; 3283 } 3284 chunk_size = ntoh64(chunk_size); 3285 data_offset += 8; 3286 3287 if (chunk_size < 16) { 3288 // The smallest valid chunk is 16 bytes long in this case. 3289 return ERROR_MALFORMED; 3290 } 3291 } else if (chunk_size < 8) { 3292 // The smallest valid chunk is 8 bytes long. 3293 return ERROR_MALFORMED; 3294 } 3295 3296 char chunk[5]; 3297 MakeFourCCString(chunk_type, chunk); 3298 ALOGV("MPEG4Source chunk %s @ %llx", chunk, *offset); 3299 3300 off64_t chunk_data_size = *offset + chunk_size - data_offset; 3301 3302 switch(chunk_type) { 3303 3304 case FOURCC('t', 'r', 'a', 'f'): 3305 case FOURCC('m', 'o', 'o', 'f'): { 3306 off64_t stop_offset = *offset + chunk_size; 3307 *offset = data_offset; 3308 while (*offset < stop_offset) { 3309 status_t err = parseChunk(offset); 3310 if (err != OK) { 3311 return err; 3312 } 3313 } 3314 if (chunk_type == FOURCC('m', 'o', 'o', 'f')) { 3315 // *offset points to the box following this moof. Find the next moof from there. 3316 3317 while (true) { 3318 if (mDataSource->readAt(*offset, hdr, 8) < 8) { 3319 return ERROR_END_OF_STREAM; 3320 } 3321 chunk_size = ntohl(hdr[0]); 3322 chunk_type = ntohl(hdr[1]); 3323 if (chunk_type == FOURCC('m', 'o', 'o', 'f')) { 3324 mNextMoofOffset = *offset; 3325 break; 3326 } 3327 *offset += chunk_size; 3328 } 3329 } 3330 break; 3331 } 3332 3333 case FOURCC('t', 'f', 'h', 'd'): { 3334 status_t err; 3335 if ((err = parseTrackFragmentHeader(data_offset, chunk_data_size)) != OK) { 3336 return err; 3337 } 3338 *offset += chunk_size; 3339 break; 3340 } 3341 3342 case FOURCC('t', 'r', 'u', 'n'): { 3343 status_t err; 3344 if (mLastParsedTrackId == mTrackId) { 3345 if ((err = parseTrackFragmentRun(data_offset, chunk_data_size)) != OK) { 3346 return err; 3347 } 3348 } 3349 3350 *offset += chunk_size; 3351 break; 3352 } 3353 3354 case FOURCC('s', 'a', 'i', 'z'): { 3355 status_t err; 3356 if ((err = parseSampleAuxiliaryInformationSizes(data_offset, chunk_data_size)) != OK) { 3357 return err; 3358 } 3359 *offset += chunk_size; 3360 break; 3361 } 3362 case FOURCC('s', 'a', 'i', 'o'): { 3363 status_t err; 3364 if ((err = parseSampleAuxiliaryInformationOffsets(data_offset, chunk_data_size)) != OK) { 3365 return err; 3366 } 3367 *offset += chunk_size; 3368 break; 3369 } 3370 3371 case FOURCC('m', 'd', 'a', 't'): { 3372 // parse DRM info if present 3373 ALOGV("MPEG4Source::parseChunk mdat"); 3374 // if saiz/saoi was previously observed, do something with the sampleinfos 3375 *offset += chunk_size; 3376 break; 3377 } 3378 3379 default: { 3380 *offset += chunk_size; 3381 break; 3382 } 3383 } 3384 return OK; 3385} 3386 3387status_t MPEG4Source::parseSampleAuxiliaryInformationSizes( 3388 off64_t offset, off64_t /* size */) { 3389 ALOGV("parseSampleAuxiliaryInformationSizes"); 3390 // 14496-12 8.7.12 3391 uint8_t version; 3392 if (mDataSource->readAt( 3393 offset, &version, sizeof(version)) 3394 < (ssize_t)sizeof(version)) { 3395 return ERROR_IO; 3396 } 3397 3398 if (version != 0) { 3399 return ERROR_UNSUPPORTED; 3400 } 3401 offset++; 3402 3403 uint32_t flags; 3404 if (!mDataSource->getUInt24(offset, &flags)) { 3405 return ERROR_IO; 3406 } 3407 offset += 3; 3408 3409 if (flags & 1) { 3410 uint32_t tmp; 3411 if (!mDataSource->getUInt32(offset, &tmp)) { 3412 return ERROR_MALFORMED; 3413 } 3414 mCurrentAuxInfoType = tmp; 3415 offset += 4; 3416 if (!mDataSource->getUInt32(offset, &tmp)) { 3417 return ERROR_MALFORMED; 3418 } 3419 mCurrentAuxInfoTypeParameter = tmp; 3420 offset += 4; 3421 } 3422 3423 uint8_t defsize; 3424 if (mDataSource->readAt(offset, &defsize, 1) != 1) { 3425 return ERROR_MALFORMED; 3426 } 3427 mCurrentDefaultSampleInfoSize = defsize; 3428 offset++; 3429 3430 uint32_t smplcnt; 3431 if (!mDataSource->getUInt32(offset, &smplcnt)) { 3432 return ERROR_MALFORMED; 3433 } 3434 mCurrentSampleInfoCount = smplcnt; 3435 offset += 4; 3436 3437 if (mCurrentDefaultSampleInfoSize != 0) { 3438 ALOGV("@@@@ using default sample info size of %d", mCurrentDefaultSampleInfoSize); 3439 return OK; 3440 } 3441 if (smplcnt > mCurrentSampleInfoAllocSize) { 3442 mCurrentSampleInfoSizes = (uint8_t*) realloc(mCurrentSampleInfoSizes, smplcnt); 3443 mCurrentSampleInfoAllocSize = smplcnt; 3444 } 3445 3446 mDataSource->readAt(offset, mCurrentSampleInfoSizes, smplcnt); 3447 return OK; 3448} 3449 3450status_t MPEG4Source::parseSampleAuxiliaryInformationOffsets( 3451 off64_t offset, off64_t /* size */) { 3452 ALOGV("parseSampleAuxiliaryInformationOffsets"); 3453 // 14496-12 8.7.13 3454 uint8_t version; 3455 if (mDataSource->readAt(offset, &version, sizeof(version)) != 1) { 3456 return ERROR_IO; 3457 } 3458 offset++; 3459 3460 uint32_t flags; 3461 if (!mDataSource->getUInt24(offset, &flags)) { 3462 return ERROR_IO; 3463 } 3464 offset += 3; 3465 3466 uint32_t entrycount; 3467 if (!mDataSource->getUInt32(offset, &entrycount)) { 3468 return ERROR_IO; 3469 } 3470 offset += 4; 3471 if (entrycount == 0) { 3472 return OK; 3473 } 3474 if (entrycount > UINT32_MAX / 8) { 3475 return ERROR_MALFORMED; 3476 } 3477 3478 if (entrycount > mCurrentSampleInfoOffsetsAllocSize) { 3479 uint64_t *newPtr = (uint64_t *)realloc(mCurrentSampleInfoOffsets, entrycount * 8); 3480 if (newPtr == NULL) { 3481 return NO_MEMORY; 3482 } 3483 mCurrentSampleInfoOffsets = newPtr; 3484 mCurrentSampleInfoOffsetsAllocSize = entrycount; 3485 } 3486 mCurrentSampleInfoOffsetCount = entrycount; 3487 3488 if (mCurrentSampleInfoOffsets == NULL) { 3489 return OK; 3490 } 3491 3492 for (size_t i = 0; i < entrycount; i++) { 3493 if (version == 0) { 3494 uint32_t tmp; 3495 if (!mDataSource->getUInt32(offset, &tmp)) { 3496 return ERROR_IO; 3497 } 3498 mCurrentSampleInfoOffsets[i] = tmp; 3499 offset += 4; 3500 } else { 3501 uint64_t tmp; 3502 if (!mDataSource->getUInt64(offset, &tmp)) { 3503 return ERROR_IO; 3504 } 3505 mCurrentSampleInfoOffsets[i] = tmp; 3506 offset += 8; 3507 } 3508 } 3509 3510 // parse clear/encrypted data 3511 3512 off64_t drmoffset = mCurrentSampleInfoOffsets[0]; // from moof 3513 3514 drmoffset += mCurrentMoofOffset; 3515 int ivlength; 3516 CHECK(mFormat->findInt32(kKeyCryptoDefaultIVSize, &ivlength)); 3517 3518 // only 0, 8 and 16 byte initialization vectors are supported 3519 if (ivlength != 0 && ivlength != 8 && ivlength != 16) { 3520 ALOGW("unsupported IV length: %d", ivlength); 3521 return ERROR_MALFORMED; 3522 } 3523 // read CencSampleAuxiliaryDataFormats 3524 for (size_t i = 0; i < mCurrentSampleInfoCount; i++) { 3525 if (i >= mCurrentSamples.size()) { 3526 ALOGW("too few samples"); 3527 break; 3528 } 3529 Sample *smpl = &mCurrentSamples.editItemAt(i); 3530 3531 memset(smpl->iv, 0, 16); 3532 if (mDataSource->readAt(drmoffset, smpl->iv, ivlength) != ivlength) { 3533 return ERROR_IO; 3534 } 3535 3536 drmoffset += ivlength; 3537 3538 int32_t smplinfosize = mCurrentDefaultSampleInfoSize; 3539 if (smplinfosize == 0) { 3540 smplinfosize = mCurrentSampleInfoSizes[i]; 3541 } 3542 if (smplinfosize > ivlength) { 3543 uint16_t numsubsamples; 3544 if (!mDataSource->getUInt16(drmoffset, &numsubsamples)) { 3545 return ERROR_IO; 3546 } 3547 drmoffset += 2; 3548 for (size_t j = 0; j < numsubsamples; j++) { 3549 uint16_t numclear; 3550 uint32_t numencrypted; 3551 if (!mDataSource->getUInt16(drmoffset, &numclear)) { 3552 return ERROR_IO; 3553 } 3554 drmoffset += 2; 3555 if (!mDataSource->getUInt32(drmoffset, &numencrypted)) { 3556 return ERROR_IO; 3557 } 3558 drmoffset += 4; 3559 smpl->clearsizes.add(numclear); 3560 smpl->encryptedsizes.add(numencrypted); 3561 } 3562 } else { 3563 smpl->clearsizes.add(0); 3564 smpl->encryptedsizes.add(smpl->size); 3565 } 3566 } 3567 3568 3569 return OK; 3570} 3571 3572status_t MPEG4Source::parseTrackFragmentHeader(off64_t offset, off64_t size) { 3573 3574 if (size < 8) { 3575 return -EINVAL; 3576 } 3577 3578 uint32_t flags; 3579 if (!mDataSource->getUInt32(offset, &flags)) { // actually version + flags 3580 return ERROR_MALFORMED; 3581 } 3582 3583 if (flags & 0xff000000) { 3584 return -EINVAL; 3585 } 3586 3587 if (!mDataSource->getUInt32(offset + 4, (uint32_t*)&mLastParsedTrackId)) { 3588 return ERROR_MALFORMED; 3589 } 3590 3591 if (mLastParsedTrackId != mTrackId) { 3592 // this is not the right track, skip it 3593 return OK; 3594 } 3595 3596 mTrackFragmentHeaderInfo.mFlags = flags; 3597 mTrackFragmentHeaderInfo.mTrackID = mLastParsedTrackId; 3598 offset += 8; 3599 size -= 8; 3600 3601 ALOGV("fragment header: %08x %08x", flags, mTrackFragmentHeaderInfo.mTrackID); 3602 3603 if (flags & TrackFragmentHeaderInfo::kBaseDataOffsetPresent) { 3604 if (size < 8) { 3605 return -EINVAL; 3606 } 3607 3608 if (!mDataSource->getUInt64(offset, &mTrackFragmentHeaderInfo.mBaseDataOffset)) { 3609 return ERROR_MALFORMED; 3610 } 3611 offset += 8; 3612 size -= 8; 3613 } 3614 3615 if (flags & TrackFragmentHeaderInfo::kSampleDescriptionIndexPresent) { 3616 if (size < 4) { 3617 return -EINVAL; 3618 } 3619 3620 if (!mDataSource->getUInt32(offset, &mTrackFragmentHeaderInfo.mSampleDescriptionIndex)) { 3621 return ERROR_MALFORMED; 3622 } 3623 offset += 4; 3624 size -= 4; 3625 } 3626 3627 if (flags & TrackFragmentHeaderInfo::kDefaultSampleDurationPresent) { 3628 if (size < 4) { 3629 return -EINVAL; 3630 } 3631 3632 if (!mDataSource->getUInt32(offset, &mTrackFragmentHeaderInfo.mDefaultSampleDuration)) { 3633 return ERROR_MALFORMED; 3634 } 3635 offset += 4; 3636 size -= 4; 3637 } 3638 3639 if (flags & TrackFragmentHeaderInfo::kDefaultSampleSizePresent) { 3640 if (size < 4) { 3641 return -EINVAL; 3642 } 3643 3644 if (!mDataSource->getUInt32(offset, &mTrackFragmentHeaderInfo.mDefaultSampleSize)) { 3645 return ERROR_MALFORMED; 3646 } 3647 offset += 4; 3648 size -= 4; 3649 } 3650 3651 if (flags & TrackFragmentHeaderInfo::kDefaultSampleFlagsPresent) { 3652 if (size < 4) { 3653 return -EINVAL; 3654 } 3655 3656 if (!mDataSource->getUInt32(offset, &mTrackFragmentHeaderInfo.mDefaultSampleFlags)) { 3657 return ERROR_MALFORMED; 3658 } 3659 offset += 4; 3660 size -= 4; 3661 } 3662 3663 if (!(flags & TrackFragmentHeaderInfo::kBaseDataOffsetPresent)) { 3664 mTrackFragmentHeaderInfo.mBaseDataOffset = mCurrentMoofOffset; 3665 } 3666 3667 mTrackFragmentHeaderInfo.mDataOffset = 0; 3668 return OK; 3669} 3670 3671status_t MPEG4Source::parseTrackFragmentRun(off64_t offset, off64_t size) { 3672 3673 ALOGV("MPEG4Extractor::parseTrackFragmentRun"); 3674 if (size < 8) { 3675 return -EINVAL; 3676 } 3677 3678 enum { 3679 kDataOffsetPresent = 0x01, 3680 kFirstSampleFlagsPresent = 0x04, 3681 kSampleDurationPresent = 0x100, 3682 kSampleSizePresent = 0x200, 3683 kSampleFlagsPresent = 0x400, 3684 kSampleCompositionTimeOffsetPresent = 0x800, 3685 }; 3686 3687 uint32_t flags; 3688 if (!mDataSource->getUInt32(offset, &flags)) { 3689 return ERROR_MALFORMED; 3690 } 3691 ALOGV("fragment run flags: %08x", flags); 3692 3693 if (flags & 0xff000000) { 3694 return -EINVAL; 3695 } 3696 3697 if ((flags & kFirstSampleFlagsPresent) && (flags & kSampleFlagsPresent)) { 3698 // These two shall not be used together. 3699 return -EINVAL; 3700 } 3701 3702 uint32_t sampleCount; 3703 if (!mDataSource->getUInt32(offset + 4, &sampleCount)) { 3704 return ERROR_MALFORMED; 3705 } 3706 offset += 8; 3707 size -= 8; 3708 3709 uint64_t dataOffset = mTrackFragmentHeaderInfo.mDataOffset; 3710 3711 uint32_t firstSampleFlags = 0; 3712 3713 if (flags & kDataOffsetPresent) { 3714 if (size < 4) { 3715 return -EINVAL; 3716 } 3717 3718 int32_t dataOffsetDelta; 3719 if (!mDataSource->getUInt32(offset, (uint32_t*)&dataOffsetDelta)) { 3720 return ERROR_MALFORMED; 3721 } 3722 3723 dataOffset = mTrackFragmentHeaderInfo.mBaseDataOffset + dataOffsetDelta; 3724 3725 offset += 4; 3726 size -= 4; 3727 } 3728 3729 if (flags & kFirstSampleFlagsPresent) { 3730 if (size < 4) { 3731 return -EINVAL; 3732 } 3733 3734 if (!mDataSource->getUInt32(offset, &firstSampleFlags)) { 3735 return ERROR_MALFORMED; 3736 } 3737 offset += 4; 3738 size -= 4; 3739 } 3740 3741 uint32_t sampleDuration = 0, sampleSize = 0, sampleFlags = 0, 3742 sampleCtsOffset = 0; 3743 3744 size_t bytesPerSample = 0; 3745 if (flags & kSampleDurationPresent) { 3746 bytesPerSample += 4; 3747 } else if (mTrackFragmentHeaderInfo.mFlags 3748 & TrackFragmentHeaderInfo::kDefaultSampleDurationPresent) { 3749 sampleDuration = mTrackFragmentHeaderInfo.mDefaultSampleDuration; 3750 } else if (mTrex) { 3751 sampleDuration = mTrex->default_sample_duration; 3752 } 3753 3754 if (flags & kSampleSizePresent) { 3755 bytesPerSample += 4; 3756 } else if (mTrackFragmentHeaderInfo.mFlags 3757 & TrackFragmentHeaderInfo::kDefaultSampleSizePresent) { 3758 sampleSize = mTrackFragmentHeaderInfo.mDefaultSampleSize; 3759 } else { 3760 sampleSize = mTrackFragmentHeaderInfo.mDefaultSampleSize; 3761 } 3762 3763 if (flags & kSampleFlagsPresent) { 3764 bytesPerSample += 4; 3765 } else if (mTrackFragmentHeaderInfo.mFlags 3766 & TrackFragmentHeaderInfo::kDefaultSampleFlagsPresent) { 3767 sampleFlags = mTrackFragmentHeaderInfo.mDefaultSampleFlags; 3768 } else { 3769 sampleFlags = mTrackFragmentHeaderInfo.mDefaultSampleFlags; 3770 } 3771 3772 if (flags & kSampleCompositionTimeOffsetPresent) { 3773 bytesPerSample += 4; 3774 } else { 3775 sampleCtsOffset = 0; 3776 } 3777 3778 if (size < (off64_t)sampleCount * bytesPerSample) { 3779 return -EINVAL; 3780 } 3781 3782 Sample tmp; 3783 for (uint32_t i = 0; i < sampleCount; ++i) { 3784 if (flags & kSampleDurationPresent) { 3785 if (!mDataSource->getUInt32(offset, &sampleDuration)) { 3786 return ERROR_MALFORMED; 3787 } 3788 offset += 4; 3789 } 3790 3791 if (flags & kSampleSizePresent) { 3792 if (!mDataSource->getUInt32(offset, &sampleSize)) { 3793 return ERROR_MALFORMED; 3794 } 3795 offset += 4; 3796 } 3797 3798 if (flags & kSampleFlagsPresent) { 3799 if (!mDataSource->getUInt32(offset, &sampleFlags)) { 3800 return ERROR_MALFORMED; 3801 } 3802 offset += 4; 3803 } 3804 3805 if (flags & kSampleCompositionTimeOffsetPresent) { 3806 if (!mDataSource->getUInt32(offset, &sampleCtsOffset)) { 3807 return ERROR_MALFORMED; 3808 } 3809 offset += 4; 3810 } 3811 3812 ALOGV("adding sample %d at offset 0x%08" PRIx64 ", size %u, duration %u, " 3813 " flags 0x%08x", i + 1, 3814 dataOffset, sampleSize, sampleDuration, 3815 (flags & kFirstSampleFlagsPresent) && i == 0 3816 ? firstSampleFlags : sampleFlags); 3817 tmp.offset = dataOffset; 3818 tmp.size = sampleSize; 3819 tmp.duration = sampleDuration; 3820 tmp.compositionOffset = sampleCtsOffset; 3821 mCurrentSamples.add(tmp); 3822 3823 dataOffset += sampleSize; 3824 } 3825 3826 mTrackFragmentHeaderInfo.mDataOffset = dataOffset; 3827 3828 return OK; 3829} 3830 3831sp<MetaData> MPEG4Source::getFormat() { 3832 Mutex::Autolock autoLock(mLock); 3833 3834 return mFormat; 3835} 3836 3837size_t MPEG4Source::parseNALSize(const uint8_t *data) const { 3838 switch (mNALLengthSize) { 3839 case 1: 3840 return *data; 3841 case 2: 3842 return U16_AT(data); 3843 case 3: 3844 return ((size_t)data[0] << 16) | U16_AT(&data[1]); 3845 case 4: 3846 return U32_AT(data); 3847 } 3848 3849 // This cannot happen, mNALLengthSize springs to life by adding 1 to 3850 // a 2-bit integer. 3851 CHECK(!"Should not be here."); 3852 3853 return 0; 3854} 3855 3856status_t MPEG4Source::read( 3857 MediaBuffer **out, const ReadOptions *options) { 3858 Mutex::Autolock autoLock(mLock); 3859 3860 CHECK(mStarted); 3861 3862 if (mFirstMoofOffset > 0) { 3863 return fragmentedRead(out, options); 3864 } 3865 3866 *out = NULL; 3867 3868 int64_t targetSampleTimeUs = -1; 3869 3870 int64_t seekTimeUs; 3871 ReadOptions::SeekMode mode; 3872 if (options && options->getSeekTo(&seekTimeUs, &mode)) { 3873 uint32_t findFlags = 0; 3874 switch (mode) { 3875 case ReadOptions::SEEK_PREVIOUS_SYNC: 3876 findFlags = SampleTable::kFlagBefore; 3877 break; 3878 case ReadOptions::SEEK_NEXT_SYNC: 3879 findFlags = SampleTable::kFlagAfter; 3880 break; 3881 case ReadOptions::SEEK_CLOSEST_SYNC: 3882 case ReadOptions::SEEK_CLOSEST: 3883 findFlags = SampleTable::kFlagClosest; 3884 break; 3885 default: 3886 CHECK(!"Should not be here."); 3887 break; 3888 } 3889 3890 uint32_t sampleIndex; 3891 status_t err = mSampleTable->findSampleAtTime( 3892 seekTimeUs, 1000000, mTimescale, 3893 &sampleIndex, findFlags); 3894 3895 if (mode == ReadOptions::SEEK_CLOSEST) { 3896 // We found the closest sample already, now we want the sync 3897 // sample preceding it (or the sample itself of course), even 3898 // if the subsequent sync sample is closer. 3899 findFlags = SampleTable::kFlagBefore; 3900 } 3901 3902 uint32_t syncSampleIndex; 3903 if (err == OK) { 3904 err = mSampleTable->findSyncSampleNear( 3905 sampleIndex, &syncSampleIndex, findFlags); 3906 } 3907 3908 uint32_t sampleTime; 3909 if (err == OK) { 3910 err = mSampleTable->getMetaDataForSample( 3911 sampleIndex, NULL, NULL, &sampleTime); 3912 } 3913 3914 if (err != OK) { 3915 if (err == ERROR_OUT_OF_RANGE) { 3916 // An attempt to seek past the end of the stream would 3917 // normally cause this ERROR_OUT_OF_RANGE error. Propagating 3918 // this all the way to the MediaPlayer would cause abnormal 3919 // termination. Legacy behaviour appears to be to behave as if 3920 // we had seeked to the end of stream, ending normally. 3921 err = ERROR_END_OF_STREAM; 3922 } 3923 ALOGV("end of stream"); 3924 return err; 3925 } 3926 3927 if (mode == ReadOptions::SEEK_CLOSEST) { 3928 targetSampleTimeUs = (sampleTime * 1000000ll) / mTimescale; 3929 } 3930 3931#if 0 3932 uint32_t syncSampleTime; 3933 CHECK_EQ(OK, mSampleTable->getMetaDataForSample( 3934 syncSampleIndex, NULL, NULL, &syncSampleTime)); 3935 3936 ALOGI("seek to time %lld us => sample at time %lld us, " 3937 "sync sample at time %lld us", 3938 seekTimeUs, 3939 sampleTime * 1000000ll / mTimescale, 3940 syncSampleTime * 1000000ll / mTimescale); 3941#endif 3942 3943 mCurrentSampleIndex = syncSampleIndex; 3944 if (mBuffer != NULL) { 3945 mBuffer->release(); 3946 mBuffer = NULL; 3947 } 3948 3949 // fall through 3950 } 3951 3952 off64_t offset; 3953 size_t size; 3954 uint32_t cts, stts; 3955 bool isSyncSample; 3956 bool newBuffer = false; 3957 if (mBuffer == NULL) { 3958 newBuffer = true; 3959 3960 status_t err = 3961 mSampleTable->getMetaDataForSample( 3962 mCurrentSampleIndex, &offset, &size, &cts, &isSyncSample, &stts); 3963 3964 if (err != OK) { 3965 return err; 3966 } 3967 3968 err = mGroup->acquire_buffer(&mBuffer); 3969 3970 if (err != OK) { 3971 CHECK(mBuffer == NULL); 3972 return err; 3973 } 3974 if (size > mBuffer->size()) { 3975 ALOGE("buffer too small: %zu > %zu", size, mBuffer->size()); 3976 return ERROR_BUFFER_TOO_SMALL; 3977 } 3978 } 3979 3980 if ((!mIsAVC && !mIsHEVC) || mWantsNALFragments) { 3981 if (newBuffer) { 3982 ssize_t num_bytes_read = 3983 mDataSource->readAt(offset, (uint8_t *)mBuffer->data(), size); 3984 3985 if (num_bytes_read < (ssize_t)size) { 3986 mBuffer->release(); 3987 mBuffer = NULL; 3988 3989 return ERROR_IO; 3990 } 3991 3992 CHECK(mBuffer != NULL); 3993 mBuffer->set_range(0, size); 3994 mBuffer->meta_data()->clear(); 3995 mBuffer->meta_data()->setInt64( 3996 kKeyTime, ((int64_t)cts * 1000000) / mTimescale); 3997 mBuffer->meta_data()->setInt64( 3998 kKeyDuration, ((int64_t)stts * 1000000) / mTimescale); 3999 4000 if (targetSampleTimeUs >= 0) { 4001 mBuffer->meta_data()->setInt64( 4002 kKeyTargetTime, targetSampleTimeUs); 4003 } 4004 4005 if (isSyncSample) { 4006 mBuffer->meta_data()->setInt32(kKeyIsSyncFrame, 1); 4007 } 4008 4009 ++mCurrentSampleIndex; 4010 } 4011 4012 if (!mIsAVC && !mIsHEVC) { 4013 *out = mBuffer; 4014 mBuffer = NULL; 4015 4016 return OK; 4017 } 4018 4019 // Each NAL unit is split up into its constituent fragments and 4020 // each one of them returned in its own buffer. 4021 4022 CHECK(mBuffer->range_length() >= mNALLengthSize); 4023 4024 const uint8_t *src = 4025 (const uint8_t *)mBuffer->data() + mBuffer->range_offset(); 4026 4027 size_t nal_size = parseNALSize(src); 4028 if (mNALLengthSize > SIZE_MAX - nal_size) { 4029 ALOGE("b/24441553, b/24445122"); 4030 } 4031 if (mBuffer->range_length() - mNALLengthSize < nal_size) { 4032 ALOGE("incomplete NAL unit."); 4033 4034 mBuffer->release(); 4035 mBuffer = NULL; 4036 4037 return ERROR_MALFORMED; 4038 } 4039 4040 MediaBuffer *clone = mBuffer->clone(); 4041 CHECK(clone != NULL); 4042 clone->set_range(mBuffer->range_offset() + mNALLengthSize, nal_size); 4043 4044 CHECK(mBuffer != NULL); 4045 mBuffer->set_range( 4046 mBuffer->range_offset() + mNALLengthSize + nal_size, 4047 mBuffer->range_length() - mNALLengthSize - nal_size); 4048 4049 if (mBuffer->range_length() == 0) { 4050 mBuffer->release(); 4051 mBuffer = NULL; 4052 } 4053 4054 *out = clone; 4055 4056 return OK; 4057 } else { 4058 // Whole NAL units are returned but each fragment is prefixed by 4059 // the start code (0x00 00 00 01). 4060 ssize_t num_bytes_read = 0; 4061 int32_t drm = 0; 4062 bool usesDRM = (mFormat->findInt32(kKeyIsDRM, &drm) && drm != 0); 4063 if (usesDRM) { 4064 num_bytes_read = 4065 mDataSource->readAt(offset, (uint8_t*)mBuffer->data(), size); 4066 } else { 4067 num_bytes_read = mDataSource->readAt(offset, mSrcBuffer, size); 4068 } 4069 4070 if (num_bytes_read < (ssize_t)size) { 4071 mBuffer->release(); 4072 mBuffer = NULL; 4073 4074 return ERROR_IO; 4075 } 4076 4077 if (usesDRM) { 4078 CHECK(mBuffer != NULL); 4079 mBuffer->set_range(0, size); 4080 4081 } else { 4082 uint8_t *dstData = (uint8_t *)mBuffer->data(); 4083 size_t srcOffset = 0; 4084 size_t dstOffset = 0; 4085 4086 while (srcOffset < size) { 4087 bool isMalFormed = !isInRange((size_t)0u, size, srcOffset, mNALLengthSize); 4088 size_t nalLength = 0; 4089 if (!isMalFormed) { 4090 nalLength = parseNALSize(&mSrcBuffer[srcOffset]); 4091 srcOffset += mNALLengthSize; 4092 isMalFormed = !isInRange((size_t)0u, size, srcOffset, nalLength); 4093 } 4094 4095 if (isMalFormed) { 4096 ALOGE("Video is malformed"); 4097 mBuffer->release(); 4098 mBuffer = NULL; 4099 return ERROR_MALFORMED; 4100 } 4101 4102 if (nalLength == 0) { 4103 continue; 4104 } 4105 4106 if (dstOffset > SIZE_MAX - 4 || 4107 dstOffset + 4 > SIZE_MAX - nalLength || 4108 dstOffset + 4 + nalLength > mBuffer->size()) { 4109 ALOGE("b/27208621 : %zu %zu", dstOffset, mBuffer->size()); 4110 android_errorWriteLog(0x534e4554, "27208621"); 4111 mBuffer->release(); 4112 mBuffer = NULL; 4113 return ERROR_MALFORMED; 4114 } 4115 4116 dstData[dstOffset++] = 0; 4117 dstData[dstOffset++] = 0; 4118 dstData[dstOffset++] = 0; 4119 dstData[dstOffset++] = 1; 4120 memcpy(&dstData[dstOffset], &mSrcBuffer[srcOffset], nalLength); 4121 srcOffset += nalLength; 4122 dstOffset += nalLength; 4123 } 4124 CHECK_EQ(srcOffset, size); 4125 CHECK(mBuffer != NULL); 4126 mBuffer->set_range(0, dstOffset); 4127 } 4128 4129 mBuffer->meta_data()->clear(); 4130 mBuffer->meta_data()->setInt64( 4131 kKeyTime, ((int64_t)cts * 1000000) / mTimescale); 4132 mBuffer->meta_data()->setInt64( 4133 kKeyDuration, ((int64_t)stts * 1000000) / mTimescale); 4134 4135 if (targetSampleTimeUs >= 0) { 4136 mBuffer->meta_data()->setInt64( 4137 kKeyTargetTime, targetSampleTimeUs); 4138 } 4139 4140 if (isSyncSample) { 4141 mBuffer->meta_data()->setInt32(kKeyIsSyncFrame, 1); 4142 } 4143 4144 ++mCurrentSampleIndex; 4145 4146 *out = mBuffer; 4147 mBuffer = NULL; 4148 4149 return OK; 4150 } 4151} 4152 4153status_t MPEG4Source::fragmentedRead( 4154 MediaBuffer **out, const ReadOptions *options) { 4155 4156 ALOGV("MPEG4Source::fragmentedRead"); 4157 4158 CHECK(mStarted); 4159 4160 *out = NULL; 4161 4162 int64_t targetSampleTimeUs = -1; 4163 4164 int64_t seekTimeUs; 4165 ReadOptions::SeekMode mode; 4166 if (options && options->getSeekTo(&seekTimeUs, &mode)) { 4167 4168 int numSidxEntries = mSegments.size(); 4169 if (numSidxEntries != 0) { 4170 int64_t totalTime = 0; 4171 off64_t totalOffset = mFirstMoofOffset; 4172 for (int i = 0; i < numSidxEntries; i++) { 4173 const SidxEntry *se = &mSegments[i]; 4174 if (totalTime + se->mDurationUs > seekTimeUs) { 4175 // The requested time is somewhere in this segment 4176 if ((mode == ReadOptions::SEEK_NEXT_SYNC && seekTimeUs > totalTime) || 4177 (mode == ReadOptions::SEEK_CLOSEST_SYNC && 4178 (seekTimeUs - totalTime) > (totalTime + se->mDurationUs - seekTimeUs))) { 4179 // requested next sync, or closest sync and it was closer to the end of 4180 // this segment 4181 totalTime += se->mDurationUs; 4182 totalOffset += se->mSize; 4183 } 4184 break; 4185 } 4186 totalTime += se->mDurationUs; 4187 totalOffset += se->mSize; 4188 } 4189 mCurrentMoofOffset = totalOffset; 4190 mCurrentSamples.clear(); 4191 mCurrentSampleIndex = 0; 4192 parseChunk(&totalOffset); 4193 mCurrentTime = totalTime * mTimescale / 1000000ll; 4194 } else { 4195 // without sidx boxes, we can only seek to 0 4196 mCurrentMoofOffset = mFirstMoofOffset; 4197 mCurrentSamples.clear(); 4198 mCurrentSampleIndex = 0; 4199 off64_t tmp = mCurrentMoofOffset; 4200 parseChunk(&tmp); 4201 mCurrentTime = 0; 4202 } 4203 4204 if (mBuffer != NULL) { 4205 mBuffer->release(); 4206 mBuffer = NULL; 4207 } 4208 4209 // fall through 4210 } 4211 4212 off64_t offset = 0; 4213 size_t size = 0; 4214 uint32_t cts = 0; 4215 bool isSyncSample = false; 4216 bool newBuffer = false; 4217 if (mBuffer == NULL) { 4218 newBuffer = true; 4219 4220 if (mCurrentSampleIndex >= mCurrentSamples.size()) { 4221 // move to next fragment if there is one 4222 if (mNextMoofOffset <= mCurrentMoofOffset) { 4223 return ERROR_END_OF_STREAM; 4224 } 4225 off64_t nextMoof = mNextMoofOffset; 4226 mCurrentMoofOffset = nextMoof; 4227 mCurrentSamples.clear(); 4228 mCurrentSampleIndex = 0; 4229 parseChunk(&nextMoof); 4230 if (mCurrentSampleIndex >= mCurrentSamples.size()) { 4231 return ERROR_END_OF_STREAM; 4232 } 4233 } 4234 4235 const Sample *smpl = &mCurrentSamples[mCurrentSampleIndex]; 4236 offset = smpl->offset; 4237 size = smpl->size; 4238 cts = mCurrentTime + smpl->compositionOffset; 4239 mCurrentTime += smpl->duration; 4240 isSyncSample = (mCurrentSampleIndex == 0); // XXX 4241 4242 status_t err = mGroup->acquire_buffer(&mBuffer); 4243 4244 if (err != OK) { 4245 CHECK(mBuffer == NULL); 4246 ALOGV("acquire_buffer returned %d", err); 4247 return err; 4248 } 4249 if (size > mBuffer->size()) { 4250 ALOGE("buffer too small: %zu > %zu", size, mBuffer->size()); 4251 return ERROR_BUFFER_TOO_SMALL; 4252 } 4253 } 4254 4255 const Sample *smpl = &mCurrentSamples[mCurrentSampleIndex]; 4256 const sp<MetaData> bufmeta = mBuffer->meta_data(); 4257 bufmeta->clear(); 4258 if (smpl->encryptedsizes.size()) { 4259 // store clear/encrypted lengths in metadata 4260 bufmeta->setData(kKeyPlainSizes, 0, 4261 smpl->clearsizes.array(), smpl->clearsizes.size() * 4); 4262 bufmeta->setData(kKeyEncryptedSizes, 0, 4263 smpl->encryptedsizes.array(), smpl->encryptedsizes.size() * 4); 4264 bufmeta->setData(kKeyCryptoIV, 0, smpl->iv, 16); // use 16 or the actual size? 4265 bufmeta->setInt32(kKeyCryptoDefaultIVSize, mDefaultIVSize); 4266 bufmeta->setInt32(kKeyCryptoMode, mCryptoMode); 4267 bufmeta->setData(kKeyCryptoKey, 0, mCryptoKey, 16); 4268 } 4269 4270 if ((!mIsAVC && !mIsHEVC)|| mWantsNALFragments) { 4271 if (newBuffer) { 4272 if (!isInRange((size_t)0u, mBuffer->size(), size)) { 4273 mBuffer->release(); 4274 mBuffer = NULL; 4275 4276 ALOGE("fragmentedRead ERROR_MALFORMED size %zu", size); 4277 return ERROR_MALFORMED; 4278 } 4279 4280 ssize_t num_bytes_read = 4281 mDataSource->readAt(offset, (uint8_t *)mBuffer->data(), size); 4282 4283 if (num_bytes_read < (ssize_t)size) { 4284 mBuffer->release(); 4285 mBuffer = NULL; 4286 4287 ALOGE("i/o error"); 4288 return ERROR_IO; 4289 } 4290 4291 CHECK(mBuffer != NULL); 4292 mBuffer->set_range(0, size); 4293 mBuffer->meta_data()->setInt64( 4294 kKeyTime, ((int64_t)cts * 1000000) / mTimescale); 4295 mBuffer->meta_data()->setInt64( 4296 kKeyDuration, ((int64_t)smpl->duration * 1000000) / mTimescale); 4297 4298 if (targetSampleTimeUs >= 0) { 4299 mBuffer->meta_data()->setInt64( 4300 kKeyTargetTime, targetSampleTimeUs); 4301 } 4302 4303 if (isSyncSample) { 4304 mBuffer->meta_data()->setInt32(kKeyIsSyncFrame, 1); 4305 } 4306 4307 ++mCurrentSampleIndex; 4308 } 4309 4310 if (!mIsAVC && !mIsHEVC) { 4311 *out = mBuffer; 4312 mBuffer = NULL; 4313 4314 return OK; 4315 } 4316 4317 // Each NAL unit is split up into its constituent fragments and 4318 // each one of them returned in its own buffer. 4319 4320 CHECK(mBuffer->range_length() >= mNALLengthSize); 4321 4322 const uint8_t *src = 4323 (const uint8_t *)mBuffer->data() + mBuffer->range_offset(); 4324 4325 size_t nal_size = parseNALSize(src); 4326 if (mNALLengthSize > SIZE_MAX - nal_size) { 4327 ALOGE("b/24441553, b/24445122"); 4328 } 4329 4330 if (mBuffer->range_length() - mNALLengthSize < nal_size) { 4331 ALOGE("incomplete NAL unit."); 4332 4333 mBuffer->release(); 4334 mBuffer = NULL; 4335 4336 return ERROR_MALFORMED; 4337 } 4338 4339 MediaBuffer *clone = mBuffer->clone(); 4340 CHECK(clone != NULL); 4341 clone->set_range(mBuffer->range_offset() + mNALLengthSize, nal_size); 4342 4343 CHECK(mBuffer != NULL); 4344 mBuffer->set_range( 4345 mBuffer->range_offset() + mNALLengthSize + nal_size, 4346 mBuffer->range_length() - mNALLengthSize - nal_size); 4347 4348 if (mBuffer->range_length() == 0) { 4349 mBuffer->release(); 4350 mBuffer = NULL; 4351 } 4352 4353 *out = clone; 4354 4355 return OK; 4356 } else { 4357 ALOGV("whole NAL"); 4358 // Whole NAL units are returned but each fragment is prefixed by 4359 // the start code (0x00 00 00 01). 4360 ssize_t num_bytes_read = 0; 4361 int32_t drm = 0; 4362 bool usesDRM = (mFormat->findInt32(kKeyIsDRM, &drm) && drm != 0); 4363 void *data = NULL; 4364 bool isMalFormed = false; 4365 if (usesDRM) { 4366 if (mBuffer == NULL || !isInRange((size_t)0u, mBuffer->size(), size)) { 4367 isMalFormed = true; 4368 } else { 4369 data = mBuffer->data(); 4370 } 4371 } else { 4372 int32_t max_size; 4373 if (mFormat == NULL 4374 || !mFormat->findInt32(kKeyMaxInputSize, &max_size) 4375 || !isInRange((size_t)0u, (size_t)max_size, size)) { 4376 isMalFormed = true; 4377 } else { 4378 data = mSrcBuffer; 4379 } 4380 } 4381 4382 if (isMalFormed || data == NULL) { 4383 ALOGE("isMalFormed size %zu", size); 4384 if (mBuffer != NULL) { 4385 mBuffer->release(); 4386 mBuffer = NULL; 4387 } 4388 return ERROR_MALFORMED; 4389 } 4390 num_bytes_read = mDataSource->readAt(offset, data, size); 4391 4392 if (num_bytes_read < (ssize_t)size) { 4393 mBuffer->release(); 4394 mBuffer = NULL; 4395 4396 ALOGE("i/o error"); 4397 return ERROR_IO; 4398 } 4399 4400 if (usesDRM) { 4401 CHECK(mBuffer != NULL); 4402 mBuffer->set_range(0, size); 4403 4404 } else { 4405 uint8_t *dstData = (uint8_t *)mBuffer->data(); 4406 size_t srcOffset = 0; 4407 size_t dstOffset = 0; 4408 4409 while (srcOffset < size) { 4410 isMalFormed = !isInRange((size_t)0u, size, srcOffset, mNALLengthSize); 4411 size_t nalLength = 0; 4412 if (!isMalFormed) { 4413 nalLength = parseNALSize(&mSrcBuffer[srcOffset]); 4414 srcOffset += mNALLengthSize; 4415 isMalFormed = !isInRange((size_t)0u, size, srcOffset, nalLength) 4416 || !isInRange((size_t)0u, mBuffer->size(), dstOffset, (size_t)4u) 4417 || !isInRange((size_t)0u, mBuffer->size(), dstOffset + 4, nalLength); 4418 } 4419 4420 if (isMalFormed) { 4421 ALOGE("Video is malformed; nalLength %zu", nalLength); 4422 mBuffer->release(); 4423 mBuffer = NULL; 4424 return ERROR_MALFORMED; 4425 } 4426 4427 if (nalLength == 0) { 4428 continue; 4429 } 4430 4431 if (dstOffset > SIZE_MAX - 4 || 4432 dstOffset + 4 > SIZE_MAX - nalLength || 4433 dstOffset + 4 + nalLength > mBuffer->size()) { 4434 ALOGE("b/26365349 : %zu %zu", dstOffset, mBuffer->size()); 4435 android_errorWriteLog(0x534e4554, "26365349"); 4436 mBuffer->release(); 4437 mBuffer = NULL; 4438 return ERROR_MALFORMED; 4439 } 4440 4441 dstData[dstOffset++] = 0; 4442 dstData[dstOffset++] = 0; 4443 dstData[dstOffset++] = 0; 4444 dstData[dstOffset++] = 1; 4445 memcpy(&dstData[dstOffset], &mSrcBuffer[srcOffset], nalLength); 4446 srcOffset += nalLength; 4447 dstOffset += nalLength; 4448 } 4449 CHECK_EQ(srcOffset, size); 4450 CHECK(mBuffer != NULL); 4451 mBuffer->set_range(0, dstOffset); 4452 } 4453 4454 mBuffer->meta_data()->setInt64( 4455 kKeyTime, ((int64_t)cts * 1000000) / mTimescale); 4456 mBuffer->meta_data()->setInt64( 4457 kKeyDuration, ((int64_t)smpl->duration * 1000000) / mTimescale); 4458 4459 if (targetSampleTimeUs >= 0) { 4460 mBuffer->meta_data()->setInt64( 4461 kKeyTargetTime, targetSampleTimeUs); 4462 } 4463 4464 if (isSyncSample) { 4465 mBuffer->meta_data()->setInt32(kKeyIsSyncFrame, 1); 4466 } 4467 4468 ++mCurrentSampleIndex; 4469 4470 *out = mBuffer; 4471 mBuffer = NULL; 4472 4473 return OK; 4474 } 4475} 4476 4477MPEG4Extractor::Track *MPEG4Extractor::findTrackByMimePrefix( 4478 const char *mimePrefix) { 4479 for (Track *track = mFirstTrack; track != NULL; track = track->next) { 4480 const char *mime; 4481 if (track->meta != NULL 4482 && track->meta->findCString(kKeyMIMEType, &mime) 4483 && !strncasecmp(mime, mimePrefix, strlen(mimePrefix))) { 4484 return track; 4485 } 4486 } 4487 4488 return NULL; 4489} 4490 4491static bool LegacySniffMPEG4( 4492 const sp<DataSource> &source, String8 *mimeType, float *confidence) { 4493 uint8_t header[8]; 4494 4495 ssize_t n = source->readAt(4, header, sizeof(header)); 4496 if (n < (ssize_t)sizeof(header)) { 4497 return false; 4498 } 4499 4500 if (!memcmp(header, "ftyp3gp", 7) || !memcmp(header, "ftypmp42", 8) 4501 || !memcmp(header, "ftyp3gr6", 8) || !memcmp(header, "ftyp3gs6", 8) 4502 || !memcmp(header, "ftyp3ge6", 8) || !memcmp(header, "ftyp3gg6", 8) 4503 || !memcmp(header, "ftypisom", 8) || !memcmp(header, "ftypM4V ", 8) 4504 || !memcmp(header, "ftypM4A ", 8) || !memcmp(header, "ftypf4v ", 8) 4505 || !memcmp(header, "ftypkddi", 8) || !memcmp(header, "ftypM4VP", 8)) { 4506 *mimeType = MEDIA_MIMETYPE_CONTAINER_MPEG4; 4507 *confidence = 0.4; 4508 4509 return true; 4510 } 4511 4512 return false; 4513} 4514 4515static bool isCompatibleBrand(uint32_t fourcc) { 4516 static const uint32_t kCompatibleBrands[] = { 4517 FOURCC('i', 's', 'o', 'm'), 4518 FOURCC('i', 's', 'o', '2'), 4519 FOURCC('a', 'v', 'c', '1'), 4520 FOURCC('h', 'v', 'c', '1'), 4521 FOURCC('h', 'e', 'v', '1'), 4522 FOURCC('3', 'g', 'p', '4'), 4523 FOURCC('m', 'p', '4', '1'), 4524 FOURCC('m', 'p', '4', '2'), 4525 4526 // Won't promise that the following file types can be played. 4527 // Just give these file types a chance. 4528 FOURCC('q', 't', ' ', ' '), // Apple's QuickTime 4529 FOURCC('M', 'S', 'N', 'V'), // Sony's PSP 4530 4531 FOURCC('3', 'g', '2', 'a'), // 3GPP2 4532 FOURCC('3', 'g', '2', 'b'), 4533 }; 4534 4535 for (size_t i = 0; 4536 i < sizeof(kCompatibleBrands) / sizeof(kCompatibleBrands[0]); 4537 ++i) { 4538 if (kCompatibleBrands[i] == fourcc) { 4539 return true; 4540 } 4541 } 4542 4543 return false; 4544} 4545 4546// Attempt to actually parse the 'ftyp' atom and determine if a suitable 4547// compatible brand is present. 4548// Also try to identify where this file's metadata ends 4549// (end of the 'moov' atom) and report it to the caller as part of 4550// the metadata. 4551static bool BetterSniffMPEG4( 4552 const sp<DataSource> &source, String8 *mimeType, float *confidence, 4553 sp<AMessage> *meta) { 4554 // We scan up to 128 bytes to identify this file as an MP4. 4555 static const off64_t kMaxScanOffset = 128ll; 4556 4557 off64_t offset = 0ll; 4558 bool foundGoodFileType = false; 4559 off64_t moovAtomEndOffset = -1ll; 4560 bool done = false; 4561 4562 while (!done && offset < kMaxScanOffset) { 4563 uint32_t hdr[2]; 4564 if (source->readAt(offset, hdr, 8) < 8) { 4565 return false; 4566 } 4567 4568 uint64_t chunkSize = ntohl(hdr[0]); 4569 uint32_t chunkType = ntohl(hdr[1]); 4570 off64_t chunkDataOffset = offset + 8; 4571 4572 if (chunkSize == 1) { 4573 if (source->readAt(offset + 8, &chunkSize, 8) < 8) { 4574 return false; 4575 } 4576 4577 chunkSize = ntoh64(chunkSize); 4578 chunkDataOffset += 8; 4579 4580 if (chunkSize < 16) { 4581 // The smallest valid chunk is 16 bytes long in this case. 4582 return false; 4583 } 4584 } else if (chunkSize < 8) { 4585 // The smallest valid chunk is 8 bytes long. 4586 return false; 4587 } 4588 4589 off64_t chunkDataSize = offset + chunkSize - chunkDataOffset; 4590 4591 char chunkstring[5]; 4592 MakeFourCCString(chunkType, chunkstring); 4593 ALOGV("saw chunk type %s, size %" PRIu64 " @ %lld", chunkstring, chunkSize, offset); 4594 switch (chunkType) { 4595 case FOURCC('f', 't', 'y', 'p'): 4596 { 4597 if (chunkDataSize < 8) { 4598 return false; 4599 } 4600 4601 uint32_t numCompatibleBrands = (chunkDataSize - 8) / 4; 4602 for (size_t i = 0; i < numCompatibleBrands + 2; ++i) { 4603 if (i == 1) { 4604 // Skip this index, it refers to the minorVersion, 4605 // not a brand. 4606 continue; 4607 } 4608 4609 uint32_t brand; 4610 if (source->readAt( 4611 chunkDataOffset + 4 * i, &brand, 4) < 4) { 4612 return false; 4613 } 4614 4615 brand = ntohl(brand); 4616 4617 if (isCompatibleBrand(brand)) { 4618 foundGoodFileType = true; 4619 break; 4620 } 4621 } 4622 4623 if (!foundGoodFileType) { 4624 return false; 4625 } 4626 4627 break; 4628 } 4629 4630 case FOURCC('m', 'o', 'o', 'v'): 4631 { 4632 moovAtomEndOffset = offset + chunkSize; 4633 4634 done = true; 4635 break; 4636 } 4637 4638 default: 4639 break; 4640 } 4641 4642 offset += chunkSize; 4643 } 4644 4645 if (!foundGoodFileType) { 4646 return false; 4647 } 4648 4649 *mimeType = MEDIA_MIMETYPE_CONTAINER_MPEG4; 4650 *confidence = 0.4f; 4651 4652 if (moovAtomEndOffset >= 0) { 4653 *meta = new AMessage; 4654 (*meta)->setInt64("meta-data-size", moovAtomEndOffset); 4655 4656 ALOGV("found metadata size: %lld", moovAtomEndOffset); 4657 } 4658 4659 return true; 4660} 4661 4662bool SniffMPEG4( 4663 const sp<DataSource> &source, String8 *mimeType, float *confidence, 4664 sp<AMessage> *meta) { 4665 if (BetterSniffMPEG4(source, mimeType, confidence, meta)) { 4666 return true; 4667 } 4668 4669 if (LegacySniffMPEG4(source, mimeType, confidence)) { 4670 ALOGW("Identified supported mpeg4 through LegacySniffMPEG4."); 4671 return true; 4672 } 4673 4674 return false; 4675} 4676 4677} // namespace android 4678