MPEG4Extractor.cpp revision 8565726f5775a6db97a394aa18a1f6b0e1ce69b7
1/* 2 * Copyright (C) 2009 The Android Open Source Project 3 * 4 * Licensed under the Apache License, Version 2.0 (the "License"); 5 * you may not use this file except in compliance with the License. 6 * You may obtain a copy of the License at 7 * 8 * http://www.apache.org/licenses/LICENSE-2.0 9 * 10 * Unless required by applicable law or agreed to in writing, software 11 * distributed under the License is distributed on an "AS IS" BASIS, 12 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 * See the License for the specific language governing permissions and 14 * limitations under the License. 15 */ 16 17//#define LOG_NDEBUG 0 18#define LOG_TAG "MPEG4Extractor" 19 20#include <ctype.h> 21#include <inttypes.h> 22#include <stdint.h> 23#include <stdlib.h> 24#include <string.h> 25 26#include <utils/Log.h> 27 28#include "include/MPEG4Extractor.h" 29#include "include/SampleTable.h" 30#include "include/ESDS.h" 31 32#include <media/stagefright/foundation/ABitReader.h> 33#include <media/stagefright/foundation/ABuffer.h> 34#include <media/stagefright/foundation/ADebug.h> 35#include <media/stagefright/foundation/AMessage.h> 36#include <media/stagefright/foundation/AUtils.h> 37#include <media/stagefright/MediaBuffer.h> 38#include <media/stagefright/MediaBufferGroup.h> 39#include <media/stagefright/MediaDefs.h> 40#include <media/stagefright/MediaSource.h> 41#include <media/stagefright/MetaData.h> 42#include <utils/String8.h> 43 44#include <byteswap.h> 45#include "include/ID3.h" 46 47#ifndef UINT32_MAX 48#define UINT32_MAX (4294967295U) 49#endif 50 51namespace android { 52 53class MPEG4Source : public MediaSource { 54public: 55 // Caller retains ownership of both "dataSource" and "sampleTable". 56 MPEG4Source(const sp<MPEG4Extractor> &owner, 57 const sp<MetaData> &format, 58 const sp<DataSource> &dataSource, 59 int32_t timeScale, 60 const sp<SampleTable> &sampleTable, 61 Vector<SidxEntry> &sidx, 62 const Trex *trex, 63 off64_t firstMoofOffset); 64 65 virtual status_t start(MetaData *params = NULL); 66 virtual status_t stop(); 67 68 virtual sp<MetaData> getFormat(); 69 70 virtual status_t read(MediaBuffer **buffer, const ReadOptions *options = NULL); 71 virtual status_t fragmentedRead(MediaBuffer **buffer, const ReadOptions *options = NULL); 72 73protected: 74 virtual ~MPEG4Source(); 75 76private: 77 Mutex mLock; 78 79 // keep the MPEG4Extractor around, since we're referencing its data 80 sp<MPEG4Extractor> mOwner; 81 sp<MetaData> mFormat; 82 sp<DataSource> mDataSource; 83 int32_t mTimescale; 84 sp<SampleTable> mSampleTable; 85 uint32_t mCurrentSampleIndex; 86 uint32_t mCurrentFragmentIndex; 87 Vector<SidxEntry> &mSegments; 88 const Trex *mTrex; 89 off64_t mFirstMoofOffset; 90 off64_t mCurrentMoofOffset; 91 off64_t mNextMoofOffset; 92 uint32_t mCurrentTime; 93 int32_t mLastParsedTrackId; 94 int32_t mTrackId; 95 96 int32_t mCryptoMode; // passed in from extractor 97 int32_t mDefaultIVSize; // passed in from extractor 98 uint8_t mCryptoKey[16]; // passed in from extractor 99 uint32_t mCurrentAuxInfoType; 100 uint32_t mCurrentAuxInfoTypeParameter; 101 int32_t mCurrentDefaultSampleInfoSize; 102 uint32_t mCurrentSampleInfoCount; 103 uint32_t mCurrentSampleInfoAllocSize; 104 uint8_t* mCurrentSampleInfoSizes; 105 uint32_t mCurrentSampleInfoOffsetCount; 106 uint32_t mCurrentSampleInfoOffsetsAllocSize; 107 uint64_t* mCurrentSampleInfoOffsets; 108 109 bool mIsAVC; 110 bool mIsHEVC; 111 size_t mNALLengthSize; 112 113 bool mStarted; 114 115 MediaBufferGroup *mGroup; 116 117 MediaBuffer *mBuffer; 118 119 bool mWantsNALFragments; 120 121 uint8_t *mSrcBuffer; 122 123 size_t parseNALSize(const uint8_t *data) const; 124 status_t parseChunk(off64_t *offset); 125 status_t parseTrackFragmentHeader(off64_t offset, off64_t size); 126 status_t parseTrackFragmentRun(off64_t offset, off64_t size); 127 status_t parseSampleAuxiliaryInformationSizes(off64_t offset, off64_t size); 128 status_t parseSampleAuxiliaryInformationOffsets(off64_t offset, off64_t size); 129 130 struct TrackFragmentHeaderInfo { 131 enum Flags { 132 kBaseDataOffsetPresent = 0x01, 133 kSampleDescriptionIndexPresent = 0x02, 134 kDefaultSampleDurationPresent = 0x08, 135 kDefaultSampleSizePresent = 0x10, 136 kDefaultSampleFlagsPresent = 0x20, 137 kDurationIsEmpty = 0x10000, 138 }; 139 140 uint32_t mTrackID; 141 uint32_t mFlags; 142 uint64_t mBaseDataOffset; 143 uint32_t mSampleDescriptionIndex; 144 uint32_t mDefaultSampleDuration; 145 uint32_t mDefaultSampleSize; 146 uint32_t mDefaultSampleFlags; 147 148 uint64_t mDataOffset; 149 }; 150 TrackFragmentHeaderInfo mTrackFragmentHeaderInfo; 151 152 struct Sample { 153 off64_t offset; 154 size_t size; 155 uint32_t duration; 156 int32_t compositionOffset; 157 uint8_t iv[16]; 158 Vector<size_t> clearsizes; 159 Vector<size_t> encryptedsizes; 160 }; 161 Vector<Sample> mCurrentSamples; 162 163 MPEG4Source(const MPEG4Source &); 164 MPEG4Source &operator=(const MPEG4Source &); 165}; 166 167// This custom data source wraps an existing one and satisfies requests 168// falling entirely within a cached range from the cache while forwarding 169// all remaining requests to the wrapped datasource. 170// This is used to cache the full sampletable metadata for a single track, 171// possibly wrapping multiple times to cover all tracks, i.e. 172// Each MPEG4DataSource caches the sampletable metadata for a single track. 173 174struct MPEG4DataSource : public DataSource { 175 MPEG4DataSource(const sp<DataSource> &source); 176 177 virtual status_t initCheck() const; 178 virtual ssize_t readAt(off64_t offset, void *data, size_t size); 179 virtual status_t getSize(off64_t *size); 180 virtual uint32_t flags(); 181 182 status_t setCachedRange(off64_t offset, size_t size); 183 184protected: 185 virtual ~MPEG4DataSource(); 186 187private: 188 Mutex mLock; 189 190 sp<DataSource> mSource; 191 off64_t mCachedOffset; 192 size_t mCachedSize; 193 uint8_t *mCache; 194 195 void clearCache(); 196 197 MPEG4DataSource(const MPEG4DataSource &); 198 MPEG4DataSource &operator=(const MPEG4DataSource &); 199}; 200 201MPEG4DataSource::MPEG4DataSource(const sp<DataSource> &source) 202 : mSource(source), 203 mCachedOffset(0), 204 mCachedSize(0), 205 mCache(NULL) { 206} 207 208MPEG4DataSource::~MPEG4DataSource() { 209 clearCache(); 210} 211 212void MPEG4DataSource::clearCache() { 213 if (mCache) { 214 free(mCache); 215 mCache = NULL; 216 } 217 218 mCachedOffset = 0; 219 mCachedSize = 0; 220} 221 222status_t MPEG4DataSource::initCheck() const { 223 return mSource->initCheck(); 224} 225 226ssize_t MPEG4DataSource::readAt(off64_t offset, void *data, size_t size) { 227 Mutex::Autolock autoLock(mLock); 228 229 if (isInRange(mCachedOffset, mCachedSize, offset, size)) { 230 memcpy(data, &mCache[offset - mCachedOffset], size); 231 return size; 232 } 233 234 return mSource->readAt(offset, data, size); 235} 236 237status_t MPEG4DataSource::getSize(off64_t *size) { 238 return mSource->getSize(size); 239} 240 241uint32_t MPEG4DataSource::flags() { 242 return mSource->flags(); 243} 244 245status_t MPEG4DataSource::setCachedRange(off64_t offset, size_t size) { 246 Mutex::Autolock autoLock(mLock); 247 248 clearCache(); 249 250 mCache = (uint8_t *)malloc(size); 251 252 if (mCache == NULL) { 253 return -ENOMEM; 254 } 255 256 mCachedOffset = offset; 257 mCachedSize = size; 258 259 ssize_t err = mSource->readAt(mCachedOffset, mCache, mCachedSize); 260 261 if (err < (ssize_t)size) { 262 clearCache(); 263 264 return ERROR_IO; 265 } 266 267 return OK; 268} 269 270//////////////////////////////////////////////////////////////////////////////// 271 272static void hexdump(const void *_data, size_t size) { 273 const uint8_t *data = (const uint8_t *)_data; 274 size_t offset = 0; 275 while (offset < size) { 276 printf("0x%04zx ", offset); 277 278 size_t n = size - offset; 279 if (n > 16) { 280 n = 16; 281 } 282 283 for (size_t i = 0; i < 16; ++i) { 284 if (i == 8) { 285 printf(" "); 286 } 287 288 if (offset + i < size) { 289 printf("%02x ", data[offset + i]); 290 } else { 291 printf(" "); 292 } 293 } 294 295 printf(" "); 296 297 for (size_t i = 0; i < n; ++i) { 298 if (isprint(data[offset + i])) { 299 printf("%c", data[offset + i]); 300 } else { 301 printf("."); 302 } 303 } 304 305 printf("\n"); 306 307 offset += 16; 308 } 309} 310 311static const char *FourCC2MIME(uint32_t fourcc) { 312 switch (fourcc) { 313 case FOURCC('m', 'p', '4', 'a'): 314 return MEDIA_MIMETYPE_AUDIO_AAC; 315 316 case FOURCC('s', 'a', 'm', 'r'): 317 return MEDIA_MIMETYPE_AUDIO_AMR_NB; 318 319 case FOURCC('s', 'a', 'w', 'b'): 320 return MEDIA_MIMETYPE_AUDIO_AMR_WB; 321 322 case FOURCC('m', 'p', '4', 'v'): 323 return MEDIA_MIMETYPE_VIDEO_MPEG4; 324 325 case FOURCC('s', '2', '6', '3'): 326 case FOURCC('h', '2', '6', '3'): 327 case FOURCC('H', '2', '6', '3'): 328 return MEDIA_MIMETYPE_VIDEO_H263; 329 330 case FOURCC('a', 'v', 'c', '1'): 331 return MEDIA_MIMETYPE_VIDEO_AVC; 332 333 case FOURCC('h', 'v', 'c', '1'): 334 case FOURCC('h', 'e', 'v', '1'): 335 return MEDIA_MIMETYPE_VIDEO_HEVC; 336 default: 337 CHECK(!"should not be here."); 338 return NULL; 339 } 340} 341 342static bool AdjustChannelsAndRate(uint32_t fourcc, uint32_t *channels, uint32_t *rate) { 343 if (!strcasecmp(MEDIA_MIMETYPE_AUDIO_AMR_NB, FourCC2MIME(fourcc))) { 344 // AMR NB audio is always mono, 8kHz 345 *channels = 1; 346 *rate = 8000; 347 return true; 348 } else if (!strcasecmp(MEDIA_MIMETYPE_AUDIO_AMR_WB, FourCC2MIME(fourcc))) { 349 // AMR WB audio is always mono, 16kHz 350 *channels = 1; 351 *rate = 16000; 352 return true; 353 } 354 return false; 355} 356 357MPEG4Extractor::MPEG4Extractor(const sp<DataSource> &source) 358 : mMoofOffset(0), 359 mDataSource(source), 360 mInitCheck(NO_INIT), 361 mHasVideo(false), 362 mHeaderTimescale(0), 363 mFirstTrack(NULL), 364 mLastTrack(NULL), 365 mFileMetaData(new MetaData), 366 mFirstSINF(NULL), 367 mIsDrm(false) { 368} 369 370MPEG4Extractor::~MPEG4Extractor() { 371 Track *track = mFirstTrack; 372 while (track) { 373 Track *next = track->next; 374 375 delete track; 376 track = next; 377 } 378 mFirstTrack = mLastTrack = NULL; 379 380 SINF *sinf = mFirstSINF; 381 while (sinf) { 382 SINF *next = sinf->next; 383 delete[] sinf->IPMPData; 384 delete sinf; 385 sinf = next; 386 } 387 mFirstSINF = NULL; 388 389 for (size_t i = 0; i < mPssh.size(); i++) { 390 delete [] mPssh[i].data; 391 } 392} 393 394uint32_t MPEG4Extractor::flags() const { 395 return CAN_PAUSE | 396 ((mMoofOffset == 0 || mSidxEntries.size() != 0) ? 397 (CAN_SEEK_BACKWARD | CAN_SEEK_FORWARD | CAN_SEEK) : 0); 398} 399 400sp<MetaData> MPEG4Extractor::getMetaData() { 401 status_t err; 402 if ((err = readMetaData()) != OK) { 403 return new MetaData; 404 } 405 406 return mFileMetaData; 407} 408 409size_t MPEG4Extractor::countTracks() { 410 status_t err; 411 if ((err = readMetaData()) != OK) { 412 ALOGV("MPEG4Extractor::countTracks: no tracks"); 413 return 0; 414 } 415 416 size_t n = 0; 417 Track *track = mFirstTrack; 418 while (track) { 419 ++n; 420 track = track->next; 421 } 422 423 ALOGV("MPEG4Extractor::countTracks: %zu tracks", n); 424 return n; 425} 426 427sp<MetaData> MPEG4Extractor::getTrackMetaData( 428 size_t index, uint32_t flags) { 429 status_t err; 430 if ((err = readMetaData()) != OK) { 431 return NULL; 432 } 433 434 Track *track = mFirstTrack; 435 while (index > 0) { 436 if (track == NULL) { 437 return NULL; 438 } 439 440 track = track->next; 441 --index; 442 } 443 444 if (track == NULL) { 445 return NULL; 446 } 447 448 if ((flags & kIncludeExtensiveMetaData) 449 && !track->includes_expensive_metadata) { 450 track->includes_expensive_metadata = true; 451 452 const char *mime; 453 CHECK(track->meta->findCString(kKeyMIMEType, &mime)); 454 if (!strncasecmp("video/", mime, 6)) { 455 if (mMoofOffset > 0) { 456 int64_t duration; 457 if (track->meta->findInt64(kKeyDuration, &duration)) { 458 // nothing fancy, just pick a frame near 1/4th of the duration 459 track->meta->setInt64( 460 kKeyThumbnailTime, duration / 4); 461 } 462 } else { 463 uint32_t sampleIndex; 464 uint32_t sampleTime; 465 if (track->sampleTable->findThumbnailSample(&sampleIndex) == OK 466 && track->sampleTable->getMetaDataForSample( 467 sampleIndex, NULL /* offset */, NULL /* size */, 468 &sampleTime) == OK) { 469 track->meta->setInt64( 470 kKeyThumbnailTime, 471 ((int64_t)sampleTime * 1000000) / track->timescale); 472 } 473 } 474 } 475 } 476 477 return track->meta; 478} 479 480static void MakeFourCCString(uint32_t x, char *s) { 481 s[0] = x >> 24; 482 s[1] = (x >> 16) & 0xff; 483 s[2] = (x >> 8) & 0xff; 484 s[3] = x & 0xff; 485 s[4] = '\0'; 486} 487 488status_t MPEG4Extractor::readMetaData() { 489 if (mInitCheck != NO_INIT) { 490 return mInitCheck; 491 } 492 493 off64_t offset = 0; 494 status_t err; 495 while (true) { 496 off64_t orig_offset = offset; 497 err = parseChunk(&offset, 0); 498 499 if (err != OK && err != UNKNOWN_ERROR) { 500 break; 501 } else if (offset <= orig_offset) { 502 // only continue parsing if the offset was advanced, 503 // otherwise we might end up in an infinite loop 504 ALOGE("did not advance: 0x%lld->0x%lld", orig_offset, offset); 505 err = ERROR_MALFORMED; 506 break; 507 } else if (err == OK) { 508 continue; 509 } 510 511 uint32_t hdr[2]; 512 if (mDataSource->readAt(offset, hdr, 8) < 8) { 513 break; 514 } 515 uint32_t chunk_type = ntohl(hdr[1]); 516 if (chunk_type == FOURCC('m', 'o', 'o', 'f')) { 517 // store the offset of the first segment 518 mMoofOffset = offset; 519 } else if (chunk_type != FOURCC('m', 'd', 'a', 't')) { 520 // keep parsing until we get to the data 521 continue; 522 } 523 break; 524 } 525 526 if (mInitCheck == OK) { 527 if (mHasVideo) { 528 mFileMetaData->setCString( 529 kKeyMIMEType, MEDIA_MIMETYPE_CONTAINER_MPEG4); 530 } else { 531 mFileMetaData->setCString(kKeyMIMEType, "audio/mp4"); 532 } 533 } else { 534 mInitCheck = err; 535 } 536 537 CHECK_NE(err, (status_t)NO_INIT); 538 539 // copy pssh data into file metadata 540 int psshsize = 0; 541 for (size_t i = 0; i < mPssh.size(); i++) { 542 psshsize += 20 + mPssh[i].datalen; 543 } 544 if (psshsize) { 545 char *buf = (char*)malloc(psshsize); 546 char *ptr = buf; 547 for (size_t i = 0; i < mPssh.size(); i++) { 548 memcpy(ptr, mPssh[i].uuid, 20); // uuid + length 549 memcpy(ptr + 20, mPssh[i].data, mPssh[i].datalen); 550 ptr += (20 + mPssh[i].datalen); 551 } 552 mFileMetaData->setData(kKeyPssh, 'pssh', buf, psshsize); 553 free(buf); 554 } 555 return mInitCheck; 556} 557 558char* MPEG4Extractor::getDrmTrackInfo(size_t trackID, int *len) { 559 if (mFirstSINF == NULL) { 560 return NULL; 561 } 562 563 SINF *sinf = mFirstSINF; 564 while (sinf && (trackID != sinf->trackID)) { 565 sinf = sinf->next; 566 } 567 568 if (sinf == NULL) { 569 return NULL; 570 } 571 572 *len = sinf->len; 573 return sinf->IPMPData; 574} 575 576// Reads an encoded integer 7 bits at a time until it encounters the high bit clear. 577static int32_t readSize(off64_t offset, 578 const sp<DataSource> DataSource, uint8_t *numOfBytes) { 579 uint32_t size = 0; 580 uint8_t data; 581 bool moreData = true; 582 *numOfBytes = 0; 583 584 while (moreData) { 585 if (DataSource->readAt(offset, &data, 1) < 1) { 586 return -1; 587 } 588 offset ++; 589 moreData = (data >= 128) ? true : false; 590 size = (size << 7) | (data & 0x7f); // Take last 7 bits 591 (*numOfBytes) ++; 592 } 593 594 return size; 595} 596 597status_t MPEG4Extractor::parseDrmSINF( 598 off64_t * /* offset */, off64_t data_offset) { 599 uint8_t updateIdTag; 600 if (mDataSource->readAt(data_offset, &updateIdTag, 1) < 1) { 601 return ERROR_IO; 602 } 603 data_offset ++; 604 605 if (0x01/*OBJECT_DESCRIPTOR_UPDATE_ID_TAG*/ != updateIdTag) { 606 return ERROR_MALFORMED; 607 } 608 609 uint8_t numOfBytes; 610 int32_t size = readSize(data_offset, mDataSource, &numOfBytes); 611 if (size < 0) { 612 return ERROR_IO; 613 } 614 int32_t classSize = size; 615 data_offset += numOfBytes; 616 617 while(size >= 11 ) { 618 uint8_t descriptorTag; 619 if (mDataSource->readAt(data_offset, &descriptorTag, 1) < 1) { 620 return ERROR_IO; 621 } 622 data_offset ++; 623 624 if (0x11/*OBJECT_DESCRIPTOR_ID_TAG*/ != descriptorTag) { 625 return ERROR_MALFORMED; 626 } 627 628 uint8_t buffer[8]; 629 //ObjectDescriptorID and ObjectDescriptor url flag 630 if (mDataSource->readAt(data_offset, buffer, 2) < 2) { 631 return ERROR_IO; 632 } 633 data_offset += 2; 634 635 if ((buffer[1] >> 5) & 0x0001) { //url flag is set 636 return ERROR_MALFORMED; 637 } 638 639 if (mDataSource->readAt(data_offset, buffer, 8) < 8) { 640 return ERROR_IO; 641 } 642 data_offset += 8; 643 644 if ((0x0F/*ES_ID_REF_TAG*/ != buffer[1]) 645 || ( 0x0A/*IPMP_DESCRIPTOR_POINTER_ID_TAG*/ != buffer[5])) { 646 return ERROR_MALFORMED; 647 } 648 649 SINF *sinf = new SINF; 650 sinf->trackID = U16_AT(&buffer[3]); 651 sinf->IPMPDescriptorID = buffer[7]; 652 sinf->next = mFirstSINF; 653 mFirstSINF = sinf; 654 655 size -= (8 + 2 + 1); 656 } 657 658 if (size != 0) { 659 return ERROR_MALFORMED; 660 } 661 662 if (mDataSource->readAt(data_offset, &updateIdTag, 1) < 1) { 663 return ERROR_IO; 664 } 665 data_offset ++; 666 667 if(0x05/*IPMP_DESCRIPTOR_UPDATE_ID_TAG*/ != updateIdTag) { 668 return ERROR_MALFORMED; 669 } 670 671 size = readSize(data_offset, mDataSource, &numOfBytes); 672 if (size < 0) { 673 return ERROR_IO; 674 } 675 classSize = size; 676 data_offset += numOfBytes; 677 678 while (size > 0) { 679 uint8_t tag; 680 int32_t dataLen; 681 if (mDataSource->readAt(data_offset, &tag, 1) < 1) { 682 return ERROR_IO; 683 } 684 data_offset ++; 685 686 if (0x0B/*IPMP_DESCRIPTOR_ID_TAG*/ == tag) { 687 uint8_t id; 688 dataLen = readSize(data_offset, mDataSource, &numOfBytes); 689 if (dataLen < 0) { 690 return ERROR_IO; 691 } else if (dataLen < 4) { 692 return ERROR_MALFORMED; 693 } 694 data_offset += numOfBytes; 695 696 if (mDataSource->readAt(data_offset, &id, 1) < 1) { 697 return ERROR_IO; 698 } 699 data_offset ++; 700 701 SINF *sinf = mFirstSINF; 702 while (sinf && (sinf->IPMPDescriptorID != id)) { 703 sinf = sinf->next; 704 } 705 if (sinf == NULL) { 706 return ERROR_MALFORMED; 707 } 708 sinf->len = dataLen - 3; 709 sinf->IPMPData = new (std::nothrow) char[sinf->len]; 710 if (sinf->IPMPData == NULL) { 711 return ERROR_MALFORMED; 712 } 713 data_offset += 2; 714 715 if (mDataSource->readAt(data_offset, sinf->IPMPData, sinf->len) < sinf->len) { 716 return ERROR_IO; 717 } 718 data_offset += sinf->len; 719 720 size -= (dataLen + numOfBytes + 1); 721 } 722 } 723 724 if (size != 0) { 725 return ERROR_MALFORMED; 726 } 727 728 return UNKNOWN_ERROR; // Return a dummy error. 729} 730 731struct PathAdder { 732 PathAdder(Vector<uint32_t> *path, uint32_t chunkType) 733 : mPath(path) { 734 mPath->push(chunkType); 735 } 736 737 ~PathAdder() { 738 mPath->pop(); 739 } 740 741private: 742 Vector<uint32_t> *mPath; 743 744 PathAdder(const PathAdder &); 745 PathAdder &operator=(const PathAdder &); 746}; 747 748static bool underMetaDataPath(const Vector<uint32_t> &path) { 749 return path.size() >= 5 750 && path[0] == FOURCC('m', 'o', 'o', 'v') 751 && path[1] == FOURCC('u', 'd', 't', 'a') 752 && path[2] == FOURCC('m', 'e', 't', 'a') 753 && path[3] == FOURCC('i', 'l', 's', 't'); 754} 755 756// Given a time in seconds since Jan 1 1904, produce a human-readable string. 757static void convertTimeToDate(int64_t time_1904, String8 *s) { 758 time_t time_1970 = time_1904 - (((66 * 365 + 17) * 24) * 3600); 759 760 char tmp[32]; 761 strftime(tmp, sizeof(tmp), "%Y%m%dT%H%M%S.000Z", gmtime(&time_1970)); 762 763 s->setTo(tmp); 764} 765 766status_t MPEG4Extractor::parseChunk(off64_t *offset, int depth) { 767 ALOGV("entering parseChunk %lld/%d", *offset, depth); 768 uint32_t hdr[2]; 769 if (mDataSource->readAt(*offset, hdr, 8) < 8) { 770 return ERROR_IO; 771 } 772 uint64_t chunk_size = ntohl(hdr[0]); 773 uint32_t chunk_type = ntohl(hdr[1]); 774 off64_t data_offset = *offset + 8; 775 776 if (chunk_size == 1) { 777 if (mDataSource->readAt(*offset + 8, &chunk_size, 8) < 8) { 778 return ERROR_IO; 779 } 780 chunk_size = ntoh64(chunk_size); 781 data_offset += 8; 782 783 if (chunk_size < 16) { 784 // The smallest valid chunk is 16 bytes long in this case. 785 return ERROR_MALFORMED; 786 } 787 } else if (chunk_size == 0) { 788 if (depth == 0) { 789 // atom extends to end of file 790 off64_t sourceSize; 791 if (mDataSource->getSize(&sourceSize) == OK) { 792 chunk_size = (sourceSize - *offset); 793 } else { 794 // XXX could we just pick a "sufficiently large" value here? 795 ALOGE("atom size is 0, and data source has no size"); 796 return ERROR_MALFORMED; 797 } 798 } else { 799 // not allowed for non-toplevel atoms, skip it 800 *offset += 4; 801 return OK; 802 } 803 } else if (chunk_size < 8) { 804 // The smallest valid chunk is 8 bytes long. 805 ALOGE("invalid chunk size: %" PRIu64, chunk_size); 806 return ERROR_MALFORMED; 807 } 808 809 char chunk[5]; 810 MakeFourCCString(chunk_type, chunk); 811 ALOGV("chunk: %s @ %lld, %d", chunk, *offset, depth); 812 813#if 0 814 static const char kWhitespace[] = " "; 815 const char *indent = &kWhitespace[sizeof(kWhitespace) - 1 - 2 * depth]; 816 printf("%sfound chunk '%s' of size %" PRIu64 "\n", indent, chunk, chunk_size); 817 818 char buffer[256]; 819 size_t n = chunk_size; 820 if (n > sizeof(buffer)) { 821 n = sizeof(buffer); 822 } 823 if (mDataSource->readAt(*offset, buffer, n) 824 < (ssize_t)n) { 825 return ERROR_IO; 826 } 827 828 hexdump(buffer, n); 829#endif 830 831 PathAdder autoAdder(&mPath, chunk_type); 832 833 off64_t chunk_data_size = *offset + chunk_size - data_offset; 834 835 if (chunk_type != FOURCC('c', 'p', 'r', 't') 836 && chunk_type != FOURCC('c', 'o', 'v', 'r') 837 && mPath.size() == 5 && underMetaDataPath(mPath)) { 838 off64_t stop_offset = *offset + chunk_size; 839 *offset = data_offset; 840 while (*offset < stop_offset) { 841 status_t err = parseChunk(offset, depth + 1); 842 if (err != OK) { 843 return err; 844 } 845 } 846 847 if (*offset != stop_offset) { 848 return ERROR_MALFORMED; 849 } 850 851 return OK; 852 } 853 854 switch(chunk_type) { 855 case FOURCC('m', 'o', 'o', 'v'): 856 case FOURCC('t', 'r', 'a', 'k'): 857 case FOURCC('m', 'd', 'i', 'a'): 858 case FOURCC('m', 'i', 'n', 'f'): 859 case FOURCC('d', 'i', 'n', 'f'): 860 case FOURCC('s', 't', 'b', 'l'): 861 case FOURCC('m', 'v', 'e', 'x'): 862 case FOURCC('m', 'o', 'o', 'f'): 863 case FOURCC('t', 'r', 'a', 'f'): 864 case FOURCC('m', 'f', 'r', 'a'): 865 case FOURCC('u', 'd', 't', 'a'): 866 case FOURCC('i', 'l', 's', 't'): 867 case FOURCC('s', 'i', 'n', 'f'): 868 case FOURCC('s', 'c', 'h', 'i'): 869 case FOURCC('e', 'd', 't', 's'): 870 { 871 if (chunk_type == FOURCC('s', 't', 'b', 'l')) { 872 ALOGV("sampleTable chunk is %" PRIu64 " bytes long.", chunk_size); 873 874 if (mDataSource->flags() 875 & (DataSource::kWantsPrefetching 876 | DataSource::kIsCachingDataSource)) { 877 sp<MPEG4DataSource> cachedSource = 878 new MPEG4DataSource(mDataSource); 879 880 if (cachedSource->setCachedRange(*offset, chunk_size) == OK) { 881 mDataSource = cachedSource; 882 } 883 } 884 885 mLastTrack->sampleTable = new SampleTable(mDataSource); 886 } 887 888 bool isTrack = false; 889 if (chunk_type == FOURCC('t', 'r', 'a', 'k')) { 890 isTrack = true; 891 892 Track *track = new Track; 893 track->next = NULL; 894 if (mLastTrack) { 895 mLastTrack->next = track; 896 } else { 897 mFirstTrack = track; 898 } 899 mLastTrack = track; 900 901 track->meta = new MetaData; 902 track->includes_expensive_metadata = false; 903 track->skipTrack = false; 904 track->timescale = 0; 905 track->meta->setCString(kKeyMIMEType, "application/octet-stream"); 906 } 907 908 off64_t stop_offset = *offset + chunk_size; 909 *offset = data_offset; 910 while (*offset < stop_offset) { 911 status_t err = parseChunk(offset, depth + 1); 912 if (err != OK) { 913 return err; 914 } 915 } 916 917 if (*offset != stop_offset) { 918 return ERROR_MALFORMED; 919 } 920 921 if (isTrack) { 922 if (mLastTrack->skipTrack) { 923 Track *cur = mFirstTrack; 924 925 if (cur == mLastTrack) { 926 delete cur; 927 mFirstTrack = mLastTrack = NULL; 928 } else { 929 while (cur && cur->next != mLastTrack) { 930 cur = cur->next; 931 } 932 cur->next = NULL; 933 delete mLastTrack; 934 mLastTrack = cur; 935 } 936 937 return OK; 938 } 939 940 status_t err = verifyTrack(mLastTrack); 941 942 if (err != OK) { 943 return err; 944 } 945 } else if (chunk_type == FOURCC('m', 'o', 'o', 'v')) { 946 mInitCheck = OK; 947 948 if (!mIsDrm) { 949 return UNKNOWN_ERROR; // Return a dummy error. 950 } else { 951 return OK; 952 } 953 } 954 break; 955 } 956 957 case FOURCC('e', 'l', 's', 't'): 958 { 959 *offset += chunk_size; 960 961 // See 14496-12 8.6.6 962 uint8_t version; 963 if (mDataSource->readAt(data_offset, &version, 1) < 1) { 964 return ERROR_IO; 965 } 966 967 uint32_t entry_count; 968 if (!mDataSource->getUInt32(data_offset + 4, &entry_count)) { 969 return ERROR_IO; 970 } 971 972 if (entry_count != 1) { 973 // we only support a single entry at the moment, for gapless playback 974 ALOGW("ignoring edit list with %d entries", entry_count); 975 } else if (mHeaderTimescale == 0) { 976 ALOGW("ignoring edit list because timescale is 0"); 977 } else { 978 off64_t entriesoffset = data_offset + 8; 979 uint64_t segment_duration; 980 int64_t media_time; 981 982 if (version == 1) { 983 if (!mDataSource->getUInt64(entriesoffset, &segment_duration) || 984 !mDataSource->getUInt64(entriesoffset + 8, (uint64_t*)&media_time)) { 985 return ERROR_IO; 986 } 987 } else if (version == 0) { 988 uint32_t sd; 989 int32_t mt; 990 if (!mDataSource->getUInt32(entriesoffset, &sd) || 991 !mDataSource->getUInt32(entriesoffset + 4, (uint32_t*)&mt)) { 992 return ERROR_IO; 993 } 994 segment_duration = sd; 995 media_time = mt; 996 } else { 997 return ERROR_IO; 998 } 999 1000 uint64_t halfscale = mHeaderTimescale / 2; 1001 segment_duration = (segment_duration * 1000000 + halfscale)/ mHeaderTimescale; 1002 media_time = (media_time * 1000000 + halfscale) / mHeaderTimescale; 1003 1004 int64_t duration; 1005 int32_t samplerate; 1006 if (mLastTrack->meta->findInt64(kKeyDuration, &duration) && 1007 mLastTrack->meta->findInt32(kKeySampleRate, &samplerate)) { 1008 1009 int64_t delay = (media_time * samplerate + 500000) / 1000000; 1010 mLastTrack->meta->setInt32(kKeyEncoderDelay, delay); 1011 1012 int64_t paddingus = duration - (segment_duration + media_time); 1013 if (paddingus < 0) { 1014 // track duration from media header (which is what kKeyDuration is) might 1015 // be slightly shorter than the segment duration, which would make the 1016 // padding negative. Clamp to zero. 1017 paddingus = 0; 1018 } 1019 int64_t paddingsamples = (paddingus * samplerate + 500000) / 1000000; 1020 mLastTrack->meta->setInt32(kKeyEncoderPadding, paddingsamples); 1021 } 1022 } 1023 break; 1024 } 1025 1026 case FOURCC('f', 'r', 'm', 'a'): 1027 { 1028 *offset += chunk_size; 1029 1030 uint32_t original_fourcc; 1031 if (mDataSource->readAt(data_offset, &original_fourcc, 4) < 4) { 1032 return ERROR_IO; 1033 } 1034 original_fourcc = ntohl(original_fourcc); 1035 ALOGV("read original format: %d", original_fourcc); 1036 mLastTrack->meta->setCString(kKeyMIMEType, FourCC2MIME(original_fourcc)); 1037 uint32_t num_channels = 0; 1038 uint32_t sample_rate = 0; 1039 if (AdjustChannelsAndRate(original_fourcc, &num_channels, &sample_rate)) { 1040 mLastTrack->meta->setInt32(kKeyChannelCount, num_channels); 1041 mLastTrack->meta->setInt32(kKeySampleRate, sample_rate); 1042 } 1043 break; 1044 } 1045 1046 case FOURCC('t', 'e', 'n', 'c'): 1047 { 1048 *offset += chunk_size; 1049 1050 if (chunk_size < 32) { 1051 return ERROR_MALFORMED; 1052 } 1053 1054 // tenc box contains 1 byte version, 3 byte flags, 3 byte default algorithm id, one byte 1055 // default IV size, 16 bytes default KeyID 1056 // (ISO 23001-7) 1057 char buf[4]; 1058 memset(buf, 0, 4); 1059 if (mDataSource->readAt(data_offset + 4, buf + 1, 3) < 3) { 1060 return ERROR_IO; 1061 } 1062 uint32_t defaultAlgorithmId = ntohl(*((int32_t*)buf)); 1063 if (defaultAlgorithmId > 1) { 1064 // only 0 (clear) and 1 (AES-128) are valid 1065 return ERROR_MALFORMED; 1066 } 1067 1068 memset(buf, 0, 4); 1069 if (mDataSource->readAt(data_offset + 7, buf + 3, 1) < 1) { 1070 return ERROR_IO; 1071 } 1072 uint32_t defaultIVSize = ntohl(*((int32_t*)buf)); 1073 1074 if ((defaultAlgorithmId == 0 && defaultIVSize != 0) || 1075 (defaultAlgorithmId != 0 && defaultIVSize == 0)) { 1076 // only unencrypted data must have 0 IV size 1077 return ERROR_MALFORMED; 1078 } else if (defaultIVSize != 0 && 1079 defaultIVSize != 8 && 1080 defaultIVSize != 16) { 1081 // only supported sizes are 0, 8 and 16 1082 return ERROR_MALFORMED; 1083 } 1084 1085 uint8_t defaultKeyId[16]; 1086 1087 if (mDataSource->readAt(data_offset + 8, &defaultKeyId, 16) < 16) { 1088 return ERROR_IO; 1089 } 1090 1091 mLastTrack->meta->setInt32(kKeyCryptoMode, defaultAlgorithmId); 1092 mLastTrack->meta->setInt32(kKeyCryptoDefaultIVSize, defaultIVSize); 1093 mLastTrack->meta->setData(kKeyCryptoKey, 'tenc', defaultKeyId, 16); 1094 break; 1095 } 1096 1097 case FOURCC('t', 'k', 'h', 'd'): 1098 { 1099 *offset += chunk_size; 1100 1101 status_t err; 1102 if ((err = parseTrackHeader(data_offset, chunk_data_size)) != OK) { 1103 return err; 1104 } 1105 1106 break; 1107 } 1108 1109 case FOURCC('p', 's', 's', 'h'): 1110 { 1111 *offset += chunk_size; 1112 1113 PsshInfo pssh; 1114 1115 if (mDataSource->readAt(data_offset + 4, &pssh.uuid, 16) < 16) { 1116 return ERROR_IO; 1117 } 1118 1119 uint32_t psshdatalen = 0; 1120 if (mDataSource->readAt(data_offset + 20, &psshdatalen, 4) < 4) { 1121 return ERROR_IO; 1122 } 1123 pssh.datalen = ntohl(psshdatalen); 1124 ALOGV("pssh data size: %d", pssh.datalen); 1125 if (pssh.datalen + 20 > chunk_size) { 1126 // pssh data length exceeds size of containing box 1127 return ERROR_MALFORMED; 1128 } 1129 1130 pssh.data = new (std::nothrow) uint8_t[pssh.datalen]; 1131 if (pssh.data == NULL) { 1132 return ERROR_MALFORMED; 1133 } 1134 ALOGV("allocated pssh @ %p", pssh.data); 1135 ssize_t requested = (ssize_t) pssh.datalen; 1136 if (mDataSource->readAt(data_offset + 24, pssh.data, requested) < requested) { 1137 return ERROR_IO; 1138 } 1139 mPssh.push_back(pssh); 1140 1141 break; 1142 } 1143 1144 case FOURCC('m', 'd', 'h', 'd'): 1145 { 1146 *offset += chunk_size; 1147 1148 if (chunk_data_size < 4) { 1149 return ERROR_MALFORMED; 1150 } 1151 1152 uint8_t version; 1153 if (mDataSource->readAt( 1154 data_offset, &version, sizeof(version)) 1155 < (ssize_t)sizeof(version)) { 1156 return ERROR_IO; 1157 } 1158 1159 off64_t timescale_offset; 1160 1161 if (version == 1) { 1162 timescale_offset = data_offset + 4 + 16; 1163 } else if (version == 0) { 1164 timescale_offset = data_offset + 4 + 8; 1165 } else { 1166 return ERROR_IO; 1167 } 1168 1169 uint32_t timescale; 1170 if (mDataSource->readAt( 1171 timescale_offset, ×cale, sizeof(timescale)) 1172 < (ssize_t)sizeof(timescale)) { 1173 return ERROR_IO; 1174 } 1175 1176 mLastTrack->timescale = ntohl(timescale); 1177 1178 // 14496-12 says all ones means indeterminate, but some files seem to use 1179 // 0 instead. We treat both the same. 1180 int64_t duration = 0; 1181 if (version == 1) { 1182 if (mDataSource->readAt( 1183 timescale_offset + 4, &duration, sizeof(duration)) 1184 < (ssize_t)sizeof(duration)) { 1185 return ERROR_IO; 1186 } 1187 if (duration != -1) { 1188 duration = ntoh64(duration); 1189 } 1190 } else { 1191 uint32_t duration32; 1192 if (mDataSource->readAt( 1193 timescale_offset + 4, &duration32, sizeof(duration32)) 1194 < (ssize_t)sizeof(duration32)) { 1195 return ERROR_IO; 1196 } 1197 if (duration32 != 0xffffffff) { 1198 duration = ntohl(duration32); 1199 } 1200 } 1201 if (duration != 0) { 1202 mLastTrack->meta->setInt64( 1203 kKeyDuration, (duration * 1000000) / mLastTrack->timescale); 1204 } 1205 1206 uint8_t lang[2]; 1207 off64_t lang_offset; 1208 if (version == 1) { 1209 lang_offset = timescale_offset + 4 + 8; 1210 } else if (version == 0) { 1211 lang_offset = timescale_offset + 4 + 4; 1212 } else { 1213 return ERROR_IO; 1214 } 1215 1216 if (mDataSource->readAt(lang_offset, &lang, sizeof(lang)) 1217 < (ssize_t)sizeof(lang)) { 1218 return ERROR_IO; 1219 } 1220 1221 // To get the ISO-639-2/T three character language code 1222 // 1 bit pad followed by 3 5-bits characters. Each character 1223 // is packed as the difference between its ASCII value and 0x60. 1224 char lang_code[4]; 1225 lang_code[0] = ((lang[0] >> 2) & 0x1f) + 0x60; 1226 lang_code[1] = ((lang[0] & 0x3) << 3 | (lang[1] >> 5)) + 0x60; 1227 lang_code[2] = (lang[1] & 0x1f) + 0x60; 1228 lang_code[3] = '\0'; 1229 1230 mLastTrack->meta->setCString( 1231 kKeyMediaLanguage, lang_code); 1232 1233 break; 1234 } 1235 1236 case FOURCC('s', 't', 's', 'd'): 1237 { 1238 if (chunk_data_size < 8) { 1239 return ERROR_MALFORMED; 1240 } 1241 1242 uint8_t buffer[8]; 1243 if (chunk_data_size < (off64_t)sizeof(buffer)) { 1244 return ERROR_MALFORMED; 1245 } 1246 1247 if (mDataSource->readAt( 1248 data_offset, buffer, 8) < 8) { 1249 return ERROR_IO; 1250 } 1251 1252 if (U32_AT(buffer) != 0) { 1253 // Should be version 0, flags 0. 1254 return ERROR_MALFORMED; 1255 } 1256 1257 uint32_t entry_count = U32_AT(&buffer[4]); 1258 1259 if (entry_count > 1) { 1260 // For 3GPP timed text, there could be multiple tx3g boxes contain 1261 // multiple text display formats. These formats will be used to 1262 // display the timed text. 1263 // For encrypted files, there may also be more than one entry. 1264 const char *mime; 1265 CHECK(mLastTrack->meta->findCString(kKeyMIMEType, &mime)); 1266 if (strcasecmp(mime, MEDIA_MIMETYPE_TEXT_3GPP) && 1267 strcasecmp(mime, "application/octet-stream")) { 1268 // For now we only support a single type of media per track. 1269 mLastTrack->skipTrack = true; 1270 *offset += chunk_size; 1271 break; 1272 } 1273 } 1274 off64_t stop_offset = *offset + chunk_size; 1275 *offset = data_offset + 8; 1276 for (uint32_t i = 0; i < entry_count; ++i) { 1277 status_t err = parseChunk(offset, depth + 1); 1278 if (err != OK) { 1279 return err; 1280 } 1281 } 1282 1283 if (*offset != stop_offset) { 1284 return ERROR_MALFORMED; 1285 } 1286 break; 1287 } 1288 1289 case FOURCC('m', 'p', '4', 'a'): 1290 case FOURCC('e', 'n', 'c', 'a'): 1291 case FOURCC('s', 'a', 'm', 'r'): 1292 case FOURCC('s', 'a', 'w', 'b'): 1293 { 1294 uint8_t buffer[8 + 20]; 1295 if (chunk_data_size < (ssize_t)sizeof(buffer)) { 1296 // Basic AudioSampleEntry size. 1297 return ERROR_MALFORMED; 1298 } 1299 1300 if (mDataSource->readAt( 1301 data_offset, buffer, sizeof(buffer)) < (ssize_t)sizeof(buffer)) { 1302 return ERROR_IO; 1303 } 1304 1305 uint16_t data_ref_index = U16_AT(&buffer[6]); 1306 uint32_t num_channels = U16_AT(&buffer[16]); 1307 1308 uint16_t sample_size = U16_AT(&buffer[18]); 1309 uint32_t sample_rate = U32_AT(&buffer[24]) >> 16; 1310 1311 if (chunk_type != FOURCC('e', 'n', 'c', 'a')) { 1312 // if the chunk type is enca, we'll get the type from the sinf/frma box later 1313 mLastTrack->meta->setCString(kKeyMIMEType, FourCC2MIME(chunk_type)); 1314 AdjustChannelsAndRate(chunk_type, &num_channels, &sample_rate); 1315 } 1316 ALOGV("*** coding='%s' %d channels, size %d, rate %d\n", 1317 chunk, num_channels, sample_size, sample_rate); 1318 mLastTrack->meta->setInt32(kKeyChannelCount, num_channels); 1319 mLastTrack->meta->setInt32(kKeySampleRate, sample_rate); 1320 1321 off64_t stop_offset = *offset + chunk_size; 1322 *offset = data_offset + sizeof(buffer); 1323 while (*offset < stop_offset) { 1324 status_t err = parseChunk(offset, depth + 1); 1325 if (err != OK) { 1326 return err; 1327 } 1328 } 1329 1330 if (*offset != stop_offset) { 1331 return ERROR_MALFORMED; 1332 } 1333 break; 1334 } 1335 1336 case FOURCC('m', 'p', '4', 'v'): 1337 case FOURCC('e', 'n', 'c', 'v'): 1338 case FOURCC('s', '2', '6', '3'): 1339 case FOURCC('H', '2', '6', '3'): 1340 case FOURCC('h', '2', '6', '3'): 1341 case FOURCC('a', 'v', 'c', '1'): 1342 case FOURCC('h', 'v', 'c', '1'): 1343 case FOURCC('h', 'e', 'v', '1'): 1344 { 1345 mHasVideo = true; 1346 1347 uint8_t buffer[78]; 1348 if (chunk_data_size < (ssize_t)sizeof(buffer)) { 1349 // Basic VideoSampleEntry size. 1350 return ERROR_MALFORMED; 1351 } 1352 1353 if (mDataSource->readAt( 1354 data_offset, buffer, sizeof(buffer)) < (ssize_t)sizeof(buffer)) { 1355 return ERROR_IO; 1356 } 1357 1358 uint16_t data_ref_index = U16_AT(&buffer[6]); 1359 uint16_t width = U16_AT(&buffer[6 + 18]); 1360 uint16_t height = U16_AT(&buffer[6 + 20]); 1361 1362 // The video sample is not standard-compliant if it has invalid dimension. 1363 // Use some default width and height value, and 1364 // let the decoder figure out the actual width and height (and thus 1365 // be prepared for INFO_FOMRAT_CHANGED event). 1366 if (width == 0) width = 352; 1367 if (height == 0) height = 288; 1368 1369 // printf("*** coding='%s' width=%d height=%d\n", 1370 // chunk, width, height); 1371 1372 if (chunk_type != FOURCC('e', 'n', 'c', 'v')) { 1373 // if the chunk type is encv, we'll get the type from the sinf/frma box later 1374 mLastTrack->meta->setCString(kKeyMIMEType, FourCC2MIME(chunk_type)); 1375 } 1376 mLastTrack->meta->setInt32(kKeyWidth, width); 1377 mLastTrack->meta->setInt32(kKeyHeight, height); 1378 1379 off64_t stop_offset = *offset + chunk_size; 1380 *offset = data_offset + sizeof(buffer); 1381 while (*offset < stop_offset) { 1382 status_t err = parseChunk(offset, depth + 1); 1383 if (err != OK) { 1384 return err; 1385 } 1386 } 1387 1388 if (*offset != stop_offset) { 1389 return ERROR_MALFORMED; 1390 } 1391 break; 1392 } 1393 1394 case FOURCC('s', 't', 'c', 'o'): 1395 case FOURCC('c', 'o', '6', '4'): 1396 { 1397 status_t err = 1398 mLastTrack->sampleTable->setChunkOffsetParams( 1399 chunk_type, data_offset, chunk_data_size); 1400 1401 *offset += chunk_size; 1402 1403 if (err != OK) { 1404 return err; 1405 } 1406 1407 break; 1408 } 1409 1410 case FOURCC('s', 't', 's', 'c'): 1411 { 1412 status_t err = 1413 mLastTrack->sampleTable->setSampleToChunkParams( 1414 data_offset, chunk_data_size); 1415 1416 *offset += chunk_size; 1417 1418 if (err != OK) { 1419 return err; 1420 } 1421 1422 break; 1423 } 1424 1425 case FOURCC('s', 't', 's', 'z'): 1426 case FOURCC('s', 't', 'z', '2'): 1427 { 1428 status_t err = 1429 mLastTrack->sampleTable->setSampleSizeParams( 1430 chunk_type, data_offset, chunk_data_size); 1431 1432 *offset += chunk_size; 1433 1434 if (err != OK) { 1435 return err; 1436 } 1437 1438 size_t max_size; 1439 err = mLastTrack->sampleTable->getMaxSampleSize(&max_size); 1440 1441 if (err != OK) { 1442 return err; 1443 } 1444 1445 if (max_size != 0) { 1446 // Assume that a given buffer only contains at most 10 chunks, 1447 // each chunk originally prefixed with a 2 byte length will 1448 // have a 4 byte header (0x00 0x00 0x00 0x01) after conversion, 1449 // and thus will grow by 2 bytes per chunk. 1450 if (max_size > SIZE_MAX - 10 * 2) { 1451 ALOGE("max sample size too big: %zu", max_size); 1452 return ERROR_MALFORMED; 1453 } 1454 mLastTrack->meta->setInt32(kKeyMaxInputSize, max_size + 10 * 2); 1455 } else { 1456 // No size was specified. Pick a conservatively large size. 1457 uint32_t width, height; 1458 if (!mLastTrack->meta->findInt32(kKeyWidth, (int32_t*)&width) || 1459 !mLastTrack->meta->findInt32(kKeyHeight,(int32_t*) &height)) { 1460 ALOGE("No width or height, assuming worst case 1080p"); 1461 width = 1920; 1462 height = 1080; 1463 } else { 1464 // A resolution was specified, check that it's not too big. The values below 1465 // were chosen so that the calculations below don't cause overflows, they're 1466 // not indicating that resolutions up to 32kx32k are actually supported. 1467 if (width > 32768 || height > 32768) { 1468 ALOGE("can't support %u x %u video", width, height); 1469 return ERROR_MALFORMED; 1470 } 1471 } 1472 1473 const char *mime; 1474 CHECK(mLastTrack->meta->findCString(kKeyMIMEType, &mime)); 1475 if (!strcmp(mime, MEDIA_MIMETYPE_VIDEO_AVC)) { 1476 // AVC requires compression ratio of at least 2, and uses 1477 // macroblocks 1478 max_size = ((width + 15) / 16) * ((height + 15) / 16) * 192; 1479 } else { 1480 // For all other formats there is no minimum compression 1481 // ratio. Use compression ratio of 1. 1482 max_size = width * height * 3 / 2; 1483 } 1484 mLastTrack->meta->setInt32(kKeyMaxInputSize, max_size); 1485 } 1486 1487 // NOTE: setting another piece of metadata invalidates any pointers (such as the 1488 // mimetype) previously obtained, so don't cache them. 1489 const char *mime; 1490 CHECK(mLastTrack->meta->findCString(kKeyMIMEType, &mime)); 1491 // Calculate average frame rate. 1492 if (!strncasecmp("video/", mime, 6)) { 1493 size_t nSamples = mLastTrack->sampleTable->countSamples(); 1494 int64_t durationUs; 1495 if (mLastTrack->meta->findInt64(kKeyDuration, &durationUs)) { 1496 if (durationUs > 0) { 1497 int32_t frameRate = (nSamples * 1000000LL + 1498 (durationUs >> 1)) / durationUs; 1499 mLastTrack->meta->setInt32(kKeyFrameRate, frameRate); 1500 } 1501 } 1502 } 1503 1504 break; 1505 } 1506 1507 case FOURCC('s', 't', 't', 's'): 1508 { 1509 *offset += chunk_size; 1510 1511 status_t err = 1512 mLastTrack->sampleTable->setTimeToSampleParams( 1513 data_offset, chunk_data_size); 1514 1515 if (err != OK) { 1516 return err; 1517 } 1518 1519 break; 1520 } 1521 1522 case FOURCC('c', 't', 't', 's'): 1523 { 1524 *offset += chunk_size; 1525 1526 status_t err = 1527 mLastTrack->sampleTable->setCompositionTimeToSampleParams( 1528 data_offset, chunk_data_size); 1529 1530 if (err != OK) { 1531 return err; 1532 } 1533 1534 break; 1535 } 1536 1537 case FOURCC('s', 't', 's', 's'): 1538 { 1539 *offset += chunk_size; 1540 1541 status_t err = 1542 mLastTrack->sampleTable->setSyncSampleParams( 1543 data_offset, chunk_data_size); 1544 1545 if (err != OK) { 1546 return err; 1547 } 1548 1549 break; 1550 } 1551 1552 // @xyz 1553 case FOURCC('\xA9', 'x', 'y', 'z'): 1554 { 1555 *offset += chunk_size; 1556 1557 // Best case the total data length inside "@xyz" box 1558 // would be 8, for instance "@xyz" + "\x00\x04\x15\xc7" + "0+0/", 1559 // where "\x00\x04" is the text string length with value = 4, 1560 // "\0x15\xc7" is the language code = en, and "0+0" is a 1561 // location (string) value with longitude = 0 and latitude = 0. 1562 if (chunk_data_size < 8) { 1563 return ERROR_MALFORMED; 1564 } 1565 1566 // Worst case the location string length would be 18, 1567 // for instance +90.0000-180.0000, without the trailing "/" and 1568 // the string length + language code. 1569 char buffer[18]; 1570 1571 // Substracting 5 from the data size is because the text string length + 1572 // language code takes 4 bytes, and the trailing slash "/" takes 1 byte. 1573 off64_t location_length = chunk_data_size - 5; 1574 if (location_length >= (off64_t) sizeof(buffer)) { 1575 return ERROR_MALFORMED; 1576 } 1577 1578 if (mDataSource->readAt( 1579 data_offset + 4, buffer, location_length) < location_length) { 1580 return ERROR_IO; 1581 } 1582 1583 buffer[location_length] = '\0'; 1584 mFileMetaData->setCString(kKeyLocation, buffer); 1585 break; 1586 } 1587 1588 case FOURCC('e', 's', 'd', 's'): 1589 { 1590 *offset += chunk_size; 1591 1592 if (chunk_data_size < 4) { 1593 return ERROR_MALFORMED; 1594 } 1595 1596 uint8_t buffer[256]; 1597 if (chunk_data_size > (off64_t)sizeof(buffer)) { 1598 return ERROR_BUFFER_TOO_SMALL; 1599 } 1600 1601 if (mDataSource->readAt( 1602 data_offset, buffer, chunk_data_size) < chunk_data_size) { 1603 return ERROR_IO; 1604 } 1605 1606 if (U32_AT(buffer) != 0) { 1607 // Should be version 0, flags 0. 1608 return ERROR_MALFORMED; 1609 } 1610 1611 mLastTrack->meta->setData( 1612 kKeyESDS, kTypeESDS, &buffer[4], chunk_data_size - 4); 1613 1614 if (mPath.size() >= 2 1615 && mPath[mPath.size() - 2] == FOURCC('m', 'p', '4', 'a')) { 1616 // Information from the ESDS must be relied on for proper 1617 // setup of sample rate and channel count for MPEG4 Audio. 1618 // The generic header appears to only contain generic 1619 // information... 1620 1621 status_t err = updateAudioTrackInfoFromESDS_MPEG4Audio( 1622 &buffer[4], chunk_data_size - 4); 1623 1624 if (err != OK) { 1625 return err; 1626 } 1627 } 1628 1629 break; 1630 } 1631 1632 case FOURCC('a', 'v', 'c', 'C'): 1633 { 1634 *offset += chunk_size; 1635 1636 sp<ABuffer> buffer = new ABuffer(chunk_data_size); 1637 1638 if (mDataSource->readAt( 1639 data_offset, buffer->data(), chunk_data_size) < chunk_data_size) { 1640 return ERROR_IO; 1641 } 1642 1643 mLastTrack->meta->setData( 1644 kKeyAVCC, kTypeAVCC, buffer->data(), chunk_data_size); 1645 1646 break; 1647 } 1648 case FOURCC('h', 'v', 'c', 'C'): 1649 { 1650 sp<ABuffer> buffer = new ABuffer(chunk_data_size); 1651 1652 if (mDataSource->readAt( 1653 data_offset, buffer->data(), chunk_data_size) < chunk_data_size) { 1654 return ERROR_IO; 1655 } 1656 1657 mLastTrack->meta->setData( 1658 kKeyHVCC, kTypeHVCC, buffer->data(), chunk_data_size); 1659 1660 *offset += chunk_size; 1661 break; 1662 } 1663 1664 case FOURCC('d', '2', '6', '3'): 1665 { 1666 *offset += chunk_size; 1667 /* 1668 * d263 contains a fixed 7 bytes part: 1669 * vendor - 4 bytes 1670 * version - 1 byte 1671 * level - 1 byte 1672 * profile - 1 byte 1673 * optionally, "d263" box itself may contain a 16-byte 1674 * bit rate box (bitr) 1675 * average bit rate - 4 bytes 1676 * max bit rate - 4 bytes 1677 */ 1678 char buffer[23]; 1679 if (chunk_data_size != 7 && 1680 chunk_data_size != 23) { 1681 ALOGE("Incorrect D263 box size %lld", chunk_data_size); 1682 return ERROR_MALFORMED; 1683 } 1684 1685 if (mDataSource->readAt( 1686 data_offset, buffer, chunk_data_size) < chunk_data_size) { 1687 return ERROR_IO; 1688 } 1689 1690 mLastTrack->meta->setData(kKeyD263, kTypeD263, buffer, chunk_data_size); 1691 1692 break; 1693 } 1694 1695 case FOURCC('m', 'e', 't', 'a'): 1696 { 1697 uint8_t buffer[4]; 1698 if (chunk_data_size < (off64_t)sizeof(buffer)) { 1699 *offset += chunk_size; 1700 return ERROR_MALFORMED; 1701 } 1702 1703 if (mDataSource->readAt( 1704 data_offset, buffer, 4) < 4) { 1705 *offset += chunk_size; 1706 return ERROR_IO; 1707 } 1708 1709 if (U32_AT(buffer) != 0) { 1710 // Should be version 0, flags 0. 1711 1712 // If it's not, let's assume this is one of those 1713 // apparently malformed chunks that don't have flags 1714 // and completely different semantics than what's 1715 // in the MPEG4 specs and skip it. 1716 *offset += chunk_size; 1717 return OK; 1718 } 1719 1720 off64_t stop_offset = *offset + chunk_size; 1721 *offset = data_offset + sizeof(buffer); 1722 while (*offset < stop_offset) { 1723 status_t err = parseChunk(offset, depth + 1); 1724 if (err != OK) { 1725 return err; 1726 } 1727 } 1728 1729 if (*offset != stop_offset) { 1730 return ERROR_MALFORMED; 1731 } 1732 break; 1733 } 1734 1735 case FOURCC('m', 'e', 'a', 'n'): 1736 case FOURCC('n', 'a', 'm', 'e'): 1737 case FOURCC('d', 'a', 't', 'a'): 1738 { 1739 *offset += chunk_size; 1740 1741 if (mPath.size() == 6 && underMetaDataPath(mPath)) { 1742 status_t err = parseITunesMetaData(data_offset, chunk_data_size); 1743 1744 if (err != OK) { 1745 return err; 1746 } 1747 } 1748 1749 break; 1750 } 1751 1752 case FOURCC('m', 'v', 'h', 'd'): 1753 { 1754 *offset += chunk_size; 1755 1756 if (chunk_data_size < 32) { 1757 return ERROR_MALFORMED; 1758 } 1759 1760 uint8_t header[32]; 1761 if (mDataSource->readAt( 1762 data_offset, header, sizeof(header)) 1763 < (ssize_t)sizeof(header)) { 1764 return ERROR_IO; 1765 } 1766 1767 uint64_t creationTime; 1768 uint64_t duration = 0; 1769 if (header[0] == 1) { 1770 creationTime = U64_AT(&header[4]); 1771 mHeaderTimescale = U32_AT(&header[20]); 1772 duration = U64_AT(&header[24]); 1773 if (duration == 0xffffffffffffffff) { 1774 duration = 0; 1775 } 1776 } else if (header[0] != 0) { 1777 return ERROR_MALFORMED; 1778 } else { 1779 creationTime = U32_AT(&header[4]); 1780 mHeaderTimescale = U32_AT(&header[12]); 1781 uint32_t d32 = U32_AT(&header[16]); 1782 if (d32 == 0xffffffff) { 1783 d32 = 0; 1784 } 1785 duration = d32; 1786 } 1787 if (duration != 0) { 1788 mFileMetaData->setInt64(kKeyDuration, duration * 1000000 / mHeaderTimescale); 1789 } 1790 1791 String8 s; 1792 convertTimeToDate(creationTime, &s); 1793 1794 mFileMetaData->setCString(kKeyDate, s.string()); 1795 1796 break; 1797 } 1798 1799 case FOURCC('m', 'e', 'h', 'd'): 1800 { 1801 *offset += chunk_size; 1802 1803 if (chunk_data_size < 8) { 1804 return ERROR_MALFORMED; 1805 } 1806 1807 uint8_t flags[4]; 1808 if (mDataSource->readAt( 1809 data_offset, flags, sizeof(flags)) 1810 < (ssize_t)sizeof(flags)) { 1811 return ERROR_IO; 1812 } 1813 1814 uint64_t duration = 0; 1815 if (flags[0] == 1) { 1816 // 64 bit 1817 if (chunk_data_size < 12) { 1818 return ERROR_MALFORMED; 1819 } 1820 mDataSource->getUInt64(data_offset + 4, &duration); 1821 if (duration == 0xffffffffffffffff) { 1822 duration = 0; 1823 } 1824 } else if (flags[0] == 0) { 1825 // 32 bit 1826 uint32_t d32; 1827 mDataSource->getUInt32(data_offset + 4, &d32); 1828 if (d32 == 0xffffffff) { 1829 d32 = 0; 1830 } 1831 duration = d32; 1832 } else { 1833 return ERROR_MALFORMED; 1834 } 1835 1836 if (duration != 0) { 1837 mFileMetaData->setInt64(kKeyDuration, duration * 1000000 / mHeaderTimescale); 1838 } 1839 1840 break; 1841 } 1842 1843 case FOURCC('m', 'd', 'a', 't'): 1844 { 1845 ALOGV("mdat chunk, drm: %d", mIsDrm); 1846 if (!mIsDrm) { 1847 *offset += chunk_size; 1848 break; 1849 } 1850 1851 if (chunk_size < 8) { 1852 return ERROR_MALFORMED; 1853 } 1854 1855 return parseDrmSINF(offset, data_offset); 1856 } 1857 1858 case FOURCC('h', 'd', 'l', 'r'): 1859 { 1860 *offset += chunk_size; 1861 1862 uint32_t buffer; 1863 if (mDataSource->readAt( 1864 data_offset + 8, &buffer, 4) < 4) { 1865 return ERROR_IO; 1866 } 1867 1868 uint32_t type = ntohl(buffer); 1869 // For the 3GPP file format, the handler-type within the 'hdlr' box 1870 // shall be 'text'. We also want to support 'sbtl' handler type 1871 // for a practical reason as various MPEG4 containers use it. 1872 if (type == FOURCC('t', 'e', 'x', 't') || type == FOURCC('s', 'b', 't', 'l')) { 1873 if (mLastTrack != NULL) { 1874 mLastTrack->meta->setCString(kKeyMIMEType, MEDIA_MIMETYPE_TEXT_3GPP); 1875 } 1876 } 1877 1878 break; 1879 } 1880 1881 case FOURCC('t', 'r', 'e', 'x'): 1882 { 1883 *offset += chunk_size; 1884 1885 if (chunk_data_size < 24) { 1886 return ERROR_IO; 1887 } 1888 uint32_t duration; 1889 Trex trex; 1890 if (!mDataSource->getUInt32(data_offset + 4, &trex.track_ID) || 1891 !mDataSource->getUInt32(data_offset + 8, &trex.default_sample_description_index) || 1892 !mDataSource->getUInt32(data_offset + 12, &trex.default_sample_duration) || 1893 !mDataSource->getUInt32(data_offset + 16, &trex.default_sample_size) || 1894 !mDataSource->getUInt32(data_offset + 20, &trex.default_sample_flags)) { 1895 return ERROR_IO; 1896 } 1897 mTrex.add(trex); 1898 break; 1899 } 1900 1901 case FOURCC('t', 'x', '3', 'g'): 1902 { 1903 uint32_t type; 1904 const void *data; 1905 size_t size = 0; 1906 if (!mLastTrack->meta->findData( 1907 kKeyTextFormatData, &type, &data, &size)) { 1908 size = 0; 1909 } 1910 1911 if ((chunk_size > SIZE_MAX) || (SIZE_MAX - chunk_size <= size)) { 1912 return ERROR_MALFORMED; 1913 } 1914 1915 uint8_t *buffer = new (std::nothrow) uint8_t[size + chunk_size]; 1916 if (buffer == NULL) { 1917 return ERROR_MALFORMED; 1918 } 1919 1920 if (size > 0) { 1921 memcpy(buffer, data, size); 1922 } 1923 1924 if ((size_t)(mDataSource->readAt(*offset, buffer + size, chunk_size)) 1925 < chunk_size) { 1926 delete[] buffer; 1927 buffer = NULL; 1928 1929 // advance read pointer so we don't end up reading this again 1930 *offset += chunk_size; 1931 return ERROR_IO; 1932 } 1933 1934 mLastTrack->meta->setData( 1935 kKeyTextFormatData, 0, buffer, size + chunk_size); 1936 1937 delete[] buffer; 1938 1939 *offset += chunk_size; 1940 break; 1941 } 1942 1943 case FOURCC('c', 'o', 'v', 'r'): 1944 { 1945 *offset += chunk_size; 1946 1947 if (mFileMetaData != NULL) { 1948 ALOGV("chunk_data_size = %lld and data_offset = %lld", 1949 chunk_data_size, data_offset); 1950 1951 if (chunk_data_size < 0 || static_cast<uint64_t>(chunk_data_size) >= SIZE_MAX - 1) { 1952 return ERROR_MALFORMED; 1953 } 1954 sp<ABuffer> buffer = new ABuffer(chunk_data_size + 1); 1955 if (mDataSource->readAt( 1956 data_offset, buffer->data(), chunk_data_size) != (ssize_t)chunk_data_size) { 1957 return ERROR_IO; 1958 } 1959 const int kSkipBytesOfDataBox = 16; 1960 if (chunk_data_size <= kSkipBytesOfDataBox) { 1961 return ERROR_MALFORMED; 1962 } 1963 1964 mFileMetaData->setData( 1965 kKeyAlbumArt, MetaData::TYPE_NONE, 1966 buffer->data() + kSkipBytesOfDataBox, chunk_data_size - kSkipBytesOfDataBox); 1967 } 1968 1969 break; 1970 } 1971 1972 case FOURCC('t', 'i', 't', 'l'): 1973 case FOURCC('p', 'e', 'r', 'f'): 1974 case FOURCC('a', 'u', 't', 'h'): 1975 case FOURCC('g', 'n', 'r', 'e'): 1976 case FOURCC('a', 'l', 'b', 'm'): 1977 case FOURCC('y', 'r', 'r', 'c'): 1978 { 1979 *offset += chunk_size; 1980 1981 status_t err = parse3GPPMetaData(data_offset, chunk_data_size, depth); 1982 1983 if (err != OK) { 1984 return err; 1985 } 1986 1987 break; 1988 } 1989 1990 case FOURCC('I', 'D', '3', '2'): 1991 { 1992 *offset += chunk_size; 1993 1994 if (chunk_data_size < 6) { 1995 return ERROR_MALFORMED; 1996 } 1997 1998 parseID3v2MetaData(data_offset + 6); 1999 2000 break; 2001 } 2002 2003 case FOURCC('-', '-', '-', '-'): 2004 { 2005 mLastCommentMean.clear(); 2006 mLastCommentName.clear(); 2007 mLastCommentData.clear(); 2008 *offset += chunk_size; 2009 break; 2010 } 2011 2012 case FOURCC('s', 'i', 'd', 'x'): 2013 { 2014 parseSegmentIndex(data_offset, chunk_data_size); 2015 *offset += chunk_size; 2016 return UNKNOWN_ERROR; // stop parsing after sidx 2017 } 2018 2019 default: 2020 { 2021 *offset += chunk_size; 2022 break; 2023 } 2024 } 2025 2026 return OK; 2027} 2028 2029status_t MPEG4Extractor::parseSegmentIndex(off64_t offset, size_t size) { 2030 ALOGV("MPEG4Extractor::parseSegmentIndex"); 2031 2032 if (size < 12) { 2033 return -EINVAL; 2034 } 2035 2036 uint32_t flags; 2037 if (!mDataSource->getUInt32(offset, &flags)) { 2038 return ERROR_MALFORMED; 2039 } 2040 2041 uint32_t version = flags >> 24; 2042 flags &= 0xffffff; 2043 2044 ALOGV("sidx version %d", version); 2045 2046 uint32_t referenceId; 2047 if (!mDataSource->getUInt32(offset + 4, &referenceId)) { 2048 return ERROR_MALFORMED; 2049 } 2050 2051 uint32_t timeScale; 2052 if (!mDataSource->getUInt32(offset + 8, &timeScale)) { 2053 return ERROR_MALFORMED; 2054 } 2055 ALOGV("sidx refid/timescale: %d/%d", referenceId, timeScale); 2056 2057 uint64_t earliestPresentationTime; 2058 uint64_t firstOffset; 2059 2060 offset += 12; 2061 size -= 12; 2062 2063 if (version == 0) { 2064 if (size < 8) { 2065 return -EINVAL; 2066 } 2067 uint32_t tmp; 2068 if (!mDataSource->getUInt32(offset, &tmp)) { 2069 return ERROR_MALFORMED; 2070 } 2071 earliestPresentationTime = tmp; 2072 if (!mDataSource->getUInt32(offset + 4, &tmp)) { 2073 return ERROR_MALFORMED; 2074 } 2075 firstOffset = tmp; 2076 offset += 8; 2077 size -= 8; 2078 } else { 2079 if (size < 16) { 2080 return -EINVAL; 2081 } 2082 if (!mDataSource->getUInt64(offset, &earliestPresentationTime)) { 2083 return ERROR_MALFORMED; 2084 } 2085 if (!mDataSource->getUInt64(offset + 8, &firstOffset)) { 2086 return ERROR_MALFORMED; 2087 } 2088 offset += 16; 2089 size -= 16; 2090 } 2091 ALOGV("sidx pres/off: %" PRIu64 "/%" PRIu64, earliestPresentationTime, firstOffset); 2092 2093 if (size < 4) { 2094 return -EINVAL; 2095 } 2096 2097 uint16_t referenceCount; 2098 if (!mDataSource->getUInt16(offset + 2, &referenceCount)) { 2099 return ERROR_MALFORMED; 2100 } 2101 offset += 4; 2102 size -= 4; 2103 ALOGV("refcount: %d", referenceCount); 2104 2105 if (size < referenceCount * 12) { 2106 return -EINVAL; 2107 } 2108 2109 uint64_t total_duration = 0; 2110 for (unsigned int i = 0; i < referenceCount; i++) { 2111 uint32_t d1, d2, d3; 2112 2113 if (!mDataSource->getUInt32(offset, &d1) || // size 2114 !mDataSource->getUInt32(offset + 4, &d2) || // duration 2115 !mDataSource->getUInt32(offset + 8, &d3)) { // flags 2116 return ERROR_MALFORMED; 2117 } 2118 2119 if (d1 & 0x80000000) { 2120 ALOGW("sub-sidx boxes not supported yet"); 2121 } 2122 bool sap = d3 & 0x80000000; 2123 uint32_t saptype = (d3 >> 28) & 7; 2124 if (!sap || (saptype != 1 && saptype != 2)) { 2125 // type 1 and 2 are sync samples 2126 ALOGW("not a stream access point, or unsupported type: %08x", d3); 2127 } 2128 total_duration += d2; 2129 offset += 12; 2130 ALOGV(" item %d, %08x %08x %08x", i, d1, d2, d3); 2131 SidxEntry se; 2132 se.mSize = d1 & 0x7fffffff; 2133 se.mDurationUs = 1000000LL * d2 / timeScale; 2134 mSidxEntries.add(se); 2135 } 2136 2137 uint64_t sidxDuration = total_duration * 1000000 / timeScale; 2138 2139 int64_t metaDuration; 2140 if (!mLastTrack->meta->findInt64(kKeyDuration, &metaDuration) || metaDuration == 0) { 2141 mLastTrack->meta->setInt64(kKeyDuration, sidxDuration); 2142 } 2143 return OK; 2144} 2145 2146 2147 2148status_t MPEG4Extractor::parseTrackHeader( 2149 off64_t data_offset, off64_t data_size) { 2150 if (data_size < 4) { 2151 return ERROR_MALFORMED; 2152 } 2153 2154 uint8_t version; 2155 if (mDataSource->readAt(data_offset, &version, 1) < 1) { 2156 return ERROR_IO; 2157 } 2158 2159 size_t dynSize = (version == 1) ? 36 : 24; 2160 2161 uint8_t buffer[36 + 60]; 2162 2163 if (data_size != (off64_t)dynSize + 60) { 2164 return ERROR_MALFORMED; 2165 } 2166 2167 if (mDataSource->readAt( 2168 data_offset, buffer, data_size) < (ssize_t)data_size) { 2169 return ERROR_IO; 2170 } 2171 2172 uint64_t ctime, mtime, duration; 2173 int32_t id; 2174 2175 if (version == 1) { 2176 ctime = U64_AT(&buffer[4]); 2177 mtime = U64_AT(&buffer[12]); 2178 id = U32_AT(&buffer[20]); 2179 duration = U64_AT(&buffer[28]); 2180 } else if (version == 0) { 2181 ctime = U32_AT(&buffer[4]); 2182 mtime = U32_AT(&buffer[8]); 2183 id = U32_AT(&buffer[12]); 2184 duration = U32_AT(&buffer[20]); 2185 } else { 2186 return ERROR_UNSUPPORTED; 2187 } 2188 2189 mLastTrack->meta->setInt32(kKeyTrackID, id); 2190 2191 size_t matrixOffset = dynSize + 16; 2192 int32_t a00 = U32_AT(&buffer[matrixOffset]); 2193 int32_t a01 = U32_AT(&buffer[matrixOffset + 4]); 2194 int32_t dx = U32_AT(&buffer[matrixOffset + 8]); 2195 int32_t a10 = U32_AT(&buffer[matrixOffset + 12]); 2196 int32_t a11 = U32_AT(&buffer[matrixOffset + 16]); 2197 int32_t dy = U32_AT(&buffer[matrixOffset + 20]); 2198 2199#if 0 2200 ALOGI("x' = %.2f * x + %.2f * y + %.2f", 2201 a00 / 65536.0f, a01 / 65536.0f, dx / 65536.0f); 2202 ALOGI("y' = %.2f * x + %.2f * y + %.2f", 2203 a10 / 65536.0f, a11 / 65536.0f, dy / 65536.0f); 2204#endif 2205 2206 uint32_t rotationDegrees; 2207 2208 static const int32_t kFixedOne = 0x10000; 2209 if (a00 == kFixedOne && a01 == 0 && a10 == 0 && a11 == kFixedOne) { 2210 // Identity, no rotation 2211 rotationDegrees = 0; 2212 } else if (a00 == 0 && a01 == kFixedOne && a10 == -kFixedOne && a11 == 0) { 2213 rotationDegrees = 90; 2214 } else if (a00 == 0 && a01 == -kFixedOne && a10 == kFixedOne && a11 == 0) { 2215 rotationDegrees = 270; 2216 } else if (a00 == -kFixedOne && a01 == 0 && a10 == 0 && a11 == -kFixedOne) { 2217 rotationDegrees = 180; 2218 } else { 2219 ALOGW("We only support 0,90,180,270 degree rotation matrices"); 2220 rotationDegrees = 0; 2221 } 2222 2223 if (rotationDegrees != 0) { 2224 mLastTrack->meta->setInt32(kKeyRotation, rotationDegrees); 2225 } 2226 2227 // Handle presentation display size, which could be different 2228 // from the image size indicated by kKeyWidth and kKeyHeight. 2229 uint32_t width = U32_AT(&buffer[dynSize + 52]); 2230 uint32_t height = U32_AT(&buffer[dynSize + 56]); 2231 mLastTrack->meta->setInt32(kKeyDisplayWidth, width >> 16); 2232 mLastTrack->meta->setInt32(kKeyDisplayHeight, height >> 16); 2233 2234 return OK; 2235} 2236 2237status_t MPEG4Extractor::parseITunesMetaData(off64_t offset, size_t size) { 2238 if (size < 4 || size == SIZE_MAX) { 2239 return ERROR_MALFORMED; 2240 } 2241 2242 uint8_t *buffer = new (std::nothrow) uint8_t[size + 1]; 2243 if (buffer == NULL) { 2244 return ERROR_MALFORMED; 2245 } 2246 if (mDataSource->readAt( 2247 offset, buffer, size) != (ssize_t)size) { 2248 delete[] buffer; 2249 buffer = NULL; 2250 2251 return ERROR_IO; 2252 } 2253 2254 uint32_t flags = U32_AT(buffer); 2255 2256 uint32_t metadataKey = 0; 2257 char chunk[5]; 2258 MakeFourCCString(mPath[4], chunk); 2259 ALOGV("meta: %s @ %lld", chunk, offset); 2260 switch (mPath[4]) { 2261 case FOURCC(0xa9, 'a', 'l', 'b'): 2262 { 2263 metadataKey = kKeyAlbum; 2264 break; 2265 } 2266 case FOURCC(0xa9, 'A', 'R', 'T'): 2267 { 2268 metadataKey = kKeyArtist; 2269 break; 2270 } 2271 case FOURCC('a', 'A', 'R', 'T'): 2272 { 2273 metadataKey = kKeyAlbumArtist; 2274 break; 2275 } 2276 case FOURCC(0xa9, 'd', 'a', 'y'): 2277 { 2278 metadataKey = kKeyYear; 2279 break; 2280 } 2281 case FOURCC(0xa9, 'n', 'a', 'm'): 2282 { 2283 metadataKey = kKeyTitle; 2284 break; 2285 } 2286 case FOURCC(0xa9, 'w', 'r', 't'): 2287 { 2288 metadataKey = kKeyWriter; 2289 break; 2290 } 2291 case FOURCC('c', 'o', 'v', 'r'): 2292 { 2293 metadataKey = kKeyAlbumArt; 2294 break; 2295 } 2296 case FOURCC('g', 'n', 'r', 'e'): 2297 { 2298 metadataKey = kKeyGenre; 2299 break; 2300 } 2301 case FOURCC(0xa9, 'g', 'e', 'n'): 2302 { 2303 metadataKey = kKeyGenre; 2304 break; 2305 } 2306 case FOURCC('c', 'p', 'i', 'l'): 2307 { 2308 if (size == 9 && flags == 21) { 2309 char tmp[16]; 2310 sprintf(tmp, "%d", 2311 (int)buffer[size - 1]); 2312 2313 mFileMetaData->setCString(kKeyCompilation, tmp); 2314 } 2315 break; 2316 } 2317 case FOURCC('t', 'r', 'k', 'n'): 2318 { 2319 if (size == 16 && flags == 0) { 2320 char tmp[16]; 2321 uint16_t* pTrack = (uint16_t*)&buffer[10]; 2322 uint16_t* pTotalTracks = (uint16_t*)&buffer[12]; 2323 sprintf(tmp, "%d/%d", ntohs(*pTrack), ntohs(*pTotalTracks)); 2324 2325 mFileMetaData->setCString(kKeyCDTrackNumber, tmp); 2326 } 2327 break; 2328 } 2329 case FOURCC('d', 'i', 's', 'k'): 2330 { 2331 if ((size == 14 || size == 16) && flags == 0) { 2332 char tmp[16]; 2333 uint16_t* pDisc = (uint16_t*)&buffer[10]; 2334 uint16_t* pTotalDiscs = (uint16_t*)&buffer[12]; 2335 sprintf(tmp, "%d/%d", ntohs(*pDisc), ntohs(*pTotalDiscs)); 2336 2337 mFileMetaData->setCString(kKeyDiscNumber, tmp); 2338 } 2339 break; 2340 } 2341 case FOURCC('-', '-', '-', '-'): 2342 { 2343 buffer[size] = '\0'; 2344 switch (mPath[5]) { 2345 case FOURCC('m', 'e', 'a', 'n'): 2346 mLastCommentMean.setTo((const char *)buffer + 4); 2347 break; 2348 case FOURCC('n', 'a', 'm', 'e'): 2349 mLastCommentName.setTo((const char *)buffer + 4); 2350 break; 2351 case FOURCC('d', 'a', 't', 'a'): 2352 if (size < 8) { 2353 delete[] buffer; 2354 buffer = NULL; 2355 ALOGE("b/24346430"); 2356 return ERROR_MALFORMED; 2357 } 2358 mLastCommentData.setTo((const char *)buffer + 8); 2359 break; 2360 } 2361 2362 // Once we have a set of mean/name/data info, go ahead and process 2363 // it to see if its something we are interested in. Whether or not 2364 // were are interested in the specific tag, make sure to clear out 2365 // the set so we can be ready to process another tuple should one 2366 // show up later in the file. 2367 if ((mLastCommentMean.length() != 0) && 2368 (mLastCommentName.length() != 0) && 2369 (mLastCommentData.length() != 0)) { 2370 2371 if (mLastCommentMean == "com.apple.iTunes" 2372 && mLastCommentName == "iTunSMPB") { 2373 int32_t delay, padding; 2374 if (sscanf(mLastCommentData, 2375 " %*x %x %x %*x", &delay, &padding) == 2) { 2376 mLastTrack->meta->setInt32(kKeyEncoderDelay, delay); 2377 mLastTrack->meta->setInt32(kKeyEncoderPadding, padding); 2378 } 2379 } 2380 2381 mLastCommentMean.clear(); 2382 mLastCommentName.clear(); 2383 mLastCommentData.clear(); 2384 } 2385 break; 2386 } 2387 2388 default: 2389 break; 2390 } 2391 2392 if (size >= 8 && metadataKey && !mFileMetaData->hasData(metadataKey)) { 2393 if (metadataKey == kKeyAlbumArt) { 2394 mFileMetaData->setData( 2395 kKeyAlbumArt, MetaData::TYPE_NONE, 2396 buffer + 8, size - 8); 2397 } else if (metadataKey == kKeyGenre) { 2398 if (flags == 0) { 2399 // uint8_t genre code, iTunes genre codes are 2400 // the standard id3 codes, except they start 2401 // at 1 instead of 0 (e.g. Pop is 14, not 13) 2402 // We use standard id3 numbering, so subtract 1. 2403 int genrecode = (int)buffer[size - 1]; 2404 genrecode--; 2405 if (genrecode < 0) { 2406 genrecode = 255; // reserved for 'unknown genre' 2407 } 2408 char genre[10]; 2409 sprintf(genre, "%d", genrecode); 2410 2411 mFileMetaData->setCString(metadataKey, genre); 2412 } else if (flags == 1) { 2413 // custom genre string 2414 buffer[size] = '\0'; 2415 2416 mFileMetaData->setCString( 2417 metadataKey, (const char *)buffer + 8); 2418 } 2419 } else { 2420 buffer[size] = '\0'; 2421 2422 mFileMetaData->setCString( 2423 metadataKey, (const char *)buffer + 8); 2424 } 2425 } 2426 2427 delete[] buffer; 2428 buffer = NULL; 2429 2430 return OK; 2431} 2432 2433status_t MPEG4Extractor::parse3GPPMetaData(off64_t offset, size_t size, int depth) { 2434 if (size < 4 || size == SIZE_MAX) { 2435 return ERROR_MALFORMED; 2436 } 2437 2438 uint8_t *buffer = new (std::nothrow) uint8_t[size + 1]; 2439 if (buffer == NULL) { 2440 return ERROR_MALFORMED; 2441 } 2442 if (mDataSource->readAt( 2443 offset, buffer, size) != (ssize_t)size) { 2444 delete[] buffer; 2445 buffer = NULL; 2446 2447 return ERROR_IO; 2448 } 2449 2450 uint32_t metadataKey = 0; 2451 switch (mPath[depth]) { 2452 case FOURCC('t', 'i', 't', 'l'): 2453 { 2454 metadataKey = kKeyTitle; 2455 break; 2456 } 2457 case FOURCC('p', 'e', 'r', 'f'): 2458 { 2459 metadataKey = kKeyArtist; 2460 break; 2461 } 2462 case FOURCC('a', 'u', 't', 'h'): 2463 { 2464 metadataKey = kKeyWriter; 2465 break; 2466 } 2467 case FOURCC('g', 'n', 'r', 'e'): 2468 { 2469 metadataKey = kKeyGenre; 2470 break; 2471 } 2472 case FOURCC('a', 'l', 'b', 'm'): 2473 { 2474 if (buffer[size - 1] != '\0') { 2475 char tmp[4]; 2476 sprintf(tmp, "%u", buffer[size - 1]); 2477 2478 mFileMetaData->setCString(kKeyCDTrackNumber, tmp); 2479 } 2480 2481 metadataKey = kKeyAlbum; 2482 break; 2483 } 2484 case FOURCC('y', 'r', 'r', 'c'): 2485 { 2486 char tmp[5]; 2487 uint16_t year = U16_AT(&buffer[4]); 2488 2489 if (year < 10000) { 2490 sprintf(tmp, "%u", year); 2491 2492 mFileMetaData->setCString(kKeyYear, tmp); 2493 } 2494 break; 2495 } 2496 2497 default: 2498 break; 2499 } 2500 2501 if (metadataKey > 0) { 2502 bool isUTF8 = true; // Common case 2503 char16_t *framedata = NULL; 2504 int len16 = 0; // Number of UTF-16 characters 2505 2506 // smallest possible valid UTF-16 string w BOM: 0xfe 0xff 0x00 0x00 2507 if (size < 6) { 2508 return ERROR_MALFORMED; 2509 } 2510 2511 if (size - 6 >= 4) { 2512 len16 = ((size - 6) / 2) - 1; // don't include 0x0000 terminator 2513 framedata = (char16_t *)(buffer + 6); 2514 if (0xfffe == *framedata) { 2515 // endianness marker (BOM) doesn't match host endianness 2516 for (int i = 0; i < len16; i++) { 2517 framedata[i] = bswap_16(framedata[i]); 2518 } 2519 // BOM is now swapped to 0xfeff, we will execute next block too 2520 } 2521 2522 if (0xfeff == *framedata) { 2523 // Remove the BOM 2524 framedata++; 2525 len16--; 2526 isUTF8 = false; 2527 } 2528 // else normal non-zero-length UTF-8 string 2529 // we can't handle UTF-16 without BOM as there is no other 2530 // indication of encoding. 2531 } 2532 2533 if (isUTF8) { 2534 buffer[size] = 0; 2535 mFileMetaData->setCString(metadataKey, (const char *)buffer + 6); 2536 } else { 2537 // Convert from UTF-16 string to UTF-8 string. 2538 String8 tmpUTF8str(framedata, len16); 2539 mFileMetaData->setCString(metadataKey, tmpUTF8str.string()); 2540 } 2541 } 2542 2543 delete[] buffer; 2544 buffer = NULL; 2545 2546 return OK; 2547} 2548 2549void MPEG4Extractor::parseID3v2MetaData(off64_t offset) { 2550 ID3 id3(mDataSource, true /* ignorev1 */, offset); 2551 2552 if (id3.isValid()) { 2553 struct Map { 2554 int key; 2555 const char *tag1; 2556 const char *tag2; 2557 }; 2558 static const Map kMap[] = { 2559 { kKeyAlbum, "TALB", "TAL" }, 2560 { kKeyArtist, "TPE1", "TP1" }, 2561 { kKeyAlbumArtist, "TPE2", "TP2" }, 2562 { kKeyComposer, "TCOM", "TCM" }, 2563 { kKeyGenre, "TCON", "TCO" }, 2564 { kKeyTitle, "TIT2", "TT2" }, 2565 { kKeyYear, "TYE", "TYER" }, 2566 { kKeyAuthor, "TXT", "TEXT" }, 2567 { kKeyCDTrackNumber, "TRK", "TRCK" }, 2568 { kKeyDiscNumber, "TPA", "TPOS" }, 2569 { kKeyCompilation, "TCP", "TCMP" }, 2570 }; 2571 static const size_t kNumMapEntries = sizeof(kMap) / sizeof(kMap[0]); 2572 2573 for (size_t i = 0; i < kNumMapEntries; ++i) { 2574 if (!mFileMetaData->hasData(kMap[i].key)) { 2575 ID3::Iterator *it = new ID3::Iterator(id3, kMap[i].tag1); 2576 if (it->done()) { 2577 delete it; 2578 it = new ID3::Iterator(id3, kMap[i].tag2); 2579 } 2580 2581 if (it->done()) { 2582 delete it; 2583 continue; 2584 } 2585 2586 String8 s; 2587 it->getString(&s); 2588 delete it; 2589 2590 mFileMetaData->setCString(kMap[i].key, s); 2591 } 2592 } 2593 2594 size_t dataSize; 2595 String8 mime; 2596 const void *data = id3.getAlbumArt(&dataSize, &mime); 2597 2598 if (data) { 2599 mFileMetaData->setData(kKeyAlbumArt, MetaData::TYPE_NONE, data, dataSize); 2600 mFileMetaData->setCString(kKeyAlbumArtMIME, mime.string()); 2601 } 2602 } 2603} 2604 2605sp<MediaSource> MPEG4Extractor::getTrack(size_t index) { 2606 status_t err; 2607 if ((err = readMetaData()) != OK) { 2608 return NULL; 2609 } 2610 2611 Track *track = mFirstTrack; 2612 while (index > 0) { 2613 if (track == NULL) { 2614 return NULL; 2615 } 2616 2617 track = track->next; 2618 --index; 2619 } 2620 2621 if (track == NULL) { 2622 return NULL; 2623 } 2624 2625 2626 Trex *trex = NULL; 2627 int32_t trackId; 2628 if (track->meta->findInt32(kKeyTrackID, &trackId)) { 2629 for (size_t i = 0; i < mTrex.size(); i++) { 2630 Trex *t = &mTrex.editItemAt(index); 2631 if (t->track_ID == (uint32_t) trackId) { 2632 trex = t; 2633 break; 2634 } 2635 } 2636 } 2637 2638 ALOGV("getTrack called, pssh: %zu", mPssh.size()); 2639 2640 return new MPEG4Source(this, 2641 track->meta, mDataSource, track->timescale, track->sampleTable, 2642 mSidxEntries, trex, mMoofOffset); 2643} 2644 2645// static 2646status_t MPEG4Extractor::verifyTrack(Track *track) { 2647 const char *mime; 2648 CHECK(track->meta->findCString(kKeyMIMEType, &mime)); 2649 2650 uint32_t type; 2651 const void *data; 2652 size_t size; 2653 if (!strcasecmp(mime, MEDIA_MIMETYPE_VIDEO_AVC)) { 2654 if (!track->meta->findData(kKeyAVCC, &type, &data, &size) 2655 || type != kTypeAVCC) { 2656 return ERROR_MALFORMED; 2657 } 2658 } else if (!strcasecmp(mime, MEDIA_MIMETYPE_VIDEO_HEVC)) { 2659 if (!track->meta->findData(kKeyHVCC, &type, &data, &size) 2660 || type != kTypeHVCC) { 2661 return ERROR_MALFORMED; 2662 } 2663 } else if (!strcasecmp(mime, MEDIA_MIMETYPE_VIDEO_MPEG4) 2664 || !strcasecmp(mime, MEDIA_MIMETYPE_AUDIO_AAC)) { 2665 if (!track->meta->findData(kKeyESDS, &type, &data, &size) 2666 || type != kTypeESDS) { 2667 return ERROR_MALFORMED; 2668 } 2669 } 2670 2671 if (track->sampleTable == NULL || !track->sampleTable->isValid()) { 2672 // Make sure we have all the metadata we need. 2673 ALOGE("stbl atom missing/invalid."); 2674 return ERROR_MALFORMED; 2675 } 2676 2677 return OK; 2678} 2679 2680typedef enum { 2681 //AOT_NONE = -1, 2682 //AOT_NULL_OBJECT = 0, 2683 //AOT_AAC_MAIN = 1, /**< Main profile */ 2684 AOT_AAC_LC = 2, /**< Low Complexity object */ 2685 //AOT_AAC_SSR = 3, 2686 //AOT_AAC_LTP = 4, 2687 AOT_SBR = 5, 2688 //AOT_AAC_SCAL = 6, 2689 //AOT_TWIN_VQ = 7, 2690 //AOT_CELP = 8, 2691 //AOT_HVXC = 9, 2692 //AOT_RSVD_10 = 10, /**< (reserved) */ 2693 //AOT_RSVD_11 = 11, /**< (reserved) */ 2694 //AOT_TTSI = 12, /**< TTSI Object */ 2695 //AOT_MAIN_SYNTH = 13, /**< Main Synthetic object */ 2696 //AOT_WAV_TAB_SYNTH = 14, /**< Wavetable Synthesis object */ 2697 //AOT_GEN_MIDI = 15, /**< General MIDI object */ 2698 //AOT_ALG_SYNTH_AUD_FX = 16, /**< Algorithmic Synthesis and Audio FX object */ 2699 AOT_ER_AAC_LC = 17, /**< Error Resilient(ER) AAC Low Complexity */ 2700 //AOT_RSVD_18 = 18, /**< (reserved) */ 2701 //AOT_ER_AAC_LTP = 19, /**< Error Resilient(ER) AAC LTP object */ 2702 AOT_ER_AAC_SCAL = 20, /**< Error Resilient(ER) AAC Scalable object */ 2703 //AOT_ER_TWIN_VQ = 21, /**< Error Resilient(ER) TwinVQ object */ 2704 AOT_ER_BSAC = 22, /**< Error Resilient(ER) BSAC object */ 2705 AOT_ER_AAC_LD = 23, /**< Error Resilient(ER) AAC LowDelay object */ 2706 //AOT_ER_CELP = 24, /**< Error Resilient(ER) CELP object */ 2707 //AOT_ER_HVXC = 25, /**< Error Resilient(ER) HVXC object */ 2708 //AOT_ER_HILN = 26, /**< Error Resilient(ER) HILN object */ 2709 //AOT_ER_PARA = 27, /**< Error Resilient(ER) Parametric object */ 2710 //AOT_RSVD_28 = 28, /**< might become SSC */ 2711 AOT_PS = 29, /**< PS, Parametric Stereo (includes SBR) */ 2712 //AOT_MPEGS = 30, /**< MPEG Surround */ 2713 2714 AOT_ESCAPE = 31, /**< Signal AOT uses more than 5 bits */ 2715 2716 //AOT_MP3ONMP4_L1 = 32, /**< MPEG-Layer1 in mp4 */ 2717 //AOT_MP3ONMP4_L2 = 33, /**< MPEG-Layer2 in mp4 */ 2718 //AOT_MP3ONMP4_L3 = 34, /**< MPEG-Layer3 in mp4 */ 2719 //AOT_RSVD_35 = 35, /**< might become DST */ 2720 //AOT_RSVD_36 = 36, /**< might become ALS */ 2721 //AOT_AAC_SLS = 37, /**< AAC + SLS */ 2722 //AOT_SLS = 38, /**< SLS */ 2723 //AOT_ER_AAC_ELD = 39, /**< AAC Enhanced Low Delay */ 2724 2725 //AOT_USAC = 42, /**< USAC */ 2726 //AOT_SAOC = 43, /**< SAOC */ 2727 //AOT_LD_MPEGS = 44, /**< Low Delay MPEG Surround */ 2728 2729 //AOT_RSVD50 = 50, /**< Interim AOT for Rsvd50 */ 2730} AUDIO_OBJECT_TYPE; 2731 2732status_t MPEG4Extractor::updateAudioTrackInfoFromESDS_MPEG4Audio( 2733 const void *esds_data, size_t esds_size) { 2734 ESDS esds(esds_data, esds_size); 2735 2736 uint8_t objectTypeIndication; 2737 if (esds.getObjectTypeIndication(&objectTypeIndication) != OK) { 2738 return ERROR_MALFORMED; 2739 } 2740 2741 if (objectTypeIndication == 0xe1) { 2742 // This isn't MPEG4 audio at all, it's QCELP 14k... 2743 mLastTrack->meta->setCString(kKeyMIMEType, MEDIA_MIMETYPE_AUDIO_QCELP); 2744 return OK; 2745 } 2746 2747 if (objectTypeIndication == 0x6b) { 2748 // The media subtype is MP3 audio 2749 // Our software MP3 audio decoder may not be able to handle 2750 // packetized MP3 audio; for now, lets just return ERROR_UNSUPPORTED 2751 ALOGE("MP3 track in MP4/3GPP file is not supported"); 2752 return ERROR_UNSUPPORTED; 2753 } 2754 2755 const uint8_t *csd; 2756 size_t csd_size; 2757 if (esds.getCodecSpecificInfo( 2758 (const void **)&csd, &csd_size) != OK) { 2759 return ERROR_MALFORMED; 2760 } 2761 2762#if 0 2763 printf("ESD of size %d\n", csd_size); 2764 hexdump(csd, csd_size); 2765#endif 2766 2767 if (csd_size == 0) { 2768 // There's no further information, i.e. no codec specific data 2769 // Let's assume that the information provided in the mpeg4 headers 2770 // is accurate and hope for the best. 2771 2772 return OK; 2773 } 2774 2775 if (csd_size < 2) { 2776 return ERROR_MALFORMED; 2777 } 2778 2779 static uint32_t kSamplingRate[] = { 2780 96000, 88200, 64000, 48000, 44100, 32000, 24000, 22050, 2781 16000, 12000, 11025, 8000, 7350 2782 }; 2783 2784 ABitReader br(csd, csd_size); 2785 uint32_t objectType = br.getBits(5); 2786 2787 if (objectType == 31) { // AAC-ELD => additional 6 bits 2788 objectType = 32 + br.getBits(6); 2789 } 2790 2791 //keep AOT type 2792 mLastTrack->meta->setInt32(kKeyAACAOT, objectType); 2793 2794 uint32_t freqIndex = br.getBits(4); 2795 2796 int32_t sampleRate = 0; 2797 int32_t numChannels = 0; 2798 if (freqIndex == 15) { 2799 if (csd_size < 5) { 2800 return ERROR_MALFORMED; 2801 } 2802 sampleRate = br.getBits(24); 2803 numChannels = br.getBits(4); 2804 } else { 2805 numChannels = br.getBits(4); 2806 2807 if (freqIndex == 13 || freqIndex == 14) { 2808 return ERROR_MALFORMED; 2809 } 2810 2811 sampleRate = kSamplingRate[freqIndex]; 2812 } 2813 2814 if (objectType == AOT_SBR || objectType == AOT_PS) {//SBR specific config per 14496-3 table 1.13 2815 uint32_t extFreqIndex = br.getBits(4); 2816 int32_t extSampleRate; 2817 if (extFreqIndex == 15) { 2818 if (csd_size < 8) { 2819 return ERROR_MALFORMED; 2820 } 2821 extSampleRate = br.getBits(24); 2822 } else { 2823 if (extFreqIndex == 13 || extFreqIndex == 14) { 2824 return ERROR_MALFORMED; 2825 } 2826 extSampleRate = kSamplingRate[extFreqIndex]; 2827 } 2828 //TODO: save the extension sampling rate value in meta data => 2829 // mLastTrack->meta->setInt32(kKeyExtSampleRate, extSampleRate); 2830 } 2831 2832 switch (numChannels) { 2833 // values defined in 14496-3_2009 amendment-4 Table 1.19 - Channel Configuration 2834 case 0: 2835 case 1:// FC 2836 case 2:// FL FR 2837 case 3:// FC, FL FR 2838 case 4:// FC, FL FR, RC 2839 case 5:// FC, FL FR, SL SR 2840 case 6:// FC, FL FR, SL SR, LFE 2841 //numChannels already contains the right value 2842 break; 2843 case 11:// FC, FL FR, SL SR, RC, LFE 2844 numChannels = 7; 2845 break; 2846 case 7: // FC, FCL FCR, FL FR, SL SR, LFE 2847 case 12:// FC, FL FR, SL SR, RL RR, LFE 2848 case 14:// FC, FL FR, SL SR, LFE, FHL FHR 2849 numChannels = 8; 2850 break; 2851 default: 2852 return ERROR_UNSUPPORTED; 2853 } 2854 2855 { 2856 if (objectType == AOT_SBR || objectType == AOT_PS) { 2857 objectType = br.getBits(5); 2858 2859 if (objectType == AOT_ESCAPE) { 2860 objectType = 32 + br.getBits(6); 2861 } 2862 } 2863 if (objectType == AOT_AAC_LC || objectType == AOT_ER_AAC_LC || 2864 objectType == AOT_ER_AAC_LD || objectType == AOT_ER_AAC_SCAL || 2865 objectType == AOT_ER_BSAC) { 2866 const int32_t frameLengthFlag = br.getBits(1); 2867 2868 const int32_t dependsOnCoreCoder = br.getBits(1); 2869 2870 if (dependsOnCoreCoder ) { 2871 const int32_t coreCoderDelay = br.getBits(14); 2872 } 2873 2874 int32_t extensionFlag = -1; 2875 if (br.numBitsLeft() > 0) { 2876 extensionFlag = br.getBits(1); 2877 } else { 2878 switch (objectType) { 2879 // 14496-3 4.5.1.1 extensionFlag 2880 case AOT_AAC_LC: 2881 extensionFlag = 0; 2882 break; 2883 case AOT_ER_AAC_LC: 2884 case AOT_ER_AAC_SCAL: 2885 case AOT_ER_BSAC: 2886 case AOT_ER_AAC_LD: 2887 extensionFlag = 1; 2888 break; 2889 default: 2890 TRESPASS(); 2891 break; 2892 } 2893 ALOGW("csd missing extension flag; assuming %d for object type %u.", 2894 extensionFlag, objectType); 2895 } 2896 2897 if (numChannels == 0) { 2898 int32_t channelsEffectiveNum = 0; 2899 int32_t channelsNum = 0; 2900 const int32_t ElementInstanceTag = br.getBits(4); 2901 const int32_t Profile = br.getBits(2); 2902 const int32_t SamplingFrequencyIndex = br.getBits(4); 2903 const int32_t NumFrontChannelElements = br.getBits(4); 2904 const int32_t NumSideChannelElements = br.getBits(4); 2905 const int32_t NumBackChannelElements = br.getBits(4); 2906 const int32_t NumLfeChannelElements = br.getBits(2); 2907 const int32_t NumAssocDataElements = br.getBits(3); 2908 const int32_t NumValidCcElements = br.getBits(4); 2909 2910 const int32_t MonoMixdownPresent = br.getBits(1); 2911 if (MonoMixdownPresent != 0) { 2912 const int32_t MonoMixdownElementNumber = br.getBits(4); 2913 } 2914 2915 const int32_t StereoMixdownPresent = br.getBits(1); 2916 if (StereoMixdownPresent != 0) { 2917 const int32_t StereoMixdownElementNumber = br.getBits(4); 2918 } 2919 2920 const int32_t MatrixMixdownIndexPresent = br.getBits(1); 2921 if (MatrixMixdownIndexPresent != 0) { 2922 const int32_t MatrixMixdownIndex = br.getBits(2); 2923 const int32_t PseudoSurroundEnable = br.getBits(1); 2924 } 2925 2926 int i; 2927 for (i=0; i < NumFrontChannelElements; i++) { 2928 const int32_t FrontElementIsCpe = br.getBits(1); 2929 const int32_t FrontElementTagSelect = br.getBits(4); 2930 channelsNum += FrontElementIsCpe ? 2 : 1; 2931 } 2932 2933 for (i=0; i < NumSideChannelElements; i++) { 2934 const int32_t SideElementIsCpe = br.getBits(1); 2935 const int32_t SideElementTagSelect = br.getBits(4); 2936 channelsNum += SideElementIsCpe ? 2 : 1; 2937 } 2938 2939 for (i=0; i < NumBackChannelElements; i++) { 2940 const int32_t BackElementIsCpe = br.getBits(1); 2941 const int32_t BackElementTagSelect = br.getBits(4); 2942 channelsNum += BackElementIsCpe ? 2 : 1; 2943 } 2944 channelsEffectiveNum = channelsNum; 2945 2946 for (i=0; i < NumLfeChannelElements; i++) { 2947 const int32_t LfeElementTagSelect = br.getBits(4); 2948 channelsNum += 1; 2949 } 2950 ALOGV("mpeg4 audio channelsNum = %d", channelsNum); 2951 ALOGV("mpeg4 audio channelsEffectiveNum = %d", channelsEffectiveNum); 2952 numChannels = channelsNum; 2953 } 2954 } 2955 } 2956 2957 if (numChannels == 0) { 2958 return ERROR_UNSUPPORTED; 2959 } 2960 2961 int32_t prevSampleRate; 2962 CHECK(mLastTrack->meta->findInt32(kKeySampleRate, &prevSampleRate)); 2963 2964 if (prevSampleRate != sampleRate) { 2965 ALOGV("mpeg4 audio sample rate different from previous setting. " 2966 "was: %d, now: %d", prevSampleRate, sampleRate); 2967 } 2968 2969 mLastTrack->meta->setInt32(kKeySampleRate, sampleRate); 2970 2971 int32_t prevChannelCount; 2972 CHECK(mLastTrack->meta->findInt32(kKeyChannelCount, &prevChannelCount)); 2973 2974 if (prevChannelCount != numChannels) { 2975 ALOGV("mpeg4 audio channel count different from previous setting. " 2976 "was: %d, now: %d", prevChannelCount, numChannels); 2977 } 2978 2979 mLastTrack->meta->setInt32(kKeyChannelCount, numChannels); 2980 2981 return OK; 2982} 2983 2984//////////////////////////////////////////////////////////////////////////////// 2985 2986MPEG4Source::MPEG4Source( 2987 const sp<MPEG4Extractor> &owner, 2988 const sp<MetaData> &format, 2989 const sp<DataSource> &dataSource, 2990 int32_t timeScale, 2991 const sp<SampleTable> &sampleTable, 2992 Vector<SidxEntry> &sidx, 2993 const Trex *trex, 2994 off64_t firstMoofOffset) 2995 : mOwner(owner), 2996 mFormat(format), 2997 mDataSource(dataSource), 2998 mTimescale(timeScale), 2999 mSampleTable(sampleTable), 3000 mCurrentSampleIndex(0), 3001 mCurrentFragmentIndex(0), 3002 mSegments(sidx), 3003 mTrex(trex), 3004 mFirstMoofOffset(firstMoofOffset), 3005 mCurrentMoofOffset(firstMoofOffset), 3006 mCurrentTime(0), 3007 mCurrentSampleInfoAllocSize(0), 3008 mCurrentSampleInfoSizes(NULL), 3009 mCurrentSampleInfoOffsetsAllocSize(0), 3010 mCurrentSampleInfoOffsets(NULL), 3011 mIsAVC(false), 3012 mIsHEVC(false), 3013 mNALLengthSize(0), 3014 mStarted(false), 3015 mGroup(NULL), 3016 mBuffer(NULL), 3017 mWantsNALFragments(false), 3018 mSrcBuffer(NULL) { 3019 3020 memset(&mTrackFragmentHeaderInfo, 0, sizeof(mTrackFragmentHeaderInfo)); 3021 3022 mFormat->findInt32(kKeyCryptoMode, &mCryptoMode); 3023 mDefaultIVSize = 0; 3024 mFormat->findInt32(kKeyCryptoDefaultIVSize, &mDefaultIVSize); 3025 uint32_t keytype; 3026 const void *key; 3027 size_t keysize; 3028 if (mFormat->findData(kKeyCryptoKey, &keytype, &key, &keysize)) { 3029 CHECK(keysize <= 16); 3030 memset(mCryptoKey, 0, 16); 3031 memcpy(mCryptoKey, key, keysize); 3032 } 3033 3034 const char *mime; 3035 bool success = mFormat->findCString(kKeyMIMEType, &mime); 3036 CHECK(success); 3037 3038 mIsAVC = !strcasecmp(mime, MEDIA_MIMETYPE_VIDEO_AVC); 3039 mIsHEVC = !strcasecmp(mime, MEDIA_MIMETYPE_VIDEO_HEVC); 3040 3041 if (mIsAVC) { 3042 uint32_t type; 3043 const void *data; 3044 size_t size; 3045 CHECK(format->findData(kKeyAVCC, &type, &data, &size)); 3046 3047 const uint8_t *ptr = (const uint8_t *)data; 3048 3049 CHECK(size >= 7); 3050 CHECK_EQ((unsigned)ptr[0], 1u); // configurationVersion == 1 3051 3052 // The number of bytes used to encode the length of a NAL unit. 3053 mNALLengthSize = 1 + (ptr[4] & 3); 3054 } else if (mIsHEVC) { 3055 uint32_t type; 3056 const void *data; 3057 size_t size; 3058 CHECK(format->findData(kKeyHVCC, &type, &data, &size)); 3059 3060 const uint8_t *ptr = (const uint8_t *)data; 3061 3062 CHECK(size >= 7); 3063 CHECK_EQ((unsigned)ptr[0], 1u); // configurationVersion == 1 3064 3065 mNALLengthSize = 1 + (ptr[14 + 7] & 3); 3066 } 3067 3068 CHECK(format->findInt32(kKeyTrackID, &mTrackId)); 3069 3070 if (mFirstMoofOffset != 0) { 3071 off64_t offset = mFirstMoofOffset; 3072 parseChunk(&offset); 3073 } 3074} 3075 3076MPEG4Source::~MPEG4Source() { 3077 if (mStarted) { 3078 stop(); 3079 } 3080 free(mCurrentSampleInfoSizes); 3081 free(mCurrentSampleInfoOffsets); 3082} 3083 3084status_t MPEG4Source::start(MetaData *params) { 3085 Mutex::Autolock autoLock(mLock); 3086 3087 CHECK(!mStarted); 3088 3089 int32_t val; 3090 if (params && params->findInt32(kKeyWantsNALFragments, &val) 3091 && val != 0) { 3092 mWantsNALFragments = true; 3093 } else { 3094 mWantsNALFragments = false; 3095 } 3096 3097 int32_t tmp; 3098 CHECK(mFormat->findInt32(kKeyMaxInputSize, &tmp)); 3099 size_t max_size = tmp; 3100 3101 // A somewhat arbitrary limit that should be sufficient for 8k video frames 3102 // If you see the message below for a valid input stream: increase the limit 3103 if (max_size > 64 * 1024 * 1024) { 3104 ALOGE("bogus max input size: %zu", max_size); 3105 return ERROR_MALFORMED; 3106 } 3107 mGroup = new MediaBufferGroup; 3108 mGroup->add_buffer(new MediaBuffer(max_size)); 3109 3110 mSrcBuffer = new (std::nothrow) uint8_t[max_size]; 3111 if (mSrcBuffer == NULL) { 3112 // file probably specified a bad max size 3113 delete mGroup; 3114 mGroup = NULL; 3115 return ERROR_MALFORMED; 3116 } 3117 3118 mStarted = true; 3119 3120 return OK; 3121} 3122 3123status_t MPEG4Source::stop() { 3124 Mutex::Autolock autoLock(mLock); 3125 3126 CHECK(mStarted); 3127 3128 if (mBuffer != NULL) { 3129 mBuffer->release(); 3130 mBuffer = NULL; 3131 } 3132 3133 delete[] mSrcBuffer; 3134 mSrcBuffer = NULL; 3135 3136 delete mGroup; 3137 mGroup = NULL; 3138 3139 mStarted = false; 3140 mCurrentSampleIndex = 0; 3141 3142 return OK; 3143} 3144 3145status_t MPEG4Source::parseChunk(off64_t *offset) { 3146 uint32_t hdr[2]; 3147 if (mDataSource->readAt(*offset, hdr, 8) < 8) { 3148 return ERROR_IO; 3149 } 3150 uint64_t chunk_size = ntohl(hdr[0]); 3151 uint32_t chunk_type = ntohl(hdr[1]); 3152 off64_t data_offset = *offset + 8; 3153 3154 if (chunk_size == 1) { 3155 if (mDataSource->readAt(*offset + 8, &chunk_size, 8) < 8) { 3156 return ERROR_IO; 3157 } 3158 chunk_size = ntoh64(chunk_size); 3159 data_offset += 8; 3160 3161 if (chunk_size < 16) { 3162 // The smallest valid chunk is 16 bytes long in this case. 3163 return ERROR_MALFORMED; 3164 } 3165 } else if (chunk_size < 8) { 3166 // The smallest valid chunk is 8 bytes long. 3167 return ERROR_MALFORMED; 3168 } 3169 3170 char chunk[5]; 3171 MakeFourCCString(chunk_type, chunk); 3172 ALOGV("MPEG4Source chunk %s @ %llx", chunk, *offset); 3173 3174 off64_t chunk_data_size = *offset + chunk_size - data_offset; 3175 3176 switch(chunk_type) { 3177 3178 case FOURCC('t', 'r', 'a', 'f'): 3179 case FOURCC('m', 'o', 'o', 'f'): { 3180 off64_t stop_offset = *offset + chunk_size; 3181 *offset = data_offset; 3182 while (*offset < stop_offset) { 3183 status_t err = parseChunk(offset); 3184 if (err != OK) { 3185 return err; 3186 } 3187 } 3188 if (chunk_type == FOURCC('m', 'o', 'o', 'f')) { 3189 // *offset points to the box following this moof. Find the next moof from there. 3190 3191 while (true) { 3192 if (mDataSource->readAt(*offset, hdr, 8) < 8) { 3193 return ERROR_END_OF_STREAM; 3194 } 3195 chunk_size = ntohl(hdr[0]); 3196 chunk_type = ntohl(hdr[1]); 3197 if (chunk_type == FOURCC('m', 'o', 'o', 'f')) { 3198 mNextMoofOffset = *offset; 3199 break; 3200 } 3201 *offset += chunk_size; 3202 } 3203 } 3204 break; 3205 } 3206 3207 case FOURCC('t', 'f', 'h', 'd'): { 3208 status_t err; 3209 if ((err = parseTrackFragmentHeader(data_offset, chunk_data_size)) != OK) { 3210 return err; 3211 } 3212 *offset += chunk_size; 3213 break; 3214 } 3215 3216 case FOURCC('t', 'r', 'u', 'n'): { 3217 status_t err; 3218 if (mLastParsedTrackId == mTrackId) { 3219 if ((err = parseTrackFragmentRun(data_offset, chunk_data_size)) != OK) { 3220 return err; 3221 } 3222 } 3223 3224 *offset += chunk_size; 3225 break; 3226 } 3227 3228 case FOURCC('s', 'a', 'i', 'z'): { 3229 status_t err; 3230 if ((err = parseSampleAuxiliaryInformationSizes(data_offset, chunk_data_size)) != OK) { 3231 return err; 3232 } 3233 *offset += chunk_size; 3234 break; 3235 } 3236 case FOURCC('s', 'a', 'i', 'o'): { 3237 status_t err; 3238 if ((err = parseSampleAuxiliaryInformationOffsets(data_offset, chunk_data_size)) != OK) { 3239 return err; 3240 } 3241 *offset += chunk_size; 3242 break; 3243 } 3244 3245 case FOURCC('m', 'd', 'a', 't'): { 3246 // parse DRM info if present 3247 ALOGV("MPEG4Source::parseChunk mdat"); 3248 // if saiz/saoi was previously observed, do something with the sampleinfos 3249 *offset += chunk_size; 3250 break; 3251 } 3252 3253 default: { 3254 *offset += chunk_size; 3255 break; 3256 } 3257 } 3258 return OK; 3259} 3260 3261status_t MPEG4Source::parseSampleAuxiliaryInformationSizes( 3262 off64_t offset, off64_t /* size */) { 3263 ALOGV("parseSampleAuxiliaryInformationSizes"); 3264 // 14496-12 8.7.12 3265 uint8_t version; 3266 if (mDataSource->readAt( 3267 offset, &version, sizeof(version)) 3268 < (ssize_t)sizeof(version)) { 3269 return ERROR_IO; 3270 } 3271 3272 if (version != 0) { 3273 return ERROR_UNSUPPORTED; 3274 } 3275 offset++; 3276 3277 uint32_t flags; 3278 if (!mDataSource->getUInt24(offset, &flags)) { 3279 return ERROR_IO; 3280 } 3281 offset += 3; 3282 3283 if (flags & 1) { 3284 uint32_t tmp; 3285 if (!mDataSource->getUInt32(offset, &tmp)) { 3286 return ERROR_MALFORMED; 3287 } 3288 mCurrentAuxInfoType = tmp; 3289 offset += 4; 3290 if (!mDataSource->getUInt32(offset, &tmp)) { 3291 return ERROR_MALFORMED; 3292 } 3293 mCurrentAuxInfoTypeParameter = tmp; 3294 offset += 4; 3295 } 3296 3297 uint8_t defsize; 3298 if (mDataSource->readAt(offset, &defsize, 1) != 1) { 3299 return ERROR_MALFORMED; 3300 } 3301 mCurrentDefaultSampleInfoSize = defsize; 3302 offset++; 3303 3304 uint32_t smplcnt; 3305 if (!mDataSource->getUInt32(offset, &smplcnt)) { 3306 return ERROR_MALFORMED; 3307 } 3308 mCurrentSampleInfoCount = smplcnt; 3309 offset += 4; 3310 3311 if (mCurrentDefaultSampleInfoSize != 0) { 3312 ALOGV("@@@@ using default sample info size of %d", mCurrentDefaultSampleInfoSize); 3313 return OK; 3314 } 3315 if (smplcnt > mCurrentSampleInfoAllocSize) { 3316 mCurrentSampleInfoSizes = (uint8_t*) realloc(mCurrentSampleInfoSizes, smplcnt); 3317 mCurrentSampleInfoAllocSize = smplcnt; 3318 } 3319 3320 mDataSource->readAt(offset, mCurrentSampleInfoSizes, smplcnt); 3321 return OK; 3322} 3323 3324status_t MPEG4Source::parseSampleAuxiliaryInformationOffsets( 3325 off64_t offset, off64_t /* size */) { 3326 ALOGV("parseSampleAuxiliaryInformationOffsets"); 3327 // 14496-12 8.7.13 3328 uint8_t version; 3329 if (mDataSource->readAt(offset, &version, sizeof(version)) != 1) { 3330 return ERROR_IO; 3331 } 3332 offset++; 3333 3334 uint32_t flags; 3335 if (!mDataSource->getUInt24(offset, &flags)) { 3336 return ERROR_IO; 3337 } 3338 offset += 3; 3339 3340 uint32_t entrycount; 3341 if (!mDataSource->getUInt32(offset, &entrycount)) { 3342 return ERROR_IO; 3343 } 3344 offset += 4; 3345 if (entrycount == 0) { 3346 return OK; 3347 } 3348 if (entrycount > UINT32_MAX / 8) { 3349 return ERROR_MALFORMED; 3350 } 3351 3352 if (entrycount > mCurrentSampleInfoOffsetsAllocSize) { 3353 uint64_t *newPtr = (uint64_t *)realloc(mCurrentSampleInfoOffsets, entrycount * 8); 3354 if (newPtr == NULL) { 3355 return NO_MEMORY; 3356 } 3357 mCurrentSampleInfoOffsets = newPtr; 3358 mCurrentSampleInfoOffsetsAllocSize = entrycount; 3359 } 3360 mCurrentSampleInfoOffsetCount = entrycount; 3361 3362 if (mCurrentSampleInfoOffsets == NULL) { 3363 return OK; 3364 } 3365 3366 for (size_t i = 0; i < entrycount; i++) { 3367 if (version == 0) { 3368 uint32_t tmp; 3369 if (!mDataSource->getUInt32(offset, &tmp)) { 3370 return ERROR_IO; 3371 } 3372 mCurrentSampleInfoOffsets[i] = tmp; 3373 offset += 4; 3374 } else { 3375 uint64_t tmp; 3376 if (!mDataSource->getUInt64(offset, &tmp)) { 3377 return ERROR_IO; 3378 } 3379 mCurrentSampleInfoOffsets[i] = tmp; 3380 offset += 8; 3381 } 3382 } 3383 3384 // parse clear/encrypted data 3385 3386 off64_t drmoffset = mCurrentSampleInfoOffsets[0]; // from moof 3387 3388 drmoffset += mCurrentMoofOffset; 3389 int ivlength; 3390 CHECK(mFormat->findInt32(kKeyCryptoDefaultIVSize, &ivlength)); 3391 3392 // only 0, 8 and 16 byte initialization vectors are supported 3393 if (ivlength != 0 && ivlength != 8 && ivlength != 16) { 3394 ALOGW("unsupported IV length: %d", ivlength); 3395 return ERROR_MALFORMED; 3396 } 3397 // read CencSampleAuxiliaryDataFormats 3398 for (size_t i = 0; i < mCurrentSampleInfoCount; i++) { 3399 if (i >= mCurrentSamples.size()) { 3400 ALOGW("too few samples"); 3401 break; 3402 } 3403 Sample *smpl = &mCurrentSamples.editItemAt(i); 3404 3405 memset(smpl->iv, 0, 16); 3406 if (mDataSource->readAt(drmoffset, smpl->iv, ivlength) != ivlength) { 3407 return ERROR_IO; 3408 } 3409 3410 drmoffset += ivlength; 3411 3412 int32_t smplinfosize = mCurrentDefaultSampleInfoSize; 3413 if (smplinfosize == 0) { 3414 smplinfosize = mCurrentSampleInfoSizes[i]; 3415 } 3416 if (smplinfosize > ivlength) { 3417 uint16_t numsubsamples; 3418 if (!mDataSource->getUInt16(drmoffset, &numsubsamples)) { 3419 return ERROR_IO; 3420 } 3421 drmoffset += 2; 3422 for (size_t j = 0; j < numsubsamples; j++) { 3423 uint16_t numclear; 3424 uint32_t numencrypted; 3425 if (!mDataSource->getUInt16(drmoffset, &numclear)) { 3426 return ERROR_IO; 3427 } 3428 drmoffset += 2; 3429 if (!mDataSource->getUInt32(drmoffset, &numencrypted)) { 3430 return ERROR_IO; 3431 } 3432 drmoffset += 4; 3433 smpl->clearsizes.add(numclear); 3434 smpl->encryptedsizes.add(numencrypted); 3435 } 3436 } else { 3437 smpl->clearsizes.add(0); 3438 smpl->encryptedsizes.add(smpl->size); 3439 } 3440 } 3441 3442 3443 return OK; 3444} 3445 3446status_t MPEG4Source::parseTrackFragmentHeader(off64_t offset, off64_t size) { 3447 3448 if (size < 8) { 3449 return -EINVAL; 3450 } 3451 3452 uint32_t flags; 3453 if (!mDataSource->getUInt32(offset, &flags)) { // actually version + flags 3454 return ERROR_MALFORMED; 3455 } 3456 3457 if (flags & 0xff000000) { 3458 return -EINVAL; 3459 } 3460 3461 if (!mDataSource->getUInt32(offset + 4, (uint32_t*)&mLastParsedTrackId)) { 3462 return ERROR_MALFORMED; 3463 } 3464 3465 if (mLastParsedTrackId != mTrackId) { 3466 // this is not the right track, skip it 3467 return OK; 3468 } 3469 3470 mTrackFragmentHeaderInfo.mFlags = flags; 3471 mTrackFragmentHeaderInfo.mTrackID = mLastParsedTrackId; 3472 offset += 8; 3473 size -= 8; 3474 3475 ALOGV("fragment header: %08x %08x", flags, mTrackFragmentHeaderInfo.mTrackID); 3476 3477 if (flags & TrackFragmentHeaderInfo::kBaseDataOffsetPresent) { 3478 if (size < 8) { 3479 return -EINVAL; 3480 } 3481 3482 if (!mDataSource->getUInt64(offset, &mTrackFragmentHeaderInfo.mBaseDataOffset)) { 3483 return ERROR_MALFORMED; 3484 } 3485 offset += 8; 3486 size -= 8; 3487 } 3488 3489 if (flags & TrackFragmentHeaderInfo::kSampleDescriptionIndexPresent) { 3490 if (size < 4) { 3491 return -EINVAL; 3492 } 3493 3494 if (!mDataSource->getUInt32(offset, &mTrackFragmentHeaderInfo.mSampleDescriptionIndex)) { 3495 return ERROR_MALFORMED; 3496 } 3497 offset += 4; 3498 size -= 4; 3499 } 3500 3501 if (flags & TrackFragmentHeaderInfo::kDefaultSampleDurationPresent) { 3502 if (size < 4) { 3503 return -EINVAL; 3504 } 3505 3506 if (!mDataSource->getUInt32(offset, &mTrackFragmentHeaderInfo.mDefaultSampleDuration)) { 3507 return ERROR_MALFORMED; 3508 } 3509 offset += 4; 3510 size -= 4; 3511 } 3512 3513 if (flags & TrackFragmentHeaderInfo::kDefaultSampleSizePresent) { 3514 if (size < 4) { 3515 return -EINVAL; 3516 } 3517 3518 if (!mDataSource->getUInt32(offset, &mTrackFragmentHeaderInfo.mDefaultSampleSize)) { 3519 return ERROR_MALFORMED; 3520 } 3521 offset += 4; 3522 size -= 4; 3523 } 3524 3525 if (flags & TrackFragmentHeaderInfo::kDefaultSampleFlagsPresent) { 3526 if (size < 4) { 3527 return -EINVAL; 3528 } 3529 3530 if (!mDataSource->getUInt32(offset, &mTrackFragmentHeaderInfo.mDefaultSampleFlags)) { 3531 return ERROR_MALFORMED; 3532 } 3533 offset += 4; 3534 size -= 4; 3535 } 3536 3537 if (!(flags & TrackFragmentHeaderInfo::kBaseDataOffsetPresent)) { 3538 mTrackFragmentHeaderInfo.mBaseDataOffset = mCurrentMoofOffset; 3539 } 3540 3541 mTrackFragmentHeaderInfo.mDataOffset = 0; 3542 return OK; 3543} 3544 3545status_t MPEG4Source::parseTrackFragmentRun(off64_t offset, off64_t size) { 3546 3547 ALOGV("MPEG4Extractor::parseTrackFragmentRun"); 3548 if (size < 8) { 3549 return -EINVAL; 3550 } 3551 3552 enum { 3553 kDataOffsetPresent = 0x01, 3554 kFirstSampleFlagsPresent = 0x04, 3555 kSampleDurationPresent = 0x100, 3556 kSampleSizePresent = 0x200, 3557 kSampleFlagsPresent = 0x400, 3558 kSampleCompositionTimeOffsetPresent = 0x800, 3559 }; 3560 3561 uint32_t flags; 3562 if (!mDataSource->getUInt32(offset, &flags)) { 3563 return ERROR_MALFORMED; 3564 } 3565 ALOGV("fragment run flags: %08x", flags); 3566 3567 if (flags & 0xff000000) { 3568 return -EINVAL; 3569 } 3570 3571 if ((flags & kFirstSampleFlagsPresent) && (flags & kSampleFlagsPresent)) { 3572 // These two shall not be used together. 3573 return -EINVAL; 3574 } 3575 3576 uint32_t sampleCount; 3577 if (!mDataSource->getUInt32(offset + 4, &sampleCount)) { 3578 return ERROR_MALFORMED; 3579 } 3580 offset += 8; 3581 size -= 8; 3582 3583 uint64_t dataOffset = mTrackFragmentHeaderInfo.mDataOffset; 3584 3585 uint32_t firstSampleFlags = 0; 3586 3587 if (flags & kDataOffsetPresent) { 3588 if (size < 4) { 3589 return -EINVAL; 3590 } 3591 3592 int32_t dataOffsetDelta; 3593 if (!mDataSource->getUInt32(offset, (uint32_t*)&dataOffsetDelta)) { 3594 return ERROR_MALFORMED; 3595 } 3596 3597 dataOffset = mTrackFragmentHeaderInfo.mBaseDataOffset + dataOffsetDelta; 3598 3599 offset += 4; 3600 size -= 4; 3601 } 3602 3603 if (flags & kFirstSampleFlagsPresent) { 3604 if (size < 4) { 3605 return -EINVAL; 3606 } 3607 3608 if (!mDataSource->getUInt32(offset, &firstSampleFlags)) { 3609 return ERROR_MALFORMED; 3610 } 3611 offset += 4; 3612 size -= 4; 3613 } 3614 3615 uint32_t sampleDuration = 0, sampleSize = 0, sampleFlags = 0, 3616 sampleCtsOffset = 0; 3617 3618 size_t bytesPerSample = 0; 3619 if (flags & kSampleDurationPresent) { 3620 bytesPerSample += 4; 3621 } else if (mTrackFragmentHeaderInfo.mFlags 3622 & TrackFragmentHeaderInfo::kDefaultSampleDurationPresent) { 3623 sampleDuration = mTrackFragmentHeaderInfo.mDefaultSampleDuration; 3624 } else if (mTrex) { 3625 sampleDuration = mTrex->default_sample_duration; 3626 } 3627 3628 if (flags & kSampleSizePresent) { 3629 bytesPerSample += 4; 3630 } else if (mTrackFragmentHeaderInfo.mFlags 3631 & TrackFragmentHeaderInfo::kDefaultSampleSizePresent) { 3632 sampleSize = mTrackFragmentHeaderInfo.mDefaultSampleSize; 3633 } else { 3634 sampleSize = mTrackFragmentHeaderInfo.mDefaultSampleSize; 3635 } 3636 3637 if (flags & kSampleFlagsPresent) { 3638 bytesPerSample += 4; 3639 } else if (mTrackFragmentHeaderInfo.mFlags 3640 & TrackFragmentHeaderInfo::kDefaultSampleFlagsPresent) { 3641 sampleFlags = mTrackFragmentHeaderInfo.mDefaultSampleFlags; 3642 } else { 3643 sampleFlags = mTrackFragmentHeaderInfo.mDefaultSampleFlags; 3644 } 3645 3646 if (flags & kSampleCompositionTimeOffsetPresent) { 3647 bytesPerSample += 4; 3648 } else { 3649 sampleCtsOffset = 0; 3650 } 3651 3652 if (size < (off64_t)sampleCount * bytesPerSample) { 3653 return -EINVAL; 3654 } 3655 3656 Sample tmp; 3657 for (uint32_t i = 0; i < sampleCount; ++i) { 3658 if (flags & kSampleDurationPresent) { 3659 if (!mDataSource->getUInt32(offset, &sampleDuration)) { 3660 return ERROR_MALFORMED; 3661 } 3662 offset += 4; 3663 } 3664 3665 if (flags & kSampleSizePresent) { 3666 if (!mDataSource->getUInt32(offset, &sampleSize)) { 3667 return ERROR_MALFORMED; 3668 } 3669 offset += 4; 3670 } 3671 3672 if (flags & kSampleFlagsPresent) { 3673 if (!mDataSource->getUInt32(offset, &sampleFlags)) { 3674 return ERROR_MALFORMED; 3675 } 3676 offset += 4; 3677 } 3678 3679 if (flags & kSampleCompositionTimeOffsetPresent) { 3680 if (!mDataSource->getUInt32(offset, &sampleCtsOffset)) { 3681 return ERROR_MALFORMED; 3682 } 3683 offset += 4; 3684 } 3685 3686 ALOGV("adding sample %d at offset 0x%08" PRIx64 ", size %u, duration %u, " 3687 " flags 0x%08x", i + 1, 3688 dataOffset, sampleSize, sampleDuration, 3689 (flags & kFirstSampleFlagsPresent) && i == 0 3690 ? firstSampleFlags : sampleFlags); 3691 tmp.offset = dataOffset; 3692 tmp.size = sampleSize; 3693 tmp.duration = sampleDuration; 3694 tmp.compositionOffset = sampleCtsOffset; 3695 mCurrentSamples.add(tmp); 3696 3697 dataOffset += sampleSize; 3698 } 3699 3700 mTrackFragmentHeaderInfo.mDataOffset = dataOffset; 3701 3702 return OK; 3703} 3704 3705sp<MetaData> MPEG4Source::getFormat() { 3706 Mutex::Autolock autoLock(mLock); 3707 3708 return mFormat; 3709} 3710 3711size_t MPEG4Source::parseNALSize(const uint8_t *data) const { 3712 switch (mNALLengthSize) { 3713 case 1: 3714 return *data; 3715 case 2: 3716 return U16_AT(data); 3717 case 3: 3718 return ((size_t)data[0] << 16) | U16_AT(&data[1]); 3719 case 4: 3720 return U32_AT(data); 3721 } 3722 3723 // This cannot happen, mNALLengthSize springs to life by adding 1 to 3724 // a 2-bit integer. 3725 CHECK(!"Should not be here."); 3726 3727 return 0; 3728} 3729 3730status_t MPEG4Source::read( 3731 MediaBuffer **out, const ReadOptions *options) { 3732 Mutex::Autolock autoLock(mLock); 3733 3734 CHECK(mStarted); 3735 3736 if (mFirstMoofOffset > 0) { 3737 return fragmentedRead(out, options); 3738 } 3739 3740 *out = NULL; 3741 3742 int64_t targetSampleTimeUs = -1; 3743 3744 int64_t seekTimeUs; 3745 ReadOptions::SeekMode mode; 3746 if (options && options->getSeekTo(&seekTimeUs, &mode)) { 3747 uint32_t findFlags = 0; 3748 switch (mode) { 3749 case ReadOptions::SEEK_PREVIOUS_SYNC: 3750 findFlags = SampleTable::kFlagBefore; 3751 break; 3752 case ReadOptions::SEEK_NEXT_SYNC: 3753 findFlags = SampleTable::kFlagAfter; 3754 break; 3755 case ReadOptions::SEEK_CLOSEST_SYNC: 3756 case ReadOptions::SEEK_CLOSEST: 3757 findFlags = SampleTable::kFlagClosest; 3758 break; 3759 default: 3760 CHECK(!"Should not be here."); 3761 break; 3762 } 3763 3764 uint32_t sampleIndex; 3765 status_t err = mSampleTable->findSampleAtTime( 3766 seekTimeUs, 1000000, mTimescale, 3767 &sampleIndex, findFlags); 3768 3769 if (mode == ReadOptions::SEEK_CLOSEST) { 3770 // We found the closest sample already, now we want the sync 3771 // sample preceding it (or the sample itself of course), even 3772 // if the subsequent sync sample is closer. 3773 findFlags = SampleTable::kFlagBefore; 3774 } 3775 3776 uint32_t syncSampleIndex; 3777 if (err == OK) { 3778 err = mSampleTable->findSyncSampleNear( 3779 sampleIndex, &syncSampleIndex, findFlags); 3780 } 3781 3782 uint32_t sampleTime; 3783 if (err == OK) { 3784 err = mSampleTable->getMetaDataForSample( 3785 sampleIndex, NULL, NULL, &sampleTime); 3786 } 3787 3788 if (err != OK) { 3789 if (err == ERROR_OUT_OF_RANGE) { 3790 // An attempt to seek past the end of the stream would 3791 // normally cause this ERROR_OUT_OF_RANGE error. Propagating 3792 // this all the way to the MediaPlayer would cause abnormal 3793 // termination. Legacy behaviour appears to be to behave as if 3794 // we had seeked to the end of stream, ending normally. 3795 err = ERROR_END_OF_STREAM; 3796 } 3797 ALOGV("end of stream"); 3798 return err; 3799 } 3800 3801 if (mode == ReadOptions::SEEK_CLOSEST) { 3802 targetSampleTimeUs = (sampleTime * 1000000ll) / mTimescale; 3803 } 3804 3805#if 0 3806 uint32_t syncSampleTime; 3807 CHECK_EQ(OK, mSampleTable->getMetaDataForSample( 3808 syncSampleIndex, NULL, NULL, &syncSampleTime)); 3809 3810 ALOGI("seek to time %lld us => sample at time %lld us, " 3811 "sync sample at time %lld us", 3812 seekTimeUs, 3813 sampleTime * 1000000ll / mTimescale, 3814 syncSampleTime * 1000000ll / mTimescale); 3815#endif 3816 3817 mCurrentSampleIndex = syncSampleIndex; 3818 if (mBuffer != NULL) { 3819 mBuffer->release(); 3820 mBuffer = NULL; 3821 } 3822 3823 // fall through 3824 } 3825 3826 off64_t offset; 3827 size_t size; 3828 uint32_t cts, stts; 3829 bool isSyncSample; 3830 bool newBuffer = false; 3831 if (mBuffer == NULL) { 3832 newBuffer = true; 3833 3834 status_t err = 3835 mSampleTable->getMetaDataForSample( 3836 mCurrentSampleIndex, &offset, &size, &cts, &isSyncSample, &stts); 3837 3838 if (err != OK) { 3839 return err; 3840 } 3841 3842 err = mGroup->acquire_buffer(&mBuffer); 3843 3844 if (err != OK) { 3845 CHECK(mBuffer == NULL); 3846 return err; 3847 } 3848 if (size > mBuffer->size()) { 3849 ALOGE("buffer too small: %zu > %zu", size, mBuffer->size()); 3850 return ERROR_BUFFER_TOO_SMALL; 3851 } 3852 } 3853 3854 if ((!mIsAVC && !mIsHEVC) || mWantsNALFragments) { 3855 if (newBuffer) { 3856 ssize_t num_bytes_read = 3857 mDataSource->readAt(offset, (uint8_t *)mBuffer->data(), size); 3858 3859 if (num_bytes_read < (ssize_t)size) { 3860 mBuffer->release(); 3861 mBuffer = NULL; 3862 3863 return ERROR_IO; 3864 } 3865 3866 CHECK(mBuffer != NULL); 3867 mBuffer->set_range(0, size); 3868 mBuffer->meta_data()->clear(); 3869 mBuffer->meta_data()->setInt64( 3870 kKeyTime, ((int64_t)cts * 1000000) / mTimescale); 3871 mBuffer->meta_data()->setInt64( 3872 kKeyDuration, ((int64_t)stts * 1000000) / mTimescale); 3873 3874 if (targetSampleTimeUs >= 0) { 3875 mBuffer->meta_data()->setInt64( 3876 kKeyTargetTime, targetSampleTimeUs); 3877 } 3878 3879 if (isSyncSample) { 3880 mBuffer->meta_data()->setInt32(kKeyIsSyncFrame, 1); 3881 } 3882 3883 ++mCurrentSampleIndex; 3884 } 3885 3886 if (!mIsAVC && !mIsHEVC) { 3887 *out = mBuffer; 3888 mBuffer = NULL; 3889 3890 return OK; 3891 } 3892 3893 // Each NAL unit is split up into its constituent fragments and 3894 // each one of them returned in its own buffer. 3895 3896 CHECK(mBuffer->range_length() >= mNALLengthSize); 3897 3898 const uint8_t *src = 3899 (const uint8_t *)mBuffer->data() + mBuffer->range_offset(); 3900 3901 size_t nal_size = parseNALSize(src); 3902 if (mNALLengthSize > SIZE_MAX - nal_size) { 3903 ALOGE("b/24441553, b/24445122"); 3904 } 3905 if (mBuffer->range_length() - mNALLengthSize < nal_size) { 3906 ALOGE("incomplete NAL unit."); 3907 3908 mBuffer->release(); 3909 mBuffer = NULL; 3910 3911 return ERROR_MALFORMED; 3912 } 3913 3914 MediaBuffer *clone = mBuffer->clone(); 3915 CHECK(clone != NULL); 3916 clone->set_range(mBuffer->range_offset() + mNALLengthSize, nal_size); 3917 3918 CHECK(mBuffer != NULL); 3919 mBuffer->set_range( 3920 mBuffer->range_offset() + mNALLengthSize + nal_size, 3921 mBuffer->range_length() - mNALLengthSize - nal_size); 3922 3923 if (mBuffer->range_length() == 0) { 3924 mBuffer->release(); 3925 mBuffer = NULL; 3926 } 3927 3928 *out = clone; 3929 3930 return OK; 3931 } else { 3932 // Whole NAL units are returned but each fragment is prefixed by 3933 // the start code (0x00 00 00 01). 3934 ssize_t num_bytes_read = 0; 3935 int32_t drm = 0; 3936 bool usesDRM = (mFormat->findInt32(kKeyIsDRM, &drm) && drm != 0); 3937 if (usesDRM) { 3938 num_bytes_read = 3939 mDataSource->readAt(offset, (uint8_t*)mBuffer->data(), size); 3940 } else { 3941 num_bytes_read = mDataSource->readAt(offset, mSrcBuffer, size); 3942 } 3943 3944 if (num_bytes_read < (ssize_t)size) { 3945 mBuffer->release(); 3946 mBuffer = NULL; 3947 3948 return ERROR_IO; 3949 } 3950 3951 if (usesDRM) { 3952 CHECK(mBuffer != NULL); 3953 mBuffer->set_range(0, size); 3954 3955 } else { 3956 uint8_t *dstData = (uint8_t *)mBuffer->data(); 3957 size_t srcOffset = 0; 3958 size_t dstOffset = 0; 3959 3960 while (srcOffset < size) { 3961 bool isMalFormed = !isInRange((size_t)0u, size, srcOffset, mNALLengthSize); 3962 size_t nalLength = 0; 3963 if (!isMalFormed) { 3964 nalLength = parseNALSize(&mSrcBuffer[srcOffset]); 3965 srcOffset += mNALLengthSize; 3966 isMalFormed = !isInRange((size_t)0u, size, srcOffset, nalLength); 3967 } 3968 3969 if (isMalFormed) { 3970 ALOGE("Video is malformed"); 3971 mBuffer->release(); 3972 mBuffer = NULL; 3973 return ERROR_MALFORMED; 3974 } 3975 3976 if (nalLength == 0) { 3977 continue; 3978 } 3979 3980 CHECK(dstOffset + 4 <= mBuffer->size()); 3981 3982 dstData[dstOffset++] = 0; 3983 dstData[dstOffset++] = 0; 3984 dstData[dstOffset++] = 0; 3985 dstData[dstOffset++] = 1; 3986 memcpy(&dstData[dstOffset], &mSrcBuffer[srcOffset], nalLength); 3987 srcOffset += nalLength; 3988 dstOffset += nalLength; 3989 } 3990 CHECK_EQ(srcOffset, size); 3991 CHECK(mBuffer != NULL); 3992 mBuffer->set_range(0, dstOffset); 3993 } 3994 3995 mBuffer->meta_data()->clear(); 3996 mBuffer->meta_data()->setInt64( 3997 kKeyTime, ((int64_t)cts * 1000000) / mTimescale); 3998 mBuffer->meta_data()->setInt64( 3999 kKeyDuration, ((int64_t)stts * 1000000) / mTimescale); 4000 4001 if (targetSampleTimeUs >= 0) { 4002 mBuffer->meta_data()->setInt64( 4003 kKeyTargetTime, targetSampleTimeUs); 4004 } 4005 4006 if (isSyncSample) { 4007 mBuffer->meta_data()->setInt32(kKeyIsSyncFrame, 1); 4008 } 4009 4010 ++mCurrentSampleIndex; 4011 4012 *out = mBuffer; 4013 mBuffer = NULL; 4014 4015 return OK; 4016 } 4017} 4018 4019status_t MPEG4Source::fragmentedRead( 4020 MediaBuffer **out, const ReadOptions *options) { 4021 4022 ALOGV("MPEG4Source::fragmentedRead"); 4023 4024 CHECK(mStarted); 4025 4026 *out = NULL; 4027 4028 int64_t targetSampleTimeUs = -1; 4029 4030 int64_t seekTimeUs; 4031 ReadOptions::SeekMode mode; 4032 if (options && options->getSeekTo(&seekTimeUs, &mode)) { 4033 4034 int numSidxEntries = mSegments.size(); 4035 if (numSidxEntries != 0) { 4036 int64_t totalTime = 0; 4037 off64_t totalOffset = mFirstMoofOffset; 4038 for (int i = 0; i < numSidxEntries; i++) { 4039 const SidxEntry *se = &mSegments[i]; 4040 if (totalTime + se->mDurationUs > seekTimeUs) { 4041 // The requested time is somewhere in this segment 4042 if ((mode == ReadOptions::SEEK_NEXT_SYNC && seekTimeUs > totalTime) || 4043 (mode == ReadOptions::SEEK_CLOSEST_SYNC && 4044 (seekTimeUs - totalTime) > (totalTime + se->mDurationUs - seekTimeUs))) { 4045 // requested next sync, or closest sync and it was closer to the end of 4046 // this segment 4047 totalTime += se->mDurationUs; 4048 totalOffset += se->mSize; 4049 } 4050 break; 4051 } 4052 totalTime += se->mDurationUs; 4053 totalOffset += se->mSize; 4054 } 4055 mCurrentMoofOffset = totalOffset; 4056 mCurrentSamples.clear(); 4057 mCurrentSampleIndex = 0; 4058 parseChunk(&totalOffset); 4059 mCurrentTime = totalTime * mTimescale / 1000000ll; 4060 } else { 4061 // without sidx boxes, we can only seek to 0 4062 mCurrentMoofOffset = mFirstMoofOffset; 4063 mCurrentSamples.clear(); 4064 mCurrentSampleIndex = 0; 4065 off64_t tmp = mCurrentMoofOffset; 4066 parseChunk(&tmp); 4067 mCurrentTime = 0; 4068 } 4069 4070 if (mBuffer != NULL) { 4071 mBuffer->release(); 4072 mBuffer = NULL; 4073 } 4074 4075 // fall through 4076 } 4077 4078 off64_t offset = 0; 4079 size_t size = 0; 4080 uint32_t cts = 0; 4081 bool isSyncSample = false; 4082 bool newBuffer = false; 4083 if (mBuffer == NULL) { 4084 newBuffer = true; 4085 4086 if (mCurrentSampleIndex >= mCurrentSamples.size()) { 4087 // move to next fragment if there is one 4088 if (mNextMoofOffset <= mCurrentMoofOffset) { 4089 return ERROR_END_OF_STREAM; 4090 } 4091 off64_t nextMoof = mNextMoofOffset; 4092 mCurrentMoofOffset = nextMoof; 4093 mCurrentSamples.clear(); 4094 mCurrentSampleIndex = 0; 4095 parseChunk(&nextMoof); 4096 if (mCurrentSampleIndex >= mCurrentSamples.size()) { 4097 return ERROR_END_OF_STREAM; 4098 } 4099 } 4100 4101 const Sample *smpl = &mCurrentSamples[mCurrentSampleIndex]; 4102 offset = smpl->offset; 4103 size = smpl->size; 4104 cts = mCurrentTime + smpl->compositionOffset; 4105 mCurrentTime += smpl->duration; 4106 isSyncSample = (mCurrentSampleIndex == 0); // XXX 4107 4108 status_t err = mGroup->acquire_buffer(&mBuffer); 4109 4110 if (err != OK) { 4111 CHECK(mBuffer == NULL); 4112 ALOGV("acquire_buffer returned %d", err); 4113 return err; 4114 } 4115 if (size > mBuffer->size()) { 4116 ALOGE("buffer too small: %zu > %zu", size, mBuffer->size()); 4117 return ERROR_BUFFER_TOO_SMALL; 4118 } 4119 } 4120 4121 const Sample *smpl = &mCurrentSamples[mCurrentSampleIndex]; 4122 const sp<MetaData> bufmeta = mBuffer->meta_data(); 4123 bufmeta->clear(); 4124 if (smpl->encryptedsizes.size()) { 4125 // store clear/encrypted lengths in metadata 4126 bufmeta->setData(kKeyPlainSizes, 0, 4127 smpl->clearsizes.array(), smpl->clearsizes.size() * 4); 4128 bufmeta->setData(kKeyEncryptedSizes, 0, 4129 smpl->encryptedsizes.array(), smpl->encryptedsizes.size() * 4); 4130 bufmeta->setData(kKeyCryptoIV, 0, smpl->iv, 16); // use 16 or the actual size? 4131 bufmeta->setInt32(kKeyCryptoDefaultIVSize, mDefaultIVSize); 4132 bufmeta->setInt32(kKeyCryptoMode, mCryptoMode); 4133 bufmeta->setData(kKeyCryptoKey, 0, mCryptoKey, 16); 4134 } 4135 4136 if ((!mIsAVC && !mIsHEVC)|| mWantsNALFragments) { 4137 if (newBuffer) { 4138 if (!isInRange((size_t)0u, mBuffer->size(), size)) { 4139 mBuffer->release(); 4140 mBuffer = NULL; 4141 4142 ALOGE("fragmentedRead ERROR_MALFORMED size %zu", size); 4143 return ERROR_MALFORMED; 4144 } 4145 4146 ssize_t num_bytes_read = 4147 mDataSource->readAt(offset, (uint8_t *)mBuffer->data(), size); 4148 4149 if (num_bytes_read < (ssize_t)size) { 4150 mBuffer->release(); 4151 mBuffer = NULL; 4152 4153 ALOGE("i/o error"); 4154 return ERROR_IO; 4155 } 4156 4157 CHECK(mBuffer != NULL); 4158 mBuffer->set_range(0, size); 4159 mBuffer->meta_data()->setInt64( 4160 kKeyTime, ((int64_t)cts * 1000000) / mTimescale); 4161 mBuffer->meta_data()->setInt64( 4162 kKeyDuration, ((int64_t)smpl->duration * 1000000) / mTimescale); 4163 4164 if (targetSampleTimeUs >= 0) { 4165 mBuffer->meta_data()->setInt64( 4166 kKeyTargetTime, targetSampleTimeUs); 4167 } 4168 4169 if (isSyncSample) { 4170 mBuffer->meta_data()->setInt32(kKeyIsSyncFrame, 1); 4171 } 4172 4173 ++mCurrentSampleIndex; 4174 } 4175 4176 if (!mIsAVC && !mIsHEVC) { 4177 *out = mBuffer; 4178 mBuffer = NULL; 4179 4180 return OK; 4181 } 4182 4183 // Each NAL unit is split up into its constituent fragments and 4184 // each one of them returned in its own buffer. 4185 4186 CHECK(mBuffer->range_length() >= mNALLengthSize); 4187 4188 const uint8_t *src = 4189 (const uint8_t *)mBuffer->data() + mBuffer->range_offset(); 4190 4191 size_t nal_size = parseNALSize(src); 4192 if (mNALLengthSize > SIZE_MAX - nal_size) { 4193 ALOGE("b/24441553, b/24445122"); 4194 } 4195 4196 if (mBuffer->range_length() - mNALLengthSize < nal_size) { 4197 ALOGE("incomplete NAL unit."); 4198 4199 mBuffer->release(); 4200 mBuffer = NULL; 4201 4202 return ERROR_MALFORMED; 4203 } 4204 4205 MediaBuffer *clone = mBuffer->clone(); 4206 CHECK(clone != NULL); 4207 clone->set_range(mBuffer->range_offset() + mNALLengthSize, nal_size); 4208 4209 CHECK(mBuffer != NULL); 4210 mBuffer->set_range( 4211 mBuffer->range_offset() + mNALLengthSize + nal_size, 4212 mBuffer->range_length() - mNALLengthSize - nal_size); 4213 4214 if (mBuffer->range_length() == 0) { 4215 mBuffer->release(); 4216 mBuffer = NULL; 4217 } 4218 4219 *out = clone; 4220 4221 return OK; 4222 } else { 4223 ALOGV("whole NAL"); 4224 // Whole NAL units are returned but each fragment is prefixed by 4225 // the start code (0x00 00 00 01). 4226 ssize_t num_bytes_read = 0; 4227 int32_t drm = 0; 4228 bool usesDRM = (mFormat->findInt32(kKeyIsDRM, &drm) && drm != 0); 4229 void *data = NULL; 4230 bool isMalFormed = false; 4231 if (usesDRM) { 4232 if (mBuffer == NULL || !isInRange((size_t)0u, mBuffer->size(), size)) { 4233 isMalFormed = true; 4234 } else { 4235 data = mBuffer->data(); 4236 } 4237 } else { 4238 int32_t max_size; 4239 if (mFormat == NULL 4240 || !mFormat->findInt32(kKeyMaxInputSize, &max_size) 4241 || !isInRange((size_t)0u, (size_t)max_size, size)) { 4242 isMalFormed = true; 4243 } else { 4244 data = mSrcBuffer; 4245 } 4246 } 4247 4248 if (isMalFormed || data == NULL) { 4249 ALOGE("isMalFormed size %zu", size); 4250 if (mBuffer != NULL) { 4251 mBuffer->release(); 4252 mBuffer = NULL; 4253 } 4254 return ERROR_MALFORMED; 4255 } 4256 num_bytes_read = mDataSource->readAt(offset, data, size); 4257 4258 if (num_bytes_read < (ssize_t)size) { 4259 mBuffer->release(); 4260 mBuffer = NULL; 4261 4262 ALOGE("i/o error"); 4263 return ERROR_IO; 4264 } 4265 4266 if (usesDRM) { 4267 CHECK(mBuffer != NULL); 4268 mBuffer->set_range(0, size); 4269 4270 } else { 4271 uint8_t *dstData = (uint8_t *)mBuffer->data(); 4272 size_t srcOffset = 0; 4273 size_t dstOffset = 0; 4274 4275 while (srcOffset < size) { 4276 isMalFormed = !isInRange((size_t)0u, size, srcOffset, mNALLengthSize); 4277 size_t nalLength = 0; 4278 if (!isMalFormed) { 4279 nalLength = parseNALSize(&mSrcBuffer[srcOffset]); 4280 srcOffset += mNALLengthSize; 4281 isMalFormed = !isInRange((size_t)0u, size, srcOffset, nalLength) 4282 || !isInRange((size_t)0u, mBuffer->size(), dstOffset, (size_t)4u) 4283 || !isInRange((size_t)0u, mBuffer->size(), dstOffset + 4, nalLength); 4284 } 4285 4286 if (isMalFormed) { 4287 ALOGE("Video is malformed; nalLength %zu", nalLength); 4288 mBuffer->release(); 4289 mBuffer = NULL; 4290 return ERROR_MALFORMED; 4291 } 4292 4293 if (nalLength == 0) { 4294 continue; 4295 } 4296 4297 CHECK(dstOffset + 4 <= mBuffer->size()); 4298 4299 dstData[dstOffset++] = 0; 4300 dstData[dstOffset++] = 0; 4301 dstData[dstOffset++] = 0; 4302 dstData[dstOffset++] = 1; 4303 memcpy(&dstData[dstOffset], &mSrcBuffer[srcOffset], nalLength); 4304 srcOffset += nalLength; 4305 dstOffset += nalLength; 4306 } 4307 CHECK_EQ(srcOffset, size); 4308 CHECK(mBuffer != NULL); 4309 mBuffer->set_range(0, dstOffset); 4310 } 4311 4312 mBuffer->meta_data()->setInt64( 4313 kKeyTime, ((int64_t)cts * 1000000) / mTimescale); 4314 mBuffer->meta_data()->setInt64( 4315 kKeyDuration, ((int64_t)smpl->duration * 1000000) / mTimescale); 4316 4317 if (targetSampleTimeUs >= 0) { 4318 mBuffer->meta_data()->setInt64( 4319 kKeyTargetTime, targetSampleTimeUs); 4320 } 4321 4322 if (isSyncSample) { 4323 mBuffer->meta_data()->setInt32(kKeyIsSyncFrame, 1); 4324 } 4325 4326 ++mCurrentSampleIndex; 4327 4328 *out = mBuffer; 4329 mBuffer = NULL; 4330 4331 return OK; 4332 } 4333} 4334 4335MPEG4Extractor::Track *MPEG4Extractor::findTrackByMimePrefix( 4336 const char *mimePrefix) { 4337 for (Track *track = mFirstTrack; track != NULL; track = track->next) { 4338 const char *mime; 4339 if (track->meta != NULL 4340 && track->meta->findCString(kKeyMIMEType, &mime) 4341 && !strncasecmp(mime, mimePrefix, strlen(mimePrefix))) { 4342 return track; 4343 } 4344 } 4345 4346 return NULL; 4347} 4348 4349static bool LegacySniffMPEG4( 4350 const sp<DataSource> &source, String8 *mimeType, float *confidence) { 4351 uint8_t header[8]; 4352 4353 ssize_t n = source->readAt(4, header, sizeof(header)); 4354 if (n < (ssize_t)sizeof(header)) { 4355 return false; 4356 } 4357 4358 if (!memcmp(header, "ftyp3gp", 7) || !memcmp(header, "ftypmp42", 8) 4359 || !memcmp(header, "ftyp3gr6", 8) || !memcmp(header, "ftyp3gs6", 8) 4360 || !memcmp(header, "ftyp3ge6", 8) || !memcmp(header, "ftyp3gg6", 8) 4361 || !memcmp(header, "ftypisom", 8) || !memcmp(header, "ftypM4V ", 8) 4362 || !memcmp(header, "ftypM4A ", 8) || !memcmp(header, "ftypf4v ", 8) 4363 || !memcmp(header, "ftypkddi", 8) || !memcmp(header, "ftypM4VP", 8)) { 4364 *mimeType = MEDIA_MIMETYPE_CONTAINER_MPEG4; 4365 *confidence = 0.4; 4366 4367 return true; 4368 } 4369 4370 return false; 4371} 4372 4373static bool isCompatibleBrand(uint32_t fourcc) { 4374 static const uint32_t kCompatibleBrands[] = { 4375 FOURCC('i', 's', 'o', 'm'), 4376 FOURCC('i', 's', 'o', '2'), 4377 FOURCC('a', 'v', 'c', '1'), 4378 FOURCC('h', 'v', 'c', '1'), 4379 FOURCC('h', 'e', 'v', '1'), 4380 FOURCC('3', 'g', 'p', '4'), 4381 FOURCC('m', 'p', '4', '1'), 4382 FOURCC('m', 'p', '4', '2'), 4383 4384 // Won't promise that the following file types can be played. 4385 // Just give these file types a chance. 4386 FOURCC('q', 't', ' ', ' '), // Apple's QuickTime 4387 FOURCC('M', 'S', 'N', 'V'), // Sony's PSP 4388 4389 FOURCC('3', 'g', '2', 'a'), // 3GPP2 4390 FOURCC('3', 'g', '2', 'b'), 4391 }; 4392 4393 for (size_t i = 0; 4394 i < sizeof(kCompatibleBrands) / sizeof(kCompatibleBrands[0]); 4395 ++i) { 4396 if (kCompatibleBrands[i] == fourcc) { 4397 return true; 4398 } 4399 } 4400 4401 return false; 4402} 4403 4404// Attempt to actually parse the 'ftyp' atom and determine if a suitable 4405// compatible brand is present. 4406// Also try to identify where this file's metadata ends 4407// (end of the 'moov' atom) and report it to the caller as part of 4408// the metadata. 4409static bool BetterSniffMPEG4( 4410 const sp<DataSource> &source, String8 *mimeType, float *confidence, 4411 sp<AMessage> *meta) { 4412 // We scan up to 128 bytes to identify this file as an MP4. 4413 static const off64_t kMaxScanOffset = 128ll; 4414 4415 off64_t offset = 0ll; 4416 bool foundGoodFileType = false; 4417 off64_t moovAtomEndOffset = -1ll; 4418 bool done = false; 4419 4420 while (!done && offset < kMaxScanOffset) { 4421 uint32_t hdr[2]; 4422 if (source->readAt(offset, hdr, 8) < 8) { 4423 return false; 4424 } 4425 4426 uint64_t chunkSize = ntohl(hdr[0]); 4427 uint32_t chunkType = ntohl(hdr[1]); 4428 off64_t chunkDataOffset = offset + 8; 4429 4430 if (chunkSize == 1) { 4431 if (source->readAt(offset + 8, &chunkSize, 8) < 8) { 4432 return false; 4433 } 4434 4435 chunkSize = ntoh64(chunkSize); 4436 chunkDataOffset += 8; 4437 4438 if (chunkSize < 16) { 4439 // The smallest valid chunk is 16 bytes long in this case. 4440 return false; 4441 } 4442 } else if (chunkSize < 8) { 4443 // The smallest valid chunk is 8 bytes long. 4444 return false; 4445 } 4446 4447 off64_t chunkDataSize = offset + chunkSize - chunkDataOffset; 4448 4449 char chunkstring[5]; 4450 MakeFourCCString(chunkType, chunkstring); 4451 ALOGV("saw chunk type %s, size %" PRIu64 " @ %lld", chunkstring, chunkSize, offset); 4452 switch (chunkType) { 4453 case FOURCC('f', 't', 'y', 'p'): 4454 { 4455 if (chunkDataSize < 8) { 4456 return false; 4457 } 4458 4459 uint32_t numCompatibleBrands = (chunkDataSize - 8) / 4; 4460 for (size_t i = 0; i < numCompatibleBrands + 2; ++i) { 4461 if (i == 1) { 4462 // Skip this index, it refers to the minorVersion, 4463 // not a brand. 4464 continue; 4465 } 4466 4467 uint32_t brand; 4468 if (source->readAt( 4469 chunkDataOffset + 4 * i, &brand, 4) < 4) { 4470 return false; 4471 } 4472 4473 brand = ntohl(brand); 4474 4475 if (isCompatibleBrand(brand)) { 4476 foundGoodFileType = true; 4477 break; 4478 } 4479 } 4480 4481 if (!foundGoodFileType) { 4482 return false; 4483 } 4484 4485 break; 4486 } 4487 4488 case FOURCC('m', 'o', 'o', 'v'): 4489 { 4490 moovAtomEndOffset = offset + chunkSize; 4491 4492 done = true; 4493 break; 4494 } 4495 4496 default: 4497 break; 4498 } 4499 4500 offset += chunkSize; 4501 } 4502 4503 if (!foundGoodFileType) { 4504 return false; 4505 } 4506 4507 *mimeType = MEDIA_MIMETYPE_CONTAINER_MPEG4; 4508 *confidence = 0.4f; 4509 4510 if (moovAtomEndOffset >= 0) { 4511 *meta = new AMessage; 4512 (*meta)->setInt64("meta-data-size", moovAtomEndOffset); 4513 4514 ALOGV("found metadata size: %lld", moovAtomEndOffset); 4515 } 4516 4517 return true; 4518} 4519 4520bool SniffMPEG4( 4521 const sp<DataSource> &source, String8 *mimeType, float *confidence, 4522 sp<AMessage> *meta) { 4523 if (BetterSniffMPEG4(source, mimeType, confidence, meta)) { 4524 return true; 4525 } 4526 4527 if (LegacySniffMPEG4(source, mimeType, confidence)) { 4528 ALOGW("Identified supported mpeg4 through LegacySniffMPEG4."); 4529 return true; 4530 } 4531 4532 return false; 4533} 4534 4535} // namespace android 4536