MPEG4Extractor.cpp revision f8f0e0b756b0f96eccc94af89a0087c146232b26
1/* 2 * Copyright (C) 2009 The Android Open Source Project 3 * 4 * Licensed under the Apache License, Version 2.0 (the "License"); 5 * you may not use this file except in compliance with the License. 6 * You may obtain a copy of the License at 7 * 8 * http://www.apache.org/licenses/LICENSE-2.0 9 * 10 * Unless required by applicable law or agreed to in writing, software 11 * distributed under the License is distributed on an "AS IS" BASIS, 12 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 * See the License for the specific language governing permissions and 14 * limitations under the License. 15 */ 16 17//#define LOG_NDEBUG 0 18#define LOG_TAG "MPEG4Extractor" 19 20#include <ctype.h> 21#include <inttypes.h> 22#include <stdint.h> 23#include <stdlib.h> 24#include <string.h> 25 26#include <utils/Log.h> 27 28#include "include/MPEG4Extractor.h" 29#include "include/SampleTable.h" 30#include "include/ESDS.h" 31 32#include <media/stagefright/foundation/ABitReader.h> 33#include <media/stagefright/foundation/ABuffer.h> 34#include <media/stagefright/foundation/ADebug.h> 35#include <media/stagefright/foundation/AMessage.h> 36#include <media/stagefright/foundation/AUtils.h> 37#include <media/stagefright/MediaBuffer.h> 38#include <media/stagefright/MediaBufferGroup.h> 39#include <media/stagefright/MediaDefs.h> 40#include <media/stagefright/MediaSource.h> 41#include <media/stagefright/MetaData.h> 42#include <utils/String8.h> 43 44#include <byteswap.h> 45#include "include/ID3.h" 46 47#ifndef UINT32_MAX 48#define UINT32_MAX (4294967295U) 49#endif 50 51namespace android { 52 53class MPEG4Source : public MediaSource { 54public: 55 // Caller retains ownership of both "dataSource" and "sampleTable". 56 MPEG4Source(const sp<MPEG4Extractor> &owner, 57 const sp<MetaData> &format, 58 const sp<DataSource> &dataSource, 59 int32_t timeScale, 60 const sp<SampleTable> &sampleTable, 61 Vector<SidxEntry> &sidx, 62 const Trex *trex, 63 off64_t firstMoofOffset); 64 65 virtual status_t start(MetaData *params = NULL); 66 virtual status_t stop(); 67 68 virtual sp<MetaData> getFormat(); 69 70 virtual status_t read(MediaBuffer **buffer, const ReadOptions *options = NULL); 71 virtual status_t fragmentedRead(MediaBuffer **buffer, const ReadOptions *options = NULL); 72 73protected: 74 virtual ~MPEG4Source(); 75 76private: 77 Mutex mLock; 78 79 // keep the MPEG4Extractor around, since we're referencing its data 80 sp<MPEG4Extractor> mOwner; 81 sp<MetaData> mFormat; 82 sp<DataSource> mDataSource; 83 int32_t mTimescale; 84 sp<SampleTable> mSampleTable; 85 uint32_t mCurrentSampleIndex; 86 uint32_t mCurrentFragmentIndex; 87 Vector<SidxEntry> &mSegments; 88 const Trex *mTrex; 89 off64_t mFirstMoofOffset; 90 off64_t mCurrentMoofOffset; 91 off64_t mNextMoofOffset; 92 uint32_t mCurrentTime; 93 int32_t mLastParsedTrackId; 94 int32_t mTrackId; 95 96 int32_t mCryptoMode; // passed in from extractor 97 int32_t mDefaultIVSize; // passed in from extractor 98 uint8_t mCryptoKey[16]; // passed in from extractor 99 uint32_t mCurrentAuxInfoType; 100 uint32_t mCurrentAuxInfoTypeParameter; 101 int32_t mCurrentDefaultSampleInfoSize; 102 uint32_t mCurrentSampleInfoCount; 103 uint32_t mCurrentSampleInfoAllocSize; 104 uint8_t* mCurrentSampleInfoSizes; 105 uint32_t mCurrentSampleInfoOffsetCount; 106 uint32_t mCurrentSampleInfoOffsetsAllocSize; 107 uint64_t* mCurrentSampleInfoOffsets; 108 109 bool mIsAVC; 110 bool mIsHEVC; 111 size_t mNALLengthSize; 112 113 bool mStarted; 114 115 MediaBufferGroup *mGroup; 116 117 MediaBuffer *mBuffer; 118 119 bool mWantsNALFragments; 120 121 uint8_t *mSrcBuffer; 122 123 size_t parseNALSize(const uint8_t *data) const; 124 status_t parseChunk(off64_t *offset); 125 status_t parseTrackFragmentHeader(off64_t offset, off64_t size); 126 status_t parseTrackFragmentRun(off64_t offset, off64_t size); 127 status_t parseSampleAuxiliaryInformationSizes(off64_t offset, off64_t size); 128 status_t parseSampleAuxiliaryInformationOffsets(off64_t offset, off64_t size); 129 130 struct TrackFragmentHeaderInfo { 131 enum Flags { 132 kBaseDataOffsetPresent = 0x01, 133 kSampleDescriptionIndexPresent = 0x02, 134 kDefaultSampleDurationPresent = 0x08, 135 kDefaultSampleSizePresent = 0x10, 136 kDefaultSampleFlagsPresent = 0x20, 137 kDurationIsEmpty = 0x10000, 138 }; 139 140 uint32_t mTrackID; 141 uint32_t mFlags; 142 uint64_t mBaseDataOffset; 143 uint32_t mSampleDescriptionIndex; 144 uint32_t mDefaultSampleDuration; 145 uint32_t mDefaultSampleSize; 146 uint32_t mDefaultSampleFlags; 147 148 uint64_t mDataOffset; 149 }; 150 TrackFragmentHeaderInfo mTrackFragmentHeaderInfo; 151 152 struct Sample { 153 off64_t offset; 154 size_t size; 155 uint32_t duration; 156 int32_t compositionOffset; 157 uint8_t iv[16]; 158 Vector<size_t> clearsizes; 159 Vector<size_t> encryptedsizes; 160 }; 161 Vector<Sample> mCurrentSamples; 162 163 MPEG4Source(const MPEG4Source &); 164 MPEG4Source &operator=(const MPEG4Source &); 165}; 166 167// This custom data source wraps an existing one and satisfies requests 168// falling entirely within a cached range from the cache while forwarding 169// all remaining requests to the wrapped datasource. 170// This is used to cache the full sampletable metadata for a single track, 171// possibly wrapping multiple times to cover all tracks, i.e. 172// Each MPEG4DataSource caches the sampletable metadata for a single track. 173 174struct MPEG4DataSource : public DataSource { 175 MPEG4DataSource(const sp<DataSource> &source); 176 177 virtual status_t initCheck() const; 178 virtual ssize_t readAt(off64_t offset, void *data, size_t size); 179 virtual status_t getSize(off64_t *size); 180 virtual uint32_t flags(); 181 182 status_t setCachedRange(off64_t offset, size_t size); 183 184protected: 185 virtual ~MPEG4DataSource(); 186 187private: 188 Mutex mLock; 189 190 sp<DataSource> mSource; 191 off64_t mCachedOffset; 192 size_t mCachedSize; 193 uint8_t *mCache; 194 195 void clearCache(); 196 197 MPEG4DataSource(const MPEG4DataSource &); 198 MPEG4DataSource &operator=(const MPEG4DataSource &); 199}; 200 201MPEG4DataSource::MPEG4DataSource(const sp<DataSource> &source) 202 : mSource(source), 203 mCachedOffset(0), 204 mCachedSize(0), 205 mCache(NULL) { 206} 207 208MPEG4DataSource::~MPEG4DataSource() { 209 clearCache(); 210} 211 212void MPEG4DataSource::clearCache() { 213 if (mCache) { 214 free(mCache); 215 mCache = NULL; 216 } 217 218 mCachedOffset = 0; 219 mCachedSize = 0; 220} 221 222status_t MPEG4DataSource::initCheck() const { 223 return mSource->initCheck(); 224} 225 226ssize_t MPEG4DataSource::readAt(off64_t offset, void *data, size_t size) { 227 Mutex::Autolock autoLock(mLock); 228 229 if (isInRange(mCachedOffset, mCachedSize, offset, size)) { 230 memcpy(data, &mCache[offset - mCachedOffset], size); 231 return size; 232 } 233 234 return mSource->readAt(offset, data, size); 235} 236 237status_t MPEG4DataSource::getSize(off64_t *size) { 238 return mSource->getSize(size); 239} 240 241uint32_t MPEG4DataSource::flags() { 242 return mSource->flags(); 243} 244 245status_t MPEG4DataSource::setCachedRange(off64_t offset, size_t size) { 246 Mutex::Autolock autoLock(mLock); 247 248 clearCache(); 249 250 mCache = (uint8_t *)malloc(size); 251 252 if (mCache == NULL) { 253 return -ENOMEM; 254 } 255 256 mCachedOffset = offset; 257 mCachedSize = size; 258 259 ssize_t err = mSource->readAt(mCachedOffset, mCache, mCachedSize); 260 261 if (err < (ssize_t)size) { 262 clearCache(); 263 264 return ERROR_IO; 265 } 266 267 return OK; 268} 269 270//////////////////////////////////////////////////////////////////////////////// 271 272static const bool kUseHexDump = false; 273 274static void hexdump(const void *_data, size_t size) { 275 const uint8_t *data = (const uint8_t *)_data; 276 size_t offset = 0; 277 while (offset < size) { 278 printf("0x%04zx ", offset); 279 280 size_t n = size - offset; 281 if (n > 16) { 282 n = 16; 283 } 284 285 for (size_t i = 0; i < 16; ++i) { 286 if (i == 8) { 287 printf(" "); 288 } 289 290 if (offset + i < size) { 291 printf("%02x ", data[offset + i]); 292 } else { 293 printf(" "); 294 } 295 } 296 297 printf(" "); 298 299 for (size_t i = 0; i < n; ++i) { 300 if (isprint(data[offset + i])) { 301 printf("%c", data[offset + i]); 302 } else { 303 printf("."); 304 } 305 } 306 307 printf("\n"); 308 309 offset += 16; 310 } 311} 312 313static const char *FourCC2MIME(uint32_t fourcc) { 314 switch (fourcc) { 315 case FOURCC('m', 'p', '4', 'a'): 316 return MEDIA_MIMETYPE_AUDIO_AAC; 317 318 case FOURCC('s', 'a', 'm', 'r'): 319 return MEDIA_MIMETYPE_AUDIO_AMR_NB; 320 321 case FOURCC('s', 'a', 'w', 'b'): 322 return MEDIA_MIMETYPE_AUDIO_AMR_WB; 323 324 case FOURCC('m', 'p', '4', 'v'): 325 return MEDIA_MIMETYPE_VIDEO_MPEG4; 326 327 case FOURCC('s', '2', '6', '3'): 328 case FOURCC('h', '2', '6', '3'): 329 case FOURCC('H', '2', '6', '3'): 330 return MEDIA_MIMETYPE_VIDEO_H263; 331 332 case FOURCC('a', 'v', 'c', '1'): 333 return MEDIA_MIMETYPE_VIDEO_AVC; 334 335 case FOURCC('h', 'v', 'c', '1'): 336 case FOURCC('h', 'e', 'v', '1'): 337 return MEDIA_MIMETYPE_VIDEO_HEVC; 338 default: 339 CHECK(!"should not be here."); 340 return NULL; 341 } 342} 343 344static bool AdjustChannelsAndRate(uint32_t fourcc, uint32_t *channels, uint32_t *rate) { 345 if (!strcasecmp(MEDIA_MIMETYPE_AUDIO_AMR_NB, FourCC2MIME(fourcc))) { 346 // AMR NB audio is always mono, 8kHz 347 *channels = 1; 348 *rate = 8000; 349 return true; 350 } else if (!strcasecmp(MEDIA_MIMETYPE_AUDIO_AMR_WB, FourCC2MIME(fourcc))) { 351 // AMR WB audio is always mono, 16kHz 352 *channels = 1; 353 *rate = 16000; 354 return true; 355 } 356 return false; 357} 358 359MPEG4Extractor::MPEG4Extractor(const sp<DataSource> &source) 360 : mMoofOffset(0), 361 mDataSource(source), 362 mInitCheck(NO_INIT), 363 mHasVideo(false), 364 mHeaderTimescale(0), 365 mFirstTrack(NULL), 366 mLastTrack(NULL), 367 mFileMetaData(new MetaData), 368 mFirstSINF(NULL), 369 mIsDrm(false) { 370} 371 372MPEG4Extractor::~MPEG4Extractor() { 373 Track *track = mFirstTrack; 374 while (track) { 375 Track *next = track->next; 376 377 delete track; 378 track = next; 379 } 380 mFirstTrack = mLastTrack = NULL; 381 382 SINF *sinf = mFirstSINF; 383 while (sinf) { 384 SINF *next = sinf->next; 385 delete[] sinf->IPMPData; 386 delete sinf; 387 sinf = next; 388 } 389 mFirstSINF = NULL; 390 391 for (size_t i = 0; i < mPssh.size(); i++) { 392 delete [] mPssh[i].data; 393 } 394} 395 396uint32_t MPEG4Extractor::flags() const { 397 return CAN_PAUSE | 398 ((mMoofOffset == 0 || mSidxEntries.size() != 0) ? 399 (CAN_SEEK_BACKWARD | CAN_SEEK_FORWARD | CAN_SEEK) : 0); 400} 401 402sp<MetaData> MPEG4Extractor::getMetaData() { 403 status_t err; 404 if ((err = readMetaData()) != OK) { 405 return new MetaData; 406 } 407 408 return mFileMetaData; 409} 410 411size_t MPEG4Extractor::countTracks() { 412 status_t err; 413 if ((err = readMetaData()) != OK) { 414 ALOGV("MPEG4Extractor::countTracks: no tracks"); 415 return 0; 416 } 417 418 size_t n = 0; 419 Track *track = mFirstTrack; 420 while (track) { 421 ++n; 422 track = track->next; 423 } 424 425 ALOGV("MPEG4Extractor::countTracks: %zu tracks", n); 426 return n; 427} 428 429sp<MetaData> MPEG4Extractor::getTrackMetaData( 430 size_t index, uint32_t flags) { 431 status_t err; 432 if ((err = readMetaData()) != OK) { 433 return NULL; 434 } 435 436 Track *track = mFirstTrack; 437 while (index > 0) { 438 if (track == NULL) { 439 return NULL; 440 } 441 442 track = track->next; 443 --index; 444 } 445 446 if (track == NULL) { 447 return NULL; 448 } 449 450 if ((flags & kIncludeExtensiveMetaData) 451 && !track->includes_expensive_metadata) { 452 track->includes_expensive_metadata = true; 453 454 const char *mime; 455 CHECK(track->meta->findCString(kKeyMIMEType, &mime)); 456 if (!strncasecmp("video/", mime, 6)) { 457 if (mMoofOffset > 0) { 458 int64_t duration; 459 if (track->meta->findInt64(kKeyDuration, &duration)) { 460 // nothing fancy, just pick a frame near 1/4th of the duration 461 track->meta->setInt64( 462 kKeyThumbnailTime, duration / 4); 463 } 464 } else { 465 uint32_t sampleIndex; 466 uint32_t sampleTime; 467 if (track->sampleTable->findThumbnailSample(&sampleIndex) == OK 468 && track->sampleTable->getMetaDataForSample( 469 sampleIndex, NULL /* offset */, NULL /* size */, 470 &sampleTime) == OK) { 471 track->meta->setInt64( 472 kKeyThumbnailTime, 473 ((int64_t)sampleTime * 1000000) / track->timescale); 474 } 475 } 476 } 477 } 478 479 return track->meta; 480} 481 482static void MakeFourCCString(uint32_t x, char *s) { 483 s[0] = x >> 24; 484 s[1] = (x >> 16) & 0xff; 485 s[2] = (x >> 8) & 0xff; 486 s[3] = x & 0xff; 487 s[4] = '\0'; 488} 489 490status_t MPEG4Extractor::readMetaData() { 491 if (mInitCheck != NO_INIT) { 492 return mInitCheck; 493 } 494 495 off64_t offset = 0; 496 status_t err; 497 while (true) { 498 off64_t orig_offset = offset; 499 err = parseChunk(&offset, 0); 500 501 if (err != OK && err != UNKNOWN_ERROR) { 502 break; 503 } else if (offset <= orig_offset) { 504 // only continue parsing if the offset was advanced, 505 // otherwise we might end up in an infinite loop 506 ALOGE("did not advance: 0x%lld->0x%lld", orig_offset, offset); 507 err = ERROR_MALFORMED; 508 break; 509 } else if (err == OK) { 510 continue; 511 } 512 513 uint32_t hdr[2]; 514 if (mDataSource->readAt(offset, hdr, 8) < 8) { 515 break; 516 } 517 uint32_t chunk_type = ntohl(hdr[1]); 518 if (chunk_type == FOURCC('m', 'o', 'o', 'f')) { 519 // store the offset of the first segment 520 mMoofOffset = offset; 521 } else if (chunk_type != FOURCC('m', 'd', 'a', 't')) { 522 // keep parsing until we get to the data 523 continue; 524 } 525 break; 526 } 527 528 if (mInitCheck == OK) { 529 if (mHasVideo) { 530 mFileMetaData->setCString( 531 kKeyMIMEType, MEDIA_MIMETYPE_CONTAINER_MPEG4); 532 } else { 533 mFileMetaData->setCString(kKeyMIMEType, "audio/mp4"); 534 } 535 } else { 536 mInitCheck = err; 537 } 538 539 CHECK_NE(err, (status_t)NO_INIT); 540 541 // copy pssh data into file metadata 542 int psshsize = 0; 543 for (size_t i = 0; i < mPssh.size(); i++) { 544 psshsize += 20 + mPssh[i].datalen; 545 } 546 if (psshsize) { 547 char *buf = (char*)malloc(psshsize); 548 char *ptr = buf; 549 for (size_t i = 0; i < mPssh.size(); i++) { 550 memcpy(ptr, mPssh[i].uuid, 20); // uuid + length 551 memcpy(ptr + 20, mPssh[i].data, mPssh[i].datalen); 552 ptr += (20 + mPssh[i].datalen); 553 } 554 mFileMetaData->setData(kKeyPssh, 'pssh', buf, psshsize); 555 free(buf); 556 } 557 return mInitCheck; 558} 559 560char* MPEG4Extractor::getDrmTrackInfo(size_t trackID, int *len) { 561 if (mFirstSINF == NULL) { 562 return NULL; 563 } 564 565 SINF *sinf = mFirstSINF; 566 while (sinf && (trackID != sinf->trackID)) { 567 sinf = sinf->next; 568 } 569 570 if (sinf == NULL) { 571 return NULL; 572 } 573 574 *len = sinf->len; 575 return sinf->IPMPData; 576} 577 578// Reads an encoded integer 7 bits at a time until it encounters the high bit clear. 579static int32_t readSize(off64_t offset, 580 const sp<DataSource> DataSource, uint8_t *numOfBytes) { 581 uint32_t size = 0; 582 uint8_t data; 583 bool moreData = true; 584 *numOfBytes = 0; 585 586 while (moreData) { 587 if (DataSource->readAt(offset, &data, 1) < 1) { 588 return -1; 589 } 590 offset ++; 591 moreData = (data >= 128) ? true : false; 592 size = (size << 7) | (data & 0x7f); // Take last 7 bits 593 (*numOfBytes) ++; 594 } 595 596 return size; 597} 598 599status_t MPEG4Extractor::parseDrmSINF( 600 off64_t * /* offset */, off64_t data_offset) { 601 uint8_t updateIdTag; 602 if (mDataSource->readAt(data_offset, &updateIdTag, 1) < 1) { 603 return ERROR_IO; 604 } 605 data_offset ++; 606 607 if (0x01/*OBJECT_DESCRIPTOR_UPDATE_ID_TAG*/ != updateIdTag) { 608 return ERROR_MALFORMED; 609 } 610 611 uint8_t numOfBytes; 612 int32_t size = readSize(data_offset, mDataSource, &numOfBytes); 613 if (size < 0) { 614 return ERROR_IO; 615 } 616 data_offset += numOfBytes; 617 618 while(size >= 11 ) { 619 uint8_t descriptorTag; 620 if (mDataSource->readAt(data_offset, &descriptorTag, 1) < 1) { 621 return ERROR_IO; 622 } 623 data_offset ++; 624 625 if (0x11/*OBJECT_DESCRIPTOR_ID_TAG*/ != descriptorTag) { 626 return ERROR_MALFORMED; 627 } 628 629 uint8_t buffer[8]; 630 //ObjectDescriptorID and ObjectDescriptor url flag 631 if (mDataSource->readAt(data_offset, buffer, 2) < 2) { 632 return ERROR_IO; 633 } 634 data_offset += 2; 635 636 if ((buffer[1] >> 5) & 0x0001) { //url flag is set 637 return ERROR_MALFORMED; 638 } 639 640 if (mDataSource->readAt(data_offset, buffer, 8) < 8) { 641 return ERROR_IO; 642 } 643 data_offset += 8; 644 645 if ((0x0F/*ES_ID_REF_TAG*/ != buffer[1]) 646 || ( 0x0A/*IPMP_DESCRIPTOR_POINTER_ID_TAG*/ != buffer[5])) { 647 return ERROR_MALFORMED; 648 } 649 650 SINF *sinf = new SINF; 651 sinf->trackID = U16_AT(&buffer[3]); 652 sinf->IPMPDescriptorID = buffer[7]; 653 sinf->next = mFirstSINF; 654 mFirstSINF = sinf; 655 656 size -= (8 + 2 + 1); 657 } 658 659 if (size != 0) { 660 return ERROR_MALFORMED; 661 } 662 663 if (mDataSource->readAt(data_offset, &updateIdTag, 1) < 1) { 664 return ERROR_IO; 665 } 666 data_offset ++; 667 668 if(0x05/*IPMP_DESCRIPTOR_UPDATE_ID_TAG*/ != updateIdTag) { 669 return ERROR_MALFORMED; 670 } 671 672 size = readSize(data_offset, mDataSource, &numOfBytes); 673 if (size < 0) { 674 return ERROR_IO; 675 } 676 data_offset += numOfBytes; 677 678 while (size > 0) { 679 uint8_t tag; 680 int32_t dataLen; 681 if (mDataSource->readAt(data_offset, &tag, 1) < 1) { 682 return ERROR_IO; 683 } 684 data_offset ++; 685 686 if (0x0B/*IPMP_DESCRIPTOR_ID_TAG*/ == tag) { 687 uint8_t id; 688 dataLen = readSize(data_offset, mDataSource, &numOfBytes); 689 if (dataLen < 0) { 690 return ERROR_IO; 691 } else if (dataLen < 4) { 692 return ERROR_MALFORMED; 693 } 694 data_offset += numOfBytes; 695 696 if (mDataSource->readAt(data_offset, &id, 1) < 1) { 697 return ERROR_IO; 698 } 699 data_offset ++; 700 701 SINF *sinf = mFirstSINF; 702 while (sinf && (sinf->IPMPDescriptorID != id)) { 703 sinf = sinf->next; 704 } 705 if (sinf == NULL) { 706 return ERROR_MALFORMED; 707 } 708 sinf->len = dataLen - 3; 709 sinf->IPMPData = new (std::nothrow) char[sinf->len]; 710 if (sinf->IPMPData == NULL) { 711 return ERROR_MALFORMED; 712 } 713 data_offset += 2; 714 715 if (mDataSource->readAt(data_offset, sinf->IPMPData, sinf->len) < sinf->len) { 716 return ERROR_IO; 717 } 718 data_offset += sinf->len; 719 720 size -= (dataLen + numOfBytes + 1); 721 } 722 } 723 724 if (size != 0) { 725 return ERROR_MALFORMED; 726 } 727 728 return UNKNOWN_ERROR; // Return a dummy error. 729} 730 731struct PathAdder { 732 PathAdder(Vector<uint32_t> *path, uint32_t chunkType) 733 : mPath(path) { 734 mPath->push(chunkType); 735 } 736 737 ~PathAdder() { 738 mPath->pop(); 739 } 740 741private: 742 Vector<uint32_t> *mPath; 743 744 PathAdder(const PathAdder &); 745 PathAdder &operator=(const PathAdder &); 746}; 747 748static bool underMetaDataPath(const Vector<uint32_t> &path) { 749 return path.size() >= 5 750 && path[0] == FOURCC('m', 'o', 'o', 'v') 751 && path[1] == FOURCC('u', 'd', 't', 'a') 752 && path[2] == FOURCC('m', 'e', 't', 'a') 753 && path[3] == FOURCC('i', 'l', 's', 't'); 754} 755 756// Given a time in seconds since Jan 1 1904, produce a human-readable string. 757static void convertTimeToDate(int64_t time_1904, String8 *s) { 758 time_t time_1970 = time_1904 - (((66 * 365 + 17) * 24) * 3600); 759 760 char tmp[32]; 761 strftime(tmp, sizeof(tmp), "%Y%m%dT%H%M%S.000Z", gmtime(&time_1970)); 762 763 s->setTo(tmp); 764} 765 766status_t MPEG4Extractor::parseChunk(off64_t *offset, int depth) { 767 ALOGV("entering parseChunk %lld/%d", *offset, depth); 768 uint32_t hdr[2]; 769 if (mDataSource->readAt(*offset, hdr, 8) < 8) { 770 return ERROR_IO; 771 } 772 uint64_t chunk_size = ntohl(hdr[0]); 773 int32_t chunk_type = ntohl(hdr[1]); 774 off64_t data_offset = *offset + 8; 775 776 if (chunk_size == 1) { 777 if (mDataSource->readAt(*offset + 8, &chunk_size, 8) < 8) { 778 return ERROR_IO; 779 } 780 chunk_size = ntoh64(chunk_size); 781 data_offset += 8; 782 783 if (chunk_size < 16) { 784 // The smallest valid chunk is 16 bytes long in this case. 785 return ERROR_MALFORMED; 786 } 787 } else if (chunk_size == 0) { 788 if (depth == 0) { 789 // atom extends to end of file 790 off64_t sourceSize; 791 if (mDataSource->getSize(&sourceSize) == OK) { 792 chunk_size = (sourceSize - *offset); 793 } else { 794 // XXX could we just pick a "sufficiently large" value here? 795 ALOGE("atom size is 0, and data source has no size"); 796 return ERROR_MALFORMED; 797 } 798 } else { 799 // not allowed for non-toplevel atoms, skip it 800 *offset += 4; 801 return OK; 802 } 803 } else if (chunk_size < 8) { 804 // The smallest valid chunk is 8 bytes long. 805 ALOGE("invalid chunk size: %" PRIu64, chunk_size); 806 return ERROR_MALFORMED; 807 } 808 809 char chunk[5]; 810 MakeFourCCString(chunk_type, chunk); 811 ALOGV("chunk: %s @ %lld, %d", chunk, *offset, depth); 812 813 if (kUseHexDump) { 814 static const char kWhitespace[] = " "; 815 const char *indent = &kWhitespace[sizeof(kWhitespace) - 1 - 2 * depth]; 816 printf("%sfound chunk '%s' of size %" PRIu64 "\n", indent, chunk, chunk_size); 817 818 char buffer[256]; 819 size_t n = chunk_size; 820 if (n > sizeof(buffer)) { 821 n = sizeof(buffer); 822 } 823 if (mDataSource->readAt(*offset, buffer, n) 824 < (ssize_t)n) { 825 return ERROR_IO; 826 } 827 828 hexdump(buffer, n); 829 } 830 831 PathAdder autoAdder(&mPath, chunk_type); 832 833 off64_t chunk_data_size = *offset + chunk_size - data_offset; 834 835 if (chunk_type != FOURCC('c', 'p', 'r', 't') 836 && chunk_type != FOURCC('c', 'o', 'v', 'r') 837 && mPath.size() == 5 && underMetaDataPath(mPath)) { 838 off64_t stop_offset = *offset + chunk_size; 839 *offset = data_offset; 840 while (*offset < stop_offset) { 841 status_t err = parseChunk(offset, depth + 1); 842 if (err != OK) { 843 return err; 844 } 845 } 846 847 if (*offset != stop_offset) { 848 return ERROR_MALFORMED; 849 } 850 851 return OK; 852 } 853 854 switch(chunk_type) { 855 case FOURCC('m', 'o', 'o', 'v'): 856 case FOURCC('t', 'r', 'a', 'k'): 857 case FOURCC('m', 'd', 'i', 'a'): 858 case FOURCC('m', 'i', 'n', 'f'): 859 case FOURCC('d', 'i', 'n', 'f'): 860 case FOURCC('s', 't', 'b', 'l'): 861 case FOURCC('m', 'v', 'e', 'x'): 862 case FOURCC('m', 'o', 'o', 'f'): 863 case FOURCC('t', 'r', 'a', 'f'): 864 case FOURCC('m', 'f', 'r', 'a'): 865 case FOURCC('u', 'd', 't', 'a'): 866 case FOURCC('i', 'l', 's', 't'): 867 case FOURCC('s', 'i', 'n', 'f'): 868 case FOURCC('s', 'c', 'h', 'i'): 869 case FOURCC('e', 'd', 't', 's'): 870 { 871 if (chunk_type == FOURCC('s', 't', 'b', 'l')) { 872 ALOGV("sampleTable chunk is %" PRIu64 " bytes long.", chunk_size); 873 874 if (mDataSource->flags() 875 & (DataSource::kWantsPrefetching 876 | DataSource::kIsCachingDataSource)) { 877 sp<MPEG4DataSource> cachedSource = 878 new MPEG4DataSource(mDataSource); 879 880 if (cachedSource->setCachedRange(*offset, chunk_size) == OK) { 881 mDataSource = cachedSource; 882 } 883 } 884 885 mLastTrack->sampleTable = new SampleTable(mDataSource); 886 } 887 888 bool isTrack = false; 889 if (chunk_type == FOURCC('t', 'r', 'a', 'k')) { 890 isTrack = true; 891 892 Track *track = new Track; 893 track->next = NULL; 894 if (mLastTrack) { 895 mLastTrack->next = track; 896 } else { 897 mFirstTrack = track; 898 } 899 mLastTrack = track; 900 901 track->meta = new MetaData; 902 track->includes_expensive_metadata = false; 903 track->skipTrack = false; 904 track->timescale = 0; 905 track->meta->setCString(kKeyMIMEType, "application/octet-stream"); 906 } 907 908 off64_t stop_offset = *offset + chunk_size; 909 *offset = data_offset; 910 while (*offset < stop_offset) { 911 status_t err = parseChunk(offset, depth + 1); 912 if (err != OK) { 913 return err; 914 } 915 } 916 917 if (*offset != stop_offset) { 918 return ERROR_MALFORMED; 919 } 920 921 if (isTrack) { 922 if (mLastTrack->skipTrack) { 923 Track *cur = mFirstTrack; 924 925 if (cur == mLastTrack) { 926 delete cur; 927 mFirstTrack = mLastTrack = NULL; 928 } else { 929 while (cur && cur->next != mLastTrack) { 930 cur = cur->next; 931 } 932 cur->next = NULL; 933 delete mLastTrack; 934 mLastTrack = cur; 935 } 936 937 return OK; 938 } 939 940 status_t err = verifyTrack(mLastTrack); 941 942 if (err != OK) { 943 return err; 944 } 945 } else if (chunk_type == FOURCC('m', 'o', 'o', 'v')) { 946 mInitCheck = OK; 947 948 if (!mIsDrm) { 949 return UNKNOWN_ERROR; // Return a dummy error. 950 } else { 951 return OK; 952 } 953 } 954 break; 955 } 956 957 case FOURCC('e', 'l', 's', 't'): 958 { 959 *offset += chunk_size; 960 961 // See 14496-12 8.6.6 962 uint8_t version; 963 if (mDataSource->readAt(data_offset, &version, 1) < 1) { 964 return ERROR_IO; 965 } 966 967 uint32_t entry_count; 968 if (!mDataSource->getUInt32(data_offset + 4, &entry_count)) { 969 return ERROR_IO; 970 } 971 972 if (entry_count != 1) { 973 // we only support a single entry at the moment, for gapless playback 974 ALOGW("ignoring edit list with %d entries", entry_count); 975 } else if (mHeaderTimescale == 0) { 976 ALOGW("ignoring edit list because timescale is 0"); 977 } else { 978 off64_t entriesoffset = data_offset + 8; 979 uint64_t segment_duration; 980 int64_t media_time; 981 982 if (version == 1) { 983 if (!mDataSource->getUInt64(entriesoffset, &segment_duration) || 984 !mDataSource->getUInt64(entriesoffset + 8, (uint64_t*)&media_time)) { 985 return ERROR_IO; 986 } 987 } else if (version == 0) { 988 uint32_t sd; 989 int32_t mt; 990 if (!mDataSource->getUInt32(entriesoffset, &sd) || 991 !mDataSource->getUInt32(entriesoffset + 4, (uint32_t*)&mt)) { 992 return ERROR_IO; 993 } 994 segment_duration = sd; 995 media_time = mt; 996 } else { 997 return ERROR_IO; 998 } 999 1000 uint64_t halfscale = mHeaderTimescale / 2; 1001 segment_duration = (segment_duration * 1000000 + halfscale)/ mHeaderTimescale; 1002 media_time = (media_time * 1000000 + halfscale) / mHeaderTimescale; 1003 1004 int64_t duration; 1005 int32_t samplerate; 1006 if (!mLastTrack) { 1007 return ERROR_MALFORMED; 1008 } 1009 if (mLastTrack->meta->findInt64(kKeyDuration, &duration) && 1010 mLastTrack->meta->findInt32(kKeySampleRate, &samplerate)) { 1011 1012 int64_t delay = (media_time * samplerate + 500000) / 1000000; 1013 mLastTrack->meta->setInt32(kKeyEncoderDelay, delay); 1014 1015 int64_t paddingus = duration - (segment_duration + media_time); 1016 if (paddingus < 0) { 1017 // track duration from media header (which is what kKeyDuration is) might 1018 // be slightly shorter than the segment duration, which would make the 1019 // padding negative. Clamp to zero. 1020 paddingus = 0; 1021 } 1022 int64_t paddingsamples = (paddingus * samplerate + 500000) / 1000000; 1023 mLastTrack->meta->setInt32(kKeyEncoderPadding, paddingsamples); 1024 } 1025 } 1026 break; 1027 } 1028 1029 case FOURCC('f', 'r', 'm', 'a'): 1030 { 1031 *offset += chunk_size; 1032 1033 uint32_t original_fourcc; 1034 if (mDataSource->readAt(data_offset, &original_fourcc, 4) < 4) { 1035 return ERROR_IO; 1036 } 1037 original_fourcc = ntohl(original_fourcc); 1038 ALOGV("read original format: %d", original_fourcc); 1039 mLastTrack->meta->setCString(kKeyMIMEType, FourCC2MIME(original_fourcc)); 1040 uint32_t num_channels = 0; 1041 uint32_t sample_rate = 0; 1042 if (AdjustChannelsAndRate(original_fourcc, &num_channels, &sample_rate)) { 1043 mLastTrack->meta->setInt32(kKeyChannelCount, num_channels); 1044 mLastTrack->meta->setInt32(kKeySampleRate, sample_rate); 1045 } 1046 break; 1047 } 1048 1049 case FOURCC('t', 'e', 'n', 'c'): 1050 { 1051 *offset += chunk_size; 1052 1053 if (chunk_size < 32) { 1054 return ERROR_MALFORMED; 1055 } 1056 1057 // tenc box contains 1 byte version, 3 byte flags, 3 byte default algorithm id, one byte 1058 // default IV size, 16 bytes default KeyID 1059 // (ISO 23001-7) 1060 char buf[4]; 1061 memset(buf, 0, 4); 1062 if (mDataSource->readAt(data_offset + 4, buf + 1, 3) < 3) { 1063 return ERROR_IO; 1064 } 1065 uint32_t defaultAlgorithmId = ntohl(*((int32_t*)buf)); 1066 if (defaultAlgorithmId > 1) { 1067 // only 0 (clear) and 1 (AES-128) are valid 1068 return ERROR_MALFORMED; 1069 } 1070 1071 memset(buf, 0, 4); 1072 if (mDataSource->readAt(data_offset + 7, buf + 3, 1) < 1) { 1073 return ERROR_IO; 1074 } 1075 uint32_t defaultIVSize = ntohl(*((int32_t*)buf)); 1076 1077 if ((defaultAlgorithmId == 0 && defaultIVSize != 0) || 1078 (defaultAlgorithmId != 0 && defaultIVSize == 0)) { 1079 // only unencrypted data must have 0 IV size 1080 return ERROR_MALFORMED; 1081 } else if (defaultIVSize != 0 && 1082 defaultIVSize != 8 && 1083 defaultIVSize != 16) { 1084 // only supported sizes are 0, 8 and 16 1085 return ERROR_MALFORMED; 1086 } 1087 1088 uint8_t defaultKeyId[16]; 1089 1090 if (mDataSource->readAt(data_offset + 8, &defaultKeyId, 16) < 16) { 1091 return ERROR_IO; 1092 } 1093 1094 mLastTrack->meta->setInt32(kKeyCryptoMode, defaultAlgorithmId); 1095 mLastTrack->meta->setInt32(kKeyCryptoDefaultIVSize, defaultIVSize); 1096 mLastTrack->meta->setData(kKeyCryptoKey, 'tenc', defaultKeyId, 16); 1097 break; 1098 } 1099 1100 case FOURCC('t', 'k', 'h', 'd'): 1101 { 1102 *offset += chunk_size; 1103 1104 status_t err; 1105 if ((err = parseTrackHeader(data_offset, chunk_data_size)) != OK) { 1106 return err; 1107 } 1108 1109 break; 1110 } 1111 1112 case FOURCC('p', 's', 's', 'h'): 1113 { 1114 *offset += chunk_size; 1115 1116 PsshInfo pssh; 1117 1118 if (mDataSource->readAt(data_offset + 4, &pssh.uuid, 16) < 16) { 1119 return ERROR_IO; 1120 } 1121 1122 uint32_t psshdatalen = 0; 1123 if (mDataSource->readAt(data_offset + 20, &psshdatalen, 4) < 4) { 1124 return ERROR_IO; 1125 } 1126 pssh.datalen = ntohl(psshdatalen); 1127 ALOGV("pssh data size: %d", pssh.datalen); 1128 if (pssh.datalen + 20 > chunk_size) { 1129 // pssh data length exceeds size of containing box 1130 return ERROR_MALFORMED; 1131 } 1132 1133 pssh.data = new (std::nothrow) uint8_t[pssh.datalen]; 1134 if (pssh.data == NULL) { 1135 return ERROR_MALFORMED; 1136 } 1137 ALOGV("allocated pssh @ %p", pssh.data); 1138 ssize_t requested = (ssize_t) pssh.datalen; 1139 if (mDataSource->readAt(data_offset + 24, pssh.data, requested) < requested) { 1140 return ERROR_IO; 1141 } 1142 mPssh.push_back(pssh); 1143 1144 break; 1145 } 1146 1147 case FOURCC('m', 'd', 'h', 'd'): 1148 { 1149 *offset += chunk_size; 1150 1151 if (chunk_data_size < 4 || mLastTrack == NULL) { 1152 return ERROR_MALFORMED; 1153 } 1154 1155 uint8_t version; 1156 if (mDataSource->readAt( 1157 data_offset, &version, sizeof(version)) 1158 < (ssize_t)sizeof(version)) { 1159 return ERROR_IO; 1160 } 1161 1162 off64_t timescale_offset; 1163 1164 if (version == 1) { 1165 timescale_offset = data_offset + 4 + 16; 1166 } else if (version == 0) { 1167 timescale_offset = data_offset + 4 + 8; 1168 } else { 1169 return ERROR_IO; 1170 } 1171 1172 uint32_t timescale; 1173 if (mDataSource->readAt( 1174 timescale_offset, ×cale, sizeof(timescale)) 1175 < (ssize_t)sizeof(timescale)) { 1176 return ERROR_IO; 1177 } 1178 1179 if (!timescale) { 1180 ALOGE("timescale should not be ZERO."); 1181 return ERROR_MALFORMED; 1182 } 1183 1184 mLastTrack->timescale = ntohl(timescale); 1185 1186 // 14496-12 says all ones means indeterminate, but some files seem to use 1187 // 0 instead. We treat both the same. 1188 int64_t duration = 0; 1189 if (version == 1) { 1190 if (mDataSource->readAt( 1191 timescale_offset + 4, &duration, sizeof(duration)) 1192 < (ssize_t)sizeof(duration)) { 1193 return ERROR_IO; 1194 } 1195 if (duration != -1) { 1196 duration = ntoh64(duration); 1197 } 1198 } else { 1199 uint32_t duration32; 1200 if (mDataSource->readAt( 1201 timescale_offset + 4, &duration32, sizeof(duration32)) 1202 < (ssize_t)sizeof(duration32)) { 1203 return ERROR_IO; 1204 } 1205 if (duration32 != 0xffffffff) { 1206 duration = ntohl(duration32); 1207 } 1208 } 1209 if (duration != 0) { 1210 mLastTrack->meta->setInt64( 1211 kKeyDuration, (duration * 1000000) / mLastTrack->timescale); 1212 } 1213 1214 uint8_t lang[2]; 1215 off64_t lang_offset; 1216 if (version == 1) { 1217 lang_offset = timescale_offset + 4 + 8; 1218 } else if (version == 0) { 1219 lang_offset = timescale_offset + 4 + 4; 1220 } else { 1221 return ERROR_IO; 1222 } 1223 1224 if (mDataSource->readAt(lang_offset, &lang, sizeof(lang)) 1225 < (ssize_t)sizeof(lang)) { 1226 return ERROR_IO; 1227 } 1228 1229 // To get the ISO-639-2/T three character language code 1230 // 1 bit pad followed by 3 5-bits characters. Each character 1231 // is packed as the difference between its ASCII value and 0x60. 1232 char lang_code[4]; 1233 lang_code[0] = ((lang[0] >> 2) & 0x1f) + 0x60; 1234 lang_code[1] = ((lang[0] & 0x3) << 3 | (lang[1] >> 5)) + 0x60; 1235 lang_code[2] = (lang[1] & 0x1f) + 0x60; 1236 lang_code[3] = '\0'; 1237 1238 mLastTrack->meta->setCString( 1239 kKeyMediaLanguage, lang_code); 1240 1241 break; 1242 } 1243 1244 case FOURCC('s', 't', 's', 'd'): 1245 { 1246 if (chunk_data_size < 8) { 1247 return ERROR_MALFORMED; 1248 } 1249 1250 uint8_t buffer[8]; 1251 if (chunk_data_size < (off64_t)sizeof(buffer)) { 1252 return ERROR_MALFORMED; 1253 } 1254 1255 if (mDataSource->readAt( 1256 data_offset, buffer, 8) < 8) { 1257 return ERROR_IO; 1258 } 1259 1260 if (U32_AT(buffer) != 0) { 1261 // Should be version 0, flags 0. 1262 return ERROR_MALFORMED; 1263 } 1264 1265 uint32_t entry_count = U32_AT(&buffer[4]); 1266 1267 if (entry_count > 1) { 1268 // For 3GPP timed text, there could be multiple tx3g boxes contain 1269 // multiple text display formats. These formats will be used to 1270 // display the timed text. 1271 // For encrypted files, there may also be more than one entry. 1272 const char *mime; 1273 CHECK(mLastTrack->meta->findCString(kKeyMIMEType, &mime)); 1274 if (strcasecmp(mime, MEDIA_MIMETYPE_TEXT_3GPP) && 1275 strcasecmp(mime, "application/octet-stream")) { 1276 // For now we only support a single type of media per track. 1277 mLastTrack->skipTrack = true; 1278 *offset += chunk_size; 1279 break; 1280 } 1281 } 1282 off64_t stop_offset = *offset + chunk_size; 1283 *offset = data_offset + 8; 1284 for (uint32_t i = 0; i < entry_count; ++i) { 1285 status_t err = parseChunk(offset, depth + 1); 1286 if (err != OK) { 1287 return err; 1288 } 1289 } 1290 1291 if (*offset != stop_offset) { 1292 return ERROR_MALFORMED; 1293 } 1294 break; 1295 } 1296 1297 case FOURCC('m', 'p', '4', 'a'): 1298 case FOURCC('e', 'n', 'c', 'a'): 1299 case FOURCC('s', 'a', 'm', 'r'): 1300 case FOURCC('s', 'a', 'w', 'b'): 1301 { 1302 uint8_t buffer[8 + 20]; 1303 if (chunk_data_size < (ssize_t)sizeof(buffer)) { 1304 // Basic AudioSampleEntry size. 1305 return ERROR_MALFORMED; 1306 } 1307 1308 if (mDataSource->readAt( 1309 data_offset, buffer, sizeof(buffer)) < (ssize_t)sizeof(buffer)) { 1310 return ERROR_IO; 1311 } 1312 1313 uint16_t data_ref_index __unused = U16_AT(&buffer[6]); 1314 uint32_t num_channels = U16_AT(&buffer[16]); 1315 1316 uint16_t sample_size = U16_AT(&buffer[18]); 1317 uint32_t sample_rate = U32_AT(&buffer[24]) >> 16; 1318 1319 if (chunk_type != FOURCC('e', 'n', 'c', 'a')) { 1320 // if the chunk type is enca, we'll get the type from the sinf/frma box later 1321 mLastTrack->meta->setCString(kKeyMIMEType, FourCC2MIME(chunk_type)); 1322 AdjustChannelsAndRate(chunk_type, &num_channels, &sample_rate); 1323 } 1324 ALOGV("*** coding='%s' %d channels, size %d, rate %d\n", 1325 chunk, num_channels, sample_size, sample_rate); 1326 mLastTrack->meta->setInt32(kKeyChannelCount, num_channels); 1327 mLastTrack->meta->setInt32(kKeySampleRate, sample_rate); 1328 1329 off64_t stop_offset = *offset + chunk_size; 1330 *offset = data_offset + sizeof(buffer); 1331 while (*offset < stop_offset) { 1332 status_t err = parseChunk(offset, depth + 1); 1333 if (err != OK) { 1334 return err; 1335 } 1336 } 1337 1338 if (*offset != stop_offset) { 1339 return ERROR_MALFORMED; 1340 } 1341 break; 1342 } 1343 1344 case FOURCC('m', 'p', '4', 'v'): 1345 case FOURCC('e', 'n', 'c', 'v'): 1346 case FOURCC('s', '2', '6', '3'): 1347 case FOURCC('H', '2', '6', '3'): 1348 case FOURCC('h', '2', '6', '3'): 1349 case FOURCC('a', 'v', 'c', '1'): 1350 case FOURCC('h', 'v', 'c', '1'): 1351 case FOURCC('h', 'e', 'v', '1'): 1352 { 1353 mHasVideo = true; 1354 1355 uint8_t buffer[78]; 1356 if (chunk_data_size < (ssize_t)sizeof(buffer)) { 1357 // Basic VideoSampleEntry size. 1358 return ERROR_MALFORMED; 1359 } 1360 1361 if (mDataSource->readAt( 1362 data_offset, buffer, sizeof(buffer)) < (ssize_t)sizeof(buffer)) { 1363 return ERROR_IO; 1364 } 1365 1366 uint16_t data_ref_index __unused = U16_AT(&buffer[6]); 1367 uint16_t width = U16_AT(&buffer[6 + 18]); 1368 uint16_t height = U16_AT(&buffer[6 + 20]); 1369 1370 // The video sample is not standard-compliant if it has invalid dimension. 1371 // Use some default width and height value, and 1372 // let the decoder figure out the actual width and height (and thus 1373 // be prepared for INFO_FOMRAT_CHANGED event). 1374 if (width == 0) width = 352; 1375 if (height == 0) height = 288; 1376 1377 // printf("*** coding='%s' width=%d height=%d\n", 1378 // chunk, width, height); 1379 1380 if (chunk_type != FOURCC('e', 'n', 'c', 'v')) { 1381 // if the chunk type is encv, we'll get the type from the sinf/frma box later 1382 mLastTrack->meta->setCString(kKeyMIMEType, FourCC2MIME(chunk_type)); 1383 } 1384 mLastTrack->meta->setInt32(kKeyWidth, width); 1385 mLastTrack->meta->setInt32(kKeyHeight, height); 1386 1387 off64_t stop_offset = *offset + chunk_size; 1388 *offset = data_offset + sizeof(buffer); 1389 while (*offset < stop_offset) { 1390 status_t err = parseChunk(offset, depth + 1); 1391 if (err != OK) { 1392 return err; 1393 } 1394 } 1395 1396 if (*offset != stop_offset) { 1397 return ERROR_MALFORMED; 1398 } 1399 break; 1400 } 1401 1402 case FOURCC('s', 't', 'c', 'o'): 1403 case FOURCC('c', 'o', '6', '4'): 1404 { 1405 status_t err = 1406 mLastTrack->sampleTable->setChunkOffsetParams( 1407 chunk_type, data_offset, chunk_data_size); 1408 1409 *offset += chunk_size; 1410 1411 if (err != OK) { 1412 return err; 1413 } 1414 1415 break; 1416 } 1417 1418 case FOURCC('s', 't', 's', 'c'): 1419 { 1420 status_t err = 1421 mLastTrack->sampleTable->setSampleToChunkParams( 1422 data_offset, chunk_data_size); 1423 1424 *offset += chunk_size; 1425 1426 if (err != OK) { 1427 return err; 1428 } 1429 1430 break; 1431 } 1432 1433 case FOURCC('s', 't', 's', 'z'): 1434 case FOURCC('s', 't', 'z', '2'): 1435 { 1436 status_t err = 1437 mLastTrack->sampleTable->setSampleSizeParams( 1438 chunk_type, data_offset, chunk_data_size); 1439 1440 *offset += chunk_size; 1441 1442 if (err != OK) { 1443 return err; 1444 } 1445 1446 size_t max_size; 1447 err = mLastTrack->sampleTable->getMaxSampleSize(&max_size); 1448 1449 if (err != OK) { 1450 return err; 1451 } 1452 1453 if (max_size != 0) { 1454 // Assume that a given buffer only contains at most 10 chunks, 1455 // each chunk originally prefixed with a 2 byte length will 1456 // have a 4 byte header (0x00 0x00 0x00 0x01) after conversion, 1457 // and thus will grow by 2 bytes per chunk. 1458 if (max_size > SIZE_MAX - 10 * 2) { 1459 ALOGE("max sample size too big: %zu", max_size); 1460 return ERROR_MALFORMED; 1461 } 1462 mLastTrack->meta->setInt32(kKeyMaxInputSize, max_size + 10 * 2); 1463 } else { 1464 // No size was specified. Pick a conservatively large size. 1465 uint32_t width, height; 1466 if (!mLastTrack->meta->findInt32(kKeyWidth, (int32_t*)&width) || 1467 !mLastTrack->meta->findInt32(kKeyHeight,(int32_t*) &height)) { 1468 ALOGE("No width or height, assuming worst case 1080p"); 1469 width = 1920; 1470 height = 1080; 1471 } else { 1472 // A resolution was specified, check that it's not too big. The values below 1473 // were chosen so that the calculations below don't cause overflows, they're 1474 // not indicating that resolutions up to 32kx32k are actually supported. 1475 if (width > 32768 || height > 32768) { 1476 ALOGE("can't support %u x %u video", width, height); 1477 return ERROR_MALFORMED; 1478 } 1479 } 1480 1481 const char *mime; 1482 CHECK(mLastTrack->meta->findCString(kKeyMIMEType, &mime)); 1483 if (!strcmp(mime, MEDIA_MIMETYPE_VIDEO_AVC)) { 1484 // AVC requires compression ratio of at least 2, and uses 1485 // macroblocks 1486 max_size = ((width + 15) / 16) * ((height + 15) / 16) * 192; 1487 } else { 1488 // For all other formats there is no minimum compression 1489 // ratio. Use compression ratio of 1. 1490 max_size = width * height * 3 / 2; 1491 } 1492 mLastTrack->meta->setInt32(kKeyMaxInputSize, max_size); 1493 } 1494 1495 // NOTE: setting another piece of metadata invalidates any pointers (such as the 1496 // mimetype) previously obtained, so don't cache them. 1497 const char *mime; 1498 CHECK(mLastTrack->meta->findCString(kKeyMIMEType, &mime)); 1499 // Calculate average frame rate. 1500 if (!strncasecmp("video/", mime, 6)) { 1501 size_t nSamples = mLastTrack->sampleTable->countSamples(); 1502 int64_t durationUs; 1503 if (mLastTrack->meta->findInt64(kKeyDuration, &durationUs)) { 1504 if (durationUs > 0) { 1505 int32_t frameRate = (nSamples * 1000000LL + 1506 (durationUs >> 1)) / durationUs; 1507 mLastTrack->meta->setInt32(kKeyFrameRate, frameRate); 1508 } 1509 } 1510 } 1511 1512 break; 1513 } 1514 1515 case FOURCC('s', 't', 't', 's'): 1516 { 1517 *offset += chunk_size; 1518 1519 status_t err = 1520 mLastTrack->sampleTable->setTimeToSampleParams( 1521 data_offset, chunk_data_size); 1522 1523 if (err != OK) { 1524 return err; 1525 } 1526 1527 break; 1528 } 1529 1530 case FOURCC('c', 't', 't', 's'): 1531 { 1532 *offset += chunk_size; 1533 1534 status_t err = 1535 mLastTrack->sampleTable->setCompositionTimeToSampleParams( 1536 data_offset, chunk_data_size); 1537 1538 if (err != OK) { 1539 return err; 1540 } 1541 1542 break; 1543 } 1544 1545 case FOURCC('s', 't', 's', 's'): 1546 { 1547 *offset += chunk_size; 1548 1549 status_t err = 1550 mLastTrack->sampleTable->setSyncSampleParams( 1551 data_offset, chunk_data_size); 1552 1553 if (err != OK) { 1554 return err; 1555 } 1556 1557 break; 1558 } 1559 1560 // �xyz 1561 case FOURCC(0xA9, 'x', 'y', 'z'): 1562 { 1563 *offset += chunk_size; 1564 1565 // Best case the total data length inside "�xyz" box 1566 // would be 8, for instance "�xyz" + "\x00\x04\x15\xc7" + "0+0/", 1567 // where "\x00\x04" is the text string length with value = 4, 1568 // "\0x15\xc7" is the language code = en, and "0+0" is a 1569 // location (string) value with longitude = 0 and latitude = 0. 1570 if (chunk_data_size < 8) { 1571 return ERROR_MALFORMED; 1572 } 1573 1574 // Worst case the location string length would be 18, 1575 // for instance +90.0000-180.0000, without the trailing "/" and 1576 // the string length + language code. 1577 char buffer[18]; 1578 1579 // Substracting 5 from the data size is because the text string length + 1580 // language code takes 4 bytes, and the trailing slash "/" takes 1 byte. 1581 off64_t location_length = chunk_data_size - 5; 1582 if (location_length >= (off64_t) sizeof(buffer)) { 1583 return ERROR_MALFORMED; 1584 } 1585 1586 if (mDataSource->readAt( 1587 data_offset + 4, buffer, location_length) < location_length) { 1588 return ERROR_IO; 1589 } 1590 1591 buffer[location_length] = '\0'; 1592 mFileMetaData->setCString(kKeyLocation, buffer); 1593 break; 1594 } 1595 1596 case FOURCC('e', 's', 'd', 's'): 1597 { 1598 *offset += chunk_size; 1599 1600 if (chunk_data_size < 4) { 1601 return ERROR_MALFORMED; 1602 } 1603 1604 uint8_t buffer[256]; 1605 if (chunk_data_size > (off64_t)sizeof(buffer)) { 1606 return ERROR_BUFFER_TOO_SMALL; 1607 } 1608 1609 if (mDataSource->readAt( 1610 data_offset, buffer, chunk_data_size) < chunk_data_size) { 1611 return ERROR_IO; 1612 } 1613 1614 if (U32_AT(buffer) != 0) { 1615 // Should be version 0, flags 0. 1616 return ERROR_MALFORMED; 1617 } 1618 1619 mLastTrack->meta->setData( 1620 kKeyESDS, kTypeESDS, &buffer[4], chunk_data_size - 4); 1621 1622 if (mPath.size() >= 2 1623 && mPath[mPath.size() - 2] == FOURCC('m', 'p', '4', 'a')) { 1624 // Information from the ESDS must be relied on for proper 1625 // setup of sample rate and channel count for MPEG4 Audio. 1626 // The generic header appears to only contain generic 1627 // information... 1628 1629 status_t err = updateAudioTrackInfoFromESDS_MPEG4Audio( 1630 &buffer[4], chunk_data_size - 4); 1631 1632 if (err != OK) { 1633 return err; 1634 } 1635 } 1636 1637 break; 1638 } 1639 1640 case FOURCC('a', 'v', 'c', 'C'): 1641 { 1642 *offset += chunk_size; 1643 1644 sp<ABuffer> buffer = new ABuffer(chunk_data_size); 1645 1646 if (mDataSource->readAt( 1647 data_offset, buffer->data(), chunk_data_size) < chunk_data_size) { 1648 return ERROR_IO; 1649 } 1650 1651 mLastTrack->meta->setData( 1652 kKeyAVCC, kTypeAVCC, buffer->data(), chunk_data_size); 1653 1654 break; 1655 } 1656 case FOURCC('h', 'v', 'c', 'C'): 1657 { 1658 sp<ABuffer> buffer = new ABuffer(chunk_data_size); 1659 1660 if (mDataSource->readAt( 1661 data_offset, buffer->data(), chunk_data_size) < chunk_data_size) { 1662 return ERROR_IO; 1663 } 1664 1665 mLastTrack->meta->setData( 1666 kKeyHVCC, kTypeHVCC, buffer->data(), chunk_data_size); 1667 1668 *offset += chunk_size; 1669 break; 1670 } 1671 1672 case FOURCC('d', '2', '6', '3'): 1673 { 1674 *offset += chunk_size; 1675 /* 1676 * d263 contains a fixed 7 bytes part: 1677 * vendor - 4 bytes 1678 * version - 1 byte 1679 * level - 1 byte 1680 * profile - 1 byte 1681 * optionally, "d263" box itself may contain a 16-byte 1682 * bit rate box (bitr) 1683 * average bit rate - 4 bytes 1684 * max bit rate - 4 bytes 1685 */ 1686 char buffer[23]; 1687 if (chunk_data_size != 7 && 1688 chunk_data_size != 23) { 1689 ALOGE("Incorrect D263 box size %lld", chunk_data_size); 1690 return ERROR_MALFORMED; 1691 } 1692 1693 if (mDataSource->readAt( 1694 data_offset, buffer, chunk_data_size) < chunk_data_size) { 1695 return ERROR_IO; 1696 } 1697 1698 mLastTrack->meta->setData(kKeyD263, kTypeD263, buffer, chunk_data_size); 1699 1700 break; 1701 } 1702 1703 case FOURCC('m', 'e', 't', 'a'): 1704 { 1705 uint8_t buffer[4]; 1706 if (chunk_data_size < (off64_t)sizeof(buffer)) { 1707 *offset += chunk_size; 1708 return ERROR_MALFORMED; 1709 } 1710 1711 if (mDataSource->readAt( 1712 data_offset, buffer, 4) < 4) { 1713 *offset += chunk_size; 1714 return ERROR_IO; 1715 } 1716 1717 if (U32_AT(buffer) != 0) { 1718 // Should be version 0, flags 0. 1719 1720 // If it's not, let's assume this is one of those 1721 // apparently malformed chunks that don't have flags 1722 // and completely different semantics than what's 1723 // in the MPEG4 specs and skip it. 1724 *offset += chunk_size; 1725 return OK; 1726 } 1727 1728 off64_t stop_offset = *offset + chunk_size; 1729 *offset = data_offset + sizeof(buffer); 1730 while (*offset < stop_offset) { 1731 status_t err = parseChunk(offset, depth + 1); 1732 if (err != OK) { 1733 return err; 1734 } 1735 } 1736 1737 if (*offset != stop_offset) { 1738 return ERROR_MALFORMED; 1739 } 1740 break; 1741 } 1742 1743 case FOURCC('m', 'e', 'a', 'n'): 1744 case FOURCC('n', 'a', 'm', 'e'): 1745 case FOURCC('d', 'a', 't', 'a'): 1746 { 1747 *offset += chunk_size; 1748 1749 if (mPath.size() == 6 && underMetaDataPath(mPath)) { 1750 status_t err = parseITunesMetaData(data_offset, chunk_data_size); 1751 1752 if (err != OK) { 1753 return err; 1754 } 1755 } 1756 1757 break; 1758 } 1759 1760 case FOURCC('m', 'v', 'h', 'd'): 1761 { 1762 *offset += chunk_size; 1763 1764 if (chunk_data_size < 32) { 1765 return ERROR_MALFORMED; 1766 } 1767 1768 uint8_t header[32]; 1769 if (mDataSource->readAt( 1770 data_offset, header, sizeof(header)) 1771 < (ssize_t)sizeof(header)) { 1772 return ERROR_IO; 1773 } 1774 1775 uint64_t creationTime; 1776 uint64_t duration = 0; 1777 if (header[0] == 1) { 1778 creationTime = U64_AT(&header[4]); 1779 mHeaderTimescale = U32_AT(&header[20]); 1780 duration = U64_AT(&header[24]); 1781 if (duration == 0xffffffffffffffff) { 1782 duration = 0; 1783 } 1784 } else if (header[0] != 0) { 1785 return ERROR_MALFORMED; 1786 } else { 1787 creationTime = U32_AT(&header[4]); 1788 mHeaderTimescale = U32_AT(&header[12]); 1789 uint32_t d32 = U32_AT(&header[16]); 1790 if (d32 == 0xffffffff) { 1791 d32 = 0; 1792 } 1793 duration = d32; 1794 } 1795 if (duration != 0) { 1796 mFileMetaData->setInt64(kKeyDuration, duration * 1000000 / mHeaderTimescale); 1797 } 1798 1799 String8 s; 1800 convertTimeToDate(creationTime, &s); 1801 1802 mFileMetaData->setCString(kKeyDate, s.string()); 1803 1804 break; 1805 } 1806 1807 case FOURCC('m', 'e', 'h', 'd'): 1808 { 1809 *offset += chunk_size; 1810 1811 if (chunk_data_size < 8) { 1812 return ERROR_MALFORMED; 1813 } 1814 1815 uint8_t flags[4]; 1816 if (mDataSource->readAt( 1817 data_offset, flags, sizeof(flags)) 1818 < (ssize_t)sizeof(flags)) { 1819 return ERROR_IO; 1820 } 1821 1822 uint64_t duration = 0; 1823 if (flags[0] == 1) { 1824 // 64 bit 1825 if (chunk_data_size < 12) { 1826 return ERROR_MALFORMED; 1827 } 1828 mDataSource->getUInt64(data_offset + 4, &duration); 1829 if (duration == 0xffffffffffffffff) { 1830 duration = 0; 1831 } 1832 } else if (flags[0] == 0) { 1833 // 32 bit 1834 uint32_t d32; 1835 mDataSource->getUInt32(data_offset + 4, &d32); 1836 if (d32 == 0xffffffff) { 1837 d32 = 0; 1838 } 1839 duration = d32; 1840 } else { 1841 return ERROR_MALFORMED; 1842 } 1843 1844 if (duration != 0) { 1845 mFileMetaData->setInt64(kKeyDuration, duration * 1000000 / mHeaderTimescale); 1846 } 1847 1848 break; 1849 } 1850 1851 case FOURCC('m', 'd', 'a', 't'): 1852 { 1853 ALOGV("mdat chunk, drm: %d", mIsDrm); 1854 if (!mIsDrm) { 1855 *offset += chunk_size; 1856 break; 1857 } 1858 1859 if (chunk_size < 8) { 1860 return ERROR_MALFORMED; 1861 } 1862 1863 return parseDrmSINF(offset, data_offset); 1864 } 1865 1866 case FOURCC('h', 'd', 'l', 'r'): 1867 { 1868 *offset += chunk_size; 1869 1870 uint32_t buffer; 1871 if (mDataSource->readAt( 1872 data_offset + 8, &buffer, 4) < 4) { 1873 return ERROR_IO; 1874 } 1875 1876 uint32_t type = ntohl(buffer); 1877 // For the 3GPP file format, the handler-type within the 'hdlr' box 1878 // shall be 'text'. We also want to support 'sbtl' handler type 1879 // for a practical reason as various MPEG4 containers use it. 1880 if (type == FOURCC('t', 'e', 'x', 't') || type == FOURCC('s', 'b', 't', 'l')) { 1881 mLastTrack->meta->setCString(kKeyMIMEType, MEDIA_MIMETYPE_TEXT_3GPP); 1882 } 1883 1884 break; 1885 } 1886 1887 case FOURCC('t', 'r', 'e', 'x'): 1888 { 1889 *offset += chunk_size; 1890 1891 if (chunk_data_size < 24) { 1892 return ERROR_IO; 1893 } 1894 Trex trex; 1895 if (!mDataSource->getUInt32(data_offset + 4, &trex.track_ID) || 1896 !mDataSource->getUInt32(data_offset + 8, &trex.default_sample_description_index) || 1897 !mDataSource->getUInt32(data_offset + 12, &trex.default_sample_duration) || 1898 !mDataSource->getUInt32(data_offset + 16, &trex.default_sample_size) || 1899 !mDataSource->getUInt32(data_offset + 20, &trex.default_sample_flags)) { 1900 return ERROR_IO; 1901 } 1902 mTrex.add(trex); 1903 break; 1904 } 1905 1906 case FOURCC('t', 'x', '3', 'g'): 1907 { 1908 uint32_t type; 1909 const void *data; 1910 size_t size = 0; 1911 if (!mLastTrack->meta->findData( 1912 kKeyTextFormatData, &type, &data, &size)) { 1913 size = 0; 1914 } 1915 1916 if ((chunk_size > SIZE_MAX) || (SIZE_MAX - chunk_size <= size)) { 1917 return ERROR_MALFORMED; 1918 } 1919 1920 uint8_t *buffer = new (std::nothrow) uint8_t[size + chunk_size]; 1921 if (buffer == NULL) { 1922 return ERROR_MALFORMED; 1923 } 1924 1925 if (size > 0) { 1926 memcpy(buffer, data, size); 1927 } 1928 1929 if ((size_t)(mDataSource->readAt(*offset, buffer + size, chunk_size)) 1930 < chunk_size) { 1931 delete[] buffer; 1932 buffer = NULL; 1933 1934 // advance read pointer so we don't end up reading this again 1935 *offset += chunk_size; 1936 return ERROR_IO; 1937 } 1938 1939 mLastTrack->meta->setData( 1940 kKeyTextFormatData, 0, buffer, size + chunk_size); 1941 1942 delete[] buffer; 1943 1944 *offset += chunk_size; 1945 break; 1946 } 1947 1948 case FOURCC('c', 'o', 'v', 'r'): 1949 { 1950 *offset += chunk_size; 1951 1952 if (mFileMetaData != NULL) { 1953 ALOGV("chunk_data_size = %lld and data_offset = %lld", 1954 chunk_data_size, data_offset); 1955 1956 if (chunk_data_size < 0 || static_cast<uint64_t>(chunk_data_size) >= SIZE_MAX - 1) { 1957 return ERROR_MALFORMED; 1958 } 1959 sp<ABuffer> buffer = new ABuffer(chunk_data_size + 1); 1960 if (mDataSource->readAt( 1961 data_offset, buffer->data(), chunk_data_size) != (ssize_t)chunk_data_size) { 1962 return ERROR_IO; 1963 } 1964 const int kSkipBytesOfDataBox = 16; 1965 if (chunk_data_size <= kSkipBytesOfDataBox) { 1966 return ERROR_MALFORMED; 1967 } 1968 1969 mFileMetaData->setData( 1970 kKeyAlbumArt, MetaData::TYPE_NONE, 1971 buffer->data() + kSkipBytesOfDataBox, chunk_data_size - kSkipBytesOfDataBox); 1972 } 1973 1974 break; 1975 } 1976 1977 case FOURCC('t', 'i', 't', 'l'): 1978 case FOURCC('p', 'e', 'r', 'f'): 1979 case FOURCC('a', 'u', 't', 'h'): 1980 case FOURCC('g', 'n', 'r', 'e'): 1981 case FOURCC('a', 'l', 'b', 'm'): 1982 case FOURCC('y', 'r', 'r', 'c'): 1983 { 1984 *offset += chunk_size; 1985 1986 status_t err = parse3GPPMetaData(data_offset, chunk_data_size, depth); 1987 1988 if (err != OK) { 1989 return err; 1990 } 1991 1992 break; 1993 } 1994 1995 case FOURCC('I', 'D', '3', '2'): 1996 { 1997 *offset += chunk_size; 1998 1999 if (chunk_data_size < 6) { 2000 return ERROR_MALFORMED; 2001 } 2002 2003 parseID3v2MetaData(data_offset + 6); 2004 2005 break; 2006 } 2007 2008 case FOURCC('-', '-', '-', '-'): 2009 { 2010 mLastCommentMean.clear(); 2011 mLastCommentName.clear(); 2012 mLastCommentData.clear(); 2013 *offset += chunk_size; 2014 break; 2015 } 2016 2017 case FOURCC('s', 'i', 'd', 'x'): 2018 { 2019 parseSegmentIndex(data_offset, chunk_data_size); 2020 *offset += chunk_size; 2021 return UNKNOWN_ERROR; // stop parsing after sidx 2022 } 2023 2024 default: 2025 { 2026 *offset += chunk_size; 2027 break; 2028 } 2029 } 2030 2031 return OK; 2032} 2033 2034status_t MPEG4Extractor::parseSegmentIndex(off64_t offset, size_t size) { 2035 ALOGV("MPEG4Extractor::parseSegmentIndex"); 2036 2037 if (size < 12) { 2038 return -EINVAL; 2039 } 2040 2041 uint32_t flags; 2042 if (!mDataSource->getUInt32(offset, &flags)) { 2043 return ERROR_MALFORMED; 2044 } 2045 2046 uint32_t version = flags >> 24; 2047 flags &= 0xffffff; 2048 2049 ALOGV("sidx version %d", version); 2050 2051 uint32_t referenceId; 2052 if (!mDataSource->getUInt32(offset + 4, &referenceId)) { 2053 return ERROR_MALFORMED; 2054 } 2055 2056 uint32_t timeScale; 2057 if (!mDataSource->getUInt32(offset + 8, &timeScale)) { 2058 return ERROR_MALFORMED; 2059 } 2060 ALOGV("sidx refid/timescale: %d/%d", referenceId, timeScale); 2061 2062 uint64_t earliestPresentationTime; 2063 uint64_t firstOffset; 2064 2065 offset += 12; 2066 size -= 12; 2067 2068 if (version == 0) { 2069 if (size < 8) { 2070 return -EINVAL; 2071 } 2072 uint32_t tmp; 2073 if (!mDataSource->getUInt32(offset, &tmp)) { 2074 return ERROR_MALFORMED; 2075 } 2076 earliestPresentationTime = tmp; 2077 if (!mDataSource->getUInt32(offset + 4, &tmp)) { 2078 return ERROR_MALFORMED; 2079 } 2080 firstOffset = tmp; 2081 offset += 8; 2082 size -= 8; 2083 } else { 2084 if (size < 16) { 2085 return -EINVAL; 2086 } 2087 if (!mDataSource->getUInt64(offset, &earliestPresentationTime)) { 2088 return ERROR_MALFORMED; 2089 } 2090 if (!mDataSource->getUInt64(offset + 8, &firstOffset)) { 2091 return ERROR_MALFORMED; 2092 } 2093 offset += 16; 2094 size -= 16; 2095 } 2096 ALOGV("sidx pres/off: %" PRIu64 "/%" PRIu64, earliestPresentationTime, firstOffset); 2097 2098 if (size < 4) { 2099 return -EINVAL; 2100 } 2101 2102 uint16_t referenceCount; 2103 if (!mDataSource->getUInt16(offset + 2, &referenceCount)) { 2104 return ERROR_MALFORMED; 2105 } 2106 offset += 4; 2107 size -= 4; 2108 ALOGV("refcount: %d", referenceCount); 2109 2110 if (size < referenceCount * 12) { 2111 return -EINVAL; 2112 } 2113 2114 uint64_t total_duration = 0; 2115 for (unsigned int i = 0; i < referenceCount; i++) { 2116 uint32_t d1, d2, d3; 2117 2118 if (!mDataSource->getUInt32(offset, &d1) || // size 2119 !mDataSource->getUInt32(offset + 4, &d2) || // duration 2120 !mDataSource->getUInt32(offset + 8, &d3)) { // flags 2121 return ERROR_MALFORMED; 2122 } 2123 2124 if (d1 & 0x80000000) { 2125 ALOGW("sub-sidx boxes not supported yet"); 2126 } 2127 bool sap = d3 & 0x80000000; 2128 uint32_t saptype = (d3 >> 28) & 7; 2129 if (!sap || (saptype != 1 && saptype != 2)) { 2130 // type 1 and 2 are sync samples 2131 ALOGW("not a stream access point, or unsupported type: %08x", d3); 2132 } 2133 total_duration += d2; 2134 offset += 12; 2135 ALOGV(" item %d, %08x %08x %08x", i, d1, d2, d3); 2136 SidxEntry se; 2137 se.mSize = d1 & 0x7fffffff; 2138 se.mDurationUs = 1000000LL * d2 / timeScale; 2139 mSidxEntries.add(se); 2140 } 2141 2142 uint64_t sidxDuration = total_duration * 1000000 / timeScale; 2143 2144 int64_t metaDuration; 2145 if (!mLastTrack->meta->findInt64(kKeyDuration, &metaDuration) || metaDuration == 0) { 2146 mLastTrack->meta->setInt64(kKeyDuration, sidxDuration); 2147 } 2148 return OK; 2149} 2150 2151 2152 2153status_t MPEG4Extractor::parseTrackHeader( 2154 off64_t data_offset, off64_t data_size) { 2155 if (data_size < 4) { 2156 return ERROR_MALFORMED; 2157 } 2158 2159 uint8_t version; 2160 if (mDataSource->readAt(data_offset, &version, 1) < 1) { 2161 return ERROR_IO; 2162 } 2163 2164 size_t dynSize = (version == 1) ? 36 : 24; 2165 2166 uint8_t buffer[36 + 60]; 2167 2168 if (data_size != (off64_t)dynSize + 60) { 2169 return ERROR_MALFORMED; 2170 } 2171 2172 if (mDataSource->readAt( 2173 data_offset, buffer, data_size) < (ssize_t)data_size) { 2174 return ERROR_IO; 2175 } 2176 2177 uint64_t ctime __unused, mtime __unused, duration __unused; 2178 int32_t id; 2179 2180 if (version == 1) { 2181 ctime = U64_AT(&buffer[4]); 2182 mtime = U64_AT(&buffer[12]); 2183 id = U32_AT(&buffer[20]); 2184 duration = U64_AT(&buffer[28]); 2185 } else if (version == 0) { 2186 ctime = U32_AT(&buffer[4]); 2187 mtime = U32_AT(&buffer[8]); 2188 id = U32_AT(&buffer[12]); 2189 duration = U32_AT(&buffer[20]); 2190 } else { 2191 return ERROR_UNSUPPORTED; 2192 } 2193 2194 mLastTrack->meta->setInt32(kKeyTrackID, id); 2195 2196 size_t matrixOffset = dynSize + 16; 2197 int32_t a00 = U32_AT(&buffer[matrixOffset]); 2198 int32_t a01 = U32_AT(&buffer[matrixOffset + 4]); 2199 int32_t a10 = U32_AT(&buffer[matrixOffset + 12]); 2200 int32_t a11 = U32_AT(&buffer[matrixOffset + 16]); 2201 2202#if 0 2203 int32_t dx = U32_AT(&buffer[matrixOffset + 8]); 2204 int32_t dy = U32_AT(&buffer[matrixOffset + 20]); 2205 2206 ALOGI("x' = %.2f * x + %.2f * y + %.2f", 2207 a00 / 65536.0f, a01 / 65536.0f, dx / 65536.0f); 2208 ALOGI("y' = %.2f * x + %.2f * y + %.2f", 2209 a10 / 65536.0f, a11 / 65536.0f, dy / 65536.0f); 2210#endif 2211 2212 uint32_t rotationDegrees; 2213 2214 static const int32_t kFixedOne = 0x10000; 2215 if (a00 == kFixedOne && a01 == 0 && a10 == 0 && a11 == kFixedOne) { 2216 // Identity, no rotation 2217 rotationDegrees = 0; 2218 } else if (a00 == 0 && a01 == kFixedOne && a10 == -kFixedOne && a11 == 0) { 2219 rotationDegrees = 90; 2220 } else if (a00 == 0 && a01 == -kFixedOne && a10 == kFixedOne && a11 == 0) { 2221 rotationDegrees = 270; 2222 } else if (a00 == -kFixedOne && a01 == 0 && a10 == 0 && a11 == -kFixedOne) { 2223 rotationDegrees = 180; 2224 } else { 2225 ALOGW("We only support 0,90,180,270 degree rotation matrices"); 2226 rotationDegrees = 0; 2227 } 2228 2229 if (rotationDegrees != 0) { 2230 mLastTrack->meta->setInt32(kKeyRotation, rotationDegrees); 2231 } 2232 2233 // Handle presentation display size, which could be different 2234 // from the image size indicated by kKeyWidth and kKeyHeight. 2235 uint32_t width = U32_AT(&buffer[dynSize + 52]); 2236 uint32_t height = U32_AT(&buffer[dynSize + 56]); 2237 mLastTrack->meta->setInt32(kKeyDisplayWidth, width >> 16); 2238 mLastTrack->meta->setInt32(kKeyDisplayHeight, height >> 16); 2239 2240 return OK; 2241} 2242 2243status_t MPEG4Extractor::parseITunesMetaData(off64_t offset, size_t size) { 2244 if (size < 4 || size == SIZE_MAX) { 2245 return ERROR_MALFORMED; 2246 } 2247 2248 uint8_t *buffer = new (std::nothrow) uint8_t[size + 1]; 2249 if (buffer == NULL) { 2250 return ERROR_MALFORMED; 2251 } 2252 if (mDataSource->readAt( 2253 offset, buffer, size) != (ssize_t)size) { 2254 delete[] buffer; 2255 buffer = NULL; 2256 2257 return ERROR_IO; 2258 } 2259 2260 uint32_t flags = U32_AT(buffer); 2261 2262 uint32_t metadataKey = 0; 2263 char chunk[5]; 2264 MakeFourCCString(mPath[4], chunk); 2265 ALOGV("meta: %s @ %lld", chunk, offset); 2266 switch ((int32_t)mPath[4]) { 2267 case FOURCC(0xa9, 'a', 'l', 'b'): 2268 { 2269 metadataKey = kKeyAlbum; 2270 break; 2271 } 2272 case FOURCC(0xa9, 'A', 'R', 'T'): 2273 { 2274 metadataKey = kKeyArtist; 2275 break; 2276 } 2277 case FOURCC('a', 'A', 'R', 'T'): 2278 { 2279 metadataKey = kKeyAlbumArtist; 2280 break; 2281 } 2282 case FOURCC(0xa9, 'd', 'a', 'y'): 2283 { 2284 metadataKey = kKeyYear; 2285 break; 2286 } 2287 case FOURCC(0xa9, 'n', 'a', 'm'): 2288 { 2289 metadataKey = kKeyTitle; 2290 break; 2291 } 2292 case FOURCC(0xa9, 'w', 'r', 't'): 2293 { 2294 metadataKey = kKeyWriter; 2295 break; 2296 } 2297 case FOURCC('c', 'o', 'v', 'r'): 2298 { 2299 metadataKey = kKeyAlbumArt; 2300 break; 2301 } 2302 case FOURCC('g', 'n', 'r', 'e'): 2303 { 2304 metadataKey = kKeyGenre; 2305 break; 2306 } 2307 case FOURCC(0xa9, 'g', 'e', 'n'): 2308 { 2309 metadataKey = kKeyGenre; 2310 break; 2311 } 2312 case FOURCC('c', 'p', 'i', 'l'): 2313 { 2314 if (size == 9 && flags == 21) { 2315 char tmp[16]; 2316 sprintf(tmp, "%d", 2317 (int)buffer[size - 1]); 2318 2319 mFileMetaData->setCString(kKeyCompilation, tmp); 2320 } 2321 break; 2322 } 2323 case FOURCC('t', 'r', 'k', 'n'): 2324 { 2325 if (size == 16 && flags == 0) { 2326 char tmp[16]; 2327 uint16_t* pTrack = (uint16_t*)&buffer[10]; 2328 uint16_t* pTotalTracks = (uint16_t*)&buffer[12]; 2329 sprintf(tmp, "%d/%d", ntohs(*pTrack), ntohs(*pTotalTracks)); 2330 2331 mFileMetaData->setCString(kKeyCDTrackNumber, tmp); 2332 } 2333 break; 2334 } 2335 case FOURCC('d', 'i', 's', 'k'): 2336 { 2337 if ((size == 14 || size == 16) && flags == 0) { 2338 char tmp[16]; 2339 uint16_t* pDisc = (uint16_t*)&buffer[10]; 2340 uint16_t* pTotalDiscs = (uint16_t*)&buffer[12]; 2341 sprintf(tmp, "%d/%d", ntohs(*pDisc), ntohs(*pTotalDiscs)); 2342 2343 mFileMetaData->setCString(kKeyDiscNumber, tmp); 2344 } 2345 break; 2346 } 2347 case FOURCC('-', '-', '-', '-'): 2348 { 2349 buffer[size] = '\0'; 2350 switch (mPath[5]) { 2351 case FOURCC('m', 'e', 'a', 'n'): 2352 mLastCommentMean.setTo((const char *)buffer + 4); 2353 break; 2354 case FOURCC('n', 'a', 'm', 'e'): 2355 mLastCommentName.setTo((const char *)buffer + 4); 2356 break; 2357 case FOURCC('d', 'a', 't', 'a'): 2358 mLastCommentData.setTo((const char *)buffer + 8); 2359 break; 2360 } 2361 2362 // Once we have a set of mean/name/data info, go ahead and process 2363 // it to see if its something we are interested in. Whether or not 2364 // were are interested in the specific tag, make sure to clear out 2365 // the set so we can be ready to process another tuple should one 2366 // show up later in the file. 2367 if ((mLastCommentMean.length() != 0) && 2368 (mLastCommentName.length() != 0) && 2369 (mLastCommentData.length() != 0)) { 2370 2371 if (mLastCommentMean == "com.apple.iTunes" 2372 && mLastCommentName == "iTunSMPB") { 2373 int32_t delay, padding; 2374 if (sscanf(mLastCommentData, 2375 " %*x %x %x %*x", &delay, &padding) == 2) { 2376 mLastTrack->meta->setInt32(kKeyEncoderDelay, delay); 2377 mLastTrack->meta->setInt32(kKeyEncoderPadding, padding); 2378 } 2379 } 2380 2381 mLastCommentMean.clear(); 2382 mLastCommentName.clear(); 2383 mLastCommentData.clear(); 2384 } 2385 break; 2386 } 2387 2388 default: 2389 break; 2390 } 2391 2392 if (size >= 8 && metadataKey && !mFileMetaData->hasData(metadataKey)) { 2393 if (metadataKey == kKeyAlbumArt) { 2394 mFileMetaData->setData( 2395 kKeyAlbumArt, MetaData::TYPE_NONE, 2396 buffer + 8, size - 8); 2397 } else if (metadataKey == kKeyGenre) { 2398 if (flags == 0) { 2399 // uint8_t genre code, iTunes genre codes are 2400 // the standard id3 codes, except they start 2401 // at 1 instead of 0 (e.g. Pop is 14, not 13) 2402 // We use standard id3 numbering, so subtract 1. 2403 int genrecode = (int)buffer[size - 1]; 2404 genrecode--; 2405 if (genrecode < 0) { 2406 genrecode = 255; // reserved for 'unknown genre' 2407 } 2408 char genre[10]; 2409 sprintf(genre, "%d", genrecode); 2410 2411 mFileMetaData->setCString(metadataKey, genre); 2412 } else if (flags == 1) { 2413 // custom genre string 2414 buffer[size] = '\0'; 2415 2416 mFileMetaData->setCString( 2417 metadataKey, (const char *)buffer + 8); 2418 } 2419 } else { 2420 buffer[size] = '\0'; 2421 2422 mFileMetaData->setCString( 2423 metadataKey, (const char *)buffer + 8); 2424 } 2425 } 2426 2427 delete[] buffer; 2428 buffer = NULL; 2429 2430 return OK; 2431} 2432 2433status_t MPEG4Extractor::parse3GPPMetaData(off64_t offset, size_t size, int depth) { 2434 if (size < 4 || size == SIZE_MAX) { 2435 return ERROR_MALFORMED; 2436 } 2437 2438 uint8_t *buffer = new (std::nothrow) uint8_t[size + 1]; 2439 if (buffer == NULL) { 2440 return ERROR_MALFORMED; 2441 } 2442 if (mDataSource->readAt( 2443 offset, buffer, size) != (ssize_t)size) { 2444 delete[] buffer; 2445 buffer = NULL; 2446 2447 return ERROR_IO; 2448 } 2449 2450 uint32_t metadataKey = 0; 2451 switch (mPath[depth]) { 2452 case FOURCC('t', 'i', 't', 'l'): 2453 { 2454 metadataKey = kKeyTitle; 2455 break; 2456 } 2457 case FOURCC('p', 'e', 'r', 'f'): 2458 { 2459 metadataKey = kKeyArtist; 2460 break; 2461 } 2462 case FOURCC('a', 'u', 't', 'h'): 2463 { 2464 metadataKey = kKeyWriter; 2465 break; 2466 } 2467 case FOURCC('g', 'n', 'r', 'e'): 2468 { 2469 metadataKey = kKeyGenre; 2470 break; 2471 } 2472 case FOURCC('a', 'l', 'b', 'm'): 2473 { 2474 if (buffer[size - 1] != '\0') { 2475 char tmp[4]; 2476 sprintf(tmp, "%u", buffer[size - 1]); 2477 2478 mFileMetaData->setCString(kKeyCDTrackNumber, tmp); 2479 } 2480 2481 metadataKey = kKeyAlbum; 2482 break; 2483 } 2484 case FOURCC('y', 'r', 'r', 'c'): 2485 { 2486 char tmp[5]; 2487 uint16_t year = U16_AT(&buffer[4]); 2488 2489 if (year < 10000) { 2490 sprintf(tmp, "%u", year); 2491 2492 mFileMetaData->setCString(kKeyYear, tmp); 2493 } 2494 break; 2495 } 2496 2497 default: 2498 break; 2499 } 2500 2501 if (metadataKey > 0) { 2502 bool isUTF8 = true; // Common case 2503 char16_t *framedata = NULL; 2504 int len16 = 0; // Number of UTF-16 characters 2505 2506 // smallest possible valid UTF-16 string w BOM: 0xfe 0xff 0x00 0x00 2507 if (size < 6) { 2508 return ERROR_MALFORMED; 2509 } 2510 2511 if (size - 6 >= 4) { 2512 len16 = ((size - 6) / 2) - 1; // don't include 0x0000 terminator 2513 framedata = (char16_t *)(buffer + 6); 2514 if (0xfffe == *framedata) { 2515 // endianness marker (BOM) doesn't match host endianness 2516 for (int i = 0; i < len16; i++) { 2517 framedata[i] = bswap_16(framedata[i]); 2518 } 2519 // BOM is now swapped to 0xfeff, we will execute next block too 2520 } 2521 2522 if (0xfeff == *framedata) { 2523 // Remove the BOM 2524 framedata++; 2525 len16--; 2526 isUTF8 = false; 2527 } 2528 // else normal non-zero-length UTF-8 string 2529 // we can't handle UTF-16 without BOM as there is no other 2530 // indication of encoding. 2531 } 2532 2533 if (isUTF8) { 2534 buffer[size] = 0; 2535 mFileMetaData->setCString(metadataKey, (const char *)buffer + 6); 2536 } else { 2537 // Convert from UTF-16 string to UTF-8 string. 2538 String8 tmpUTF8str(framedata, len16); 2539 mFileMetaData->setCString(metadataKey, tmpUTF8str.string()); 2540 } 2541 } 2542 2543 delete[] buffer; 2544 buffer = NULL; 2545 2546 return OK; 2547} 2548 2549void MPEG4Extractor::parseID3v2MetaData(off64_t offset) { 2550 ID3 id3(mDataSource, true /* ignorev1 */, offset); 2551 2552 if (id3.isValid()) { 2553 struct Map { 2554 int key; 2555 const char *tag1; 2556 const char *tag2; 2557 }; 2558 static const Map kMap[] = { 2559 { kKeyAlbum, "TALB", "TAL" }, 2560 { kKeyArtist, "TPE1", "TP1" }, 2561 { kKeyAlbumArtist, "TPE2", "TP2" }, 2562 { kKeyComposer, "TCOM", "TCM" }, 2563 { kKeyGenre, "TCON", "TCO" }, 2564 { kKeyTitle, "TIT2", "TT2" }, 2565 { kKeyYear, "TYE", "TYER" }, 2566 { kKeyAuthor, "TXT", "TEXT" }, 2567 { kKeyCDTrackNumber, "TRK", "TRCK" }, 2568 { kKeyDiscNumber, "TPA", "TPOS" }, 2569 { kKeyCompilation, "TCP", "TCMP" }, 2570 }; 2571 static const size_t kNumMapEntries = sizeof(kMap) / sizeof(kMap[0]); 2572 2573 for (size_t i = 0; i < kNumMapEntries; ++i) { 2574 if (!mFileMetaData->hasData(kMap[i].key)) { 2575 ID3::Iterator *it = new ID3::Iterator(id3, kMap[i].tag1); 2576 if (it->done()) { 2577 delete it; 2578 it = new ID3::Iterator(id3, kMap[i].tag2); 2579 } 2580 2581 if (it->done()) { 2582 delete it; 2583 continue; 2584 } 2585 2586 String8 s; 2587 it->getString(&s); 2588 delete it; 2589 2590 mFileMetaData->setCString(kMap[i].key, s); 2591 } 2592 } 2593 2594 size_t dataSize; 2595 String8 mime; 2596 const void *data = id3.getAlbumArt(&dataSize, &mime); 2597 2598 if (data) { 2599 mFileMetaData->setData(kKeyAlbumArt, MetaData::TYPE_NONE, data, dataSize); 2600 mFileMetaData->setCString(kKeyAlbumArtMIME, mime.string()); 2601 } 2602 } 2603} 2604 2605sp<MediaSource> MPEG4Extractor::getTrack(size_t index) { 2606 status_t err; 2607 if ((err = readMetaData()) != OK) { 2608 return NULL; 2609 } 2610 2611 Track *track = mFirstTrack; 2612 while (index > 0) { 2613 if (track == NULL) { 2614 return NULL; 2615 } 2616 2617 track = track->next; 2618 --index; 2619 } 2620 2621 if (track == NULL) { 2622 return NULL; 2623 } 2624 2625 2626 Trex *trex = NULL; 2627 int32_t trackId; 2628 if (track->meta->findInt32(kKeyTrackID, &trackId)) { 2629 for (size_t i = 0; i < mTrex.size(); i++) { 2630 Trex *t = &mTrex.editItemAt(index); 2631 if (t->track_ID == (uint32_t) trackId) { 2632 trex = t; 2633 break; 2634 } 2635 } 2636 } 2637 2638 ALOGV("getTrack called, pssh: %zu", mPssh.size()); 2639 2640 return new MPEG4Source(this, 2641 track->meta, mDataSource, track->timescale, track->sampleTable, 2642 mSidxEntries, trex, mMoofOffset); 2643} 2644 2645// static 2646status_t MPEG4Extractor::verifyTrack(Track *track) { 2647 const char *mime; 2648 CHECK(track->meta->findCString(kKeyMIMEType, &mime)); 2649 2650 uint32_t type; 2651 const void *data; 2652 size_t size; 2653 if (!strcasecmp(mime, MEDIA_MIMETYPE_VIDEO_AVC)) { 2654 if (!track->meta->findData(kKeyAVCC, &type, &data, &size) 2655 || type != kTypeAVCC) { 2656 return ERROR_MALFORMED; 2657 } 2658 } else if (!strcasecmp(mime, MEDIA_MIMETYPE_VIDEO_HEVC)) { 2659 if (!track->meta->findData(kKeyHVCC, &type, &data, &size) 2660 || type != kTypeHVCC) { 2661 return ERROR_MALFORMED; 2662 } 2663 } else if (!strcasecmp(mime, MEDIA_MIMETYPE_VIDEO_MPEG4) 2664 || !strcasecmp(mime, MEDIA_MIMETYPE_AUDIO_AAC)) { 2665 if (!track->meta->findData(kKeyESDS, &type, &data, &size) 2666 || type != kTypeESDS) { 2667 return ERROR_MALFORMED; 2668 } 2669 } 2670 2671 if (track->sampleTable == NULL || !track->sampleTable->isValid()) { 2672 // Make sure we have all the metadata we need. 2673 ALOGE("stbl atom missing/invalid."); 2674 return ERROR_MALFORMED; 2675 } 2676 2677 if (track->timescale == 0) { 2678 ALOGE("timescale invalid."); 2679 return ERROR_MALFORMED; 2680 } 2681 2682 return OK; 2683} 2684 2685typedef enum { 2686 //AOT_NONE = -1, 2687 //AOT_NULL_OBJECT = 0, 2688 //AOT_AAC_MAIN = 1, /**< Main profile */ 2689 AOT_AAC_LC = 2, /**< Low Complexity object */ 2690 //AOT_AAC_SSR = 3, 2691 //AOT_AAC_LTP = 4, 2692 AOT_SBR = 5, 2693 //AOT_AAC_SCAL = 6, 2694 //AOT_TWIN_VQ = 7, 2695 //AOT_CELP = 8, 2696 //AOT_HVXC = 9, 2697 //AOT_RSVD_10 = 10, /**< (reserved) */ 2698 //AOT_RSVD_11 = 11, /**< (reserved) */ 2699 //AOT_TTSI = 12, /**< TTSI Object */ 2700 //AOT_MAIN_SYNTH = 13, /**< Main Synthetic object */ 2701 //AOT_WAV_TAB_SYNTH = 14, /**< Wavetable Synthesis object */ 2702 //AOT_GEN_MIDI = 15, /**< General MIDI object */ 2703 //AOT_ALG_SYNTH_AUD_FX = 16, /**< Algorithmic Synthesis and Audio FX object */ 2704 AOT_ER_AAC_LC = 17, /**< Error Resilient(ER) AAC Low Complexity */ 2705 //AOT_RSVD_18 = 18, /**< (reserved) */ 2706 //AOT_ER_AAC_LTP = 19, /**< Error Resilient(ER) AAC LTP object */ 2707 AOT_ER_AAC_SCAL = 20, /**< Error Resilient(ER) AAC Scalable object */ 2708 //AOT_ER_TWIN_VQ = 21, /**< Error Resilient(ER) TwinVQ object */ 2709 AOT_ER_BSAC = 22, /**< Error Resilient(ER) BSAC object */ 2710 AOT_ER_AAC_LD = 23, /**< Error Resilient(ER) AAC LowDelay object */ 2711 //AOT_ER_CELP = 24, /**< Error Resilient(ER) CELP object */ 2712 //AOT_ER_HVXC = 25, /**< Error Resilient(ER) HVXC object */ 2713 //AOT_ER_HILN = 26, /**< Error Resilient(ER) HILN object */ 2714 //AOT_ER_PARA = 27, /**< Error Resilient(ER) Parametric object */ 2715 //AOT_RSVD_28 = 28, /**< might become SSC */ 2716 AOT_PS = 29, /**< PS, Parametric Stereo (includes SBR) */ 2717 //AOT_MPEGS = 30, /**< MPEG Surround */ 2718 2719 AOT_ESCAPE = 31, /**< Signal AOT uses more than 5 bits */ 2720 2721 //AOT_MP3ONMP4_L1 = 32, /**< MPEG-Layer1 in mp4 */ 2722 //AOT_MP3ONMP4_L2 = 33, /**< MPEG-Layer2 in mp4 */ 2723 //AOT_MP3ONMP4_L3 = 34, /**< MPEG-Layer3 in mp4 */ 2724 //AOT_RSVD_35 = 35, /**< might become DST */ 2725 //AOT_RSVD_36 = 36, /**< might become ALS */ 2726 //AOT_AAC_SLS = 37, /**< AAC + SLS */ 2727 //AOT_SLS = 38, /**< SLS */ 2728 //AOT_ER_AAC_ELD = 39, /**< AAC Enhanced Low Delay */ 2729 2730 //AOT_USAC = 42, /**< USAC */ 2731 //AOT_SAOC = 43, /**< SAOC */ 2732 //AOT_LD_MPEGS = 44, /**< Low Delay MPEG Surround */ 2733 2734 //AOT_RSVD50 = 50, /**< Interim AOT for Rsvd50 */ 2735} AUDIO_OBJECT_TYPE; 2736 2737status_t MPEG4Extractor::updateAudioTrackInfoFromESDS_MPEG4Audio( 2738 const void *esds_data, size_t esds_size) { 2739 ESDS esds(esds_data, esds_size); 2740 2741 uint8_t objectTypeIndication; 2742 if (esds.getObjectTypeIndication(&objectTypeIndication) != OK) { 2743 return ERROR_MALFORMED; 2744 } 2745 2746 if (objectTypeIndication == 0xe1) { 2747 // This isn't MPEG4 audio at all, it's QCELP 14k... 2748 mLastTrack->meta->setCString(kKeyMIMEType, MEDIA_MIMETYPE_AUDIO_QCELP); 2749 return OK; 2750 } 2751 2752 if (objectTypeIndication == 0x6b) { 2753 // The media subtype is MP3 audio 2754 // Our software MP3 audio decoder may not be able to handle 2755 // packetized MP3 audio; for now, lets just return ERROR_UNSUPPORTED 2756 ALOGE("MP3 track in MP4/3GPP file is not supported"); 2757 return ERROR_UNSUPPORTED; 2758 } 2759 2760 const uint8_t *csd; 2761 size_t csd_size; 2762 if (esds.getCodecSpecificInfo( 2763 (const void **)&csd, &csd_size) != OK) { 2764 return ERROR_MALFORMED; 2765 } 2766 2767 if (kUseHexDump) { 2768 printf("ESD of size %d\n", csd_size); 2769 hexdump(csd, csd_size); 2770 } 2771 2772 if (csd_size == 0) { 2773 // There's no further information, i.e. no codec specific data 2774 // Let's assume that the information provided in the mpeg4 headers 2775 // is accurate and hope for the best. 2776 2777 return OK; 2778 } 2779 2780 if (csd_size < 2) { 2781 return ERROR_MALFORMED; 2782 } 2783 2784 static uint32_t kSamplingRate[] = { 2785 96000, 88200, 64000, 48000, 44100, 32000, 24000, 22050, 2786 16000, 12000, 11025, 8000, 7350 2787 }; 2788 2789 ABitReader br(csd, csd_size); 2790 uint32_t objectType = br.getBits(5); 2791 2792 if (objectType == 31) { // AAC-ELD => additional 6 bits 2793 objectType = 32 + br.getBits(6); 2794 } 2795 2796 //keep AOT type 2797 mLastTrack->meta->setInt32(kKeyAACAOT, objectType); 2798 2799 uint32_t freqIndex = br.getBits(4); 2800 2801 int32_t sampleRate = 0; 2802 int32_t numChannels = 0; 2803 if (freqIndex == 15) { 2804 if (csd_size < 5) { 2805 return ERROR_MALFORMED; 2806 } 2807 sampleRate = br.getBits(24); 2808 numChannels = br.getBits(4); 2809 } else { 2810 numChannels = br.getBits(4); 2811 2812 if (freqIndex == 13 || freqIndex == 14) { 2813 return ERROR_MALFORMED; 2814 } 2815 2816 sampleRate = kSamplingRate[freqIndex]; 2817 } 2818 2819 if (objectType == AOT_SBR || objectType == AOT_PS) {//SBR specific config per 14496-3 table 1.13 2820 uint32_t extFreqIndex = br.getBits(4); 2821 int32_t extSampleRate __unused; 2822 if (extFreqIndex == 15) { 2823 if (csd_size < 8) { 2824 return ERROR_MALFORMED; 2825 } 2826 extSampleRate = br.getBits(24); 2827 } else { 2828 if (extFreqIndex == 13 || extFreqIndex == 14) { 2829 return ERROR_MALFORMED; 2830 } 2831 extSampleRate = kSamplingRate[extFreqIndex]; 2832 } 2833 //TODO: save the extension sampling rate value in meta data => 2834 // mLastTrack->meta->setInt32(kKeyExtSampleRate, extSampleRate); 2835 } 2836 2837 switch (numChannels) { 2838 // values defined in 14496-3_2009 amendment-4 Table 1.19 - Channel Configuration 2839 case 0: 2840 case 1:// FC 2841 case 2:// FL FR 2842 case 3:// FC, FL FR 2843 case 4:// FC, FL FR, RC 2844 case 5:// FC, FL FR, SL SR 2845 case 6:// FC, FL FR, SL SR, LFE 2846 //numChannels already contains the right value 2847 break; 2848 case 11:// FC, FL FR, SL SR, RC, LFE 2849 numChannels = 7; 2850 break; 2851 case 7: // FC, FCL FCR, FL FR, SL SR, LFE 2852 case 12:// FC, FL FR, SL SR, RL RR, LFE 2853 case 14:// FC, FL FR, SL SR, LFE, FHL FHR 2854 numChannels = 8; 2855 break; 2856 default: 2857 return ERROR_UNSUPPORTED; 2858 } 2859 2860 { 2861 if (objectType == AOT_SBR || objectType == AOT_PS) { 2862 objectType = br.getBits(5); 2863 2864 if (objectType == AOT_ESCAPE) { 2865 objectType = 32 + br.getBits(6); 2866 } 2867 } 2868 if (objectType == AOT_AAC_LC || objectType == AOT_ER_AAC_LC || 2869 objectType == AOT_ER_AAC_LD || objectType == AOT_ER_AAC_SCAL || 2870 objectType == AOT_ER_BSAC) { 2871 const int32_t frameLengthFlag __unused = br.getBits(1); 2872 2873 const int32_t dependsOnCoreCoder = br.getBits(1); 2874 2875 if (dependsOnCoreCoder ) { 2876 const int32_t coreCoderDelay __unused = br.getBits(14); 2877 } 2878 2879 int32_t extensionFlag = -1; 2880 if (br.numBitsLeft() > 0) { 2881 extensionFlag = br.getBits(1); 2882 } else { 2883 switch (objectType) { 2884 // 14496-3 4.5.1.1 extensionFlag 2885 case AOT_AAC_LC: 2886 extensionFlag = 0; 2887 break; 2888 case AOT_ER_AAC_LC: 2889 case AOT_ER_AAC_SCAL: 2890 case AOT_ER_BSAC: 2891 case AOT_ER_AAC_LD: 2892 extensionFlag = 1; 2893 break; 2894 default: 2895 TRESPASS(); 2896 break; 2897 } 2898 ALOGW("csd missing extension flag; assuming %d for object type %u.", 2899 extensionFlag, objectType); 2900 } 2901 2902 if (numChannels == 0) { 2903 int32_t channelsEffectiveNum = 0; 2904 int32_t channelsNum = 0; 2905 const int32_t ElementInstanceTag __unused = br.getBits(4); 2906 const int32_t Profile __unused = br.getBits(2); 2907 const int32_t SamplingFrequencyIndex __unused = br.getBits(4); 2908 const int32_t NumFrontChannelElements = br.getBits(4); 2909 const int32_t NumSideChannelElements = br.getBits(4); 2910 const int32_t NumBackChannelElements = br.getBits(4); 2911 const int32_t NumLfeChannelElements = br.getBits(2); 2912 const int32_t NumAssocDataElements __unused = br.getBits(3); 2913 const int32_t NumValidCcElements __unused = br.getBits(4); 2914 2915 const int32_t MonoMixdownPresent = br.getBits(1); 2916 if (MonoMixdownPresent != 0) { 2917 const int32_t MonoMixdownElementNumber __unused = br.getBits(4); 2918 } 2919 2920 const int32_t StereoMixdownPresent = br.getBits(1); 2921 if (StereoMixdownPresent != 0) { 2922 const int32_t StereoMixdownElementNumber __unused = br.getBits(4); 2923 } 2924 2925 const int32_t MatrixMixdownIndexPresent = br.getBits(1); 2926 if (MatrixMixdownIndexPresent != 0) { 2927 const int32_t MatrixMixdownIndex __unused = br.getBits(2); 2928 const int32_t PseudoSurroundEnable __unused = br.getBits(1); 2929 } 2930 2931 int i; 2932 for (i=0; i < NumFrontChannelElements; i++) { 2933 const int32_t FrontElementIsCpe = br.getBits(1); 2934 const int32_t FrontElementTagSelect __unused = br.getBits(4); 2935 channelsNum += FrontElementIsCpe ? 2 : 1; 2936 } 2937 2938 for (i=0; i < NumSideChannelElements; i++) { 2939 const int32_t SideElementIsCpe = br.getBits(1); 2940 const int32_t SideElementTagSelect __unused = br.getBits(4); 2941 channelsNum += SideElementIsCpe ? 2 : 1; 2942 } 2943 2944 for (i=0; i < NumBackChannelElements; i++) { 2945 const int32_t BackElementIsCpe = br.getBits(1); 2946 const int32_t BackElementTagSelect __unused = br.getBits(4); 2947 channelsNum += BackElementIsCpe ? 2 : 1; 2948 } 2949 channelsEffectiveNum = channelsNum; 2950 2951 for (i=0; i < NumLfeChannelElements; i++) { 2952 const int32_t LfeElementTagSelect __unused = br.getBits(4); 2953 channelsNum += 1; 2954 } 2955 ALOGV("mpeg4 audio channelsNum = %d", channelsNum); 2956 ALOGV("mpeg4 audio channelsEffectiveNum = %d", channelsEffectiveNum); 2957 numChannels = channelsNum; 2958 } 2959 } 2960 } 2961 2962 if (numChannels == 0) { 2963 return ERROR_UNSUPPORTED; 2964 } 2965 2966 int32_t prevSampleRate; 2967 CHECK(mLastTrack->meta->findInt32(kKeySampleRate, &prevSampleRate)); 2968 2969 if (prevSampleRate != sampleRate) { 2970 ALOGV("mpeg4 audio sample rate different from previous setting. " 2971 "was: %d, now: %d", prevSampleRate, sampleRate); 2972 } 2973 2974 mLastTrack->meta->setInt32(kKeySampleRate, sampleRate); 2975 2976 int32_t prevChannelCount; 2977 CHECK(mLastTrack->meta->findInt32(kKeyChannelCount, &prevChannelCount)); 2978 2979 if (prevChannelCount != numChannels) { 2980 ALOGV("mpeg4 audio channel count different from previous setting. " 2981 "was: %d, now: %d", prevChannelCount, numChannels); 2982 } 2983 2984 mLastTrack->meta->setInt32(kKeyChannelCount, numChannels); 2985 2986 return OK; 2987} 2988 2989//////////////////////////////////////////////////////////////////////////////// 2990 2991MPEG4Source::MPEG4Source( 2992 const sp<MPEG4Extractor> &owner, 2993 const sp<MetaData> &format, 2994 const sp<DataSource> &dataSource, 2995 int32_t timeScale, 2996 const sp<SampleTable> &sampleTable, 2997 Vector<SidxEntry> &sidx, 2998 const Trex *trex, 2999 off64_t firstMoofOffset) 3000 : mOwner(owner), 3001 mFormat(format), 3002 mDataSource(dataSource), 3003 mTimescale(timeScale), 3004 mSampleTable(sampleTable), 3005 mCurrentSampleIndex(0), 3006 mCurrentFragmentIndex(0), 3007 mSegments(sidx), 3008 mTrex(trex), 3009 mFirstMoofOffset(firstMoofOffset), 3010 mCurrentMoofOffset(firstMoofOffset), 3011 mCurrentTime(0), 3012 mCurrentSampleInfoAllocSize(0), 3013 mCurrentSampleInfoSizes(NULL), 3014 mCurrentSampleInfoOffsetsAllocSize(0), 3015 mCurrentSampleInfoOffsets(NULL), 3016 mIsAVC(false), 3017 mIsHEVC(false), 3018 mNALLengthSize(0), 3019 mStarted(false), 3020 mGroup(NULL), 3021 mBuffer(NULL), 3022 mWantsNALFragments(false), 3023 mSrcBuffer(NULL) { 3024 3025 memset(&mTrackFragmentHeaderInfo, 0, sizeof(mTrackFragmentHeaderInfo)); 3026 3027 mFormat->findInt32(kKeyCryptoMode, &mCryptoMode); 3028 mDefaultIVSize = 0; 3029 mFormat->findInt32(kKeyCryptoDefaultIVSize, &mDefaultIVSize); 3030 uint32_t keytype; 3031 const void *key; 3032 size_t keysize; 3033 if (mFormat->findData(kKeyCryptoKey, &keytype, &key, &keysize)) { 3034 CHECK(keysize <= 16); 3035 memset(mCryptoKey, 0, 16); 3036 memcpy(mCryptoKey, key, keysize); 3037 } 3038 3039 const char *mime; 3040 bool success = mFormat->findCString(kKeyMIMEType, &mime); 3041 CHECK(success); 3042 3043 mIsAVC = !strcasecmp(mime, MEDIA_MIMETYPE_VIDEO_AVC); 3044 mIsHEVC = !strcasecmp(mime, MEDIA_MIMETYPE_VIDEO_HEVC); 3045 3046 if (mIsAVC) { 3047 uint32_t type; 3048 const void *data; 3049 size_t size; 3050 CHECK(format->findData(kKeyAVCC, &type, &data, &size)); 3051 3052 const uint8_t *ptr = (const uint8_t *)data; 3053 3054 CHECK(size >= 7); 3055 CHECK_EQ((unsigned)ptr[0], 1u); // configurationVersion == 1 3056 3057 // The number of bytes used to encode the length of a NAL unit. 3058 mNALLengthSize = 1 + (ptr[4] & 3); 3059 } else if (mIsHEVC) { 3060 uint32_t type; 3061 const void *data; 3062 size_t size; 3063 CHECK(format->findData(kKeyHVCC, &type, &data, &size)); 3064 3065 const uint8_t *ptr = (const uint8_t *)data; 3066 3067 CHECK(size >= 7); 3068 CHECK_EQ((unsigned)ptr[0], 1u); // configurationVersion == 1 3069 3070 mNALLengthSize = 1 + (ptr[14 + 7] & 3); 3071 } 3072 3073 CHECK(format->findInt32(kKeyTrackID, &mTrackId)); 3074 3075 if (mFirstMoofOffset != 0) { 3076 off64_t offset = mFirstMoofOffset; 3077 parseChunk(&offset); 3078 } 3079} 3080 3081MPEG4Source::~MPEG4Source() { 3082 if (mStarted) { 3083 stop(); 3084 } 3085 free(mCurrentSampleInfoSizes); 3086 free(mCurrentSampleInfoOffsets); 3087} 3088 3089status_t MPEG4Source::start(MetaData *params) { 3090 Mutex::Autolock autoLock(mLock); 3091 3092 CHECK(!mStarted); 3093 3094 int32_t val; 3095 if (params && params->findInt32(kKeyWantsNALFragments, &val) 3096 && val != 0) { 3097 mWantsNALFragments = true; 3098 } else { 3099 mWantsNALFragments = false; 3100 } 3101 3102 mGroup = new MediaBufferGroup; 3103 3104 int32_t max_size; 3105 CHECK(mFormat->findInt32(kKeyMaxInputSize, &max_size)); 3106 3107 mGroup->add_buffer(new MediaBuffer(max_size)); 3108 3109 mSrcBuffer = new (std::nothrow) uint8_t[max_size]; 3110 if (mSrcBuffer == NULL) { 3111 // file probably specified a bad max size 3112 return ERROR_MALFORMED; 3113 } 3114 3115 mStarted = true; 3116 3117 return OK; 3118} 3119 3120status_t MPEG4Source::stop() { 3121 Mutex::Autolock autoLock(mLock); 3122 3123 CHECK(mStarted); 3124 3125 if (mBuffer != NULL) { 3126 mBuffer->release(); 3127 mBuffer = NULL; 3128 } 3129 3130 delete[] mSrcBuffer; 3131 mSrcBuffer = NULL; 3132 3133 delete mGroup; 3134 mGroup = NULL; 3135 3136 mStarted = false; 3137 mCurrentSampleIndex = 0; 3138 3139 return OK; 3140} 3141 3142status_t MPEG4Source::parseChunk(off64_t *offset) { 3143 uint32_t hdr[2]; 3144 if (mDataSource->readAt(*offset, hdr, 8) < 8) { 3145 return ERROR_IO; 3146 } 3147 uint64_t chunk_size = ntohl(hdr[0]); 3148 uint32_t chunk_type = ntohl(hdr[1]); 3149 off64_t data_offset = *offset + 8; 3150 3151 if (chunk_size == 1) { 3152 if (mDataSource->readAt(*offset + 8, &chunk_size, 8) < 8) { 3153 return ERROR_IO; 3154 } 3155 chunk_size = ntoh64(chunk_size); 3156 data_offset += 8; 3157 3158 if (chunk_size < 16) { 3159 // The smallest valid chunk is 16 bytes long in this case. 3160 return ERROR_MALFORMED; 3161 } 3162 } else if (chunk_size < 8) { 3163 // The smallest valid chunk is 8 bytes long. 3164 return ERROR_MALFORMED; 3165 } 3166 3167 char chunk[5]; 3168 MakeFourCCString(chunk_type, chunk); 3169 ALOGV("MPEG4Source chunk %s @ %llx", chunk, *offset); 3170 3171 off64_t chunk_data_size = *offset + chunk_size - data_offset; 3172 3173 switch(chunk_type) { 3174 3175 case FOURCC('t', 'r', 'a', 'f'): 3176 case FOURCC('m', 'o', 'o', 'f'): { 3177 off64_t stop_offset = *offset + chunk_size; 3178 *offset = data_offset; 3179 while (*offset < stop_offset) { 3180 status_t err = parseChunk(offset); 3181 if (err != OK) { 3182 return err; 3183 } 3184 } 3185 if (chunk_type == FOURCC('m', 'o', 'o', 'f')) { 3186 // *offset points to the box following this moof. Find the next moof from there. 3187 3188 while (true) { 3189 if (mDataSource->readAt(*offset, hdr, 8) < 8) { 3190 return ERROR_END_OF_STREAM; 3191 } 3192 chunk_size = ntohl(hdr[0]); 3193 chunk_type = ntohl(hdr[1]); 3194 if (chunk_type == FOURCC('m', 'o', 'o', 'f')) { 3195 mNextMoofOffset = *offset; 3196 break; 3197 } 3198 *offset += chunk_size; 3199 } 3200 } 3201 break; 3202 } 3203 3204 case FOURCC('t', 'f', 'h', 'd'): { 3205 status_t err; 3206 if ((err = parseTrackFragmentHeader(data_offset, chunk_data_size)) != OK) { 3207 return err; 3208 } 3209 *offset += chunk_size; 3210 break; 3211 } 3212 3213 case FOURCC('t', 'r', 'u', 'n'): { 3214 status_t err; 3215 if (mLastParsedTrackId == mTrackId) { 3216 if ((err = parseTrackFragmentRun(data_offset, chunk_data_size)) != OK) { 3217 return err; 3218 } 3219 } 3220 3221 *offset += chunk_size; 3222 break; 3223 } 3224 3225 case FOURCC('s', 'a', 'i', 'z'): { 3226 status_t err; 3227 if ((err = parseSampleAuxiliaryInformationSizes(data_offset, chunk_data_size)) != OK) { 3228 return err; 3229 } 3230 *offset += chunk_size; 3231 break; 3232 } 3233 case FOURCC('s', 'a', 'i', 'o'): { 3234 status_t err; 3235 if ((err = parseSampleAuxiliaryInformationOffsets(data_offset, chunk_data_size)) != OK) { 3236 return err; 3237 } 3238 *offset += chunk_size; 3239 break; 3240 } 3241 3242 case FOURCC('m', 'd', 'a', 't'): { 3243 // parse DRM info if present 3244 ALOGV("MPEG4Source::parseChunk mdat"); 3245 // if saiz/saoi was previously observed, do something with the sampleinfos 3246 *offset += chunk_size; 3247 break; 3248 } 3249 3250 default: { 3251 *offset += chunk_size; 3252 break; 3253 } 3254 } 3255 return OK; 3256} 3257 3258status_t MPEG4Source::parseSampleAuxiliaryInformationSizes( 3259 off64_t offset, off64_t /* size */) { 3260 ALOGV("parseSampleAuxiliaryInformationSizes"); 3261 // 14496-12 8.7.12 3262 uint8_t version; 3263 if (mDataSource->readAt( 3264 offset, &version, sizeof(version)) 3265 < (ssize_t)sizeof(version)) { 3266 return ERROR_IO; 3267 } 3268 3269 if (version != 0) { 3270 return ERROR_UNSUPPORTED; 3271 } 3272 offset++; 3273 3274 uint32_t flags; 3275 if (!mDataSource->getUInt24(offset, &flags)) { 3276 return ERROR_IO; 3277 } 3278 offset += 3; 3279 3280 if (flags & 1) { 3281 uint32_t tmp; 3282 if (!mDataSource->getUInt32(offset, &tmp)) { 3283 return ERROR_MALFORMED; 3284 } 3285 mCurrentAuxInfoType = tmp; 3286 offset += 4; 3287 if (!mDataSource->getUInt32(offset, &tmp)) { 3288 return ERROR_MALFORMED; 3289 } 3290 mCurrentAuxInfoTypeParameter = tmp; 3291 offset += 4; 3292 } 3293 3294 uint8_t defsize; 3295 if (mDataSource->readAt(offset, &defsize, 1) != 1) { 3296 return ERROR_MALFORMED; 3297 } 3298 mCurrentDefaultSampleInfoSize = defsize; 3299 offset++; 3300 3301 uint32_t smplcnt; 3302 if (!mDataSource->getUInt32(offset, &smplcnt)) { 3303 return ERROR_MALFORMED; 3304 } 3305 mCurrentSampleInfoCount = smplcnt; 3306 offset += 4; 3307 3308 if (mCurrentDefaultSampleInfoSize != 0) { 3309 ALOGV("@@@@ using default sample info size of %d", mCurrentDefaultSampleInfoSize); 3310 return OK; 3311 } 3312 if (smplcnt > mCurrentSampleInfoAllocSize) { 3313 mCurrentSampleInfoSizes = (uint8_t*) realloc(mCurrentSampleInfoSizes, smplcnt); 3314 mCurrentSampleInfoAllocSize = smplcnt; 3315 } 3316 3317 mDataSource->readAt(offset, mCurrentSampleInfoSizes, smplcnt); 3318 return OK; 3319} 3320 3321status_t MPEG4Source::parseSampleAuxiliaryInformationOffsets( 3322 off64_t offset, off64_t /* size */) { 3323 ALOGV("parseSampleAuxiliaryInformationOffsets"); 3324 // 14496-12 8.7.13 3325 uint8_t version; 3326 if (mDataSource->readAt(offset, &version, sizeof(version)) != 1) { 3327 return ERROR_IO; 3328 } 3329 offset++; 3330 3331 uint32_t flags; 3332 if (!mDataSource->getUInt24(offset, &flags)) { 3333 return ERROR_IO; 3334 } 3335 offset += 3; 3336 3337 uint32_t entrycount; 3338 if (!mDataSource->getUInt32(offset, &entrycount)) { 3339 return ERROR_IO; 3340 } 3341 offset += 4; 3342 if (entrycount == 0) { 3343 return OK; 3344 } 3345 if (entrycount > UINT32_MAX / 8) { 3346 return ERROR_MALFORMED; 3347 } 3348 3349 if (entrycount > mCurrentSampleInfoOffsetsAllocSize) { 3350 uint64_t *newPtr = (uint64_t *)realloc(mCurrentSampleInfoOffsets, entrycount * 8); 3351 if (newPtr == NULL) { 3352 return NO_MEMORY; 3353 } 3354 mCurrentSampleInfoOffsets = newPtr; 3355 mCurrentSampleInfoOffsetsAllocSize = entrycount; 3356 } 3357 mCurrentSampleInfoOffsetCount = entrycount; 3358 3359 if (mCurrentSampleInfoOffsets == NULL) { 3360 return OK; 3361 } 3362 3363 for (size_t i = 0; i < entrycount; i++) { 3364 if (version == 0) { 3365 uint32_t tmp; 3366 if (!mDataSource->getUInt32(offset, &tmp)) { 3367 return ERROR_IO; 3368 } 3369 mCurrentSampleInfoOffsets[i] = tmp; 3370 offset += 4; 3371 } else { 3372 uint64_t tmp; 3373 if (!mDataSource->getUInt64(offset, &tmp)) { 3374 return ERROR_IO; 3375 } 3376 mCurrentSampleInfoOffsets[i] = tmp; 3377 offset += 8; 3378 } 3379 } 3380 3381 // parse clear/encrypted data 3382 3383 off64_t drmoffset = mCurrentSampleInfoOffsets[0]; // from moof 3384 3385 drmoffset += mCurrentMoofOffset; 3386 int ivlength; 3387 CHECK(mFormat->findInt32(kKeyCryptoDefaultIVSize, &ivlength)); 3388 3389 // only 0, 8 and 16 byte initialization vectors are supported 3390 if (ivlength != 0 && ivlength != 8 && ivlength != 16) { 3391 ALOGW("unsupported IV length: %d", ivlength); 3392 return ERROR_MALFORMED; 3393 } 3394 // read CencSampleAuxiliaryDataFormats 3395 for (size_t i = 0; i < mCurrentSampleInfoCount; i++) { 3396 if (i >= mCurrentSamples.size()) { 3397 ALOGW("too few samples"); 3398 break; 3399 } 3400 Sample *smpl = &mCurrentSamples.editItemAt(i); 3401 3402 memset(smpl->iv, 0, 16); 3403 if (mDataSource->readAt(drmoffset, smpl->iv, ivlength) != ivlength) { 3404 return ERROR_IO; 3405 } 3406 3407 drmoffset += ivlength; 3408 3409 int32_t smplinfosize = mCurrentDefaultSampleInfoSize; 3410 if (smplinfosize == 0) { 3411 smplinfosize = mCurrentSampleInfoSizes[i]; 3412 } 3413 if (smplinfosize > ivlength) { 3414 uint16_t numsubsamples; 3415 if (!mDataSource->getUInt16(drmoffset, &numsubsamples)) { 3416 return ERROR_IO; 3417 } 3418 drmoffset += 2; 3419 for (size_t j = 0; j < numsubsamples; j++) { 3420 uint16_t numclear; 3421 uint32_t numencrypted; 3422 if (!mDataSource->getUInt16(drmoffset, &numclear)) { 3423 return ERROR_IO; 3424 } 3425 drmoffset += 2; 3426 if (!mDataSource->getUInt32(drmoffset, &numencrypted)) { 3427 return ERROR_IO; 3428 } 3429 drmoffset += 4; 3430 smpl->clearsizes.add(numclear); 3431 smpl->encryptedsizes.add(numencrypted); 3432 } 3433 } else { 3434 smpl->clearsizes.add(0); 3435 smpl->encryptedsizes.add(smpl->size); 3436 } 3437 } 3438 3439 3440 return OK; 3441} 3442 3443status_t MPEG4Source::parseTrackFragmentHeader(off64_t offset, off64_t size) { 3444 3445 if (size < 8) { 3446 return -EINVAL; 3447 } 3448 3449 uint32_t flags; 3450 if (!mDataSource->getUInt32(offset, &flags)) { // actually version + flags 3451 return ERROR_MALFORMED; 3452 } 3453 3454 if (flags & 0xff000000) { 3455 return -EINVAL; 3456 } 3457 3458 if (!mDataSource->getUInt32(offset + 4, (uint32_t*)&mLastParsedTrackId)) { 3459 return ERROR_MALFORMED; 3460 } 3461 3462 if (mLastParsedTrackId != mTrackId) { 3463 // this is not the right track, skip it 3464 return OK; 3465 } 3466 3467 mTrackFragmentHeaderInfo.mFlags = flags; 3468 mTrackFragmentHeaderInfo.mTrackID = mLastParsedTrackId; 3469 offset += 8; 3470 size -= 8; 3471 3472 ALOGV("fragment header: %08x %08x", flags, mTrackFragmentHeaderInfo.mTrackID); 3473 3474 if (flags & TrackFragmentHeaderInfo::kBaseDataOffsetPresent) { 3475 if (size < 8) { 3476 return -EINVAL; 3477 } 3478 3479 if (!mDataSource->getUInt64(offset, &mTrackFragmentHeaderInfo.mBaseDataOffset)) { 3480 return ERROR_MALFORMED; 3481 } 3482 offset += 8; 3483 size -= 8; 3484 } 3485 3486 if (flags & TrackFragmentHeaderInfo::kSampleDescriptionIndexPresent) { 3487 if (size < 4) { 3488 return -EINVAL; 3489 } 3490 3491 if (!mDataSource->getUInt32(offset, &mTrackFragmentHeaderInfo.mSampleDescriptionIndex)) { 3492 return ERROR_MALFORMED; 3493 } 3494 offset += 4; 3495 size -= 4; 3496 } 3497 3498 if (flags & TrackFragmentHeaderInfo::kDefaultSampleDurationPresent) { 3499 if (size < 4) { 3500 return -EINVAL; 3501 } 3502 3503 if (!mDataSource->getUInt32(offset, &mTrackFragmentHeaderInfo.mDefaultSampleDuration)) { 3504 return ERROR_MALFORMED; 3505 } 3506 offset += 4; 3507 size -= 4; 3508 } 3509 3510 if (flags & TrackFragmentHeaderInfo::kDefaultSampleSizePresent) { 3511 if (size < 4) { 3512 return -EINVAL; 3513 } 3514 3515 if (!mDataSource->getUInt32(offset, &mTrackFragmentHeaderInfo.mDefaultSampleSize)) { 3516 return ERROR_MALFORMED; 3517 } 3518 offset += 4; 3519 size -= 4; 3520 } 3521 3522 if (flags & TrackFragmentHeaderInfo::kDefaultSampleFlagsPresent) { 3523 if (size < 4) { 3524 return -EINVAL; 3525 } 3526 3527 if (!mDataSource->getUInt32(offset, &mTrackFragmentHeaderInfo.mDefaultSampleFlags)) { 3528 return ERROR_MALFORMED; 3529 } 3530 offset += 4; 3531 size -= 4; 3532 } 3533 3534 if (!(flags & TrackFragmentHeaderInfo::kBaseDataOffsetPresent)) { 3535 mTrackFragmentHeaderInfo.mBaseDataOffset = mCurrentMoofOffset; 3536 } 3537 3538 mTrackFragmentHeaderInfo.mDataOffset = 0; 3539 return OK; 3540} 3541 3542status_t MPEG4Source::parseTrackFragmentRun(off64_t offset, off64_t size) { 3543 3544 ALOGV("MPEG4Extractor::parseTrackFragmentRun"); 3545 if (size < 8) { 3546 return -EINVAL; 3547 } 3548 3549 enum { 3550 kDataOffsetPresent = 0x01, 3551 kFirstSampleFlagsPresent = 0x04, 3552 kSampleDurationPresent = 0x100, 3553 kSampleSizePresent = 0x200, 3554 kSampleFlagsPresent = 0x400, 3555 kSampleCompositionTimeOffsetPresent = 0x800, 3556 }; 3557 3558 uint32_t flags; 3559 if (!mDataSource->getUInt32(offset, &flags)) { 3560 return ERROR_MALFORMED; 3561 } 3562 ALOGV("fragment run flags: %08x", flags); 3563 3564 if (flags & 0xff000000) { 3565 return -EINVAL; 3566 } 3567 3568 if ((flags & kFirstSampleFlagsPresent) && (flags & kSampleFlagsPresent)) { 3569 // These two shall not be used together. 3570 return -EINVAL; 3571 } 3572 3573 uint32_t sampleCount; 3574 if (!mDataSource->getUInt32(offset + 4, &sampleCount)) { 3575 return ERROR_MALFORMED; 3576 } 3577 offset += 8; 3578 size -= 8; 3579 3580 uint64_t dataOffset = mTrackFragmentHeaderInfo.mDataOffset; 3581 3582 uint32_t firstSampleFlags = 0; 3583 3584 if (flags & kDataOffsetPresent) { 3585 if (size < 4) { 3586 return -EINVAL; 3587 } 3588 3589 int32_t dataOffsetDelta; 3590 if (!mDataSource->getUInt32(offset, (uint32_t*)&dataOffsetDelta)) { 3591 return ERROR_MALFORMED; 3592 } 3593 3594 dataOffset = mTrackFragmentHeaderInfo.mBaseDataOffset + dataOffsetDelta; 3595 3596 offset += 4; 3597 size -= 4; 3598 } 3599 3600 if (flags & kFirstSampleFlagsPresent) { 3601 if (size < 4) { 3602 return -EINVAL; 3603 } 3604 3605 if (!mDataSource->getUInt32(offset, &firstSampleFlags)) { 3606 return ERROR_MALFORMED; 3607 } 3608 offset += 4; 3609 size -= 4; 3610 } 3611 3612 uint32_t sampleDuration = 0, sampleSize = 0, sampleFlags = 0, 3613 sampleCtsOffset = 0; 3614 3615 size_t bytesPerSample = 0; 3616 if (flags & kSampleDurationPresent) { 3617 bytesPerSample += 4; 3618 } else if (mTrackFragmentHeaderInfo.mFlags 3619 & TrackFragmentHeaderInfo::kDefaultSampleDurationPresent) { 3620 sampleDuration = mTrackFragmentHeaderInfo.mDefaultSampleDuration; 3621 } else if (mTrex) { 3622 sampleDuration = mTrex->default_sample_duration; 3623 } 3624 3625 if (flags & kSampleSizePresent) { 3626 bytesPerSample += 4; 3627 } else if (mTrackFragmentHeaderInfo.mFlags 3628 & TrackFragmentHeaderInfo::kDefaultSampleSizePresent) { 3629 sampleSize = mTrackFragmentHeaderInfo.mDefaultSampleSize; 3630 } else { 3631 sampleSize = mTrackFragmentHeaderInfo.mDefaultSampleSize; 3632 } 3633 3634 if (flags & kSampleFlagsPresent) { 3635 bytesPerSample += 4; 3636 } else if (mTrackFragmentHeaderInfo.mFlags 3637 & TrackFragmentHeaderInfo::kDefaultSampleFlagsPresent) { 3638 sampleFlags = mTrackFragmentHeaderInfo.mDefaultSampleFlags; 3639 } else { 3640 sampleFlags = mTrackFragmentHeaderInfo.mDefaultSampleFlags; 3641 } 3642 3643 if (flags & kSampleCompositionTimeOffsetPresent) { 3644 bytesPerSample += 4; 3645 } else { 3646 sampleCtsOffset = 0; 3647 } 3648 3649 if (size < (off64_t)sampleCount * bytesPerSample) { 3650 return -EINVAL; 3651 } 3652 3653 Sample tmp; 3654 for (uint32_t i = 0; i < sampleCount; ++i) { 3655 if (flags & kSampleDurationPresent) { 3656 if (!mDataSource->getUInt32(offset, &sampleDuration)) { 3657 return ERROR_MALFORMED; 3658 } 3659 offset += 4; 3660 } 3661 3662 if (flags & kSampleSizePresent) { 3663 if (!mDataSource->getUInt32(offset, &sampleSize)) { 3664 return ERROR_MALFORMED; 3665 } 3666 offset += 4; 3667 } 3668 3669 if (flags & kSampleFlagsPresent) { 3670 if (!mDataSource->getUInt32(offset, &sampleFlags)) { 3671 return ERROR_MALFORMED; 3672 } 3673 offset += 4; 3674 } 3675 3676 if (flags & kSampleCompositionTimeOffsetPresent) { 3677 if (!mDataSource->getUInt32(offset, &sampleCtsOffset)) { 3678 return ERROR_MALFORMED; 3679 } 3680 offset += 4; 3681 } 3682 3683 ALOGV("adding sample %d at offset 0x%08" PRIx64 ", size %u, duration %u, " 3684 " flags 0x%08x", i + 1, 3685 dataOffset, sampleSize, sampleDuration, 3686 (flags & kFirstSampleFlagsPresent) && i == 0 3687 ? firstSampleFlags : sampleFlags); 3688 tmp.offset = dataOffset; 3689 tmp.size = sampleSize; 3690 tmp.duration = sampleDuration; 3691 tmp.compositionOffset = sampleCtsOffset; 3692 mCurrentSamples.add(tmp); 3693 3694 dataOffset += sampleSize; 3695 } 3696 3697 mTrackFragmentHeaderInfo.mDataOffset = dataOffset; 3698 3699 return OK; 3700} 3701 3702sp<MetaData> MPEG4Source::getFormat() { 3703 Mutex::Autolock autoLock(mLock); 3704 3705 return mFormat; 3706} 3707 3708size_t MPEG4Source::parseNALSize(const uint8_t *data) const { 3709 switch (mNALLengthSize) { 3710 case 1: 3711 return *data; 3712 case 2: 3713 return U16_AT(data); 3714 case 3: 3715 return ((size_t)data[0] << 16) | U16_AT(&data[1]); 3716 case 4: 3717 return U32_AT(data); 3718 } 3719 3720 // This cannot happen, mNALLengthSize springs to life by adding 1 to 3721 // a 2-bit integer. 3722 CHECK(!"Should not be here."); 3723 3724 return 0; 3725} 3726 3727status_t MPEG4Source::read( 3728 MediaBuffer **out, const ReadOptions *options) { 3729 Mutex::Autolock autoLock(mLock); 3730 3731 CHECK(mStarted); 3732 3733 if (mFirstMoofOffset > 0) { 3734 return fragmentedRead(out, options); 3735 } 3736 3737 *out = NULL; 3738 3739 int64_t targetSampleTimeUs = -1; 3740 3741 int64_t seekTimeUs; 3742 ReadOptions::SeekMode mode; 3743 if (options && options->getSeekTo(&seekTimeUs, &mode)) { 3744 uint32_t findFlags = 0; 3745 switch (mode) { 3746 case ReadOptions::SEEK_PREVIOUS_SYNC: 3747 findFlags = SampleTable::kFlagBefore; 3748 break; 3749 case ReadOptions::SEEK_NEXT_SYNC: 3750 findFlags = SampleTable::kFlagAfter; 3751 break; 3752 case ReadOptions::SEEK_CLOSEST_SYNC: 3753 case ReadOptions::SEEK_CLOSEST: 3754 findFlags = SampleTable::kFlagClosest; 3755 break; 3756 default: 3757 CHECK(!"Should not be here."); 3758 break; 3759 } 3760 3761 uint32_t sampleIndex; 3762 status_t err = mSampleTable->findSampleAtTime( 3763 seekTimeUs, 1000000, mTimescale, 3764 &sampleIndex, findFlags); 3765 3766 if (mode == ReadOptions::SEEK_CLOSEST) { 3767 // We found the closest sample already, now we want the sync 3768 // sample preceding it (or the sample itself of course), even 3769 // if the subsequent sync sample is closer. 3770 findFlags = SampleTable::kFlagBefore; 3771 } 3772 3773 uint32_t syncSampleIndex; 3774 if (err == OK) { 3775 err = mSampleTable->findSyncSampleNear( 3776 sampleIndex, &syncSampleIndex, findFlags); 3777 } 3778 3779 uint32_t sampleTime; 3780 if (err == OK) { 3781 err = mSampleTable->getMetaDataForSample( 3782 sampleIndex, NULL, NULL, &sampleTime); 3783 } 3784 3785 if (err != OK) { 3786 if (err == ERROR_OUT_OF_RANGE) { 3787 // An attempt to seek past the end of the stream would 3788 // normally cause this ERROR_OUT_OF_RANGE error. Propagating 3789 // this all the way to the MediaPlayer would cause abnormal 3790 // termination. Legacy behaviour appears to be to behave as if 3791 // we had seeked to the end of stream, ending normally. 3792 err = ERROR_END_OF_STREAM; 3793 } 3794 ALOGV("end of stream"); 3795 return err; 3796 } 3797 3798 if (mode == ReadOptions::SEEK_CLOSEST) { 3799 targetSampleTimeUs = (sampleTime * 1000000ll) / mTimescale; 3800 } 3801 3802#if 0 3803 uint32_t syncSampleTime; 3804 CHECK_EQ(OK, mSampleTable->getMetaDataForSample( 3805 syncSampleIndex, NULL, NULL, &syncSampleTime)); 3806 3807 ALOGI("seek to time %lld us => sample at time %lld us, " 3808 "sync sample at time %lld us", 3809 seekTimeUs, 3810 sampleTime * 1000000ll / mTimescale, 3811 syncSampleTime * 1000000ll / mTimescale); 3812#endif 3813 3814 mCurrentSampleIndex = syncSampleIndex; 3815 if (mBuffer != NULL) { 3816 mBuffer->release(); 3817 mBuffer = NULL; 3818 } 3819 3820 // fall through 3821 } 3822 3823 off64_t offset; 3824 size_t size; 3825 uint32_t cts, stts; 3826 bool isSyncSample; 3827 bool newBuffer = false; 3828 if (mBuffer == NULL) { 3829 newBuffer = true; 3830 3831 status_t err = 3832 mSampleTable->getMetaDataForSample( 3833 mCurrentSampleIndex, &offset, &size, &cts, &isSyncSample, &stts); 3834 3835 if (err != OK) { 3836 return err; 3837 } 3838 3839 err = mGroup->acquire_buffer(&mBuffer); 3840 3841 if (err != OK) { 3842 CHECK(mBuffer == NULL); 3843 return err; 3844 } 3845 if (size > mBuffer->size()) { 3846 ALOGE("buffer too small: %zu > %zu", size, mBuffer->size()); 3847 return ERROR_BUFFER_TOO_SMALL; 3848 } 3849 } 3850 3851 if ((!mIsAVC && !mIsHEVC) || mWantsNALFragments) { 3852 if (newBuffer) { 3853 ssize_t num_bytes_read = 3854 mDataSource->readAt(offset, (uint8_t *)mBuffer->data(), size); 3855 3856 if (num_bytes_read < (ssize_t)size) { 3857 mBuffer->release(); 3858 mBuffer = NULL; 3859 3860 return ERROR_IO; 3861 } 3862 3863 CHECK(mBuffer != NULL); 3864 mBuffer->set_range(0, size); 3865 mBuffer->meta_data()->clear(); 3866 mBuffer->meta_data()->setInt64( 3867 kKeyTime, ((int64_t)cts * 1000000) / mTimescale); 3868 mBuffer->meta_data()->setInt64( 3869 kKeyDuration, ((int64_t)stts * 1000000) / mTimescale); 3870 3871 if (targetSampleTimeUs >= 0) { 3872 mBuffer->meta_data()->setInt64( 3873 kKeyTargetTime, targetSampleTimeUs); 3874 } 3875 3876 if (isSyncSample) { 3877 mBuffer->meta_data()->setInt32(kKeyIsSyncFrame, 1); 3878 } 3879 3880 ++mCurrentSampleIndex; 3881 } 3882 3883 if (!mIsAVC && !mIsHEVC) { 3884 *out = mBuffer; 3885 mBuffer = NULL; 3886 3887 return OK; 3888 } 3889 3890 // Each NAL unit is split up into its constituent fragments and 3891 // each one of them returned in its own buffer. 3892 3893 CHECK(mBuffer->range_length() >= mNALLengthSize); 3894 3895 const uint8_t *src = 3896 (const uint8_t *)mBuffer->data() + mBuffer->range_offset(); 3897 3898 size_t nal_size = parseNALSize(src); 3899 if (mBuffer->range_length() < mNALLengthSize + nal_size) { 3900 ALOGE("incomplete NAL unit."); 3901 3902 mBuffer->release(); 3903 mBuffer = NULL; 3904 3905 return ERROR_MALFORMED; 3906 } 3907 3908 MediaBuffer *clone = mBuffer->clone(); 3909 CHECK(clone != NULL); 3910 clone->set_range(mBuffer->range_offset() + mNALLengthSize, nal_size); 3911 3912 CHECK(mBuffer != NULL); 3913 mBuffer->set_range( 3914 mBuffer->range_offset() + mNALLengthSize + nal_size, 3915 mBuffer->range_length() - mNALLengthSize - nal_size); 3916 3917 if (mBuffer->range_length() == 0) { 3918 mBuffer->release(); 3919 mBuffer = NULL; 3920 } 3921 3922 *out = clone; 3923 3924 return OK; 3925 } else { 3926 // Whole NAL units are returned but each fragment is prefixed by 3927 // the start code (0x00 00 00 01). 3928 ssize_t num_bytes_read = 0; 3929 int32_t drm = 0; 3930 bool usesDRM = (mFormat->findInt32(kKeyIsDRM, &drm) && drm != 0); 3931 if (usesDRM) { 3932 num_bytes_read = 3933 mDataSource->readAt(offset, (uint8_t*)mBuffer->data(), size); 3934 } else { 3935 num_bytes_read = mDataSource->readAt(offset, mSrcBuffer, size); 3936 } 3937 3938 if (num_bytes_read < (ssize_t)size) { 3939 mBuffer->release(); 3940 mBuffer = NULL; 3941 3942 return ERROR_IO; 3943 } 3944 3945 if (usesDRM) { 3946 CHECK(mBuffer != NULL); 3947 mBuffer->set_range(0, size); 3948 3949 } else { 3950 uint8_t *dstData = (uint8_t *)mBuffer->data(); 3951 size_t srcOffset = 0; 3952 size_t dstOffset = 0; 3953 3954 while (srcOffset < size) { 3955 bool isMalFormed = !isInRange((size_t)0u, size, srcOffset, mNALLengthSize); 3956 size_t nalLength = 0; 3957 if (!isMalFormed) { 3958 nalLength = parseNALSize(&mSrcBuffer[srcOffset]); 3959 srcOffset += mNALLengthSize; 3960 isMalFormed = !isInRange((size_t)0u, size, srcOffset, nalLength); 3961 } 3962 3963 if (isMalFormed) { 3964 ALOGE("Video is malformed"); 3965 mBuffer->release(); 3966 mBuffer = NULL; 3967 return ERROR_MALFORMED; 3968 } 3969 3970 if (nalLength == 0) { 3971 continue; 3972 } 3973 3974 CHECK(dstOffset + 4 <= mBuffer->size()); 3975 3976 dstData[dstOffset++] = 0; 3977 dstData[dstOffset++] = 0; 3978 dstData[dstOffset++] = 0; 3979 dstData[dstOffset++] = 1; 3980 memcpy(&dstData[dstOffset], &mSrcBuffer[srcOffset], nalLength); 3981 srcOffset += nalLength; 3982 dstOffset += nalLength; 3983 } 3984 CHECK_EQ(srcOffset, size); 3985 CHECK(mBuffer != NULL); 3986 mBuffer->set_range(0, dstOffset); 3987 } 3988 3989 mBuffer->meta_data()->clear(); 3990 mBuffer->meta_data()->setInt64( 3991 kKeyTime, ((int64_t)cts * 1000000) / mTimescale); 3992 mBuffer->meta_data()->setInt64( 3993 kKeyDuration, ((int64_t)stts * 1000000) / mTimescale); 3994 3995 if (targetSampleTimeUs >= 0) { 3996 mBuffer->meta_data()->setInt64( 3997 kKeyTargetTime, targetSampleTimeUs); 3998 } 3999 4000 if (isSyncSample) { 4001 mBuffer->meta_data()->setInt32(kKeyIsSyncFrame, 1); 4002 } 4003 4004 ++mCurrentSampleIndex; 4005 4006 *out = mBuffer; 4007 mBuffer = NULL; 4008 4009 return OK; 4010 } 4011} 4012 4013status_t MPEG4Source::fragmentedRead( 4014 MediaBuffer **out, const ReadOptions *options) { 4015 4016 ALOGV("MPEG4Source::fragmentedRead"); 4017 4018 CHECK(mStarted); 4019 4020 *out = NULL; 4021 4022 int64_t targetSampleTimeUs = -1; 4023 4024 int64_t seekTimeUs; 4025 ReadOptions::SeekMode mode; 4026 if (options && options->getSeekTo(&seekTimeUs, &mode)) { 4027 4028 int numSidxEntries = mSegments.size(); 4029 if (numSidxEntries != 0) { 4030 int64_t totalTime = 0; 4031 off64_t totalOffset = mFirstMoofOffset; 4032 for (int i = 0; i < numSidxEntries; i++) { 4033 const SidxEntry *se = &mSegments[i]; 4034 if (totalTime + se->mDurationUs > seekTimeUs) { 4035 // The requested time is somewhere in this segment 4036 if ((mode == ReadOptions::SEEK_NEXT_SYNC && seekTimeUs > totalTime) || 4037 (mode == ReadOptions::SEEK_CLOSEST_SYNC && 4038 (seekTimeUs - totalTime) > (totalTime + se->mDurationUs - seekTimeUs))) { 4039 // requested next sync, or closest sync and it was closer to the end of 4040 // this segment 4041 totalTime += se->mDurationUs; 4042 totalOffset += se->mSize; 4043 } 4044 break; 4045 } 4046 totalTime += se->mDurationUs; 4047 totalOffset += se->mSize; 4048 } 4049 mCurrentMoofOffset = totalOffset; 4050 mCurrentSamples.clear(); 4051 mCurrentSampleIndex = 0; 4052 parseChunk(&totalOffset); 4053 mCurrentTime = totalTime * mTimescale / 1000000ll; 4054 } else { 4055 // without sidx boxes, we can only seek to 0 4056 mCurrentMoofOffset = mFirstMoofOffset; 4057 mCurrentSamples.clear(); 4058 mCurrentSampleIndex = 0; 4059 off64_t tmp = mCurrentMoofOffset; 4060 parseChunk(&tmp); 4061 mCurrentTime = 0; 4062 } 4063 4064 if (mBuffer != NULL) { 4065 mBuffer->release(); 4066 mBuffer = NULL; 4067 } 4068 4069 // fall through 4070 } 4071 4072 off64_t offset = 0; 4073 size_t size = 0; 4074 uint32_t cts = 0; 4075 bool isSyncSample = false; 4076 bool newBuffer = false; 4077 if (mBuffer == NULL) { 4078 newBuffer = true; 4079 4080 if (mCurrentSampleIndex >= mCurrentSamples.size()) { 4081 // move to next fragment if there is one 4082 if (mNextMoofOffset <= mCurrentMoofOffset) { 4083 return ERROR_END_OF_STREAM; 4084 } 4085 off64_t nextMoof = mNextMoofOffset; 4086 mCurrentMoofOffset = nextMoof; 4087 mCurrentSamples.clear(); 4088 mCurrentSampleIndex = 0; 4089 parseChunk(&nextMoof); 4090 if (mCurrentSampleIndex >= mCurrentSamples.size()) { 4091 return ERROR_END_OF_STREAM; 4092 } 4093 } 4094 4095 const Sample *smpl = &mCurrentSamples[mCurrentSampleIndex]; 4096 offset = smpl->offset; 4097 size = smpl->size; 4098 cts = mCurrentTime + smpl->compositionOffset; 4099 mCurrentTime += smpl->duration; 4100 isSyncSample = (mCurrentSampleIndex == 0); // XXX 4101 4102 status_t err = mGroup->acquire_buffer(&mBuffer); 4103 4104 if (err != OK) { 4105 CHECK(mBuffer == NULL); 4106 ALOGV("acquire_buffer returned %d", err); 4107 return err; 4108 } 4109 if (size > mBuffer->size()) { 4110 ALOGE("buffer too small: %zu > %zu", size, mBuffer->size()); 4111 return ERROR_BUFFER_TOO_SMALL; 4112 } 4113 } 4114 4115 const Sample *smpl = &mCurrentSamples[mCurrentSampleIndex]; 4116 const sp<MetaData> bufmeta = mBuffer->meta_data(); 4117 bufmeta->clear(); 4118 if (smpl->encryptedsizes.size()) { 4119 // store clear/encrypted lengths in metadata 4120 bufmeta->setData(kKeyPlainSizes, 0, 4121 smpl->clearsizes.array(), smpl->clearsizes.size() * 4); 4122 bufmeta->setData(kKeyEncryptedSizes, 0, 4123 smpl->encryptedsizes.array(), smpl->encryptedsizes.size() * 4); 4124 bufmeta->setData(kKeyCryptoIV, 0, smpl->iv, 16); // use 16 or the actual size? 4125 bufmeta->setInt32(kKeyCryptoDefaultIVSize, mDefaultIVSize); 4126 bufmeta->setInt32(kKeyCryptoMode, mCryptoMode); 4127 bufmeta->setData(kKeyCryptoKey, 0, mCryptoKey, 16); 4128 } 4129 4130 if ((!mIsAVC && !mIsHEVC)|| mWantsNALFragments) { 4131 if (newBuffer) { 4132 if (!isInRange((size_t)0u, mBuffer->size(), size)) { 4133 mBuffer->release(); 4134 mBuffer = NULL; 4135 4136 ALOGE("fragmentedRead ERROR_MALFORMED size %zu", size); 4137 return ERROR_MALFORMED; 4138 } 4139 4140 ssize_t num_bytes_read = 4141 mDataSource->readAt(offset, (uint8_t *)mBuffer->data(), size); 4142 4143 if (num_bytes_read < (ssize_t)size) { 4144 mBuffer->release(); 4145 mBuffer = NULL; 4146 4147 ALOGE("i/o error"); 4148 return ERROR_IO; 4149 } 4150 4151 CHECK(mBuffer != NULL); 4152 mBuffer->set_range(0, size); 4153 mBuffer->meta_data()->setInt64( 4154 kKeyTime, ((int64_t)cts * 1000000) / mTimescale); 4155 mBuffer->meta_data()->setInt64( 4156 kKeyDuration, ((int64_t)smpl->duration * 1000000) / mTimescale); 4157 4158 if (targetSampleTimeUs >= 0) { 4159 mBuffer->meta_data()->setInt64( 4160 kKeyTargetTime, targetSampleTimeUs); 4161 } 4162 4163 if (isSyncSample) { 4164 mBuffer->meta_data()->setInt32(kKeyIsSyncFrame, 1); 4165 } 4166 4167 ++mCurrentSampleIndex; 4168 } 4169 4170 if (!mIsAVC && !mIsHEVC) { 4171 *out = mBuffer; 4172 mBuffer = NULL; 4173 4174 return OK; 4175 } 4176 4177 // Each NAL unit is split up into its constituent fragments and 4178 // each one of them returned in its own buffer. 4179 4180 CHECK(mBuffer->range_length() >= mNALLengthSize); 4181 4182 const uint8_t *src = 4183 (const uint8_t *)mBuffer->data() + mBuffer->range_offset(); 4184 4185 size_t nal_size = parseNALSize(src); 4186 if (mBuffer->range_length() < mNALLengthSize + nal_size) { 4187 ALOGE("incomplete NAL unit."); 4188 4189 mBuffer->release(); 4190 mBuffer = NULL; 4191 4192 return ERROR_MALFORMED; 4193 } 4194 4195 MediaBuffer *clone = mBuffer->clone(); 4196 CHECK(clone != NULL); 4197 clone->set_range(mBuffer->range_offset() + mNALLengthSize, nal_size); 4198 4199 CHECK(mBuffer != NULL); 4200 mBuffer->set_range( 4201 mBuffer->range_offset() + mNALLengthSize + nal_size, 4202 mBuffer->range_length() - mNALLengthSize - nal_size); 4203 4204 if (mBuffer->range_length() == 0) { 4205 mBuffer->release(); 4206 mBuffer = NULL; 4207 } 4208 4209 *out = clone; 4210 4211 return OK; 4212 } else { 4213 ALOGV("whole NAL"); 4214 // Whole NAL units are returned but each fragment is prefixed by 4215 // the start code (0x00 00 00 01). 4216 ssize_t num_bytes_read = 0; 4217 int32_t drm = 0; 4218 bool usesDRM = (mFormat->findInt32(kKeyIsDRM, &drm) && drm != 0); 4219 void *data = NULL; 4220 bool isMalFormed = false; 4221 if (usesDRM) { 4222 if (mBuffer == NULL || !isInRange((size_t)0u, mBuffer->size(), size)) { 4223 isMalFormed = true; 4224 } else { 4225 data = mBuffer->data(); 4226 } 4227 } else { 4228 int32_t max_size; 4229 if (mFormat == NULL 4230 || !mFormat->findInt32(kKeyMaxInputSize, &max_size) 4231 || !isInRange((size_t)0u, (size_t)max_size, size)) { 4232 isMalFormed = true; 4233 } else { 4234 data = mSrcBuffer; 4235 } 4236 } 4237 4238 if (isMalFormed || data == NULL) { 4239 ALOGE("isMalFormed size %zu", size); 4240 if (mBuffer != NULL) { 4241 mBuffer->release(); 4242 mBuffer = NULL; 4243 } 4244 return ERROR_MALFORMED; 4245 } 4246 num_bytes_read = mDataSource->readAt(offset, data, size); 4247 4248 if (num_bytes_read < (ssize_t)size) { 4249 mBuffer->release(); 4250 mBuffer = NULL; 4251 4252 ALOGE("i/o error"); 4253 return ERROR_IO; 4254 } 4255 4256 if (usesDRM) { 4257 CHECK(mBuffer != NULL); 4258 mBuffer->set_range(0, size); 4259 4260 } else { 4261 uint8_t *dstData = (uint8_t *)mBuffer->data(); 4262 size_t srcOffset = 0; 4263 size_t dstOffset = 0; 4264 4265 while (srcOffset < size) { 4266 isMalFormed = !isInRange((size_t)0u, size, srcOffset, mNALLengthSize); 4267 size_t nalLength = 0; 4268 if (!isMalFormed) { 4269 nalLength = parseNALSize(&mSrcBuffer[srcOffset]); 4270 srcOffset += mNALLengthSize; 4271 isMalFormed = !isInRange((size_t)0u, size, srcOffset, nalLength) 4272 || !isInRange((size_t)0u, mBuffer->size(), dstOffset, (size_t)4u) 4273 || !isInRange((size_t)0u, mBuffer->size(), dstOffset + 4, nalLength); 4274 } 4275 4276 if (isMalFormed) { 4277 ALOGE("Video is malformed; nalLength %zu", nalLength); 4278 mBuffer->release(); 4279 mBuffer = NULL; 4280 return ERROR_MALFORMED; 4281 } 4282 4283 if (nalLength == 0) { 4284 continue; 4285 } 4286 4287 CHECK(dstOffset + 4 <= mBuffer->size()); 4288 4289 dstData[dstOffset++] = 0; 4290 dstData[dstOffset++] = 0; 4291 dstData[dstOffset++] = 0; 4292 dstData[dstOffset++] = 1; 4293 memcpy(&dstData[dstOffset], &mSrcBuffer[srcOffset], nalLength); 4294 srcOffset += nalLength; 4295 dstOffset += nalLength; 4296 } 4297 CHECK_EQ(srcOffset, size); 4298 CHECK(mBuffer != NULL); 4299 mBuffer->set_range(0, dstOffset); 4300 } 4301 4302 mBuffer->meta_data()->setInt64( 4303 kKeyTime, ((int64_t)cts * 1000000) / mTimescale); 4304 mBuffer->meta_data()->setInt64( 4305 kKeyDuration, ((int64_t)smpl->duration * 1000000) / mTimescale); 4306 4307 if (targetSampleTimeUs >= 0) { 4308 mBuffer->meta_data()->setInt64( 4309 kKeyTargetTime, targetSampleTimeUs); 4310 } 4311 4312 if (isSyncSample) { 4313 mBuffer->meta_data()->setInt32(kKeyIsSyncFrame, 1); 4314 } 4315 4316 ++mCurrentSampleIndex; 4317 4318 *out = mBuffer; 4319 mBuffer = NULL; 4320 4321 return OK; 4322 } 4323} 4324 4325MPEG4Extractor::Track *MPEG4Extractor::findTrackByMimePrefix( 4326 const char *mimePrefix) { 4327 for (Track *track = mFirstTrack; track != NULL; track = track->next) { 4328 const char *mime; 4329 if (track->meta != NULL 4330 && track->meta->findCString(kKeyMIMEType, &mime) 4331 && !strncasecmp(mime, mimePrefix, strlen(mimePrefix))) { 4332 return track; 4333 } 4334 } 4335 4336 return NULL; 4337} 4338 4339static bool LegacySniffMPEG4( 4340 const sp<DataSource> &source, String8 *mimeType, float *confidence) { 4341 uint8_t header[8]; 4342 4343 ssize_t n = source->readAt(4, header, sizeof(header)); 4344 if (n < (ssize_t)sizeof(header)) { 4345 return false; 4346 } 4347 4348 if (!memcmp(header, "ftyp3gp", 7) || !memcmp(header, "ftypmp42", 8) 4349 || !memcmp(header, "ftyp3gr6", 8) || !memcmp(header, "ftyp3gs6", 8) 4350 || !memcmp(header, "ftyp3ge6", 8) || !memcmp(header, "ftyp3gg6", 8) 4351 || !memcmp(header, "ftypisom", 8) || !memcmp(header, "ftypM4V ", 8) 4352 || !memcmp(header, "ftypM4A ", 8) || !memcmp(header, "ftypf4v ", 8) 4353 || !memcmp(header, "ftypkddi", 8) || !memcmp(header, "ftypM4VP", 8)) { 4354 *mimeType = MEDIA_MIMETYPE_CONTAINER_MPEG4; 4355 *confidence = 0.4; 4356 4357 return true; 4358 } 4359 4360 return false; 4361} 4362 4363static bool isCompatibleBrand(uint32_t fourcc) { 4364 static const uint32_t kCompatibleBrands[] = { 4365 FOURCC('i', 's', 'o', 'm'), 4366 FOURCC('i', 's', 'o', '2'), 4367 FOURCC('a', 'v', 'c', '1'), 4368 FOURCC('h', 'v', 'c', '1'), 4369 FOURCC('h', 'e', 'v', '1'), 4370 FOURCC('3', 'g', 'p', '4'), 4371 FOURCC('m', 'p', '4', '1'), 4372 FOURCC('m', 'p', '4', '2'), 4373 4374 // Won't promise that the following file types can be played. 4375 // Just give these file types a chance. 4376 FOURCC('q', 't', ' ', ' '), // Apple's QuickTime 4377 FOURCC('M', 'S', 'N', 'V'), // Sony's PSP 4378 4379 FOURCC('3', 'g', '2', 'a'), // 3GPP2 4380 FOURCC('3', 'g', '2', 'b'), 4381 }; 4382 4383 for (size_t i = 0; 4384 i < sizeof(kCompatibleBrands) / sizeof(kCompatibleBrands[0]); 4385 ++i) { 4386 if (kCompatibleBrands[i] == fourcc) { 4387 return true; 4388 } 4389 } 4390 4391 return false; 4392} 4393 4394// Attempt to actually parse the 'ftyp' atom and determine if a suitable 4395// compatible brand is present. 4396// Also try to identify where this file's metadata ends 4397// (end of the 'moov' atom) and report it to the caller as part of 4398// the metadata. 4399static bool BetterSniffMPEG4( 4400 const sp<DataSource> &source, String8 *mimeType, float *confidence, 4401 sp<AMessage> *meta) { 4402 // We scan up to 128 bytes to identify this file as an MP4. 4403 static const off64_t kMaxScanOffset = 128ll; 4404 4405 off64_t offset = 0ll; 4406 bool foundGoodFileType = false; 4407 off64_t moovAtomEndOffset = -1ll; 4408 bool done = false; 4409 4410 while (!done && offset < kMaxScanOffset) { 4411 uint32_t hdr[2]; 4412 if (source->readAt(offset, hdr, 8) < 8) { 4413 return false; 4414 } 4415 4416 uint64_t chunkSize = ntohl(hdr[0]); 4417 uint32_t chunkType = ntohl(hdr[1]); 4418 off64_t chunkDataOffset = offset + 8; 4419 4420 if (chunkSize == 1) { 4421 if (source->readAt(offset + 8, &chunkSize, 8) < 8) { 4422 return false; 4423 } 4424 4425 chunkSize = ntoh64(chunkSize); 4426 chunkDataOffset += 8; 4427 4428 if (chunkSize < 16) { 4429 // The smallest valid chunk is 16 bytes long in this case. 4430 return false; 4431 } 4432 } else if (chunkSize < 8) { 4433 // The smallest valid chunk is 8 bytes long. 4434 return false; 4435 } 4436 4437 off64_t chunkDataSize = offset + chunkSize - chunkDataOffset; 4438 4439 char chunkstring[5]; 4440 MakeFourCCString(chunkType, chunkstring); 4441 ALOGV("saw chunk type %s, size %" PRIu64 " @ %lld", chunkstring, chunkSize, offset); 4442 switch (chunkType) { 4443 case FOURCC('f', 't', 'y', 'p'): 4444 { 4445 if (chunkDataSize < 8) { 4446 return false; 4447 } 4448 4449 uint32_t numCompatibleBrands = (chunkDataSize - 8) / 4; 4450 for (size_t i = 0; i < numCompatibleBrands + 2; ++i) { 4451 if (i == 1) { 4452 // Skip this index, it refers to the minorVersion, 4453 // not a brand. 4454 continue; 4455 } 4456 4457 uint32_t brand; 4458 if (source->readAt( 4459 chunkDataOffset + 4 * i, &brand, 4) < 4) { 4460 return false; 4461 } 4462 4463 brand = ntohl(brand); 4464 4465 if (isCompatibleBrand(brand)) { 4466 foundGoodFileType = true; 4467 break; 4468 } 4469 } 4470 4471 if (!foundGoodFileType) { 4472 return false; 4473 } 4474 4475 break; 4476 } 4477 4478 case FOURCC('m', 'o', 'o', 'v'): 4479 { 4480 moovAtomEndOffset = offset + chunkSize; 4481 4482 done = true; 4483 break; 4484 } 4485 4486 default: 4487 break; 4488 } 4489 4490 offset += chunkSize; 4491 } 4492 4493 if (!foundGoodFileType) { 4494 return false; 4495 } 4496 4497 *mimeType = MEDIA_MIMETYPE_CONTAINER_MPEG4; 4498 *confidence = 0.4f; 4499 4500 if (moovAtomEndOffset >= 0) { 4501 *meta = new AMessage; 4502 (*meta)->setInt64("meta-data-size", moovAtomEndOffset); 4503 4504 ALOGV("found metadata size: %lld", moovAtomEndOffset); 4505 } 4506 4507 return true; 4508} 4509 4510bool SniffMPEG4( 4511 const sp<DataSource> &source, String8 *mimeType, float *confidence, 4512 sp<AMessage> *meta) { 4513 if (BetterSniffMPEG4(source, mimeType, confidence, meta)) { 4514 return true; 4515 } 4516 4517 if (LegacySniffMPEG4(source, mimeType, confidence)) { 4518 ALOGW("Identified supported mpeg4 through LegacySniffMPEG4."); 4519 return true; 4520 } 4521 4522 return false; 4523} 4524 4525} // namespace android 4526