MPEG4Extractor.cpp revision 7d3be41436e91a1d79b3a38c651bcd0d2c221f6d
1/* 2 * Copyright (C) 2009 The Android Open Source Project 3 * 4 * Licensed under the Apache License, Version 2.0 (the "License"); 5 * you may not use this file except in compliance with the License. 6 * You may obtain a copy of the License at 7 * 8 * http://www.apache.org/licenses/LICENSE-2.0 9 * 10 * Unless required by applicable law or agreed to in writing, software 11 * distributed under the License is distributed on an "AS IS" BASIS, 12 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 * See the License for the specific language governing permissions and 14 * limitations under the License. 15 */ 16 17//#define LOG_NDEBUG 0 18#define LOG_TAG "MPEG4Extractor" 19 20#include <ctype.h> 21#include <inttypes.h> 22#include <stdint.h> 23#include <stdlib.h> 24#include <string.h> 25 26#include <utils/Log.h> 27 28#include "include/MPEG4Extractor.h" 29#include "include/SampleTable.h" 30#include "include/ESDS.h" 31 32#include <media/stagefright/foundation/ABitReader.h> 33#include <media/stagefright/foundation/ABuffer.h> 34#include <media/stagefright/foundation/ADebug.h> 35#include <media/stagefright/foundation/AMessage.h> 36#include <media/stagefright/foundation/AUtils.h> 37#include <media/stagefright/MediaBuffer.h> 38#include <media/stagefright/MediaBufferGroup.h> 39#include <media/stagefright/MediaDefs.h> 40#include <media/stagefright/MediaSource.h> 41#include <media/stagefright/MetaData.h> 42#include <utils/String8.h> 43 44#include <byteswap.h> 45#include "include/ID3.h" 46 47namespace android { 48 49class MPEG4Source : public MediaSource { 50public: 51 // Caller retains ownership of both "dataSource" and "sampleTable". 52 MPEG4Source(const sp<MPEG4Extractor> &owner, 53 const sp<MetaData> &format, 54 const sp<DataSource> &dataSource, 55 int32_t timeScale, 56 const sp<SampleTable> &sampleTable, 57 Vector<SidxEntry> &sidx, 58 const Trex *trex, 59 off64_t firstMoofOffset); 60 61 virtual status_t start(MetaData *params = NULL); 62 virtual status_t stop(); 63 64 virtual sp<MetaData> getFormat(); 65 66 virtual status_t read(MediaBuffer **buffer, const ReadOptions *options = NULL); 67 virtual status_t fragmentedRead(MediaBuffer **buffer, const ReadOptions *options = NULL); 68 69protected: 70 virtual ~MPEG4Source(); 71 72private: 73 Mutex mLock; 74 75 // keep the MPEG4Extractor around, since we're referencing its data 76 sp<MPEG4Extractor> mOwner; 77 sp<MetaData> mFormat; 78 sp<DataSource> mDataSource; 79 int32_t mTimescale; 80 sp<SampleTable> mSampleTable; 81 uint32_t mCurrentSampleIndex; 82 uint32_t mCurrentFragmentIndex; 83 Vector<SidxEntry> &mSegments; 84 const Trex *mTrex; 85 off64_t mFirstMoofOffset; 86 off64_t mCurrentMoofOffset; 87 off64_t mNextMoofOffset; 88 uint32_t mCurrentTime; 89 int32_t mLastParsedTrackId; 90 int32_t mTrackId; 91 92 int32_t mCryptoMode; // passed in from extractor 93 int32_t mDefaultIVSize; // passed in from extractor 94 uint8_t mCryptoKey[16]; // passed in from extractor 95 uint32_t mCurrentAuxInfoType; 96 uint32_t mCurrentAuxInfoTypeParameter; 97 int32_t mCurrentDefaultSampleInfoSize; 98 uint32_t mCurrentSampleInfoCount; 99 uint32_t mCurrentSampleInfoAllocSize; 100 uint8_t* mCurrentSampleInfoSizes; 101 uint32_t mCurrentSampleInfoOffsetCount; 102 uint32_t mCurrentSampleInfoOffsetsAllocSize; 103 uint64_t* mCurrentSampleInfoOffsets; 104 105 bool mIsAVC; 106 bool mIsHEVC; 107 size_t mNALLengthSize; 108 109 bool mStarted; 110 111 MediaBufferGroup *mGroup; 112 113 MediaBuffer *mBuffer; 114 115 bool mWantsNALFragments; 116 117 uint8_t *mSrcBuffer; 118 119 size_t parseNALSize(const uint8_t *data) const; 120 status_t parseChunk(off64_t *offset); 121 status_t parseTrackFragmentHeader(off64_t offset, off64_t size); 122 status_t parseTrackFragmentRun(off64_t offset, off64_t size); 123 status_t parseSampleAuxiliaryInformationSizes(off64_t offset, off64_t size); 124 status_t parseSampleAuxiliaryInformationOffsets(off64_t offset, off64_t size); 125 126 struct TrackFragmentHeaderInfo { 127 enum Flags { 128 kBaseDataOffsetPresent = 0x01, 129 kSampleDescriptionIndexPresent = 0x02, 130 kDefaultSampleDurationPresent = 0x08, 131 kDefaultSampleSizePresent = 0x10, 132 kDefaultSampleFlagsPresent = 0x20, 133 kDurationIsEmpty = 0x10000, 134 }; 135 136 uint32_t mTrackID; 137 uint32_t mFlags; 138 uint64_t mBaseDataOffset; 139 uint32_t mSampleDescriptionIndex; 140 uint32_t mDefaultSampleDuration; 141 uint32_t mDefaultSampleSize; 142 uint32_t mDefaultSampleFlags; 143 144 uint64_t mDataOffset; 145 }; 146 TrackFragmentHeaderInfo mTrackFragmentHeaderInfo; 147 148 struct Sample { 149 off64_t offset; 150 size_t size; 151 uint32_t duration; 152 int32_t compositionOffset; 153 uint8_t iv[16]; 154 Vector<size_t> clearsizes; 155 Vector<size_t> encryptedsizes; 156 }; 157 Vector<Sample> mCurrentSamples; 158 159 MPEG4Source(const MPEG4Source &); 160 MPEG4Source &operator=(const MPEG4Source &); 161}; 162 163// This custom data source wraps an existing one and satisfies requests 164// falling entirely within a cached range from the cache while forwarding 165// all remaining requests to the wrapped datasource. 166// This is used to cache the full sampletable metadata for a single track, 167// possibly wrapping multiple times to cover all tracks, i.e. 168// Each MPEG4DataSource caches the sampletable metadata for a single track. 169 170struct MPEG4DataSource : public DataSource { 171 MPEG4DataSource(const sp<DataSource> &source); 172 173 virtual status_t initCheck() const; 174 virtual ssize_t readAt(off64_t offset, void *data, size_t size); 175 virtual status_t getSize(off64_t *size); 176 virtual uint32_t flags(); 177 178 status_t setCachedRange(off64_t offset, size_t size); 179 180protected: 181 virtual ~MPEG4DataSource(); 182 183private: 184 Mutex mLock; 185 186 sp<DataSource> mSource; 187 off64_t mCachedOffset; 188 size_t mCachedSize; 189 uint8_t *mCache; 190 191 void clearCache(); 192 193 MPEG4DataSource(const MPEG4DataSource &); 194 MPEG4DataSource &operator=(const MPEG4DataSource &); 195}; 196 197MPEG4DataSource::MPEG4DataSource(const sp<DataSource> &source) 198 : mSource(source), 199 mCachedOffset(0), 200 mCachedSize(0), 201 mCache(NULL) { 202} 203 204MPEG4DataSource::~MPEG4DataSource() { 205 clearCache(); 206} 207 208void MPEG4DataSource::clearCache() { 209 if (mCache) { 210 free(mCache); 211 mCache = NULL; 212 } 213 214 mCachedOffset = 0; 215 mCachedSize = 0; 216} 217 218status_t MPEG4DataSource::initCheck() const { 219 return mSource->initCheck(); 220} 221 222ssize_t MPEG4DataSource::readAt(off64_t offset, void *data, size_t size) { 223 Mutex::Autolock autoLock(mLock); 224 225 if (isInRange(mCachedOffset, mCachedSize, offset, size)) { 226 memcpy(data, &mCache[offset - mCachedOffset], size); 227 return size; 228 } 229 230 return mSource->readAt(offset, data, size); 231} 232 233status_t MPEG4DataSource::getSize(off64_t *size) { 234 return mSource->getSize(size); 235} 236 237uint32_t MPEG4DataSource::flags() { 238 return mSource->flags(); 239} 240 241status_t MPEG4DataSource::setCachedRange(off64_t offset, size_t size) { 242 Mutex::Autolock autoLock(mLock); 243 244 clearCache(); 245 246 mCache = (uint8_t *)malloc(size); 247 248 if (mCache == NULL) { 249 return -ENOMEM; 250 } 251 252 mCachedOffset = offset; 253 mCachedSize = size; 254 255 ssize_t err = mSource->readAt(mCachedOffset, mCache, mCachedSize); 256 257 if (err < (ssize_t)size) { 258 clearCache(); 259 260 return ERROR_IO; 261 } 262 263 return OK; 264} 265 266//////////////////////////////////////////////////////////////////////////////// 267 268static const bool kUseHexDump = false; 269 270static void hexdump(const void *_data, size_t size) { 271 const uint8_t *data = (const uint8_t *)_data; 272 size_t offset = 0; 273 while (offset < size) { 274 printf("0x%04zx ", offset); 275 276 size_t n = size - offset; 277 if (n > 16) { 278 n = 16; 279 } 280 281 for (size_t i = 0; i < 16; ++i) { 282 if (i == 8) { 283 printf(" "); 284 } 285 286 if (offset + i < size) { 287 printf("%02x ", data[offset + i]); 288 } else { 289 printf(" "); 290 } 291 } 292 293 printf(" "); 294 295 for (size_t i = 0; i < n; ++i) { 296 if (isprint(data[offset + i])) { 297 printf("%c", data[offset + i]); 298 } else { 299 printf("."); 300 } 301 } 302 303 printf("\n"); 304 305 offset += 16; 306 } 307} 308 309static const char *FourCC2MIME(uint32_t fourcc) { 310 switch (fourcc) { 311 case FOURCC('m', 'p', '4', 'a'): 312 return MEDIA_MIMETYPE_AUDIO_AAC; 313 314 case FOURCC('s', 'a', 'm', 'r'): 315 return MEDIA_MIMETYPE_AUDIO_AMR_NB; 316 317 case FOURCC('s', 'a', 'w', 'b'): 318 return MEDIA_MIMETYPE_AUDIO_AMR_WB; 319 320 case FOURCC('m', 'p', '4', 'v'): 321 return MEDIA_MIMETYPE_VIDEO_MPEG4; 322 323 case FOURCC('s', '2', '6', '3'): 324 case FOURCC('h', '2', '6', '3'): 325 case FOURCC('H', '2', '6', '3'): 326 return MEDIA_MIMETYPE_VIDEO_H263; 327 328 case FOURCC('a', 'v', 'c', '1'): 329 return MEDIA_MIMETYPE_VIDEO_AVC; 330 331 case FOURCC('h', 'v', 'c', '1'): 332 case FOURCC('h', 'e', 'v', '1'): 333 return MEDIA_MIMETYPE_VIDEO_HEVC; 334 default: 335 CHECK(!"should not be here."); 336 return NULL; 337 } 338} 339 340static bool AdjustChannelsAndRate(uint32_t fourcc, uint32_t *channels, uint32_t *rate) { 341 if (!strcasecmp(MEDIA_MIMETYPE_AUDIO_AMR_NB, FourCC2MIME(fourcc))) { 342 // AMR NB audio is always mono, 8kHz 343 *channels = 1; 344 *rate = 8000; 345 return true; 346 } else if (!strcasecmp(MEDIA_MIMETYPE_AUDIO_AMR_WB, FourCC2MIME(fourcc))) { 347 // AMR WB audio is always mono, 16kHz 348 *channels = 1; 349 *rate = 16000; 350 return true; 351 } 352 return false; 353} 354 355MPEG4Extractor::MPEG4Extractor(const sp<DataSource> &source) 356 : mMoofOffset(0), 357 mDataSource(source), 358 mInitCheck(NO_INIT), 359 mHasVideo(false), 360 mHeaderTimescale(0), 361 mFirstTrack(NULL), 362 mLastTrack(NULL), 363 mFileMetaData(new MetaData), 364 mFirstSINF(NULL), 365 mIsDrm(false) { 366} 367 368MPEG4Extractor::~MPEG4Extractor() { 369 Track *track = mFirstTrack; 370 while (track) { 371 Track *next = track->next; 372 373 delete track; 374 track = next; 375 } 376 mFirstTrack = mLastTrack = NULL; 377 378 SINF *sinf = mFirstSINF; 379 while (sinf) { 380 SINF *next = sinf->next; 381 delete[] sinf->IPMPData; 382 delete sinf; 383 sinf = next; 384 } 385 mFirstSINF = NULL; 386 387 for (size_t i = 0; i < mPssh.size(); i++) { 388 delete [] mPssh[i].data; 389 } 390} 391 392uint32_t MPEG4Extractor::flags() const { 393 return CAN_PAUSE | 394 ((mMoofOffset == 0 || mSidxEntries.size() != 0) ? 395 (CAN_SEEK_BACKWARD | CAN_SEEK_FORWARD | CAN_SEEK) : 0); 396} 397 398sp<MetaData> MPEG4Extractor::getMetaData() { 399 status_t err; 400 if ((err = readMetaData()) != OK) { 401 return new MetaData; 402 } 403 404 return mFileMetaData; 405} 406 407size_t MPEG4Extractor::countTracks() { 408 status_t err; 409 if ((err = readMetaData()) != OK) { 410 ALOGV("MPEG4Extractor::countTracks: no tracks"); 411 return 0; 412 } 413 414 size_t n = 0; 415 Track *track = mFirstTrack; 416 while (track) { 417 ++n; 418 track = track->next; 419 } 420 421 ALOGV("MPEG4Extractor::countTracks: %zu tracks", n); 422 return n; 423} 424 425sp<MetaData> MPEG4Extractor::getTrackMetaData( 426 size_t index, uint32_t flags) { 427 status_t err; 428 if ((err = readMetaData()) != OK) { 429 return NULL; 430 } 431 432 Track *track = mFirstTrack; 433 while (index > 0) { 434 if (track == NULL) { 435 return NULL; 436 } 437 438 track = track->next; 439 --index; 440 } 441 442 if (track == NULL) { 443 return NULL; 444 } 445 446 if ((flags & kIncludeExtensiveMetaData) 447 && !track->includes_expensive_metadata) { 448 track->includes_expensive_metadata = true; 449 450 const char *mime; 451 CHECK(track->meta->findCString(kKeyMIMEType, &mime)); 452 if (!strncasecmp("video/", mime, 6)) { 453 if (mMoofOffset > 0) { 454 int64_t duration; 455 if (track->meta->findInt64(kKeyDuration, &duration)) { 456 // nothing fancy, just pick a frame near 1/4th of the duration 457 track->meta->setInt64( 458 kKeyThumbnailTime, duration / 4); 459 } 460 } else { 461 uint32_t sampleIndex; 462 uint32_t sampleTime; 463 if (track->sampleTable->findThumbnailSample(&sampleIndex) == OK 464 && track->sampleTable->getMetaDataForSample( 465 sampleIndex, NULL /* offset */, NULL /* size */, 466 &sampleTime) == OK) { 467 track->meta->setInt64( 468 kKeyThumbnailTime, 469 ((int64_t)sampleTime * 1000000) / track->timescale); 470 } 471 } 472 } 473 } 474 475 return track->meta; 476} 477 478static void MakeFourCCString(uint32_t x, char *s) { 479 s[0] = x >> 24; 480 s[1] = (x >> 16) & 0xff; 481 s[2] = (x >> 8) & 0xff; 482 s[3] = x & 0xff; 483 s[4] = '\0'; 484} 485 486status_t MPEG4Extractor::readMetaData() { 487 if (mInitCheck != NO_INIT) { 488 return mInitCheck; 489 } 490 491 off64_t offset = 0; 492 status_t err; 493 while (true) { 494 off64_t orig_offset = offset; 495 err = parseChunk(&offset, 0); 496 497 if (err != OK && err != UNKNOWN_ERROR) { 498 break; 499 } else if (offset <= orig_offset) { 500 // only continue parsing if the offset was advanced, 501 // otherwise we might end up in an infinite loop 502 ALOGE("did not advance: 0x%lld->0x%lld", orig_offset, offset); 503 err = ERROR_MALFORMED; 504 break; 505 } else if (err == OK) { 506 continue; 507 } 508 509 uint32_t hdr[2]; 510 if (mDataSource->readAt(offset, hdr, 8) < 8) { 511 break; 512 } 513 uint32_t chunk_type = ntohl(hdr[1]); 514 if (chunk_type == FOURCC('m', 'o', 'o', 'f')) { 515 // store the offset of the first segment 516 mMoofOffset = offset; 517 } else if (chunk_type != FOURCC('m', 'd', 'a', 't')) { 518 // keep parsing until we get to the data 519 continue; 520 } 521 break; 522 } 523 524 if (mInitCheck == OK) { 525 if (mHasVideo) { 526 mFileMetaData->setCString( 527 kKeyMIMEType, MEDIA_MIMETYPE_CONTAINER_MPEG4); 528 } else { 529 mFileMetaData->setCString(kKeyMIMEType, "audio/mp4"); 530 } 531 } else { 532 mInitCheck = err; 533 } 534 535 CHECK_NE(err, (status_t)NO_INIT); 536 537 // copy pssh data into file metadata 538 int psshsize = 0; 539 for (size_t i = 0; i < mPssh.size(); i++) { 540 psshsize += 20 + mPssh[i].datalen; 541 } 542 if (psshsize) { 543 char *buf = (char*)malloc(psshsize); 544 char *ptr = buf; 545 for (size_t i = 0; i < mPssh.size(); i++) { 546 memcpy(ptr, mPssh[i].uuid, 20); // uuid + length 547 memcpy(ptr + 20, mPssh[i].data, mPssh[i].datalen); 548 ptr += (20 + mPssh[i].datalen); 549 } 550 mFileMetaData->setData(kKeyPssh, 'pssh', buf, psshsize); 551 free(buf); 552 } 553 return mInitCheck; 554} 555 556char* MPEG4Extractor::getDrmTrackInfo(size_t trackID, int *len) { 557 if (mFirstSINF == NULL) { 558 return NULL; 559 } 560 561 SINF *sinf = mFirstSINF; 562 while (sinf && (trackID != sinf->trackID)) { 563 sinf = sinf->next; 564 } 565 566 if (sinf == NULL) { 567 return NULL; 568 } 569 570 *len = sinf->len; 571 return sinf->IPMPData; 572} 573 574// Reads an encoded integer 7 bits at a time until it encounters the high bit clear. 575static int32_t readSize(off64_t offset, 576 const sp<DataSource> DataSource, uint8_t *numOfBytes) { 577 uint32_t size = 0; 578 uint8_t data; 579 bool moreData = true; 580 *numOfBytes = 0; 581 582 while (moreData) { 583 if (DataSource->readAt(offset, &data, 1) < 1) { 584 return -1; 585 } 586 offset ++; 587 moreData = (data >= 128) ? true : false; 588 size = (size << 7) | (data & 0x7f); // Take last 7 bits 589 (*numOfBytes) ++; 590 } 591 592 return size; 593} 594 595status_t MPEG4Extractor::parseDrmSINF( 596 off64_t * /* offset */, off64_t data_offset) { 597 uint8_t updateIdTag; 598 if (mDataSource->readAt(data_offset, &updateIdTag, 1) < 1) { 599 return ERROR_IO; 600 } 601 data_offset ++; 602 603 if (0x01/*OBJECT_DESCRIPTOR_UPDATE_ID_TAG*/ != updateIdTag) { 604 return ERROR_MALFORMED; 605 } 606 607 uint8_t numOfBytes; 608 int32_t size = readSize(data_offset, mDataSource, &numOfBytes); 609 if (size < 0) { 610 return ERROR_IO; 611 } 612 data_offset += numOfBytes; 613 614 while(size >= 11 ) { 615 uint8_t descriptorTag; 616 if (mDataSource->readAt(data_offset, &descriptorTag, 1) < 1) { 617 return ERROR_IO; 618 } 619 data_offset ++; 620 621 if (0x11/*OBJECT_DESCRIPTOR_ID_TAG*/ != descriptorTag) { 622 return ERROR_MALFORMED; 623 } 624 625 uint8_t buffer[8]; 626 //ObjectDescriptorID and ObjectDescriptor url flag 627 if (mDataSource->readAt(data_offset, buffer, 2) < 2) { 628 return ERROR_IO; 629 } 630 data_offset += 2; 631 632 if ((buffer[1] >> 5) & 0x0001) { //url flag is set 633 return ERROR_MALFORMED; 634 } 635 636 if (mDataSource->readAt(data_offset, buffer, 8) < 8) { 637 return ERROR_IO; 638 } 639 data_offset += 8; 640 641 if ((0x0F/*ES_ID_REF_TAG*/ != buffer[1]) 642 || ( 0x0A/*IPMP_DESCRIPTOR_POINTER_ID_TAG*/ != buffer[5])) { 643 return ERROR_MALFORMED; 644 } 645 646 SINF *sinf = new SINF; 647 sinf->trackID = U16_AT(&buffer[3]); 648 sinf->IPMPDescriptorID = buffer[7]; 649 sinf->next = mFirstSINF; 650 mFirstSINF = sinf; 651 652 size -= (8 + 2 + 1); 653 } 654 655 if (size != 0) { 656 return ERROR_MALFORMED; 657 } 658 659 if (mDataSource->readAt(data_offset, &updateIdTag, 1) < 1) { 660 return ERROR_IO; 661 } 662 data_offset ++; 663 664 if(0x05/*IPMP_DESCRIPTOR_UPDATE_ID_TAG*/ != updateIdTag) { 665 return ERROR_MALFORMED; 666 } 667 668 size = readSize(data_offset, mDataSource, &numOfBytes); 669 if (size < 0) { 670 return ERROR_IO; 671 } 672 data_offset += numOfBytes; 673 674 while (size > 0) { 675 uint8_t tag; 676 int32_t dataLen; 677 if (mDataSource->readAt(data_offset, &tag, 1) < 1) { 678 return ERROR_IO; 679 } 680 data_offset ++; 681 682 if (0x0B/*IPMP_DESCRIPTOR_ID_TAG*/ == tag) { 683 uint8_t id; 684 dataLen = readSize(data_offset, mDataSource, &numOfBytes); 685 if (dataLen < 0) { 686 return ERROR_IO; 687 } else if (dataLen < 4) { 688 return ERROR_MALFORMED; 689 } 690 data_offset += numOfBytes; 691 692 if (mDataSource->readAt(data_offset, &id, 1) < 1) { 693 return ERROR_IO; 694 } 695 data_offset ++; 696 697 SINF *sinf = mFirstSINF; 698 while (sinf && (sinf->IPMPDescriptorID != id)) { 699 sinf = sinf->next; 700 } 701 if (sinf == NULL) { 702 return ERROR_MALFORMED; 703 } 704 sinf->len = dataLen - 3; 705 sinf->IPMPData = new (std::nothrow) char[sinf->len]; 706 if (sinf->IPMPData == NULL) { 707 return ERROR_MALFORMED; 708 } 709 data_offset += 2; 710 711 if (mDataSource->readAt(data_offset, sinf->IPMPData, sinf->len) < sinf->len) { 712 return ERROR_IO; 713 } 714 data_offset += sinf->len; 715 716 size -= (dataLen + numOfBytes + 1); 717 } 718 } 719 720 if (size != 0) { 721 return ERROR_MALFORMED; 722 } 723 724 return UNKNOWN_ERROR; // Return a dummy error. 725} 726 727struct PathAdder { 728 PathAdder(Vector<uint32_t> *path, uint32_t chunkType) 729 : mPath(path) { 730 mPath->push(chunkType); 731 } 732 733 ~PathAdder() { 734 mPath->pop(); 735 } 736 737private: 738 Vector<uint32_t> *mPath; 739 740 PathAdder(const PathAdder &); 741 PathAdder &operator=(const PathAdder &); 742}; 743 744static bool underMetaDataPath(const Vector<uint32_t> &path) { 745 return path.size() >= 5 746 && path[0] == FOURCC('m', 'o', 'o', 'v') 747 && path[1] == FOURCC('u', 'd', 't', 'a') 748 && path[2] == FOURCC('m', 'e', 't', 'a') 749 && path[3] == FOURCC('i', 'l', 's', 't'); 750} 751 752// Given a time in seconds since Jan 1 1904, produce a human-readable string. 753static void convertTimeToDate(int64_t time_1904, String8 *s) { 754 time_t time_1970 = time_1904 - (((66 * 365 + 17) * 24) * 3600); 755 756 char tmp[32]; 757 strftime(tmp, sizeof(tmp), "%Y%m%dT%H%M%S.000Z", gmtime(&time_1970)); 758 759 s->setTo(tmp); 760} 761 762status_t MPEG4Extractor::parseChunk(off64_t *offset, int depth) { 763 ALOGV("entering parseChunk %lld/%d", *offset, depth); 764 uint32_t hdr[2]; 765 if (mDataSource->readAt(*offset, hdr, 8) < 8) { 766 return ERROR_IO; 767 } 768 uint64_t chunk_size = ntohl(hdr[0]); 769 int32_t chunk_type = ntohl(hdr[1]); 770 off64_t data_offset = *offset + 8; 771 772 if (chunk_size == 1) { 773 if (mDataSource->readAt(*offset + 8, &chunk_size, 8) < 8) { 774 return ERROR_IO; 775 } 776 chunk_size = ntoh64(chunk_size); 777 data_offset += 8; 778 779 if (chunk_size < 16) { 780 // The smallest valid chunk is 16 bytes long in this case. 781 return ERROR_MALFORMED; 782 } 783 } else if (chunk_size == 0) { 784 if (depth == 0) { 785 // atom extends to end of file 786 off64_t sourceSize; 787 if (mDataSource->getSize(&sourceSize) == OK) { 788 chunk_size = (sourceSize - *offset); 789 } else { 790 // XXX could we just pick a "sufficiently large" value here? 791 ALOGE("atom size is 0, and data source has no size"); 792 return ERROR_MALFORMED; 793 } 794 } else { 795 // not allowed for non-toplevel atoms, skip it 796 *offset += 4; 797 return OK; 798 } 799 } else if (chunk_size < 8) { 800 // The smallest valid chunk is 8 bytes long. 801 ALOGE("invalid chunk size: %" PRIu64, chunk_size); 802 return ERROR_MALFORMED; 803 } 804 805 char chunk[5]; 806 MakeFourCCString(chunk_type, chunk); 807 ALOGV("chunk: %s @ %lld, %d", chunk, *offset, depth); 808 809 if (kUseHexDump) { 810 static const char kWhitespace[] = " "; 811 const char *indent = &kWhitespace[sizeof(kWhitespace) - 1 - 2 * depth]; 812 printf("%sfound chunk '%s' of size %" PRIu64 "\n", indent, chunk, chunk_size); 813 814 char buffer[256]; 815 size_t n = chunk_size; 816 if (n > sizeof(buffer)) { 817 n = sizeof(buffer); 818 } 819 if (mDataSource->readAt(*offset, buffer, n) 820 < (ssize_t)n) { 821 return ERROR_IO; 822 } 823 824 hexdump(buffer, n); 825 } 826 827 PathAdder autoAdder(&mPath, chunk_type); 828 829 off64_t chunk_data_size = *offset + chunk_size - data_offset; 830 831 if (chunk_type != FOURCC('c', 'p', 'r', 't') 832 && chunk_type != FOURCC('c', 'o', 'v', 'r') 833 && mPath.size() == 5 && underMetaDataPath(mPath)) { 834 off64_t stop_offset = *offset + chunk_size; 835 *offset = data_offset; 836 while (*offset < stop_offset) { 837 status_t err = parseChunk(offset, depth + 1); 838 if (err != OK) { 839 return err; 840 } 841 } 842 843 if (*offset != stop_offset) { 844 return ERROR_MALFORMED; 845 } 846 847 return OK; 848 } 849 850 switch(chunk_type) { 851 case FOURCC('m', 'o', 'o', 'v'): 852 case FOURCC('t', 'r', 'a', 'k'): 853 case FOURCC('m', 'd', 'i', 'a'): 854 case FOURCC('m', 'i', 'n', 'f'): 855 case FOURCC('d', 'i', 'n', 'f'): 856 case FOURCC('s', 't', 'b', 'l'): 857 case FOURCC('m', 'v', 'e', 'x'): 858 case FOURCC('m', 'o', 'o', 'f'): 859 case FOURCC('t', 'r', 'a', 'f'): 860 case FOURCC('m', 'f', 'r', 'a'): 861 case FOURCC('u', 'd', 't', 'a'): 862 case FOURCC('i', 'l', 's', 't'): 863 case FOURCC('s', 'i', 'n', 'f'): 864 case FOURCC('s', 'c', 'h', 'i'): 865 case FOURCC('e', 'd', 't', 's'): 866 { 867 if (chunk_type == FOURCC('s', 't', 'b', 'l')) { 868 ALOGV("sampleTable chunk is %" PRIu64 " bytes long.", chunk_size); 869 870 if (mDataSource->flags() 871 & (DataSource::kWantsPrefetching 872 | DataSource::kIsCachingDataSource)) { 873 sp<MPEG4DataSource> cachedSource = 874 new MPEG4DataSource(mDataSource); 875 876 if (cachedSource->setCachedRange(*offset, chunk_size) == OK) { 877 mDataSource = cachedSource; 878 } 879 } 880 881 mLastTrack->sampleTable = new SampleTable(mDataSource); 882 } 883 884 bool isTrack = false; 885 if (chunk_type == FOURCC('t', 'r', 'a', 'k')) { 886 isTrack = true; 887 888 Track *track = new Track; 889 track->next = NULL; 890 if (mLastTrack) { 891 mLastTrack->next = track; 892 } else { 893 mFirstTrack = track; 894 } 895 mLastTrack = track; 896 897 track->meta = new MetaData; 898 track->includes_expensive_metadata = false; 899 track->skipTrack = false; 900 track->timescale = 0; 901 track->meta->setCString(kKeyMIMEType, "application/octet-stream"); 902 } 903 904 off64_t stop_offset = *offset + chunk_size; 905 *offset = data_offset; 906 while (*offset < stop_offset) { 907 status_t err = parseChunk(offset, depth + 1); 908 if (err != OK) { 909 return err; 910 } 911 } 912 913 if (*offset != stop_offset) { 914 return ERROR_MALFORMED; 915 } 916 917 if (isTrack) { 918 if (mLastTrack->skipTrack) { 919 Track *cur = mFirstTrack; 920 921 if (cur == mLastTrack) { 922 delete cur; 923 mFirstTrack = mLastTrack = NULL; 924 } else { 925 while (cur && cur->next != mLastTrack) { 926 cur = cur->next; 927 } 928 cur->next = NULL; 929 delete mLastTrack; 930 mLastTrack = cur; 931 } 932 933 return OK; 934 } 935 936 status_t err = verifyTrack(mLastTrack); 937 938 if (err != OK) { 939 return err; 940 } 941 } else if (chunk_type == FOURCC('m', 'o', 'o', 'v')) { 942 mInitCheck = OK; 943 944 if (!mIsDrm) { 945 return UNKNOWN_ERROR; // Return a dummy error. 946 } else { 947 return OK; 948 } 949 } 950 break; 951 } 952 953 case FOURCC('e', 'l', 's', 't'): 954 { 955 *offset += chunk_size; 956 957 // See 14496-12 8.6.6 958 uint8_t version; 959 if (mDataSource->readAt(data_offset, &version, 1) < 1) { 960 return ERROR_IO; 961 } 962 963 uint32_t entry_count; 964 if (!mDataSource->getUInt32(data_offset + 4, &entry_count)) { 965 return ERROR_IO; 966 } 967 968 if (entry_count != 1) { 969 // we only support a single entry at the moment, for gapless playback 970 ALOGW("ignoring edit list with %d entries", entry_count); 971 } else if (mHeaderTimescale == 0) { 972 ALOGW("ignoring edit list because timescale is 0"); 973 } else { 974 off64_t entriesoffset = data_offset + 8; 975 uint64_t segment_duration; 976 int64_t media_time; 977 978 if (version == 1) { 979 if (!mDataSource->getUInt64(entriesoffset, &segment_duration) || 980 !mDataSource->getUInt64(entriesoffset + 8, (uint64_t*)&media_time)) { 981 return ERROR_IO; 982 } 983 } else if (version == 0) { 984 uint32_t sd; 985 int32_t mt; 986 if (!mDataSource->getUInt32(entriesoffset, &sd) || 987 !mDataSource->getUInt32(entriesoffset + 4, (uint32_t*)&mt)) { 988 return ERROR_IO; 989 } 990 segment_duration = sd; 991 media_time = mt; 992 } else { 993 return ERROR_IO; 994 } 995 996 uint64_t halfscale = mHeaderTimescale / 2; 997 segment_duration = (segment_duration * 1000000 + halfscale)/ mHeaderTimescale; 998 media_time = (media_time * 1000000 + halfscale) / mHeaderTimescale; 999 1000 int64_t duration; 1001 int32_t samplerate; 1002 if (!mLastTrack) { 1003 return ERROR_MALFORMED; 1004 } 1005 if (mLastTrack->meta->findInt64(kKeyDuration, &duration) && 1006 mLastTrack->meta->findInt32(kKeySampleRate, &samplerate)) { 1007 1008 int64_t delay = (media_time * samplerate + 500000) / 1000000; 1009 mLastTrack->meta->setInt32(kKeyEncoderDelay, delay); 1010 1011 int64_t paddingus = duration - (segment_duration + media_time); 1012 if (paddingus < 0) { 1013 // track duration from media header (which is what kKeyDuration is) might 1014 // be slightly shorter than the segment duration, which would make the 1015 // padding negative. Clamp to zero. 1016 paddingus = 0; 1017 } 1018 int64_t paddingsamples = (paddingus * samplerate + 500000) / 1000000; 1019 mLastTrack->meta->setInt32(kKeyEncoderPadding, paddingsamples); 1020 } 1021 } 1022 break; 1023 } 1024 1025 case FOURCC('f', 'r', 'm', 'a'): 1026 { 1027 *offset += chunk_size; 1028 1029 uint32_t original_fourcc; 1030 if (mDataSource->readAt(data_offset, &original_fourcc, 4) < 4) { 1031 return ERROR_IO; 1032 } 1033 original_fourcc = ntohl(original_fourcc); 1034 ALOGV("read original format: %d", original_fourcc); 1035 mLastTrack->meta->setCString(kKeyMIMEType, FourCC2MIME(original_fourcc)); 1036 uint32_t num_channels = 0; 1037 uint32_t sample_rate = 0; 1038 if (AdjustChannelsAndRate(original_fourcc, &num_channels, &sample_rate)) { 1039 mLastTrack->meta->setInt32(kKeyChannelCount, num_channels); 1040 mLastTrack->meta->setInt32(kKeySampleRate, sample_rate); 1041 } 1042 break; 1043 } 1044 1045 case FOURCC('t', 'e', 'n', 'c'): 1046 { 1047 *offset += chunk_size; 1048 1049 if (chunk_size < 32) { 1050 return ERROR_MALFORMED; 1051 } 1052 1053 // tenc box contains 1 byte version, 3 byte flags, 3 byte default algorithm id, one byte 1054 // default IV size, 16 bytes default KeyID 1055 // (ISO 23001-7) 1056 char buf[4]; 1057 memset(buf, 0, 4); 1058 if (mDataSource->readAt(data_offset + 4, buf + 1, 3) < 3) { 1059 return ERROR_IO; 1060 } 1061 uint32_t defaultAlgorithmId = ntohl(*((int32_t*)buf)); 1062 if (defaultAlgorithmId > 1) { 1063 // only 0 (clear) and 1 (AES-128) are valid 1064 return ERROR_MALFORMED; 1065 } 1066 1067 memset(buf, 0, 4); 1068 if (mDataSource->readAt(data_offset + 7, buf + 3, 1) < 1) { 1069 return ERROR_IO; 1070 } 1071 uint32_t defaultIVSize = ntohl(*((int32_t*)buf)); 1072 1073 if ((defaultAlgorithmId == 0 && defaultIVSize != 0) || 1074 (defaultAlgorithmId != 0 && defaultIVSize == 0)) { 1075 // only unencrypted data must have 0 IV size 1076 return ERROR_MALFORMED; 1077 } else if (defaultIVSize != 0 && 1078 defaultIVSize != 8 && 1079 defaultIVSize != 16) { 1080 // only supported sizes are 0, 8 and 16 1081 return ERROR_MALFORMED; 1082 } 1083 1084 uint8_t defaultKeyId[16]; 1085 1086 if (mDataSource->readAt(data_offset + 8, &defaultKeyId, 16) < 16) { 1087 return ERROR_IO; 1088 } 1089 1090 mLastTrack->meta->setInt32(kKeyCryptoMode, defaultAlgorithmId); 1091 mLastTrack->meta->setInt32(kKeyCryptoDefaultIVSize, defaultIVSize); 1092 mLastTrack->meta->setData(kKeyCryptoKey, 'tenc', defaultKeyId, 16); 1093 break; 1094 } 1095 1096 case FOURCC('t', 'k', 'h', 'd'): 1097 { 1098 *offset += chunk_size; 1099 1100 status_t err; 1101 if ((err = parseTrackHeader(data_offset, chunk_data_size)) != OK) { 1102 return err; 1103 } 1104 1105 break; 1106 } 1107 1108 case FOURCC('p', 's', 's', 'h'): 1109 { 1110 *offset += chunk_size; 1111 1112 PsshInfo pssh; 1113 1114 if (mDataSource->readAt(data_offset + 4, &pssh.uuid, 16) < 16) { 1115 return ERROR_IO; 1116 } 1117 1118 uint32_t psshdatalen = 0; 1119 if (mDataSource->readAt(data_offset + 20, &psshdatalen, 4) < 4) { 1120 return ERROR_IO; 1121 } 1122 pssh.datalen = ntohl(psshdatalen); 1123 ALOGV("pssh data size: %d", pssh.datalen); 1124 if (pssh.datalen + 20 > chunk_size) { 1125 // pssh data length exceeds size of containing box 1126 return ERROR_MALFORMED; 1127 } 1128 1129 pssh.data = new (std::nothrow) uint8_t[pssh.datalen]; 1130 if (pssh.data == NULL) { 1131 return ERROR_MALFORMED; 1132 } 1133 ALOGV("allocated pssh @ %p", pssh.data); 1134 ssize_t requested = (ssize_t) pssh.datalen; 1135 if (mDataSource->readAt(data_offset + 24, pssh.data, requested) < requested) { 1136 return ERROR_IO; 1137 } 1138 mPssh.push_back(pssh); 1139 1140 break; 1141 } 1142 1143 case FOURCC('m', 'd', 'h', 'd'): 1144 { 1145 *offset += chunk_size; 1146 1147 if (chunk_data_size < 4 || mLastTrack == NULL) { 1148 return ERROR_MALFORMED; 1149 } 1150 1151 uint8_t version; 1152 if (mDataSource->readAt( 1153 data_offset, &version, sizeof(version)) 1154 < (ssize_t)sizeof(version)) { 1155 return ERROR_IO; 1156 } 1157 1158 off64_t timescale_offset; 1159 1160 if (version == 1) { 1161 timescale_offset = data_offset + 4 + 16; 1162 } else if (version == 0) { 1163 timescale_offset = data_offset + 4 + 8; 1164 } else { 1165 return ERROR_IO; 1166 } 1167 1168 uint32_t timescale; 1169 if (mDataSource->readAt( 1170 timescale_offset, ×cale, sizeof(timescale)) 1171 < (ssize_t)sizeof(timescale)) { 1172 return ERROR_IO; 1173 } 1174 1175 if (!timescale) { 1176 ALOGE("timescale should not be ZERO."); 1177 return ERROR_MALFORMED; 1178 } 1179 1180 mLastTrack->timescale = ntohl(timescale); 1181 1182 // 14496-12 says all ones means indeterminate, but some files seem to use 1183 // 0 instead. We treat both the same. 1184 int64_t duration = 0; 1185 if (version == 1) { 1186 if (mDataSource->readAt( 1187 timescale_offset + 4, &duration, sizeof(duration)) 1188 < (ssize_t)sizeof(duration)) { 1189 return ERROR_IO; 1190 } 1191 if (duration != -1) { 1192 duration = ntoh64(duration); 1193 } 1194 } else { 1195 uint32_t duration32; 1196 if (mDataSource->readAt( 1197 timescale_offset + 4, &duration32, sizeof(duration32)) 1198 < (ssize_t)sizeof(duration32)) { 1199 return ERROR_IO; 1200 } 1201 if (duration32 != 0xffffffff) { 1202 duration = ntohl(duration32); 1203 } 1204 } 1205 if (duration != 0) { 1206 mLastTrack->meta->setInt64( 1207 kKeyDuration, (duration * 1000000) / mLastTrack->timescale); 1208 } 1209 1210 uint8_t lang[2]; 1211 off64_t lang_offset; 1212 if (version == 1) { 1213 lang_offset = timescale_offset + 4 + 8; 1214 } else if (version == 0) { 1215 lang_offset = timescale_offset + 4 + 4; 1216 } else { 1217 return ERROR_IO; 1218 } 1219 1220 if (mDataSource->readAt(lang_offset, &lang, sizeof(lang)) 1221 < (ssize_t)sizeof(lang)) { 1222 return ERROR_IO; 1223 } 1224 1225 // To get the ISO-639-2/T three character language code 1226 // 1 bit pad followed by 3 5-bits characters. Each character 1227 // is packed as the difference between its ASCII value and 0x60. 1228 char lang_code[4]; 1229 lang_code[0] = ((lang[0] >> 2) & 0x1f) + 0x60; 1230 lang_code[1] = ((lang[0] & 0x3) << 3 | (lang[1] >> 5)) + 0x60; 1231 lang_code[2] = (lang[1] & 0x1f) + 0x60; 1232 lang_code[3] = '\0'; 1233 1234 mLastTrack->meta->setCString( 1235 kKeyMediaLanguage, lang_code); 1236 1237 break; 1238 } 1239 1240 case FOURCC('s', 't', 's', 'd'): 1241 { 1242 if (chunk_data_size < 8) { 1243 return ERROR_MALFORMED; 1244 } 1245 1246 uint8_t buffer[8]; 1247 if (chunk_data_size < (off64_t)sizeof(buffer)) { 1248 return ERROR_MALFORMED; 1249 } 1250 1251 if (mDataSource->readAt( 1252 data_offset, buffer, 8) < 8) { 1253 return ERROR_IO; 1254 } 1255 1256 if (U32_AT(buffer) != 0) { 1257 // Should be version 0, flags 0. 1258 return ERROR_MALFORMED; 1259 } 1260 1261 uint32_t entry_count = U32_AT(&buffer[4]); 1262 1263 if (entry_count > 1) { 1264 // For 3GPP timed text, there could be multiple tx3g boxes contain 1265 // multiple text display formats. These formats will be used to 1266 // display the timed text. 1267 // For encrypted files, there may also be more than one entry. 1268 const char *mime; 1269 CHECK(mLastTrack->meta->findCString(kKeyMIMEType, &mime)); 1270 if (strcasecmp(mime, MEDIA_MIMETYPE_TEXT_3GPP) && 1271 strcasecmp(mime, "application/octet-stream")) { 1272 // For now we only support a single type of media per track. 1273 mLastTrack->skipTrack = true; 1274 *offset += chunk_size; 1275 break; 1276 } 1277 } 1278 off64_t stop_offset = *offset + chunk_size; 1279 *offset = data_offset + 8; 1280 for (uint32_t i = 0; i < entry_count; ++i) { 1281 status_t err = parseChunk(offset, depth + 1); 1282 if (err != OK) { 1283 return err; 1284 } 1285 } 1286 1287 if (*offset != stop_offset) { 1288 return ERROR_MALFORMED; 1289 } 1290 break; 1291 } 1292 1293 case FOURCC('m', 'p', '4', 'a'): 1294 case FOURCC('e', 'n', 'c', 'a'): 1295 case FOURCC('s', 'a', 'm', 'r'): 1296 case FOURCC('s', 'a', 'w', 'b'): 1297 { 1298 uint8_t buffer[8 + 20]; 1299 if (chunk_data_size < (ssize_t)sizeof(buffer)) { 1300 // Basic AudioSampleEntry size. 1301 return ERROR_MALFORMED; 1302 } 1303 1304 if (mDataSource->readAt( 1305 data_offset, buffer, sizeof(buffer)) < (ssize_t)sizeof(buffer)) { 1306 return ERROR_IO; 1307 } 1308 1309 uint16_t data_ref_index __unused = U16_AT(&buffer[6]); 1310 uint32_t num_channels = U16_AT(&buffer[16]); 1311 1312 uint16_t sample_size = U16_AT(&buffer[18]); 1313 uint32_t sample_rate = U32_AT(&buffer[24]) >> 16; 1314 1315 if (chunk_type != FOURCC('e', 'n', 'c', 'a')) { 1316 // if the chunk type is enca, we'll get the type from the sinf/frma box later 1317 mLastTrack->meta->setCString(kKeyMIMEType, FourCC2MIME(chunk_type)); 1318 AdjustChannelsAndRate(chunk_type, &num_channels, &sample_rate); 1319 } 1320 ALOGV("*** coding='%s' %d channels, size %d, rate %d\n", 1321 chunk, num_channels, sample_size, sample_rate); 1322 mLastTrack->meta->setInt32(kKeyChannelCount, num_channels); 1323 mLastTrack->meta->setInt32(kKeySampleRate, sample_rate); 1324 1325 off64_t stop_offset = *offset + chunk_size; 1326 *offset = data_offset + sizeof(buffer); 1327 while (*offset < stop_offset) { 1328 status_t err = parseChunk(offset, depth + 1); 1329 if (err != OK) { 1330 return err; 1331 } 1332 } 1333 1334 if (*offset != stop_offset) { 1335 return ERROR_MALFORMED; 1336 } 1337 break; 1338 } 1339 1340 case FOURCC('m', 'p', '4', 'v'): 1341 case FOURCC('e', 'n', 'c', 'v'): 1342 case FOURCC('s', '2', '6', '3'): 1343 case FOURCC('H', '2', '6', '3'): 1344 case FOURCC('h', '2', '6', '3'): 1345 case FOURCC('a', 'v', 'c', '1'): 1346 case FOURCC('h', 'v', 'c', '1'): 1347 case FOURCC('h', 'e', 'v', '1'): 1348 { 1349 mHasVideo = true; 1350 1351 uint8_t buffer[78]; 1352 if (chunk_data_size < (ssize_t)sizeof(buffer)) { 1353 // Basic VideoSampleEntry size. 1354 return ERROR_MALFORMED; 1355 } 1356 1357 if (mDataSource->readAt( 1358 data_offset, buffer, sizeof(buffer)) < (ssize_t)sizeof(buffer)) { 1359 return ERROR_IO; 1360 } 1361 1362 uint16_t data_ref_index __unused = U16_AT(&buffer[6]); 1363 uint16_t width = U16_AT(&buffer[6 + 18]); 1364 uint16_t height = U16_AT(&buffer[6 + 20]); 1365 1366 // The video sample is not standard-compliant if it has invalid dimension. 1367 // Use some default width and height value, and 1368 // let the decoder figure out the actual width and height (and thus 1369 // be prepared for INFO_FOMRAT_CHANGED event). 1370 if (width == 0) width = 352; 1371 if (height == 0) height = 288; 1372 1373 // printf("*** coding='%s' width=%d height=%d\n", 1374 // chunk, width, height); 1375 1376 if (chunk_type != FOURCC('e', 'n', 'c', 'v')) { 1377 // if the chunk type is encv, we'll get the type from the sinf/frma box later 1378 mLastTrack->meta->setCString(kKeyMIMEType, FourCC2MIME(chunk_type)); 1379 } 1380 mLastTrack->meta->setInt32(kKeyWidth, width); 1381 mLastTrack->meta->setInt32(kKeyHeight, height); 1382 1383 off64_t stop_offset = *offset + chunk_size; 1384 *offset = data_offset + sizeof(buffer); 1385 while (*offset < stop_offset) { 1386 status_t err = parseChunk(offset, depth + 1); 1387 if (err != OK) { 1388 return err; 1389 } 1390 } 1391 1392 if (*offset != stop_offset) { 1393 return ERROR_MALFORMED; 1394 } 1395 break; 1396 } 1397 1398 case FOURCC('s', 't', 'c', 'o'): 1399 case FOURCC('c', 'o', '6', '4'): 1400 { 1401 status_t err = 1402 mLastTrack->sampleTable->setChunkOffsetParams( 1403 chunk_type, data_offset, chunk_data_size); 1404 1405 *offset += chunk_size; 1406 1407 if (err != OK) { 1408 return err; 1409 } 1410 1411 break; 1412 } 1413 1414 case FOURCC('s', 't', 's', 'c'): 1415 { 1416 status_t err = 1417 mLastTrack->sampleTable->setSampleToChunkParams( 1418 data_offset, chunk_data_size); 1419 1420 *offset += chunk_size; 1421 1422 if (err != OK) { 1423 return err; 1424 } 1425 1426 break; 1427 } 1428 1429 case FOURCC('s', 't', 's', 'z'): 1430 case FOURCC('s', 't', 'z', '2'): 1431 { 1432 status_t err = 1433 mLastTrack->sampleTable->setSampleSizeParams( 1434 chunk_type, data_offset, chunk_data_size); 1435 1436 *offset += chunk_size; 1437 1438 if (err != OK) { 1439 return err; 1440 } 1441 1442 size_t max_size; 1443 err = mLastTrack->sampleTable->getMaxSampleSize(&max_size); 1444 1445 if (err != OK) { 1446 return err; 1447 } 1448 1449 if (max_size != 0) { 1450 // Assume that a given buffer only contains at most 10 chunks, 1451 // each chunk originally prefixed with a 2 byte length will 1452 // have a 4 byte header (0x00 0x00 0x00 0x01) after conversion, 1453 // and thus will grow by 2 bytes per chunk. 1454 mLastTrack->meta->setInt32(kKeyMaxInputSize, max_size + 10 * 2); 1455 } else { 1456 // No size was specified. Pick a conservatively large size. 1457 int32_t width, height; 1458 if (!mLastTrack->meta->findInt32(kKeyWidth, &width) || 1459 !mLastTrack->meta->findInt32(kKeyHeight, &height)) { 1460 ALOGE("No width or height, assuming worst case 1080p"); 1461 width = 1920; 1462 height = 1080; 1463 } 1464 1465 const char *mime; 1466 CHECK(mLastTrack->meta->findCString(kKeyMIMEType, &mime)); 1467 if (!strcmp(mime, MEDIA_MIMETYPE_VIDEO_AVC)) { 1468 // AVC requires compression ratio of at least 2, and uses 1469 // macroblocks 1470 max_size = ((width + 15) / 16) * ((height + 15) / 16) * 192; 1471 } else { 1472 // For all other formats there is no minimum compression 1473 // ratio. Use compression ratio of 1. 1474 max_size = width * height * 3 / 2; 1475 } 1476 mLastTrack->meta->setInt32(kKeyMaxInputSize, max_size); 1477 } 1478 1479 // NOTE: setting another piece of metadata invalidates any pointers (such as the 1480 // mimetype) previously obtained, so don't cache them. 1481 const char *mime; 1482 CHECK(mLastTrack->meta->findCString(kKeyMIMEType, &mime)); 1483 // Calculate average frame rate. 1484 if (!strncasecmp("video/", mime, 6)) { 1485 size_t nSamples = mLastTrack->sampleTable->countSamples(); 1486 int64_t durationUs; 1487 if (mLastTrack->meta->findInt64(kKeyDuration, &durationUs)) { 1488 if (durationUs > 0) { 1489 int32_t frameRate = (nSamples * 1000000LL + 1490 (durationUs >> 1)) / durationUs; 1491 mLastTrack->meta->setInt32(kKeyFrameRate, frameRate); 1492 } 1493 } 1494 } 1495 1496 break; 1497 } 1498 1499 case FOURCC('s', 't', 't', 's'): 1500 { 1501 *offset += chunk_size; 1502 1503 status_t err = 1504 mLastTrack->sampleTable->setTimeToSampleParams( 1505 data_offset, chunk_data_size); 1506 1507 if (err != OK) { 1508 return err; 1509 } 1510 1511 break; 1512 } 1513 1514 case FOURCC('c', 't', 't', 's'): 1515 { 1516 *offset += chunk_size; 1517 1518 status_t err = 1519 mLastTrack->sampleTable->setCompositionTimeToSampleParams( 1520 data_offset, chunk_data_size); 1521 1522 if (err != OK) { 1523 return err; 1524 } 1525 1526 break; 1527 } 1528 1529 case FOURCC('s', 't', 's', 's'): 1530 { 1531 *offset += chunk_size; 1532 1533 status_t err = 1534 mLastTrack->sampleTable->setSyncSampleParams( 1535 data_offset, chunk_data_size); 1536 1537 if (err != OK) { 1538 return err; 1539 } 1540 1541 break; 1542 } 1543 1544 // �xyz 1545 case FOURCC(0xA9, 'x', 'y', 'z'): 1546 { 1547 *offset += chunk_size; 1548 1549 // Best case the total data length inside "�xyz" box 1550 // would be 8, for instance "�xyz" + "\x00\x04\x15\xc7" + "0+0/", 1551 // where "\x00\x04" is the text string length with value = 4, 1552 // "\0x15\xc7" is the language code = en, and "0+0" is a 1553 // location (string) value with longitude = 0 and latitude = 0. 1554 if (chunk_data_size < 8) { 1555 return ERROR_MALFORMED; 1556 } 1557 1558 // Worst case the location string length would be 18, 1559 // for instance +90.0000-180.0000, without the trailing "/" and 1560 // the string length + language code. 1561 char buffer[18]; 1562 1563 // Substracting 5 from the data size is because the text string length + 1564 // language code takes 4 bytes, and the trailing slash "/" takes 1 byte. 1565 off64_t location_length = chunk_data_size - 5; 1566 if (location_length >= (off64_t) sizeof(buffer)) { 1567 return ERROR_MALFORMED; 1568 } 1569 1570 if (mDataSource->readAt( 1571 data_offset + 4, buffer, location_length) < location_length) { 1572 return ERROR_IO; 1573 } 1574 1575 buffer[location_length] = '\0'; 1576 mFileMetaData->setCString(kKeyLocation, buffer); 1577 break; 1578 } 1579 1580 case FOURCC('e', 's', 'd', 's'): 1581 { 1582 *offset += chunk_size; 1583 1584 if (chunk_data_size < 4) { 1585 return ERROR_MALFORMED; 1586 } 1587 1588 uint8_t buffer[256]; 1589 if (chunk_data_size > (off64_t)sizeof(buffer)) { 1590 return ERROR_BUFFER_TOO_SMALL; 1591 } 1592 1593 if (mDataSource->readAt( 1594 data_offset, buffer, chunk_data_size) < chunk_data_size) { 1595 return ERROR_IO; 1596 } 1597 1598 if (U32_AT(buffer) != 0) { 1599 // Should be version 0, flags 0. 1600 return ERROR_MALFORMED; 1601 } 1602 1603 mLastTrack->meta->setData( 1604 kKeyESDS, kTypeESDS, &buffer[4], chunk_data_size - 4); 1605 1606 if (mPath.size() >= 2 1607 && mPath[mPath.size() - 2] == FOURCC('m', 'p', '4', 'a')) { 1608 // Information from the ESDS must be relied on for proper 1609 // setup of sample rate and channel count for MPEG4 Audio. 1610 // The generic header appears to only contain generic 1611 // information... 1612 1613 status_t err = updateAudioTrackInfoFromESDS_MPEG4Audio( 1614 &buffer[4], chunk_data_size - 4); 1615 1616 if (err != OK) { 1617 return err; 1618 } 1619 } 1620 1621 break; 1622 } 1623 1624 case FOURCC('a', 'v', 'c', 'C'): 1625 { 1626 *offset += chunk_size; 1627 1628 sp<ABuffer> buffer = new ABuffer(chunk_data_size); 1629 1630 if (mDataSource->readAt( 1631 data_offset, buffer->data(), chunk_data_size) < chunk_data_size) { 1632 return ERROR_IO; 1633 } 1634 1635 mLastTrack->meta->setData( 1636 kKeyAVCC, kTypeAVCC, buffer->data(), chunk_data_size); 1637 1638 break; 1639 } 1640 case FOURCC('h', 'v', 'c', 'C'): 1641 { 1642 sp<ABuffer> buffer = new ABuffer(chunk_data_size); 1643 1644 if (mDataSource->readAt( 1645 data_offset, buffer->data(), chunk_data_size) < chunk_data_size) { 1646 return ERROR_IO; 1647 } 1648 1649 mLastTrack->meta->setData( 1650 kKeyHVCC, kTypeHVCC, buffer->data(), chunk_data_size); 1651 1652 *offset += chunk_size; 1653 break; 1654 } 1655 1656 case FOURCC('d', '2', '6', '3'): 1657 { 1658 *offset += chunk_size; 1659 /* 1660 * d263 contains a fixed 7 bytes part: 1661 * vendor - 4 bytes 1662 * version - 1 byte 1663 * level - 1 byte 1664 * profile - 1 byte 1665 * optionally, "d263" box itself may contain a 16-byte 1666 * bit rate box (bitr) 1667 * average bit rate - 4 bytes 1668 * max bit rate - 4 bytes 1669 */ 1670 char buffer[23]; 1671 if (chunk_data_size != 7 && 1672 chunk_data_size != 23) { 1673 ALOGE("Incorrect D263 box size %lld", chunk_data_size); 1674 return ERROR_MALFORMED; 1675 } 1676 1677 if (mDataSource->readAt( 1678 data_offset, buffer, chunk_data_size) < chunk_data_size) { 1679 return ERROR_IO; 1680 } 1681 1682 mLastTrack->meta->setData(kKeyD263, kTypeD263, buffer, chunk_data_size); 1683 1684 break; 1685 } 1686 1687 case FOURCC('m', 'e', 't', 'a'): 1688 { 1689 uint8_t buffer[4]; 1690 if (chunk_data_size < (off64_t)sizeof(buffer)) { 1691 *offset += chunk_size; 1692 return ERROR_MALFORMED; 1693 } 1694 1695 if (mDataSource->readAt( 1696 data_offset, buffer, 4) < 4) { 1697 *offset += chunk_size; 1698 return ERROR_IO; 1699 } 1700 1701 if (U32_AT(buffer) != 0) { 1702 // Should be version 0, flags 0. 1703 1704 // If it's not, let's assume this is one of those 1705 // apparently malformed chunks that don't have flags 1706 // and completely different semantics than what's 1707 // in the MPEG4 specs and skip it. 1708 *offset += chunk_size; 1709 return OK; 1710 } 1711 1712 off64_t stop_offset = *offset + chunk_size; 1713 *offset = data_offset + sizeof(buffer); 1714 while (*offset < stop_offset) { 1715 status_t err = parseChunk(offset, depth + 1); 1716 if (err != OK) { 1717 return err; 1718 } 1719 } 1720 1721 if (*offset != stop_offset) { 1722 return ERROR_MALFORMED; 1723 } 1724 break; 1725 } 1726 1727 case FOURCC('m', 'e', 'a', 'n'): 1728 case FOURCC('n', 'a', 'm', 'e'): 1729 case FOURCC('d', 'a', 't', 'a'): 1730 { 1731 *offset += chunk_size; 1732 1733 if (mPath.size() == 6 && underMetaDataPath(mPath)) { 1734 status_t err = parseITunesMetaData(data_offset, chunk_data_size); 1735 1736 if (err != OK) { 1737 return err; 1738 } 1739 } 1740 1741 break; 1742 } 1743 1744 case FOURCC('m', 'v', 'h', 'd'): 1745 { 1746 *offset += chunk_size; 1747 1748 if (chunk_data_size < 32) { 1749 return ERROR_MALFORMED; 1750 } 1751 1752 uint8_t header[32]; 1753 if (mDataSource->readAt( 1754 data_offset, header, sizeof(header)) 1755 < (ssize_t)sizeof(header)) { 1756 return ERROR_IO; 1757 } 1758 1759 uint64_t creationTime; 1760 uint64_t duration = 0; 1761 if (header[0] == 1) { 1762 creationTime = U64_AT(&header[4]); 1763 mHeaderTimescale = U32_AT(&header[20]); 1764 duration = U64_AT(&header[24]); 1765 if (duration == 0xffffffffffffffff) { 1766 duration = 0; 1767 } 1768 } else if (header[0] != 0) { 1769 return ERROR_MALFORMED; 1770 } else { 1771 creationTime = U32_AT(&header[4]); 1772 mHeaderTimescale = U32_AT(&header[12]); 1773 uint32_t d32 = U32_AT(&header[16]); 1774 if (d32 == 0xffffffff) { 1775 d32 = 0; 1776 } 1777 duration = d32; 1778 } 1779 if (duration != 0) { 1780 mFileMetaData->setInt64(kKeyDuration, duration * 1000000 / mHeaderTimescale); 1781 } 1782 1783 String8 s; 1784 convertTimeToDate(creationTime, &s); 1785 1786 mFileMetaData->setCString(kKeyDate, s.string()); 1787 1788 break; 1789 } 1790 1791 case FOURCC('m', 'e', 'h', 'd'): 1792 { 1793 *offset += chunk_size; 1794 1795 if (chunk_data_size < 8) { 1796 return ERROR_MALFORMED; 1797 } 1798 1799 uint8_t flags[4]; 1800 if (mDataSource->readAt( 1801 data_offset, flags, sizeof(flags)) 1802 < (ssize_t)sizeof(flags)) { 1803 return ERROR_IO; 1804 } 1805 1806 uint64_t duration = 0; 1807 if (flags[0] == 1) { 1808 // 64 bit 1809 if (chunk_data_size < 12) { 1810 return ERROR_MALFORMED; 1811 } 1812 mDataSource->getUInt64(data_offset + 4, &duration); 1813 if (duration == 0xffffffffffffffff) { 1814 duration = 0; 1815 } 1816 } else if (flags[0] == 0) { 1817 // 32 bit 1818 uint32_t d32; 1819 mDataSource->getUInt32(data_offset + 4, &d32); 1820 if (d32 == 0xffffffff) { 1821 d32 = 0; 1822 } 1823 duration = d32; 1824 } else { 1825 return ERROR_MALFORMED; 1826 } 1827 1828 if (duration != 0) { 1829 mFileMetaData->setInt64(kKeyDuration, duration * 1000000 / mHeaderTimescale); 1830 } 1831 1832 break; 1833 } 1834 1835 case FOURCC('m', 'd', 'a', 't'): 1836 { 1837 ALOGV("mdat chunk, drm: %d", mIsDrm); 1838 if (!mIsDrm) { 1839 *offset += chunk_size; 1840 break; 1841 } 1842 1843 if (chunk_size < 8) { 1844 return ERROR_MALFORMED; 1845 } 1846 1847 return parseDrmSINF(offset, data_offset); 1848 } 1849 1850 case FOURCC('h', 'd', 'l', 'r'): 1851 { 1852 *offset += chunk_size; 1853 1854 uint32_t buffer; 1855 if (mDataSource->readAt( 1856 data_offset + 8, &buffer, 4) < 4) { 1857 return ERROR_IO; 1858 } 1859 1860 uint32_t type = ntohl(buffer); 1861 // For the 3GPP file format, the handler-type within the 'hdlr' box 1862 // shall be 'text'. We also want to support 'sbtl' handler type 1863 // for a practical reason as various MPEG4 containers use it. 1864 if (type == FOURCC('t', 'e', 'x', 't') || type == FOURCC('s', 'b', 't', 'l')) { 1865 mLastTrack->meta->setCString(kKeyMIMEType, MEDIA_MIMETYPE_TEXT_3GPP); 1866 } 1867 1868 break; 1869 } 1870 1871 case FOURCC('t', 'r', 'e', 'x'): 1872 { 1873 *offset += chunk_size; 1874 1875 if (chunk_data_size < 24) { 1876 return ERROR_IO; 1877 } 1878 Trex trex; 1879 if (!mDataSource->getUInt32(data_offset + 4, &trex.track_ID) || 1880 !mDataSource->getUInt32(data_offset + 8, &trex.default_sample_description_index) || 1881 !mDataSource->getUInt32(data_offset + 12, &trex.default_sample_duration) || 1882 !mDataSource->getUInt32(data_offset + 16, &trex.default_sample_size) || 1883 !mDataSource->getUInt32(data_offset + 20, &trex.default_sample_flags)) { 1884 return ERROR_IO; 1885 } 1886 mTrex.add(trex); 1887 break; 1888 } 1889 1890 case FOURCC('t', 'x', '3', 'g'): 1891 { 1892 uint32_t type; 1893 const void *data; 1894 size_t size = 0; 1895 if (!mLastTrack->meta->findData( 1896 kKeyTextFormatData, &type, &data, &size)) { 1897 size = 0; 1898 } 1899 1900 if (SIZE_MAX - chunk_size <= size) { 1901 return ERROR_MALFORMED; 1902 } 1903 1904 uint8_t *buffer = new (std::nothrow) uint8_t[size + chunk_size]; 1905 if (buffer == NULL) { 1906 return ERROR_MALFORMED; 1907 } 1908 1909 if (size > 0) { 1910 memcpy(buffer, data, size); 1911 } 1912 1913 if ((size_t)(mDataSource->readAt(*offset, buffer + size, chunk_size)) 1914 < chunk_size) { 1915 delete[] buffer; 1916 buffer = NULL; 1917 1918 // advance read pointer so we don't end up reading this again 1919 *offset += chunk_size; 1920 return ERROR_IO; 1921 } 1922 1923 mLastTrack->meta->setData( 1924 kKeyTextFormatData, 0, buffer, size + chunk_size); 1925 1926 delete[] buffer; 1927 1928 *offset += chunk_size; 1929 break; 1930 } 1931 1932 case FOURCC('c', 'o', 'v', 'r'): 1933 { 1934 *offset += chunk_size; 1935 1936 if (mFileMetaData != NULL) { 1937 ALOGV("chunk_data_size = %lld and data_offset = %lld", 1938 chunk_data_size, data_offset); 1939 sp<ABuffer> buffer = new ABuffer(chunk_data_size + 1); 1940 if (mDataSource->readAt( 1941 data_offset, buffer->data(), chunk_data_size) != (ssize_t)chunk_data_size) { 1942 return ERROR_IO; 1943 } 1944 const int kSkipBytesOfDataBox = 16; 1945 if (chunk_data_size <= kSkipBytesOfDataBox) { 1946 return ERROR_MALFORMED; 1947 } 1948 1949 mFileMetaData->setData( 1950 kKeyAlbumArt, MetaData::TYPE_NONE, 1951 buffer->data() + kSkipBytesOfDataBox, chunk_data_size - kSkipBytesOfDataBox); 1952 } 1953 1954 break; 1955 } 1956 1957 case FOURCC('t', 'i', 't', 'l'): 1958 case FOURCC('p', 'e', 'r', 'f'): 1959 case FOURCC('a', 'u', 't', 'h'): 1960 case FOURCC('g', 'n', 'r', 'e'): 1961 case FOURCC('a', 'l', 'b', 'm'): 1962 case FOURCC('y', 'r', 'r', 'c'): 1963 { 1964 *offset += chunk_size; 1965 1966 status_t err = parse3GPPMetaData(data_offset, chunk_data_size, depth); 1967 1968 if (err != OK) { 1969 return err; 1970 } 1971 1972 break; 1973 } 1974 1975 case FOURCC('I', 'D', '3', '2'): 1976 { 1977 *offset += chunk_size; 1978 1979 if (chunk_data_size < 6) { 1980 return ERROR_MALFORMED; 1981 } 1982 1983 parseID3v2MetaData(data_offset + 6); 1984 1985 break; 1986 } 1987 1988 case FOURCC('-', '-', '-', '-'): 1989 { 1990 mLastCommentMean.clear(); 1991 mLastCommentName.clear(); 1992 mLastCommentData.clear(); 1993 *offset += chunk_size; 1994 break; 1995 } 1996 1997 case FOURCC('s', 'i', 'd', 'x'): 1998 { 1999 parseSegmentIndex(data_offset, chunk_data_size); 2000 *offset += chunk_size; 2001 return UNKNOWN_ERROR; // stop parsing after sidx 2002 } 2003 2004 default: 2005 { 2006 *offset += chunk_size; 2007 break; 2008 } 2009 } 2010 2011 return OK; 2012} 2013 2014status_t MPEG4Extractor::parseSegmentIndex(off64_t offset, size_t size) { 2015 ALOGV("MPEG4Extractor::parseSegmentIndex"); 2016 2017 if (size < 12) { 2018 return -EINVAL; 2019 } 2020 2021 uint32_t flags; 2022 if (!mDataSource->getUInt32(offset, &flags)) { 2023 return ERROR_MALFORMED; 2024 } 2025 2026 uint32_t version = flags >> 24; 2027 flags &= 0xffffff; 2028 2029 ALOGV("sidx version %d", version); 2030 2031 uint32_t referenceId; 2032 if (!mDataSource->getUInt32(offset + 4, &referenceId)) { 2033 return ERROR_MALFORMED; 2034 } 2035 2036 uint32_t timeScale; 2037 if (!mDataSource->getUInt32(offset + 8, &timeScale)) { 2038 return ERROR_MALFORMED; 2039 } 2040 ALOGV("sidx refid/timescale: %d/%d", referenceId, timeScale); 2041 2042 uint64_t earliestPresentationTime; 2043 uint64_t firstOffset; 2044 2045 offset += 12; 2046 size -= 12; 2047 2048 if (version == 0) { 2049 if (size < 8) { 2050 return -EINVAL; 2051 } 2052 uint32_t tmp; 2053 if (!mDataSource->getUInt32(offset, &tmp)) { 2054 return ERROR_MALFORMED; 2055 } 2056 earliestPresentationTime = tmp; 2057 if (!mDataSource->getUInt32(offset + 4, &tmp)) { 2058 return ERROR_MALFORMED; 2059 } 2060 firstOffset = tmp; 2061 offset += 8; 2062 size -= 8; 2063 } else { 2064 if (size < 16) { 2065 return -EINVAL; 2066 } 2067 if (!mDataSource->getUInt64(offset, &earliestPresentationTime)) { 2068 return ERROR_MALFORMED; 2069 } 2070 if (!mDataSource->getUInt64(offset + 8, &firstOffset)) { 2071 return ERROR_MALFORMED; 2072 } 2073 offset += 16; 2074 size -= 16; 2075 } 2076 ALOGV("sidx pres/off: %" PRIu64 "/%" PRIu64, earliestPresentationTime, firstOffset); 2077 2078 if (size < 4) { 2079 return -EINVAL; 2080 } 2081 2082 uint16_t referenceCount; 2083 if (!mDataSource->getUInt16(offset + 2, &referenceCount)) { 2084 return ERROR_MALFORMED; 2085 } 2086 offset += 4; 2087 size -= 4; 2088 ALOGV("refcount: %d", referenceCount); 2089 2090 if (size < referenceCount * 12) { 2091 return -EINVAL; 2092 } 2093 2094 uint64_t total_duration = 0; 2095 for (unsigned int i = 0; i < referenceCount; i++) { 2096 uint32_t d1, d2, d3; 2097 2098 if (!mDataSource->getUInt32(offset, &d1) || // size 2099 !mDataSource->getUInt32(offset + 4, &d2) || // duration 2100 !mDataSource->getUInt32(offset + 8, &d3)) { // flags 2101 return ERROR_MALFORMED; 2102 } 2103 2104 if (d1 & 0x80000000) { 2105 ALOGW("sub-sidx boxes not supported yet"); 2106 } 2107 bool sap = d3 & 0x80000000; 2108 uint32_t saptype = (d3 >> 28) & 7; 2109 if (!sap || (saptype != 1 && saptype != 2)) { 2110 // type 1 and 2 are sync samples 2111 ALOGW("not a stream access point, or unsupported type: %08x", d3); 2112 } 2113 total_duration += d2; 2114 offset += 12; 2115 ALOGV(" item %d, %08x %08x %08x", i, d1, d2, d3); 2116 SidxEntry se; 2117 se.mSize = d1 & 0x7fffffff; 2118 se.mDurationUs = 1000000LL * d2 / timeScale; 2119 mSidxEntries.add(se); 2120 } 2121 2122 uint64_t sidxDuration = total_duration * 1000000 / timeScale; 2123 2124 int64_t metaDuration; 2125 if (!mLastTrack->meta->findInt64(kKeyDuration, &metaDuration) || metaDuration == 0) { 2126 mLastTrack->meta->setInt64(kKeyDuration, sidxDuration); 2127 } 2128 return OK; 2129} 2130 2131 2132 2133status_t MPEG4Extractor::parseTrackHeader( 2134 off64_t data_offset, off64_t data_size) { 2135 if (data_size < 4) { 2136 return ERROR_MALFORMED; 2137 } 2138 2139 uint8_t version; 2140 if (mDataSource->readAt(data_offset, &version, 1) < 1) { 2141 return ERROR_IO; 2142 } 2143 2144 size_t dynSize = (version == 1) ? 36 : 24; 2145 2146 uint8_t buffer[36 + 60]; 2147 2148 if (data_size != (off64_t)dynSize + 60) { 2149 return ERROR_MALFORMED; 2150 } 2151 2152 if (mDataSource->readAt( 2153 data_offset, buffer, data_size) < (ssize_t)data_size) { 2154 return ERROR_IO; 2155 } 2156 2157 uint64_t ctime __unused, mtime __unused, duration __unused; 2158 int32_t id; 2159 2160 if (version == 1) { 2161 ctime = U64_AT(&buffer[4]); 2162 mtime = U64_AT(&buffer[12]); 2163 id = U32_AT(&buffer[20]); 2164 duration = U64_AT(&buffer[28]); 2165 } else if (version == 0) { 2166 ctime = U32_AT(&buffer[4]); 2167 mtime = U32_AT(&buffer[8]); 2168 id = U32_AT(&buffer[12]); 2169 duration = U32_AT(&buffer[20]); 2170 } else { 2171 return ERROR_UNSUPPORTED; 2172 } 2173 2174 mLastTrack->meta->setInt32(kKeyTrackID, id); 2175 2176 size_t matrixOffset = dynSize + 16; 2177 int32_t a00 = U32_AT(&buffer[matrixOffset]); 2178 int32_t a01 = U32_AT(&buffer[matrixOffset + 4]); 2179 int32_t a10 = U32_AT(&buffer[matrixOffset + 12]); 2180 int32_t a11 = U32_AT(&buffer[matrixOffset + 16]); 2181 2182#if 0 2183 int32_t dx = U32_AT(&buffer[matrixOffset + 8]); 2184 int32_t dy = U32_AT(&buffer[matrixOffset + 20]); 2185 2186 ALOGI("x' = %.2f * x + %.2f * y + %.2f", 2187 a00 / 65536.0f, a01 / 65536.0f, dx / 65536.0f); 2188 ALOGI("y' = %.2f * x + %.2f * y + %.2f", 2189 a10 / 65536.0f, a11 / 65536.0f, dy / 65536.0f); 2190#endif 2191 2192 uint32_t rotationDegrees; 2193 2194 static const int32_t kFixedOne = 0x10000; 2195 if (a00 == kFixedOne && a01 == 0 && a10 == 0 && a11 == kFixedOne) { 2196 // Identity, no rotation 2197 rotationDegrees = 0; 2198 } else if (a00 == 0 && a01 == kFixedOne && a10 == -kFixedOne && a11 == 0) { 2199 rotationDegrees = 90; 2200 } else if (a00 == 0 && a01 == -kFixedOne && a10 == kFixedOne && a11 == 0) { 2201 rotationDegrees = 270; 2202 } else if (a00 == -kFixedOne && a01 == 0 && a10 == 0 && a11 == -kFixedOne) { 2203 rotationDegrees = 180; 2204 } else { 2205 ALOGW("We only support 0,90,180,270 degree rotation matrices"); 2206 rotationDegrees = 0; 2207 } 2208 2209 if (rotationDegrees != 0) { 2210 mLastTrack->meta->setInt32(kKeyRotation, rotationDegrees); 2211 } 2212 2213 // Handle presentation display size, which could be different 2214 // from the image size indicated by kKeyWidth and kKeyHeight. 2215 uint32_t width = U32_AT(&buffer[dynSize + 52]); 2216 uint32_t height = U32_AT(&buffer[dynSize + 56]); 2217 mLastTrack->meta->setInt32(kKeyDisplayWidth, width >> 16); 2218 mLastTrack->meta->setInt32(kKeyDisplayHeight, height >> 16); 2219 2220 return OK; 2221} 2222 2223status_t MPEG4Extractor::parseITunesMetaData(off64_t offset, size_t size) { 2224 if (size < 4) { 2225 return ERROR_MALFORMED; 2226 } 2227 2228 uint8_t *buffer = new (std::nothrow) uint8_t[size + 1]; 2229 if (buffer == NULL) { 2230 return ERROR_MALFORMED; 2231 } 2232 if (mDataSource->readAt( 2233 offset, buffer, size) != (ssize_t)size) { 2234 delete[] buffer; 2235 buffer = NULL; 2236 2237 return ERROR_IO; 2238 } 2239 2240 uint32_t flags = U32_AT(buffer); 2241 2242 uint32_t metadataKey = 0; 2243 char chunk[5]; 2244 MakeFourCCString(mPath[4], chunk); 2245 ALOGV("meta: %s @ %lld", chunk, offset); 2246 switch ((int32_t)mPath[4]) { 2247 case FOURCC(0xa9, 'a', 'l', 'b'): 2248 { 2249 metadataKey = kKeyAlbum; 2250 break; 2251 } 2252 case FOURCC(0xa9, 'A', 'R', 'T'): 2253 { 2254 metadataKey = kKeyArtist; 2255 break; 2256 } 2257 case FOURCC('a', 'A', 'R', 'T'): 2258 { 2259 metadataKey = kKeyAlbumArtist; 2260 break; 2261 } 2262 case FOURCC(0xa9, 'd', 'a', 'y'): 2263 { 2264 metadataKey = kKeyYear; 2265 break; 2266 } 2267 case FOURCC(0xa9, 'n', 'a', 'm'): 2268 { 2269 metadataKey = kKeyTitle; 2270 break; 2271 } 2272 case FOURCC(0xa9, 'w', 'r', 't'): 2273 { 2274 metadataKey = kKeyWriter; 2275 break; 2276 } 2277 case FOURCC('c', 'o', 'v', 'r'): 2278 { 2279 metadataKey = kKeyAlbumArt; 2280 break; 2281 } 2282 case FOURCC('g', 'n', 'r', 'e'): 2283 { 2284 metadataKey = kKeyGenre; 2285 break; 2286 } 2287 case FOURCC(0xa9, 'g', 'e', 'n'): 2288 { 2289 metadataKey = kKeyGenre; 2290 break; 2291 } 2292 case FOURCC('c', 'p', 'i', 'l'): 2293 { 2294 if (size == 9 && flags == 21) { 2295 char tmp[16]; 2296 sprintf(tmp, "%d", 2297 (int)buffer[size - 1]); 2298 2299 mFileMetaData->setCString(kKeyCompilation, tmp); 2300 } 2301 break; 2302 } 2303 case FOURCC('t', 'r', 'k', 'n'): 2304 { 2305 if (size == 16 && flags == 0) { 2306 char tmp[16]; 2307 uint16_t* pTrack = (uint16_t*)&buffer[10]; 2308 uint16_t* pTotalTracks = (uint16_t*)&buffer[12]; 2309 sprintf(tmp, "%d/%d", ntohs(*pTrack), ntohs(*pTotalTracks)); 2310 2311 mFileMetaData->setCString(kKeyCDTrackNumber, tmp); 2312 } 2313 break; 2314 } 2315 case FOURCC('d', 'i', 's', 'k'): 2316 { 2317 if ((size == 14 || size == 16) && flags == 0) { 2318 char tmp[16]; 2319 uint16_t* pDisc = (uint16_t*)&buffer[10]; 2320 uint16_t* pTotalDiscs = (uint16_t*)&buffer[12]; 2321 sprintf(tmp, "%d/%d", ntohs(*pDisc), ntohs(*pTotalDiscs)); 2322 2323 mFileMetaData->setCString(kKeyDiscNumber, tmp); 2324 } 2325 break; 2326 } 2327 case FOURCC('-', '-', '-', '-'): 2328 { 2329 buffer[size] = '\0'; 2330 switch (mPath[5]) { 2331 case FOURCC('m', 'e', 'a', 'n'): 2332 mLastCommentMean.setTo((const char *)buffer + 4); 2333 break; 2334 case FOURCC('n', 'a', 'm', 'e'): 2335 mLastCommentName.setTo((const char *)buffer + 4); 2336 break; 2337 case FOURCC('d', 'a', 't', 'a'): 2338 mLastCommentData.setTo((const char *)buffer + 8); 2339 break; 2340 } 2341 2342 // Once we have a set of mean/name/data info, go ahead and process 2343 // it to see if its something we are interested in. Whether or not 2344 // were are interested in the specific tag, make sure to clear out 2345 // the set so we can be ready to process another tuple should one 2346 // show up later in the file. 2347 if ((mLastCommentMean.length() != 0) && 2348 (mLastCommentName.length() != 0) && 2349 (mLastCommentData.length() != 0)) { 2350 2351 if (mLastCommentMean == "com.apple.iTunes" 2352 && mLastCommentName == "iTunSMPB") { 2353 int32_t delay, padding; 2354 if (sscanf(mLastCommentData, 2355 " %*x %x %x %*x", &delay, &padding) == 2) { 2356 mLastTrack->meta->setInt32(kKeyEncoderDelay, delay); 2357 mLastTrack->meta->setInt32(kKeyEncoderPadding, padding); 2358 } 2359 } 2360 2361 mLastCommentMean.clear(); 2362 mLastCommentName.clear(); 2363 mLastCommentData.clear(); 2364 } 2365 break; 2366 } 2367 2368 default: 2369 break; 2370 } 2371 2372 if (size >= 8 && metadataKey && !mFileMetaData->hasData(metadataKey)) { 2373 if (metadataKey == kKeyAlbumArt) { 2374 mFileMetaData->setData( 2375 kKeyAlbumArt, MetaData::TYPE_NONE, 2376 buffer + 8, size - 8); 2377 } else if (metadataKey == kKeyGenre) { 2378 if (flags == 0) { 2379 // uint8_t genre code, iTunes genre codes are 2380 // the standard id3 codes, except they start 2381 // at 1 instead of 0 (e.g. Pop is 14, not 13) 2382 // We use standard id3 numbering, so subtract 1. 2383 int genrecode = (int)buffer[size - 1]; 2384 genrecode--; 2385 if (genrecode < 0) { 2386 genrecode = 255; // reserved for 'unknown genre' 2387 } 2388 char genre[10]; 2389 sprintf(genre, "%d", genrecode); 2390 2391 mFileMetaData->setCString(metadataKey, genre); 2392 } else if (flags == 1) { 2393 // custom genre string 2394 buffer[size] = '\0'; 2395 2396 mFileMetaData->setCString( 2397 metadataKey, (const char *)buffer + 8); 2398 } 2399 } else { 2400 buffer[size] = '\0'; 2401 2402 mFileMetaData->setCString( 2403 metadataKey, (const char *)buffer + 8); 2404 } 2405 } 2406 2407 delete[] buffer; 2408 buffer = NULL; 2409 2410 return OK; 2411} 2412 2413status_t MPEG4Extractor::parse3GPPMetaData(off64_t offset, size_t size, int depth) { 2414 if (size < 4 || size == SIZE_MAX) { 2415 return ERROR_MALFORMED; 2416 } 2417 2418 uint8_t *buffer = new (std::nothrow) uint8_t[size + 1]; 2419 if (buffer == NULL) { 2420 return ERROR_MALFORMED; 2421 } 2422 if (mDataSource->readAt( 2423 offset, buffer, size) != (ssize_t)size) { 2424 delete[] buffer; 2425 buffer = NULL; 2426 2427 return ERROR_IO; 2428 } 2429 2430 uint32_t metadataKey = 0; 2431 switch (mPath[depth]) { 2432 case FOURCC('t', 'i', 't', 'l'): 2433 { 2434 metadataKey = kKeyTitle; 2435 break; 2436 } 2437 case FOURCC('p', 'e', 'r', 'f'): 2438 { 2439 metadataKey = kKeyArtist; 2440 break; 2441 } 2442 case FOURCC('a', 'u', 't', 'h'): 2443 { 2444 metadataKey = kKeyWriter; 2445 break; 2446 } 2447 case FOURCC('g', 'n', 'r', 'e'): 2448 { 2449 metadataKey = kKeyGenre; 2450 break; 2451 } 2452 case FOURCC('a', 'l', 'b', 'm'): 2453 { 2454 if (buffer[size - 1] != '\0') { 2455 char tmp[4]; 2456 sprintf(tmp, "%u", buffer[size - 1]); 2457 2458 mFileMetaData->setCString(kKeyCDTrackNumber, tmp); 2459 } 2460 2461 metadataKey = kKeyAlbum; 2462 break; 2463 } 2464 case FOURCC('y', 'r', 'r', 'c'): 2465 { 2466 char tmp[5]; 2467 uint16_t year = U16_AT(&buffer[4]); 2468 2469 if (year < 10000) { 2470 sprintf(tmp, "%u", year); 2471 2472 mFileMetaData->setCString(kKeyYear, tmp); 2473 } 2474 break; 2475 } 2476 2477 default: 2478 break; 2479 } 2480 2481 if (metadataKey > 0) { 2482 bool isUTF8 = true; // Common case 2483 char16_t *framedata = NULL; 2484 int len16 = 0; // Number of UTF-16 characters 2485 2486 // smallest possible valid UTF-16 string w BOM: 0xfe 0xff 0x00 0x00 2487 if (size < 6) { 2488 return ERROR_MALFORMED; 2489 } 2490 2491 if (size - 6 >= 4) { 2492 len16 = ((size - 6) / 2) - 1; // don't include 0x0000 terminator 2493 framedata = (char16_t *)(buffer + 6); 2494 if (0xfffe == *framedata) { 2495 // endianness marker (BOM) doesn't match host endianness 2496 for (int i = 0; i < len16; i++) { 2497 framedata[i] = bswap_16(framedata[i]); 2498 } 2499 // BOM is now swapped to 0xfeff, we will execute next block too 2500 } 2501 2502 if (0xfeff == *framedata) { 2503 // Remove the BOM 2504 framedata++; 2505 len16--; 2506 isUTF8 = false; 2507 } 2508 // else normal non-zero-length UTF-8 string 2509 // we can't handle UTF-16 without BOM as there is no other 2510 // indication of encoding. 2511 } 2512 2513 if (isUTF8) { 2514 buffer[size] = 0; 2515 mFileMetaData->setCString(metadataKey, (const char *)buffer + 6); 2516 } else { 2517 // Convert from UTF-16 string to UTF-8 string. 2518 String8 tmpUTF8str(framedata, len16); 2519 mFileMetaData->setCString(metadataKey, tmpUTF8str.string()); 2520 } 2521 } 2522 2523 delete[] buffer; 2524 buffer = NULL; 2525 2526 return OK; 2527} 2528 2529void MPEG4Extractor::parseID3v2MetaData(off64_t offset) { 2530 ID3 id3(mDataSource, true /* ignorev1 */, offset); 2531 2532 if (id3.isValid()) { 2533 struct Map { 2534 int key; 2535 const char *tag1; 2536 const char *tag2; 2537 }; 2538 static const Map kMap[] = { 2539 { kKeyAlbum, "TALB", "TAL" }, 2540 { kKeyArtist, "TPE1", "TP1" }, 2541 { kKeyAlbumArtist, "TPE2", "TP2" }, 2542 { kKeyComposer, "TCOM", "TCM" }, 2543 { kKeyGenre, "TCON", "TCO" }, 2544 { kKeyTitle, "TIT2", "TT2" }, 2545 { kKeyYear, "TYE", "TYER" }, 2546 { kKeyAuthor, "TXT", "TEXT" }, 2547 { kKeyCDTrackNumber, "TRK", "TRCK" }, 2548 { kKeyDiscNumber, "TPA", "TPOS" }, 2549 { kKeyCompilation, "TCP", "TCMP" }, 2550 }; 2551 static const size_t kNumMapEntries = sizeof(kMap) / sizeof(kMap[0]); 2552 2553 for (size_t i = 0; i < kNumMapEntries; ++i) { 2554 if (!mFileMetaData->hasData(kMap[i].key)) { 2555 ID3::Iterator *it = new ID3::Iterator(id3, kMap[i].tag1); 2556 if (it->done()) { 2557 delete it; 2558 it = new ID3::Iterator(id3, kMap[i].tag2); 2559 } 2560 2561 if (it->done()) { 2562 delete it; 2563 continue; 2564 } 2565 2566 String8 s; 2567 it->getString(&s); 2568 delete it; 2569 2570 mFileMetaData->setCString(kMap[i].key, s); 2571 } 2572 } 2573 2574 size_t dataSize; 2575 String8 mime; 2576 const void *data = id3.getAlbumArt(&dataSize, &mime); 2577 2578 if (data) { 2579 mFileMetaData->setData(kKeyAlbumArt, MetaData::TYPE_NONE, data, dataSize); 2580 mFileMetaData->setCString(kKeyAlbumArtMIME, mime.string()); 2581 } 2582 } 2583} 2584 2585sp<MediaSource> MPEG4Extractor::getTrack(size_t index) { 2586 status_t err; 2587 if ((err = readMetaData()) != OK) { 2588 return NULL; 2589 } 2590 2591 Track *track = mFirstTrack; 2592 while (index > 0) { 2593 if (track == NULL) { 2594 return NULL; 2595 } 2596 2597 track = track->next; 2598 --index; 2599 } 2600 2601 if (track == NULL) { 2602 return NULL; 2603 } 2604 2605 2606 Trex *trex = NULL; 2607 int32_t trackId; 2608 if (track->meta->findInt32(kKeyTrackID, &trackId)) { 2609 for (size_t i = 0; i < mTrex.size(); i++) { 2610 Trex *t = &mTrex.editItemAt(index); 2611 if (t->track_ID == (uint32_t) trackId) { 2612 trex = t; 2613 break; 2614 } 2615 } 2616 } 2617 2618 ALOGV("getTrack called, pssh: %zu", mPssh.size()); 2619 2620 return new MPEG4Source(this, 2621 track->meta, mDataSource, track->timescale, track->sampleTable, 2622 mSidxEntries, trex, mMoofOffset); 2623} 2624 2625// static 2626status_t MPEG4Extractor::verifyTrack(Track *track) { 2627 const char *mime; 2628 CHECK(track->meta->findCString(kKeyMIMEType, &mime)); 2629 2630 uint32_t type; 2631 const void *data; 2632 size_t size; 2633 if (!strcasecmp(mime, MEDIA_MIMETYPE_VIDEO_AVC)) { 2634 if (!track->meta->findData(kKeyAVCC, &type, &data, &size) 2635 || type != kTypeAVCC) { 2636 return ERROR_MALFORMED; 2637 } 2638 } else if (!strcasecmp(mime, MEDIA_MIMETYPE_VIDEO_HEVC)) { 2639 if (!track->meta->findData(kKeyHVCC, &type, &data, &size) 2640 || type != kTypeHVCC) { 2641 return ERROR_MALFORMED; 2642 } 2643 } else if (!strcasecmp(mime, MEDIA_MIMETYPE_VIDEO_MPEG4) 2644 || !strcasecmp(mime, MEDIA_MIMETYPE_AUDIO_AAC)) { 2645 if (!track->meta->findData(kKeyESDS, &type, &data, &size) 2646 || type != kTypeESDS) { 2647 return ERROR_MALFORMED; 2648 } 2649 } 2650 2651 if (track->sampleTable == NULL || !track->sampleTable->isValid()) { 2652 // Make sure we have all the metadata we need. 2653 ALOGE("stbl atom missing/invalid."); 2654 return ERROR_MALFORMED; 2655 } 2656 2657 if (track->timescale == 0) { 2658 ALOGE("timescale invalid."); 2659 return ERROR_MALFORMED; 2660 } 2661 2662 return OK; 2663} 2664 2665typedef enum { 2666 //AOT_NONE = -1, 2667 //AOT_NULL_OBJECT = 0, 2668 //AOT_AAC_MAIN = 1, /**< Main profile */ 2669 AOT_AAC_LC = 2, /**< Low Complexity object */ 2670 //AOT_AAC_SSR = 3, 2671 //AOT_AAC_LTP = 4, 2672 AOT_SBR = 5, 2673 //AOT_AAC_SCAL = 6, 2674 //AOT_TWIN_VQ = 7, 2675 //AOT_CELP = 8, 2676 //AOT_HVXC = 9, 2677 //AOT_RSVD_10 = 10, /**< (reserved) */ 2678 //AOT_RSVD_11 = 11, /**< (reserved) */ 2679 //AOT_TTSI = 12, /**< TTSI Object */ 2680 //AOT_MAIN_SYNTH = 13, /**< Main Synthetic object */ 2681 //AOT_WAV_TAB_SYNTH = 14, /**< Wavetable Synthesis object */ 2682 //AOT_GEN_MIDI = 15, /**< General MIDI object */ 2683 //AOT_ALG_SYNTH_AUD_FX = 16, /**< Algorithmic Synthesis and Audio FX object */ 2684 AOT_ER_AAC_LC = 17, /**< Error Resilient(ER) AAC Low Complexity */ 2685 //AOT_RSVD_18 = 18, /**< (reserved) */ 2686 //AOT_ER_AAC_LTP = 19, /**< Error Resilient(ER) AAC LTP object */ 2687 AOT_ER_AAC_SCAL = 20, /**< Error Resilient(ER) AAC Scalable object */ 2688 //AOT_ER_TWIN_VQ = 21, /**< Error Resilient(ER) TwinVQ object */ 2689 AOT_ER_BSAC = 22, /**< Error Resilient(ER) BSAC object */ 2690 AOT_ER_AAC_LD = 23, /**< Error Resilient(ER) AAC LowDelay object */ 2691 //AOT_ER_CELP = 24, /**< Error Resilient(ER) CELP object */ 2692 //AOT_ER_HVXC = 25, /**< Error Resilient(ER) HVXC object */ 2693 //AOT_ER_HILN = 26, /**< Error Resilient(ER) HILN object */ 2694 //AOT_ER_PARA = 27, /**< Error Resilient(ER) Parametric object */ 2695 //AOT_RSVD_28 = 28, /**< might become SSC */ 2696 AOT_PS = 29, /**< PS, Parametric Stereo (includes SBR) */ 2697 //AOT_MPEGS = 30, /**< MPEG Surround */ 2698 2699 AOT_ESCAPE = 31, /**< Signal AOT uses more than 5 bits */ 2700 2701 //AOT_MP3ONMP4_L1 = 32, /**< MPEG-Layer1 in mp4 */ 2702 //AOT_MP3ONMP4_L2 = 33, /**< MPEG-Layer2 in mp4 */ 2703 //AOT_MP3ONMP4_L3 = 34, /**< MPEG-Layer3 in mp4 */ 2704 //AOT_RSVD_35 = 35, /**< might become DST */ 2705 //AOT_RSVD_36 = 36, /**< might become ALS */ 2706 //AOT_AAC_SLS = 37, /**< AAC + SLS */ 2707 //AOT_SLS = 38, /**< SLS */ 2708 //AOT_ER_AAC_ELD = 39, /**< AAC Enhanced Low Delay */ 2709 2710 //AOT_USAC = 42, /**< USAC */ 2711 //AOT_SAOC = 43, /**< SAOC */ 2712 //AOT_LD_MPEGS = 44, /**< Low Delay MPEG Surround */ 2713 2714 //AOT_RSVD50 = 50, /**< Interim AOT for Rsvd50 */ 2715} AUDIO_OBJECT_TYPE; 2716 2717status_t MPEG4Extractor::updateAudioTrackInfoFromESDS_MPEG4Audio( 2718 const void *esds_data, size_t esds_size) { 2719 ESDS esds(esds_data, esds_size); 2720 2721 uint8_t objectTypeIndication; 2722 if (esds.getObjectTypeIndication(&objectTypeIndication) != OK) { 2723 return ERROR_MALFORMED; 2724 } 2725 2726 if (objectTypeIndication == 0xe1) { 2727 // This isn't MPEG4 audio at all, it's QCELP 14k... 2728 mLastTrack->meta->setCString(kKeyMIMEType, MEDIA_MIMETYPE_AUDIO_QCELP); 2729 return OK; 2730 } 2731 2732 if (objectTypeIndication == 0x6b) { 2733 // The media subtype is MP3 audio 2734 // Our software MP3 audio decoder may not be able to handle 2735 // packetized MP3 audio; for now, lets just return ERROR_UNSUPPORTED 2736 ALOGE("MP3 track in MP4/3GPP file is not supported"); 2737 return ERROR_UNSUPPORTED; 2738 } 2739 2740 const uint8_t *csd; 2741 size_t csd_size; 2742 if (esds.getCodecSpecificInfo( 2743 (const void **)&csd, &csd_size) != OK) { 2744 return ERROR_MALFORMED; 2745 } 2746 2747 if (kUseHexDump) { 2748 printf("ESD of size %d\n", csd_size); 2749 hexdump(csd, csd_size); 2750 } 2751 2752 if (csd_size == 0) { 2753 // There's no further information, i.e. no codec specific data 2754 // Let's assume that the information provided in the mpeg4 headers 2755 // is accurate and hope for the best. 2756 2757 return OK; 2758 } 2759 2760 if (csd_size < 2) { 2761 return ERROR_MALFORMED; 2762 } 2763 2764 static uint32_t kSamplingRate[] = { 2765 96000, 88200, 64000, 48000, 44100, 32000, 24000, 22050, 2766 16000, 12000, 11025, 8000, 7350 2767 }; 2768 2769 ABitReader br(csd, csd_size); 2770 uint32_t objectType = br.getBits(5); 2771 2772 if (objectType == 31) { // AAC-ELD => additional 6 bits 2773 objectType = 32 + br.getBits(6); 2774 } 2775 2776 //keep AOT type 2777 mLastTrack->meta->setInt32(kKeyAACAOT, objectType); 2778 2779 uint32_t freqIndex = br.getBits(4); 2780 2781 int32_t sampleRate = 0; 2782 int32_t numChannels = 0; 2783 if (freqIndex == 15) { 2784 if (csd_size < 5) { 2785 return ERROR_MALFORMED; 2786 } 2787 sampleRate = br.getBits(24); 2788 numChannels = br.getBits(4); 2789 } else { 2790 numChannels = br.getBits(4); 2791 2792 if (freqIndex == 13 || freqIndex == 14) { 2793 return ERROR_MALFORMED; 2794 } 2795 2796 sampleRate = kSamplingRate[freqIndex]; 2797 } 2798 2799 if (objectType == AOT_SBR || objectType == AOT_PS) {//SBR specific config per 14496-3 table 1.13 2800 uint32_t extFreqIndex = br.getBits(4); 2801 int32_t extSampleRate __unused; 2802 if (extFreqIndex == 15) { 2803 if (csd_size < 8) { 2804 return ERROR_MALFORMED; 2805 } 2806 extSampleRate = br.getBits(24); 2807 } else { 2808 if (extFreqIndex == 13 || extFreqIndex == 14) { 2809 return ERROR_MALFORMED; 2810 } 2811 extSampleRate = kSamplingRate[extFreqIndex]; 2812 } 2813 //TODO: save the extension sampling rate value in meta data => 2814 // mLastTrack->meta->setInt32(kKeyExtSampleRate, extSampleRate); 2815 } 2816 2817 switch (numChannels) { 2818 // values defined in 14496-3_2009 amendment-4 Table 1.19 - Channel Configuration 2819 case 0: 2820 case 1:// FC 2821 case 2:// FL FR 2822 case 3:// FC, FL FR 2823 case 4:// FC, FL FR, RC 2824 case 5:// FC, FL FR, SL SR 2825 case 6:// FC, FL FR, SL SR, LFE 2826 //numChannels already contains the right value 2827 break; 2828 case 11:// FC, FL FR, SL SR, RC, LFE 2829 numChannels = 7; 2830 break; 2831 case 7: // FC, FCL FCR, FL FR, SL SR, LFE 2832 case 12:// FC, FL FR, SL SR, RL RR, LFE 2833 case 14:// FC, FL FR, SL SR, LFE, FHL FHR 2834 numChannels = 8; 2835 break; 2836 default: 2837 return ERROR_UNSUPPORTED; 2838 } 2839 2840 { 2841 if (objectType == AOT_SBR || objectType == AOT_PS) { 2842 objectType = br.getBits(5); 2843 2844 if (objectType == AOT_ESCAPE) { 2845 objectType = 32 + br.getBits(6); 2846 } 2847 } 2848 if (objectType == AOT_AAC_LC || objectType == AOT_ER_AAC_LC || 2849 objectType == AOT_ER_AAC_LD || objectType == AOT_ER_AAC_SCAL || 2850 objectType == AOT_ER_BSAC) { 2851 const int32_t frameLengthFlag __unused = br.getBits(1); 2852 2853 const int32_t dependsOnCoreCoder = br.getBits(1); 2854 2855 if (dependsOnCoreCoder ) { 2856 const int32_t coreCoderDelay __unused = br.getBits(14); 2857 } 2858 2859 int32_t extensionFlag = -1; 2860 if (br.numBitsLeft() > 0) { 2861 extensionFlag = br.getBits(1); 2862 } else { 2863 switch (objectType) { 2864 // 14496-3 4.5.1.1 extensionFlag 2865 case AOT_AAC_LC: 2866 extensionFlag = 0; 2867 break; 2868 case AOT_ER_AAC_LC: 2869 case AOT_ER_AAC_SCAL: 2870 case AOT_ER_BSAC: 2871 case AOT_ER_AAC_LD: 2872 extensionFlag = 1; 2873 break; 2874 default: 2875 TRESPASS(); 2876 break; 2877 } 2878 ALOGW("csd missing extension flag; assuming %d for object type %u.", 2879 extensionFlag, objectType); 2880 } 2881 2882 if (numChannels == 0) { 2883 int32_t channelsEffectiveNum = 0; 2884 int32_t channelsNum = 0; 2885 const int32_t ElementInstanceTag __unused = br.getBits(4); 2886 const int32_t Profile __unused = br.getBits(2); 2887 const int32_t SamplingFrequencyIndex __unused = br.getBits(4); 2888 const int32_t NumFrontChannelElements = br.getBits(4); 2889 const int32_t NumSideChannelElements = br.getBits(4); 2890 const int32_t NumBackChannelElements = br.getBits(4); 2891 const int32_t NumLfeChannelElements = br.getBits(2); 2892 const int32_t NumAssocDataElements __unused = br.getBits(3); 2893 const int32_t NumValidCcElements __unused = br.getBits(4); 2894 2895 const int32_t MonoMixdownPresent = br.getBits(1); 2896 if (MonoMixdownPresent != 0) { 2897 const int32_t MonoMixdownElementNumber __unused = br.getBits(4); 2898 } 2899 2900 const int32_t StereoMixdownPresent = br.getBits(1); 2901 if (StereoMixdownPresent != 0) { 2902 const int32_t StereoMixdownElementNumber __unused = br.getBits(4); 2903 } 2904 2905 const int32_t MatrixMixdownIndexPresent = br.getBits(1); 2906 if (MatrixMixdownIndexPresent != 0) { 2907 const int32_t MatrixMixdownIndex __unused = br.getBits(2); 2908 const int32_t PseudoSurroundEnable __unused = br.getBits(1); 2909 } 2910 2911 int i; 2912 for (i=0; i < NumFrontChannelElements; i++) { 2913 const int32_t FrontElementIsCpe = br.getBits(1); 2914 const int32_t FrontElementTagSelect __unused = br.getBits(4); 2915 channelsNum += FrontElementIsCpe ? 2 : 1; 2916 } 2917 2918 for (i=0; i < NumSideChannelElements; i++) { 2919 const int32_t SideElementIsCpe = br.getBits(1); 2920 const int32_t SideElementTagSelect __unused = br.getBits(4); 2921 channelsNum += SideElementIsCpe ? 2 : 1; 2922 } 2923 2924 for (i=0; i < NumBackChannelElements; i++) { 2925 const int32_t BackElementIsCpe = br.getBits(1); 2926 const int32_t BackElementTagSelect __unused = br.getBits(4); 2927 channelsNum += BackElementIsCpe ? 2 : 1; 2928 } 2929 channelsEffectiveNum = channelsNum; 2930 2931 for (i=0; i < NumLfeChannelElements; i++) { 2932 const int32_t LfeElementTagSelect __unused = br.getBits(4); 2933 channelsNum += 1; 2934 } 2935 ALOGV("mpeg4 audio channelsNum = %d", channelsNum); 2936 ALOGV("mpeg4 audio channelsEffectiveNum = %d", channelsEffectiveNum); 2937 numChannels = channelsNum; 2938 } 2939 } 2940 } 2941 2942 if (numChannels == 0) { 2943 return ERROR_UNSUPPORTED; 2944 } 2945 2946 int32_t prevSampleRate; 2947 CHECK(mLastTrack->meta->findInt32(kKeySampleRate, &prevSampleRate)); 2948 2949 if (prevSampleRate != sampleRate) { 2950 ALOGV("mpeg4 audio sample rate different from previous setting. " 2951 "was: %d, now: %d", prevSampleRate, sampleRate); 2952 } 2953 2954 mLastTrack->meta->setInt32(kKeySampleRate, sampleRate); 2955 2956 int32_t prevChannelCount; 2957 CHECK(mLastTrack->meta->findInt32(kKeyChannelCount, &prevChannelCount)); 2958 2959 if (prevChannelCount != numChannels) { 2960 ALOGV("mpeg4 audio channel count different from previous setting. " 2961 "was: %d, now: %d", prevChannelCount, numChannels); 2962 } 2963 2964 mLastTrack->meta->setInt32(kKeyChannelCount, numChannels); 2965 2966 return OK; 2967} 2968 2969//////////////////////////////////////////////////////////////////////////////// 2970 2971MPEG4Source::MPEG4Source( 2972 const sp<MPEG4Extractor> &owner, 2973 const sp<MetaData> &format, 2974 const sp<DataSource> &dataSource, 2975 int32_t timeScale, 2976 const sp<SampleTable> &sampleTable, 2977 Vector<SidxEntry> &sidx, 2978 const Trex *trex, 2979 off64_t firstMoofOffset) 2980 : mOwner(owner), 2981 mFormat(format), 2982 mDataSource(dataSource), 2983 mTimescale(timeScale), 2984 mSampleTable(sampleTable), 2985 mCurrentSampleIndex(0), 2986 mCurrentFragmentIndex(0), 2987 mSegments(sidx), 2988 mTrex(trex), 2989 mFirstMoofOffset(firstMoofOffset), 2990 mCurrentMoofOffset(firstMoofOffset), 2991 mCurrentTime(0), 2992 mCurrentSampleInfoAllocSize(0), 2993 mCurrentSampleInfoSizes(NULL), 2994 mCurrentSampleInfoOffsetsAllocSize(0), 2995 mCurrentSampleInfoOffsets(NULL), 2996 mIsAVC(false), 2997 mIsHEVC(false), 2998 mNALLengthSize(0), 2999 mStarted(false), 3000 mGroup(NULL), 3001 mBuffer(NULL), 3002 mWantsNALFragments(false), 3003 mSrcBuffer(NULL) { 3004 3005 memset(&mTrackFragmentHeaderInfo, 0, sizeof(mTrackFragmentHeaderInfo)); 3006 3007 mFormat->findInt32(kKeyCryptoMode, &mCryptoMode); 3008 mDefaultIVSize = 0; 3009 mFormat->findInt32(kKeyCryptoDefaultIVSize, &mDefaultIVSize); 3010 uint32_t keytype; 3011 const void *key; 3012 size_t keysize; 3013 if (mFormat->findData(kKeyCryptoKey, &keytype, &key, &keysize)) { 3014 CHECK(keysize <= 16); 3015 memset(mCryptoKey, 0, 16); 3016 memcpy(mCryptoKey, key, keysize); 3017 } 3018 3019 const char *mime; 3020 bool success = mFormat->findCString(kKeyMIMEType, &mime); 3021 CHECK(success); 3022 3023 mIsAVC = !strcasecmp(mime, MEDIA_MIMETYPE_VIDEO_AVC); 3024 mIsHEVC = !strcasecmp(mime, MEDIA_MIMETYPE_VIDEO_HEVC); 3025 3026 if (mIsAVC) { 3027 uint32_t type; 3028 const void *data; 3029 size_t size; 3030 CHECK(format->findData(kKeyAVCC, &type, &data, &size)); 3031 3032 const uint8_t *ptr = (const uint8_t *)data; 3033 3034 CHECK(size >= 7); 3035 CHECK_EQ((unsigned)ptr[0], 1u); // configurationVersion == 1 3036 3037 // The number of bytes used to encode the length of a NAL unit. 3038 mNALLengthSize = 1 + (ptr[4] & 3); 3039 } else if (mIsHEVC) { 3040 uint32_t type; 3041 const void *data; 3042 size_t size; 3043 CHECK(format->findData(kKeyHVCC, &type, &data, &size)); 3044 3045 const uint8_t *ptr = (const uint8_t *)data; 3046 3047 CHECK(size >= 7); 3048 CHECK_EQ((unsigned)ptr[0], 1u); // configurationVersion == 1 3049 3050 mNALLengthSize = 1 + (ptr[14 + 7] & 3); 3051 } 3052 3053 CHECK(format->findInt32(kKeyTrackID, &mTrackId)); 3054 3055 if (mFirstMoofOffset != 0) { 3056 off64_t offset = mFirstMoofOffset; 3057 parseChunk(&offset); 3058 } 3059} 3060 3061MPEG4Source::~MPEG4Source() { 3062 if (mStarted) { 3063 stop(); 3064 } 3065 free(mCurrentSampleInfoSizes); 3066 free(mCurrentSampleInfoOffsets); 3067} 3068 3069status_t MPEG4Source::start(MetaData *params) { 3070 Mutex::Autolock autoLock(mLock); 3071 3072 CHECK(!mStarted); 3073 3074 int32_t val; 3075 if (params && params->findInt32(kKeyWantsNALFragments, &val) 3076 && val != 0) { 3077 mWantsNALFragments = true; 3078 } else { 3079 mWantsNALFragments = false; 3080 } 3081 3082 mGroup = new MediaBufferGroup; 3083 3084 int32_t max_size; 3085 CHECK(mFormat->findInt32(kKeyMaxInputSize, &max_size)); 3086 3087 mGroup->add_buffer(new MediaBuffer(max_size)); 3088 3089 mSrcBuffer = new (std::nothrow) uint8_t[max_size]; 3090 if (mSrcBuffer == NULL) { 3091 // file probably specified a bad max size 3092 return ERROR_MALFORMED; 3093 } 3094 3095 mStarted = true; 3096 3097 return OK; 3098} 3099 3100status_t MPEG4Source::stop() { 3101 Mutex::Autolock autoLock(mLock); 3102 3103 CHECK(mStarted); 3104 3105 if (mBuffer != NULL) { 3106 mBuffer->release(); 3107 mBuffer = NULL; 3108 } 3109 3110 delete[] mSrcBuffer; 3111 mSrcBuffer = NULL; 3112 3113 delete mGroup; 3114 mGroup = NULL; 3115 3116 mStarted = false; 3117 mCurrentSampleIndex = 0; 3118 3119 return OK; 3120} 3121 3122status_t MPEG4Source::parseChunk(off64_t *offset) { 3123 uint32_t hdr[2]; 3124 if (mDataSource->readAt(*offset, hdr, 8) < 8) { 3125 return ERROR_IO; 3126 } 3127 uint64_t chunk_size = ntohl(hdr[0]); 3128 uint32_t chunk_type = ntohl(hdr[1]); 3129 off64_t data_offset = *offset + 8; 3130 3131 if (chunk_size == 1) { 3132 if (mDataSource->readAt(*offset + 8, &chunk_size, 8) < 8) { 3133 return ERROR_IO; 3134 } 3135 chunk_size = ntoh64(chunk_size); 3136 data_offset += 8; 3137 3138 if (chunk_size < 16) { 3139 // The smallest valid chunk is 16 bytes long in this case. 3140 return ERROR_MALFORMED; 3141 } 3142 } else if (chunk_size < 8) { 3143 // The smallest valid chunk is 8 bytes long. 3144 return ERROR_MALFORMED; 3145 } 3146 3147 char chunk[5]; 3148 MakeFourCCString(chunk_type, chunk); 3149 ALOGV("MPEG4Source chunk %s @ %llx", chunk, *offset); 3150 3151 off64_t chunk_data_size = *offset + chunk_size - data_offset; 3152 3153 switch(chunk_type) { 3154 3155 case FOURCC('t', 'r', 'a', 'f'): 3156 case FOURCC('m', 'o', 'o', 'f'): { 3157 off64_t stop_offset = *offset + chunk_size; 3158 *offset = data_offset; 3159 while (*offset < stop_offset) { 3160 status_t err = parseChunk(offset); 3161 if (err != OK) { 3162 return err; 3163 } 3164 } 3165 if (chunk_type == FOURCC('m', 'o', 'o', 'f')) { 3166 // *offset points to the box following this moof. Find the next moof from there. 3167 3168 while (true) { 3169 if (mDataSource->readAt(*offset, hdr, 8) < 8) { 3170 return ERROR_END_OF_STREAM; 3171 } 3172 chunk_size = ntohl(hdr[0]); 3173 chunk_type = ntohl(hdr[1]); 3174 if (chunk_type == FOURCC('m', 'o', 'o', 'f')) { 3175 mNextMoofOffset = *offset; 3176 break; 3177 } 3178 *offset += chunk_size; 3179 } 3180 } 3181 break; 3182 } 3183 3184 case FOURCC('t', 'f', 'h', 'd'): { 3185 status_t err; 3186 if ((err = parseTrackFragmentHeader(data_offset, chunk_data_size)) != OK) { 3187 return err; 3188 } 3189 *offset += chunk_size; 3190 break; 3191 } 3192 3193 case FOURCC('t', 'r', 'u', 'n'): { 3194 status_t err; 3195 if (mLastParsedTrackId == mTrackId) { 3196 if ((err = parseTrackFragmentRun(data_offset, chunk_data_size)) != OK) { 3197 return err; 3198 } 3199 } 3200 3201 *offset += chunk_size; 3202 break; 3203 } 3204 3205 case FOURCC('s', 'a', 'i', 'z'): { 3206 status_t err; 3207 if ((err = parseSampleAuxiliaryInformationSizes(data_offset, chunk_data_size)) != OK) { 3208 return err; 3209 } 3210 *offset += chunk_size; 3211 break; 3212 } 3213 case FOURCC('s', 'a', 'i', 'o'): { 3214 status_t err; 3215 if ((err = parseSampleAuxiliaryInformationOffsets(data_offset, chunk_data_size)) != OK) { 3216 return err; 3217 } 3218 *offset += chunk_size; 3219 break; 3220 } 3221 3222 case FOURCC('m', 'd', 'a', 't'): { 3223 // parse DRM info if present 3224 ALOGV("MPEG4Source::parseChunk mdat"); 3225 // if saiz/saoi was previously observed, do something with the sampleinfos 3226 *offset += chunk_size; 3227 break; 3228 } 3229 3230 default: { 3231 *offset += chunk_size; 3232 break; 3233 } 3234 } 3235 return OK; 3236} 3237 3238status_t MPEG4Source::parseSampleAuxiliaryInformationSizes( 3239 off64_t offset, off64_t /* size */) { 3240 ALOGV("parseSampleAuxiliaryInformationSizes"); 3241 // 14496-12 8.7.12 3242 uint8_t version; 3243 if (mDataSource->readAt( 3244 offset, &version, sizeof(version)) 3245 < (ssize_t)sizeof(version)) { 3246 return ERROR_IO; 3247 } 3248 3249 if (version != 0) { 3250 return ERROR_UNSUPPORTED; 3251 } 3252 offset++; 3253 3254 uint32_t flags; 3255 if (!mDataSource->getUInt24(offset, &flags)) { 3256 return ERROR_IO; 3257 } 3258 offset += 3; 3259 3260 if (flags & 1) { 3261 uint32_t tmp; 3262 if (!mDataSource->getUInt32(offset, &tmp)) { 3263 return ERROR_MALFORMED; 3264 } 3265 mCurrentAuxInfoType = tmp; 3266 offset += 4; 3267 if (!mDataSource->getUInt32(offset, &tmp)) { 3268 return ERROR_MALFORMED; 3269 } 3270 mCurrentAuxInfoTypeParameter = tmp; 3271 offset += 4; 3272 } 3273 3274 uint8_t defsize; 3275 if (mDataSource->readAt(offset, &defsize, 1) != 1) { 3276 return ERROR_MALFORMED; 3277 } 3278 mCurrentDefaultSampleInfoSize = defsize; 3279 offset++; 3280 3281 uint32_t smplcnt; 3282 if (!mDataSource->getUInt32(offset, &smplcnt)) { 3283 return ERROR_MALFORMED; 3284 } 3285 mCurrentSampleInfoCount = smplcnt; 3286 offset += 4; 3287 3288 if (mCurrentDefaultSampleInfoSize != 0) { 3289 ALOGV("@@@@ using default sample info size of %d", mCurrentDefaultSampleInfoSize); 3290 return OK; 3291 } 3292 if (smplcnt > mCurrentSampleInfoAllocSize) { 3293 mCurrentSampleInfoSizes = (uint8_t*) realloc(mCurrentSampleInfoSizes, smplcnt); 3294 mCurrentSampleInfoAllocSize = smplcnt; 3295 } 3296 3297 mDataSource->readAt(offset, mCurrentSampleInfoSizes, smplcnt); 3298 return OK; 3299} 3300 3301status_t MPEG4Source::parseSampleAuxiliaryInformationOffsets( 3302 off64_t offset, off64_t /* size */) { 3303 ALOGV("parseSampleAuxiliaryInformationOffsets"); 3304 // 14496-12 8.7.13 3305 uint8_t version; 3306 if (mDataSource->readAt(offset, &version, sizeof(version)) != 1) { 3307 return ERROR_IO; 3308 } 3309 offset++; 3310 3311 uint32_t flags; 3312 if (!mDataSource->getUInt24(offset, &flags)) { 3313 return ERROR_IO; 3314 } 3315 offset += 3; 3316 3317 uint32_t entrycount; 3318 if (!mDataSource->getUInt32(offset, &entrycount)) { 3319 return ERROR_IO; 3320 } 3321 offset += 4; 3322 3323 if (entrycount > mCurrentSampleInfoOffsetsAllocSize) { 3324 mCurrentSampleInfoOffsets = (uint64_t*) realloc(mCurrentSampleInfoOffsets, entrycount * 8); 3325 mCurrentSampleInfoOffsetsAllocSize = entrycount; 3326 } 3327 mCurrentSampleInfoOffsetCount = entrycount; 3328 3329 for (size_t i = 0; i < entrycount; i++) { 3330 if (version == 0) { 3331 uint32_t tmp; 3332 if (!mDataSource->getUInt32(offset, &tmp)) { 3333 return ERROR_IO; 3334 } 3335 mCurrentSampleInfoOffsets[i] = tmp; 3336 offset += 4; 3337 } else { 3338 uint64_t tmp; 3339 if (!mDataSource->getUInt64(offset, &tmp)) { 3340 return ERROR_IO; 3341 } 3342 mCurrentSampleInfoOffsets[i] = tmp; 3343 offset += 8; 3344 } 3345 } 3346 3347 // parse clear/encrypted data 3348 3349 off64_t drmoffset = mCurrentSampleInfoOffsets[0]; // from moof 3350 3351 drmoffset += mCurrentMoofOffset; 3352 int ivlength; 3353 CHECK(mFormat->findInt32(kKeyCryptoDefaultIVSize, &ivlength)); 3354 3355 // read CencSampleAuxiliaryDataFormats 3356 for (size_t i = 0; i < mCurrentSampleInfoCount; i++) { 3357 Sample *smpl = &mCurrentSamples.editItemAt(i); 3358 3359 memset(smpl->iv, 0, 16); 3360 if (mDataSource->readAt(drmoffset, smpl->iv, ivlength) != ivlength) { 3361 return ERROR_IO; 3362 } 3363 3364 drmoffset += ivlength; 3365 3366 int32_t smplinfosize = mCurrentDefaultSampleInfoSize; 3367 if (smplinfosize == 0) { 3368 smplinfosize = mCurrentSampleInfoSizes[i]; 3369 } 3370 if (smplinfosize > ivlength) { 3371 uint16_t numsubsamples; 3372 if (!mDataSource->getUInt16(drmoffset, &numsubsamples)) { 3373 return ERROR_IO; 3374 } 3375 drmoffset += 2; 3376 for (size_t j = 0; j < numsubsamples; j++) { 3377 uint16_t numclear; 3378 uint32_t numencrypted; 3379 if (!mDataSource->getUInt16(drmoffset, &numclear)) { 3380 return ERROR_IO; 3381 } 3382 drmoffset += 2; 3383 if (!mDataSource->getUInt32(drmoffset, &numencrypted)) { 3384 return ERROR_IO; 3385 } 3386 drmoffset += 4; 3387 smpl->clearsizes.add(numclear); 3388 smpl->encryptedsizes.add(numencrypted); 3389 } 3390 } else { 3391 smpl->clearsizes.add(0); 3392 smpl->encryptedsizes.add(smpl->size); 3393 } 3394 } 3395 3396 3397 return OK; 3398} 3399 3400status_t MPEG4Source::parseTrackFragmentHeader(off64_t offset, off64_t size) { 3401 3402 if (size < 8) { 3403 return -EINVAL; 3404 } 3405 3406 uint32_t flags; 3407 if (!mDataSource->getUInt32(offset, &flags)) { // actually version + flags 3408 return ERROR_MALFORMED; 3409 } 3410 3411 if (flags & 0xff000000) { 3412 return -EINVAL; 3413 } 3414 3415 if (!mDataSource->getUInt32(offset + 4, (uint32_t*)&mLastParsedTrackId)) { 3416 return ERROR_MALFORMED; 3417 } 3418 3419 if (mLastParsedTrackId != mTrackId) { 3420 // this is not the right track, skip it 3421 return OK; 3422 } 3423 3424 mTrackFragmentHeaderInfo.mFlags = flags; 3425 mTrackFragmentHeaderInfo.mTrackID = mLastParsedTrackId; 3426 offset += 8; 3427 size -= 8; 3428 3429 ALOGV("fragment header: %08x %08x", flags, mTrackFragmentHeaderInfo.mTrackID); 3430 3431 if (flags & TrackFragmentHeaderInfo::kBaseDataOffsetPresent) { 3432 if (size < 8) { 3433 return -EINVAL; 3434 } 3435 3436 if (!mDataSource->getUInt64(offset, &mTrackFragmentHeaderInfo.mBaseDataOffset)) { 3437 return ERROR_MALFORMED; 3438 } 3439 offset += 8; 3440 size -= 8; 3441 } 3442 3443 if (flags & TrackFragmentHeaderInfo::kSampleDescriptionIndexPresent) { 3444 if (size < 4) { 3445 return -EINVAL; 3446 } 3447 3448 if (!mDataSource->getUInt32(offset, &mTrackFragmentHeaderInfo.mSampleDescriptionIndex)) { 3449 return ERROR_MALFORMED; 3450 } 3451 offset += 4; 3452 size -= 4; 3453 } 3454 3455 if (flags & TrackFragmentHeaderInfo::kDefaultSampleDurationPresent) { 3456 if (size < 4) { 3457 return -EINVAL; 3458 } 3459 3460 if (!mDataSource->getUInt32(offset, &mTrackFragmentHeaderInfo.mDefaultSampleDuration)) { 3461 return ERROR_MALFORMED; 3462 } 3463 offset += 4; 3464 size -= 4; 3465 } 3466 3467 if (flags & TrackFragmentHeaderInfo::kDefaultSampleSizePresent) { 3468 if (size < 4) { 3469 return -EINVAL; 3470 } 3471 3472 if (!mDataSource->getUInt32(offset, &mTrackFragmentHeaderInfo.mDefaultSampleSize)) { 3473 return ERROR_MALFORMED; 3474 } 3475 offset += 4; 3476 size -= 4; 3477 } 3478 3479 if (flags & TrackFragmentHeaderInfo::kDefaultSampleFlagsPresent) { 3480 if (size < 4) { 3481 return -EINVAL; 3482 } 3483 3484 if (!mDataSource->getUInt32(offset, &mTrackFragmentHeaderInfo.mDefaultSampleFlags)) { 3485 return ERROR_MALFORMED; 3486 } 3487 offset += 4; 3488 size -= 4; 3489 } 3490 3491 if (!(flags & TrackFragmentHeaderInfo::kBaseDataOffsetPresent)) { 3492 mTrackFragmentHeaderInfo.mBaseDataOffset = mCurrentMoofOffset; 3493 } 3494 3495 mTrackFragmentHeaderInfo.mDataOffset = 0; 3496 return OK; 3497} 3498 3499status_t MPEG4Source::parseTrackFragmentRun(off64_t offset, off64_t size) { 3500 3501 ALOGV("MPEG4Extractor::parseTrackFragmentRun"); 3502 if (size < 8) { 3503 return -EINVAL; 3504 } 3505 3506 enum { 3507 kDataOffsetPresent = 0x01, 3508 kFirstSampleFlagsPresent = 0x04, 3509 kSampleDurationPresent = 0x100, 3510 kSampleSizePresent = 0x200, 3511 kSampleFlagsPresent = 0x400, 3512 kSampleCompositionTimeOffsetPresent = 0x800, 3513 }; 3514 3515 uint32_t flags; 3516 if (!mDataSource->getUInt32(offset, &flags)) { 3517 return ERROR_MALFORMED; 3518 } 3519 ALOGV("fragment run flags: %08x", flags); 3520 3521 if (flags & 0xff000000) { 3522 return -EINVAL; 3523 } 3524 3525 if ((flags & kFirstSampleFlagsPresent) && (flags & kSampleFlagsPresent)) { 3526 // These two shall not be used together. 3527 return -EINVAL; 3528 } 3529 3530 uint32_t sampleCount; 3531 if (!mDataSource->getUInt32(offset + 4, &sampleCount)) { 3532 return ERROR_MALFORMED; 3533 } 3534 offset += 8; 3535 size -= 8; 3536 3537 uint64_t dataOffset = mTrackFragmentHeaderInfo.mDataOffset; 3538 3539 uint32_t firstSampleFlags = 0; 3540 3541 if (flags & kDataOffsetPresent) { 3542 if (size < 4) { 3543 return -EINVAL; 3544 } 3545 3546 int32_t dataOffsetDelta; 3547 if (!mDataSource->getUInt32(offset, (uint32_t*)&dataOffsetDelta)) { 3548 return ERROR_MALFORMED; 3549 } 3550 3551 dataOffset = mTrackFragmentHeaderInfo.mBaseDataOffset + dataOffsetDelta; 3552 3553 offset += 4; 3554 size -= 4; 3555 } 3556 3557 if (flags & kFirstSampleFlagsPresent) { 3558 if (size < 4) { 3559 return -EINVAL; 3560 } 3561 3562 if (!mDataSource->getUInt32(offset, &firstSampleFlags)) { 3563 return ERROR_MALFORMED; 3564 } 3565 offset += 4; 3566 size -= 4; 3567 } 3568 3569 uint32_t sampleDuration = 0, sampleSize = 0, sampleFlags = 0, 3570 sampleCtsOffset = 0; 3571 3572 size_t bytesPerSample = 0; 3573 if (flags & kSampleDurationPresent) { 3574 bytesPerSample += 4; 3575 } else if (mTrackFragmentHeaderInfo.mFlags 3576 & TrackFragmentHeaderInfo::kDefaultSampleDurationPresent) { 3577 sampleDuration = mTrackFragmentHeaderInfo.mDefaultSampleDuration; 3578 } else if (mTrex) { 3579 sampleDuration = mTrex->default_sample_duration; 3580 } 3581 3582 if (flags & kSampleSizePresent) { 3583 bytesPerSample += 4; 3584 } else if (mTrackFragmentHeaderInfo.mFlags 3585 & TrackFragmentHeaderInfo::kDefaultSampleSizePresent) { 3586 sampleSize = mTrackFragmentHeaderInfo.mDefaultSampleSize; 3587 } else { 3588 sampleSize = mTrackFragmentHeaderInfo.mDefaultSampleSize; 3589 } 3590 3591 if (flags & kSampleFlagsPresent) { 3592 bytesPerSample += 4; 3593 } else if (mTrackFragmentHeaderInfo.mFlags 3594 & TrackFragmentHeaderInfo::kDefaultSampleFlagsPresent) { 3595 sampleFlags = mTrackFragmentHeaderInfo.mDefaultSampleFlags; 3596 } else { 3597 sampleFlags = mTrackFragmentHeaderInfo.mDefaultSampleFlags; 3598 } 3599 3600 if (flags & kSampleCompositionTimeOffsetPresent) { 3601 bytesPerSample += 4; 3602 } else { 3603 sampleCtsOffset = 0; 3604 } 3605 3606 if (size < (off64_t)sampleCount * bytesPerSample) { 3607 return -EINVAL; 3608 } 3609 3610 Sample tmp; 3611 for (uint32_t i = 0; i < sampleCount; ++i) { 3612 if (flags & kSampleDurationPresent) { 3613 if (!mDataSource->getUInt32(offset, &sampleDuration)) { 3614 return ERROR_MALFORMED; 3615 } 3616 offset += 4; 3617 } 3618 3619 if (flags & kSampleSizePresent) { 3620 if (!mDataSource->getUInt32(offset, &sampleSize)) { 3621 return ERROR_MALFORMED; 3622 } 3623 offset += 4; 3624 } 3625 3626 if (flags & kSampleFlagsPresent) { 3627 if (!mDataSource->getUInt32(offset, &sampleFlags)) { 3628 return ERROR_MALFORMED; 3629 } 3630 offset += 4; 3631 } 3632 3633 if (flags & kSampleCompositionTimeOffsetPresent) { 3634 if (!mDataSource->getUInt32(offset, &sampleCtsOffset)) { 3635 return ERROR_MALFORMED; 3636 } 3637 offset += 4; 3638 } 3639 3640 ALOGV("adding sample %d at offset 0x%08" PRIx64 ", size %u, duration %u, " 3641 " flags 0x%08x", i + 1, 3642 dataOffset, sampleSize, sampleDuration, 3643 (flags & kFirstSampleFlagsPresent) && i == 0 3644 ? firstSampleFlags : sampleFlags); 3645 tmp.offset = dataOffset; 3646 tmp.size = sampleSize; 3647 tmp.duration = sampleDuration; 3648 tmp.compositionOffset = sampleCtsOffset; 3649 mCurrentSamples.add(tmp); 3650 3651 dataOffset += sampleSize; 3652 } 3653 3654 mTrackFragmentHeaderInfo.mDataOffset = dataOffset; 3655 3656 return OK; 3657} 3658 3659sp<MetaData> MPEG4Source::getFormat() { 3660 Mutex::Autolock autoLock(mLock); 3661 3662 return mFormat; 3663} 3664 3665size_t MPEG4Source::parseNALSize(const uint8_t *data) const { 3666 switch (mNALLengthSize) { 3667 case 1: 3668 return *data; 3669 case 2: 3670 return U16_AT(data); 3671 case 3: 3672 return ((size_t)data[0] << 16) | U16_AT(&data[1]); 3673 case 4: 3674 return U32_AT(data); 3675 } 3676 3677 // This cannot happen, mNALLengthSize springs to life by adding 1 to 3678 // a 2-bit integer. 3679 CHECK(!"Should not be here."); 3680 3681 return 0; 3682} 3683 3684status_t MPEG4Source::read( 3685 MediaBuffer **out, const ReadOptions *options) { 3686 Mutex::Autolock autoLock(mLock); 3687 3688 CHECK(mStarted); 3689 3690 if (mFirstMoofOffset > 0) { 3691 return fragmentedRead(out, options); 3692 } 3693 3694 *out = NULL; 3695 3696 int64_t targetSampleTimeUs = -1; 3697 3698 int64_t seekTimeUs; 3699 ReadOptions::SeekMode mode; 3700 if (options && options->getSeekTo(&seekTimeUs, &mode)) { 3701 uint32_t findFlags = 0; 3702 switch (mode) { 3703 case ReadOptions::SEEK_PREVIOUS_SYNC: 3704 findFlags = SampleTable::kFlagBefore; 3705 break; 3706 case ReadOptions::SEEK_NEXT_SYNC: 3707 findFlags = SampleTable::kFlagAfter; 3708 break; 3709 case ReadOptions::SEEK_CLOSEST_SYNC: 3710 case ReadOptions::SEEK_CLOSEST: 3711 findFlags = SampleTable::kFlagClosest; 3712 break; 3713 default: 3714 CHECK(!"Should not be here."); 3715 break; 3716 } 3717 3718 uint32_t sampleIndex; 3719 status_t err = mSampleTable->findSampleAtTime( 3720 seekTimeUs, 1000000, mTimescale, 3721 &sampleIndex, findFlags); 3722 3723 if (mode == ReadOptions::SEEK_CLOSEST) { 3724 // We found the closest sample already, now we want the sync 3725 // sample preceding it (or the sample itself of course), even 3726 // if the subsequent sync sample is closer. 3727 findFlags = SampleTable::kFlagBefore; 3728 } 3729 3730 uint32_t syncSampleIndex; 3731 if (err == OK) { 3732 err = mSampleTable->findSyncSampleNear( 3733 sampleIndex, &syncSampleIndex, findFlags); 3734 } 3735 3736 uint32_t sampleTime; 3737 if (err == OK) { 3738 err = mSampleTable->getMetaDataForSample( 3739 sampleIndex, NULL, NULL, &sampleTime); 3740 } 3741 3742 if (err != OK) { 3743 if (err == ERROR_OUT_OF_RANGE) { 3744 // An attempt to seek past the end of the stream would 3745 // normally cause this ERROR_OUT_OF_RANGE error. Propagating 3746 // this all the way to the MediaPlayer would cause abnormal 3747 // termination. Legacy behaviour appears to be to behave as if 3748 // we had seeked to the end of stream, ending normally. 3749 err = ERROR_END_OF_STREAM; 3750 } 3751 ALOGV("end of stream"); 3752 return err; 3753 } 3754 3755 if (mode == ReadOptions::SEEK_CLOSEST) { 3756 targetSampleTimeUs = (sampleTime * 1000000ll) / mTimescale; 3757 } 3758 3759#if 0 3760 uint32_t syncSampleTime; 3761 CHECK_EQ(OK, mSampleTable->getMetaDataForSample( 3762 syncSampleIndex, NULL, NULL, &syncSampleTime)); 3763 3764 ALOGI("seek to time %lld us => sample at time %lld us, " 3765 "sync sample at time %lld us", 3766 seekTimeUs, 3767 sampleTime * 1000000ll / mTimescale, 3768 syncSampleTime * 1000000ll / mTimescale); 3769#endif 3770 3771 mCurrentSampleIndex = syncSampleIndex; 3772 if (mBuffer != NULL) { 3773 mBuffer->release(); 3774 mBuffer = NULL; 3775 } 3776 3777 // fall through 3778 } 3779 3780 off64_t offset; 3781 size_t size; 3782 uint32_t cts, stts; 3783 bool isSyncSample; 3784 bool newBuffer = false; 3785 if (mBuffer == NULL) { 3786 newBuffer = true; 3787 3788 status_t err = 3789 mSampleTable->getMetaDataForSample( 3790 mCurrentSampleIndex, &offset, &size, &cts, &isSyncSample, &stts); 3791 3792 if (err != OK) { 3793 return err; 3794 } 3795 3796 err = mGroup->acquire_buffer(&mBuffer); 3797 3798 if (err != OK) { 3799 CHECK(mBuffer == NULL); 3800 return err; 3801 } 3802 } 3803 3804 if ((!mIsAVC && !mIsHEVC) || mWantsNALFragments) { 3805 if (newBuffer) { 3806 ssize_t num_bytes_read = 3807 mDataSource->readAt(offset, (uint8_t *)mBuffer->data(), size); 3808 3809 if (num_bytes_read < (ssize_t)size) { 3810 mBuffer->release(); 3811 mBuffer = NULL; 3812 3813 return ERROR_IO; 3814 } 3815 3816 CHECK(mBuffer != NULL); 3817 mBuffer->set_range(0, size); 3818 mBuffer->meta_data()->clear(); 3819 mBuffer->meta_data()->setInt64( 3820 kKeyTime, ((int64_t)cts * 1000000) / mTimescale); 3821 mBuffer->meta_data()->setInt64( 3822 kKeyDuration, ((int64_t)stts * 1000000) / mTimescale); 3823 3824 if (targetSampleTimeUs >= 0) { 3825 mBuffer->meta_data()->setInt64( 3826 kKeyTargetTime, targetSampleTimeUs); 3827 } 3828 3829 if (isSyncSample) { 3830 mBuffer->meta_data()->setInt32(kKeyIsSyncFrame, 1); 3831 } 3832 3833 ++mCurrentSampleIndex; 3834 } 3835 3836 if (!mIsAVC && !mIsHEVC) { 3837 *out = mBuffer; 3838 mBuffer = NULL; 3839 3840 return OK; 3841 } 3842 3843 // Each NAL unit is split up into its constituent fragments and 3844 // each one of them returned in its own buffer. 3845 3846 CHECK(mBuffer->range_length() >= mNALLengthSize); 3847 3848 const uint8_t *src = 3849 (const uint8_t *)mBuffer->data() + mBuffer->range_offset(); 3850 3851 size_t nal_size = parseNALSize(src); 3852 if (mBuffer->range_length() < mNALLengthSize + nal_size) { 3853 ALOGE("incomplete NAL unit."); 3854 3855 mBuffer->release(); 3856 mBuffer = NULL; 3857 3858 return ERROR_MALFORMED; 3859 } 3860 3861 MediaBuffer *clone = mBuffer->clone(); 3862 CHECK(clone != NULL); 3863 clone->set_range(mBuffer->range_offset() + mNALLengthSize, nal_size); 3864 3865 CHECK(mBuffer != NULL); 3866 mBuffer->set_range( 3867 mBuffer->range_offset() + mNALLengthSize + nal_size, 3868 mBuffer->range_length() - mNALLengthSize - nal_size); 3869 3870 if (mBuffer->range_length() == 0) { 3871 mBuffer->release(); 3872 mBuffer = NULL; 3873 } 3874 3875 *out = clone; 3876 3877 return OK; 3878 } else { 3879 // Whole NAL units are returned but each fragment is prefixed by 3880 // the start code (0x00 00 00 01). 3881 ssize_t num_bytes_read = 0; 3882 int32_t drm = 0; 3883 bool usesDRM = (mFormat->findInt32(kKeyIsDRM, &drm) && drm != 0); 3884 if (usesDRM) { 3885 num_bytes_read = 3886 mDataSource->readAt(offset, (uint8_t*)mBuffer->data(), size); 3887 } else { 3888 num_bytes_read = mDataSource->readAt(offset, mSrcBuffer, size); 3889 } 3890 3891 if (num_bytes_read < (ssize_t)size) { 3892 mBuffer->release(); 3893 mBuffer = NULL; 3894 3895 return ERROR_IO; 3896 } 3897 3898 if (usesDRM) { 3899 CHECK(mBuffer != NULL); 3900 mBuffer->set_range(0, size); 3901 3902 } else { 3903 uint8_t *dstData = (uint8_t *)mBuffer->data(); 3904 size_t srcOffset = 0; 3905 size_t dstOffset = 0; 3906 3907 while (srcOffset < size) { 3908 bool isMalFormed = !isInRange((size_t)0u, size, srcOffset, mNALLengthSize); 3909 size_t nalLength = 0; 3910 if (!isMalFormed) { 3911 nalLength = parseNALSize(&mSrcBuffer[srcOffset]); 3912 srcOffset += mNALLengthSize; 3913 isMalFormed = !isInRange((size_t)0u, size, srcOffset, nalLength); 3914 } 3915 3916 if (isMalFormed) { 3917 ALOGE("Video is malformed"); 3918 mBuffer->release(); 3919 mBuffer = NULL; 3920 return ERROR_MALFORMED; 3921 } 3922 3923 if (nalLength == 0) { 3924 continue; 3925 } 3926 3927 CHECK(dstOffset + 4 <= mBuffer->size()); 3928 3929 dstData[dstOffset++] = 0; 3930 dstData[dstOffset++] = 0; 3931 dstData[dstOffset++] = 0; 3932 dstData[dstOffset++] = 1; 3933 memcpy(&dstData[dstOffset], &mSrcBuffer[srcOffset], nalLength); 3934 srcOffset += nalLength; 3935 dstOffset += nalLength; 3936 } 3937 CHECK_EQ(srcOffset, size); 3938 CHECK(mBuffer != NULL); 3939 mBuffer->set_range(0, dstOffset); 3940 } 3941 3942 mBuffer->meta_data()->clear(); 3943 mBuffer->meta_data()->setInt64( 3944 kKeyTime, ((int64_t)cts * 1000000) / mTimescale); 3945 mBuffer->meta_data()->setInt64( 3946 kKeyDuration, ((int64_t)stts * 1000000) / mTimescale); 3947 3948 if (targetSampleTimeUs >= 0) { 3949 mBuffer->meta_data()->setInt64( 3950 kKeyTargetTime, targetSampleTimeUs); 3951 } 3952 3953 if (isSyncSample) { 3954 mBuffer->meta_data()->setInt32(kKeyIsSyncFrame, 1); 3955 } 3956 3957 ++mCurrentSampleIndex; 3958 3959 *out = mBuffer; 3960 mBuffer = NULL; 3961 3962 return OK; 3963 } 3964} 3965 3966status_t MPEG4Source::fragmentedRead( 3967 MediaBuffer **out, const ReadOptions *options) { 3968 3969 ALOGV("MPEG4Source::fragmentedRead"); 3970 3971 CHECK(mStarted); 3972 3973 *out = NULL; 3974 3975 int64_t targetSampleTimeUs = -1; 3976 3977 int64_t seekTimeUs; 3978 ReadOptions::SeekMode mode; 3979 if (options && options->getSeekTo(&seekTimeUs, &mode)) { 3980 3981 int numSidxEntries = mSegments.size(); 3982 if (numSidxEntries != 0) { 3983 int64_t totalTime = 0; 3984 off64_t totalOffset = mFirstMoofOffset; 3985 for (int i = 0; i < numSidxEntries; i++) { 3986 const SidxEntry *se = &mSegments[i]; 3987 if (totalTime + se->mDurationUs > seekTimeUs) { 3988 // The requested time is somewhere in this segment 3989 if ((mode == ReadOptions::SEEK_NEXT_SYNC && seekTimeUs > totalTime) || 3990 (mode == ReadOptions::SEEK_CLOSEST_SYNC && 3991 (seekTimeUs - totalTime) > (totalTime + se->mDurationUs - seekTimeUs))) { 3992 // requested next sync, or closest sync and it was closer to the end of 3993 // this segment 3994 totalTime += se->mDurationUs; 3995 totalOffset += se->mSize; 3996 } 3997 break; 3998 } 3999 totalTime += se->mDurationUs; 4000 totalOffset += se->mSize; 4001 } 4002 mCurrentMoofOffset = totalOffset; 4003 mCurrentSamples.clear(); 4004 mCurrentSampleIndex = 0; 4005 parseChunk(&totalOffset); 4006 mCurrentTime = totalTime * mTimescale / 1000000ll; 4007 } else { 4008 // without sidx boxes, we can only seek to 0 4009 mCurrentMoofOffset = mFirstMoofOffset; 4010 mCurrentSamples.clear(); 4011 mCurrentSampleIndex = 0; 4012 off64_t tmp = mCurrentMoofOffset; 4013 parseChunk(&tmp); 4014 mCurrentTime = 0; 4015 } 4016 4017 if (mBuffer != NULL) { 4018 mBuffer->release(); 4019 mBuffer = NULL; 4020 } 4021 4022 // fall through 4023 } 4024 4025 off64_t offset = 0; 4026 size_t size = 0; 4027 uint32_t cts = 0; 4028 bool isSyncSample = false; 4029 bool newBuffer = false; 4030 if (mBuffer == NULL) { 4031 newBuffer = true; 4032 4033 if (mCurrentSampleIndex >= mCurrentSamples.size()) { 4034 // move to next fragment if there is one 4035 if (mNextMoofOffset <= mCurrentMoofOffset) { 4036 return ERROR_END_OF_STREAM; 4037 } 4038 off64_t nextMoof = mNextMoofOffset; 4039 mCurrentMoofOffset = nextMoof; 4040 mCurrentSamples.clear(); 4041 mCurrentSampleIndex = 0; 4042 parseChunk(&nextMoof); 4043 if (mCurrentSampleIndex >= mCurrentSamples.size()) { 4044 return ERROR_END_OF_STREAM; 4045 } 4046 } 4047 4048 const Sample *smpl = &mCurrentSamples[mCurrentSampleIndex]; 4049 offset = smpl->offset; 4050 size = smpl->size; 4051 cts = mCurrentTime + smpl->compositionOffset; 4052 mCurrentTime += smpl->duration; 4053 isSyncSample = (mCurrentSampleIndex == 0); // XXX 4054 4055 status_t err = mGroup->acquire_buffer(&mBuffer); 4056 4057 if (err != OK) { 4058 CHECK(mBuffer == NULL); 4059 ALOGV("acquire_buffer returned %d", err); 4060 return err; 4061 } 4062 } 4063 4064 const Sample *smpl = &mCurrentSamples[mCurrentSampleIndex]; 4065 const sp<MetaData> bufmeta = mBuffer->meta_data(); 4066 bufmeta->clear(); 4067 if (smpl->encryptedsizes.size()) { 4068 // store clear/encrypted lengths in metadata 4069 bufmeta->setData(kKeyPlainSizes, 0, 4070 smpl->clearsizes.array(), smpl->clearsizes.size() * 4); 4071 bufmeta->setData(kKeyEncryptedSizes, 0, 4072 smpl->encryptedsizes.array(), smpl->encryptedsizes.size() * 4); 4073 bufmeta->setData(kKeyCryptoIV, 0, smpl->iv, 16); // use 16 or the actual size? 4074 bufmeta->setInt32(kKeyCryptoDefaultIVSize, mDefaultIVSize); 4075 bufmeta->setInt32(kKeyCryptoMode, mCryptoMode); 4076 bufmeta->setData(kKeyCryptoKey, 0, mCryptoKey, 16); 4077 } 4078 4079 if ((!mIsAVC && !mIsHEVC)|| mWantsNALFragments) { 4080 if (newBuffer) { 4081 ssize_t num_bytes_read = 4082 mDataSource->readAt(offset, (uint8_t *)mBuffer->data(), size); 4083 4084 if (num_bytes_read < (ssize_t)size) { 4085 mBuffer->release(); 4086 mBuffer = NULL; 4087 4088 ALOGV("i/o error"); 4089 return ERROR_IO; 4090 } 4091 4092 CHECK(mBuffer != NULL); 4093 mBuffer->set_range(0, size); 4094 mBuffer->meta_data()->setInt64( 4095 kKeyTime, ((int64_t)cts * 1000000) / mTimescale); 4096 mBuffer->meta_data()->setInt64( 4097 kKeyDuration, ((int64_t)smpl->duration * 1000000) / mTimescale); 4098 4099 if (targetSampleTimeUs >= 0) { 4100 mBuffer->meta_data()->setInt64( 4101 kKeyTargetTime, targetSampleTimeUs); 4102 } 4103 4104 if (isSyncSample) { 4105 mBuffer->meta_data()->setInt32(kKeyIsSyncFrame, 1); 4106 } 4107 4108 ++mCurrentSampleIndex; 4109 } 4110 4111 if (!mIsAVC && !mIsHEVC) { 4112 *out = mBuffer; 4113 mBuffer = NULL; 4114 4115 return OK; 4116 } 4117 4118 // Each NAL unit is split up into its constituent fragments and 4119 // each one of them returned in its own buffer. 4120 4121 CHECK(mBuffer->range_length() >= mNALLengthSize); 4122 4123 const uint8_t *src = 4124 (const uint8_t *)mBuffer->data() + mBuffer->range_offset(); 4125 4126 size_t nal_size = parseNALSize(src); 4127 if (mBuffer->range_length() < mNALLengthSize + nal_size) { 4128 ALOGE("incomplete NAL unit."); 4129 4130 mBuffer->release(); 4131 mBuffer = NULL; 4132 4133 return ERROR_MALFORMED; 4134 } 4135 4136 MediaBuffer *clone = mBuffer->clone(); 4137 CHECK(clone != NULL); 4138 clone->set_range(mBuffer->range_offset() + mNALLengthSize, nal_size); 4139 4140 CHECK(mBuffer != NULL); 4141 mBuffer->set_range( 4142 mBuffer->range_offset() + mNALLengthSize + nal_size, 4143 mBuffer->range_length() - mNALLengthSize - nal_size); 4144 4145 if (mBuffer->range_length() == 0) { 4146 mBuffer->release(); 4147 mBuffer = NULL; 4148 } 4149 4150 *out = clone; 4151 4152 return OK; 4153 } else { 4154 ALOGV("whole NAL"); 4155 // Whole NAL units are returned but each fragment is prefixed by 4156 // the start code (0x00 00 00 01). 4157 ssize_t num_bytes_read = 0; 4158 int32_t drm = 0; 4159 bool usesDRM = (mFormat->findInt32(kKeyIsDRM, &drm) && drm != 0); 4160 if (usesDRM) { 4161 num_bytes_read = 4162 mDataSource->readAt(offset, (uint8_t*)mBuffer->data(), size); 4163 } else { 4164 num_bytes_read = mDataSource->readAt(offset, mSrcBuffer, size); 4165 } 4166 4167 if (num_bytes_read < (ssize_t)size) { 4168 mBuffer->release(); 4169 mBuffer = NULL; 4170 4171 ALOGV("i/o error"); 4172 return ERROR_IO; 4173 } 4174 4175 if (usesDRM) { 4176 CHECK(mBuffer != NULL); 4177 mBuffer->set_range(0, size); 4178 4179 } else { 4180 uint8_t *dstData = (uint8_t *)mBuffer->data(); 4181 size_t srcOffset = 0; 4182 size_t dstOffset = 0; 4183 4184 while (srcOffset < size) { 4185 bool isMalFormed = !isInRange((size_t)0u, size, srcOffset, mNALLengthSize); 4186 size_t nalLength = 0; 4187 if (!isMalFormed) { 4188 nalLength = parseNALSize(&mSrcBuffer[srcOffset]); 4189 srcOffset += mNALLengthSize; 4190 isMalFormed = !isInRange((size_t)0u, size, srcOffset, nalLength); 4191 } 4192 4193 if (isMalFormed) { 4194 ALOGE("Video is malformed"); 4195 mBuffer->release(); 4196 mBuffer = NULL; 4197 return ERROR_MALFORMED; 4198 } 4199 4200 if (nalLength == 0) { 4201 continue; 4202 } 4203 4204 CHECK(dstOffset + 4 <= mBuffer->size()); 4205 4206 dstData[dstOffset++] = 0; 4207 dstData[dstOffset++] = 0; 4208 dstData[dstOffset++] = 0; 4209 dstData[dstOffset++] = 1; 4210 memcpy(&dstData[dstOffset], &mSrcBuffer[srcOffset], nalLength); 4211 srcOffset += nalLength; 4212 dstOffset += nalLength; 4213 } 4214 CHECK_EQ(srcOffset, size); 4215 CHECK(mBuffer != NULL); 4216 mBuffer->set_range(0, dstOffset); 4217 } 4218 4219 mBuffer->meta_data()->setInt64( 4220 kKeyTime, ((int64_t)cts * 1000000) / mTimescale); 4221 mBuffer->meta_data()->setInt64( 4222 kKeyDuration, ((int64_t)smpl->duration * 1000000) / mTimescale); 4223 4224 if (targetSampleTimeUs >= 0) { 4225 mBuffer->meta_data()->setInt64( 4226 kKeyTargetTime, targetSampleTimeUs); 4227 } 4228 4229 if (isSyncSample) { 4230 mBuffer->meta_data()->setInt32(kKeyIsSyncFrame, 1); 4231 } 4232 4233 ++mCurrentSampleIndex; 4234 4235 *out = mBuffer; 4236 mBuffer = NULL; 4237 4238 return OK; 4239 } 4240} 4241 4242MPEG4Extractor::Track *MPEG4Extractor::findTrackByMimePrefix( 4243 const char *mimePrefix) { 4244 for (Track *track = mFirstTrack; track != NULL; track = track->next) { 4245 const char *mime; 4246 if (track->meta != NULL 4247 && track->meta->findCString(kKeyMIMEType, &mime) 4248 && !strncasecmp(mime, mimePrefix, strlen(mimePrefix))) { 4249 return track; 4250 } 4251 } 4252 4253 return NULL; 4254} 4255 4256static bool LegacySniffMPEG4( 4257 const sp<DataSource> &source, String8 *mimeType, float *confidence) { 4258 uint8_t header[8]; 4259 4260 ssize_t n = source->readAt(4, header, sizeof(header)); 4261 if (n < (ssize_t)sizeof(header)) { 4262 return false; 4263 } 4264 4265 if (!memcmp(header, "ftyp3gp", 7) || !memcmp(header, "ftypmp42", 8) 4266 || !memcmp(header, "ftyp3gr6", 8) || !memcmp(header, "ftyp3gs6", 8) 4267 || !memcmp(header, "ftyp3ge6", 8) || !memcmp(header, "ftyp3gg6", 8) 4268 || !memcmp(header, "ftypisom", 8) || !memcmp(header, "ftypM4V ", 8) 4269 || !memcmp(header, "ftypM4A ", 8) || !memcmp(header, "ftypf4v ", 8) 4270 || !memcmp(header, "ftypkddi", 8) || !memcmp(header, "ftypM4VP", 8)) { 4271 *mimeType = MEDIA_MIMETYPE_CONTAINER_MPEG4; 4272 *confidence = 0.4; 4273 4274 return true; 4275 } 4276 4277 return false; 4278} 4279 4280static bool isCompatibleBrand(uint32_t fourcc) { 4281 static const uint32_t kCompatibleBrands[] = { 4282 FOURCC('i', 's', 'o', 'm'), 4283 FOURCC('i', 's', 'o', '2'), 4284 FOURCC('a', 'v', 'c', '1'), 4285 FOURCC('h', 'v', 'c', '1'), 4286 FOURCC('h', 'e', 'v', '1'), 4287 FOURCC('3', 'g', 'p', '4'), 4288 FOURCC('m', 'p', '4', '1'), 4289 FOURCC('m', 'p', '4', '2'), 4290 4291 // Won't promise that the following file types can be played. 4292 // Just give these file types a chance. 4293 FOURCC('q', 't', ' ', ' '), // Apple's QuickTime 4294 FOURCC('M', 'S', 'N', 'V'), // Sony's PSP 4295 4296 FOURCC('3', 'g', '2', 'a'), // 3GPP2 4297 FOURCC('3', 'g', '2', 'b'), 4298 }; 4299 4300 for (size_t i = 0; 4301 i < sizeof(kCompatibleBrands) / sizeof(kCompatibleBrands[0]); 4302 ++i) { 4303 if (kCompatibleBrands[i] == fourcc) { 4304 return true; 4305 } 4306 } 4307 4308 return false; 4309} 4310 4311// Attempt to actually parse the 'ftyp' atom and determine if a suitable 4312// compatible brand is present. 4313// Also try to identify where this file's metadata ends 4314// (end of the 'moov' atom) and report it to the caller as part of 4315// the metadata. 4316static bool BetterSniffMPEG4( 4317 const sp<DataSource> &source, String8 *mimeType, float *confidence, 4318 sp<AMessage> *meta) { 4319 // We scan up to 128 bytes to identify this file as an MP4. 4320 static const off64_t kMaxScanOffset = 128ll; 4321 4322 off64_t offset = 0ll; 4323 bool foundGoodFileType = false; 4324 off64_t moovAtomEndOffset = -1ll; 4325 bool done = false; 4326 4327 while (!done && offset < kMaxScanOffset) { 4328 uint32_t hdr[2]; 4329 if (source->readAt(offset, hdr, 8) < 8) { 4330 return false; 4331 } 4332 4333 uint64_t chunkSize = ntohl(hdr[0]); 4334 uint32_t chunkType = ntohl(hdr[1]); 4335 off64_t chunkDataOffset = offset + 8; 4336 4337 if (chunkSize == 1) { 4338 if (source->readAt(offset + 8, &chunkSize, 8) < 8) { 4339 return false; 4340 } 4341 4342 chunkSize = ntoh64(chunkSize); 4343 chunkDataOffset += 8; 4344 4345 if (chunkSize < 16) { 4346 // The smallest valid chunk is 16 bytes long in this case. 4347 return false; 4348 } 4349 } else if (chunkSize < 8) { 4350 // The smallest valid chunk is 8 bytes long. 4351 return false; 4352 } 4353 4354 off64_t chunkDataSize = offset + chunkSize - chunkDataOffset; 4355 4356 char chunkstring[5]; 4357 MakeFourCCString(chunkType, chunkstring); 4358 ALOGV("saw chunk type %s, size %" PRIu64 " @ %lld", chunkstring, chunkSize, offset); 4359 switch (chunkType) { 4360 case FOURCC('f', 't', 'y', 'p'): 4361 { 4362 if (chunkDataSize < 8) { 4363 return false; 4364 } 4365 4366 uint32_t numCompatibleBrands = (chunkDataSize - 8) / 4; 4367 for (size_t i = 0; i < numCompatibleBrands + 2; ++i) { 4368 if (i == 1) { 4369 // Skip this index, it refers to the minorVersion, 4370 // not a brand. 4371 continue; 4372 } 4373 4374 uint32_t brand; 4375 if (source->readAt( 4376 chunkDataOffset + 4 * i, &brand, 4) < 4) { 4377 return false; 4378 } 4379 4380 brand = ntohl(brand); 4381 4382 if (isCompatibleBrand(brand)) { 4383 foundGoodFileType = true; 4384 break; 4385 } 4386 } 4387 4388 if (!foundGoodFileType) { 4389 return false; 4390 } 4391 4392 break; 4393 } 4394 4395 case FOURCC('m', 'o', 'o', 'v'): 4396 { 4397 moovAtomEndOffset = offset + chunkSize; 4398 4399 done = true; 4400 break; 4401 } 4402 4403 default: 4404 break; 4405 } 4406 4407 offset += chunkSize; 4408 } 4409 4410 if (!foundGoodFileType) { 4411 return false; 4412 } 4413 4414 *mimeType = MEDIA_MIMETYPE_CONTAINER_MPEG4; 4415 *confidence = 0.4f; 4416 4417 if (moovAtomEndOffset >= 0) { 4418 *meta = new AMessage; 4419 (*meta)->setInt64("meta-data-size", moovAtomEndOffset); 4420 4421 ALOGV("found metadata size: %lld", moovAtomEndOffset); 4422 } 4423 4424 return true; 4425} 4426 4427bool SniffMPEG4( 4428 const sp<DataSource> &source, String8 *mimeType, float *confidence, 4429 sp<AMessage> *meta) { 4430 if (BetterSniffMPEG4(source, mimeType, confidence, meta)) { 4431 return true; 4432 } 4433 4434 if (LegacySniffMPEG4(source, mimeType, confidence)) { 4435 ALOGW("Identified supported mpeg4 through LegacySniffMPEG4."); 4436 return true; 4437 } 4438 4439 return false; 4440} 4441 4442} // namespace android 4443