MPEG4Extractor.cpp revision 8da8b2e80ccdb10ff2445f503829f803d3a6ab9f
1/* 2 * Copyright (C) 2009 The Android Open Source Project 3 * 4 * Licensed under the Apache License, Version 2.0 (the "License"); 5 * you may not use this file except in compliance with the License. 6 * You may obtain a copy of the License at 7 * 8 * http://www.apache.org/licenses/LICENSE-2.0 9 * 10 * Unless required by applicable law or agreed to in writing, software 11 * distributed under the License is distributed on an "AS IS" BASIS, 12 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 * See the License for the specific language governing permissions and 14 * limitations under the License. 15 */ 16 17//#define LOG_NDEBUG 0 18#define LOG_TAG "MPEG4Extractor" 19#include <utils/Log.h> 20 21#include "include/MPEG4Extractor.h" 22#include "include/SampleTable.h" 23#include "include/ESDS.h" 24 25#include <ctype.h> 26#include <stdint.h> 27#include <stdlib.h> 28#include <string.h> 29 30#include <media/stagefright/foundation/ABitReader.h> 31#include <media/stagefright/foundation/ABuffer.h> 32#include <media/stagefright/foundation/ADebug.h> 33#include <media/stagefright/foundation/AMessage.h> 34#include <media/stagefright/MediaBuffer.h> 35#include <media/stagefright/MediaBufferGroup.h> 36#include <media/stagefright/MediaDefs.h> 37#include <media/stagefright/MediaSource.h> 38#include <media/stagefright/MetaData.h> 39#include <utils/String8.h> 40 41#include <byteswap.h> 42#include "include/ID3.h" 43 44namespace android { 45 46class MPEG4Source : public MediaSource { 47public: 48 // Caller retains ownership of both "dataSource" and "sampleTable". 49 MPEG4Source(const sp<MetaData> &format, 50 const sp<DataSource> &dataSource, 51 int32_t timeScale, 52 const sp<SampleTable> &sampleTable, 53 Vector<SidxEntry> &sidx, 54 off64_t firstMoofOffset); 55 56 virtual status_t start(MetaData *params = NULL); 57 virtual status_t stop(); 58 59 virtual sp<MetaData> getFormat(); 60 61 virtual status_t read(MediaBuffer **buffer, const ReadOptions *options = NULL); 62 virtual status_t fragmentedRead(MediaBuffer **buffer, const ReadOptions *options = NULL); 63 64protected: 65 virtual ~MPEG4Source(); 66 67private: 68 Mutex mLock; 69 70 sp<MetaData> mFormat; 71 sp<DataSource> mDataSource; 72 int32_t mTimescale; 73 sp<SampleTable> mSampleTable; 74 uint32_t mCurrentSampleIndex; 75 uint32_t mCurrentFragmentIndex; 76 Vector<SidxEntry> &mSegments; 77 off64_t mFirstMoofOffset; 78 off64_t mCurrentMoofOffset; 79 off64_t mNextMoofOffset; 80 uint32_t mCurrentTime; 81 int32_t mLastParsedTrackId; 82 int32_t mTrackId; 83 84 int32_t mCryptoMode; // passed in from extractor 85 int32_t mDefaultIVSize; // passed in from extractor 86 uint8_t mCryptoKey[16]; // passed in from extractor 87 uint32_t mCurrentAuxInfoType; 88 uint32_t mCurrentAuxInfoTypeParameter; 89 int32_t mCurrentDefaultSampleInfoSize; 90 uint32_t mCurrentSampleInfoCount; 91 uint32_t mCurrentSampleInfoAllocSize; 92 uint8_t* mCurrentSampleInfoSizes; 93 uint32_t mCurrentSampleInfoOffsetCount; 94 uint32_t mCurrentSampleInfoOffsetsAllocSize; 95 uint64_t* mCurrentSampleInfoOffsets; 96 97 bool mIsAVC; 98 bool mIsHEVC; 99 size_t mNALLengthSize; 100 101 bool mStarted; 102 103 MediaBufferGroup *mGroup; 104 105 MediaBuffer *mBuffer; 106 107 bool mWantsNALFragments; 108 109 uint8_t *mSrcBuffer; 110 111 size_t parseNALSize(const uint8_t *data) const; 112 status_t parseChunk(off64_t *offset); 113 status_t parseTrackFragmentHeader(off64_t offset, off64_t size); 114 status_t parseTrackFragmentRun(off64_t offset, off64_t size); 115 status_t parseSampleAuxiliaryInformationSizes(off64_t offset, off64_t size); 116 status_t parseSampleAuxiliaryInformationOffsets(off64_t offset, off64_t size); 117 118 struct TrackFragmentHeaderInfo { 119 enum Flags { 120 kBaseDataOffsetPresent = 0x01, 121 kSampleDescriptionIndexPresent = 0x02, 122 kDefaultSampleDurationPresent = 0x08, 123 kDefaultSampleSizePresent = 0x10, 124 kDefaultSampleFlagsPresent = 0x20, 125 kDurationIsEmpty = 0x10000, 126 }; 127 128 uint32_t mTrackID; 129 uint32_t mFlags; 130 uint64_t mBaseDataOffset; 131 uint32_t mSampleDescriptionIndex; 132 uint32_t mDefaultSampleDuration; 133 uint32_t mDefaultSampleSize; 134 uint32_t mDefaultSampleFlags; 135 136 uint64_t mDataOffset; 137 }; 138 TrackFragmentHeaderInfo mTrackFragmentHeaderInfo; 139 140 struct Sample { 141 off64_t offset; 142 size_t size; 143 uint32_t duration; 144 uint8_t iv[16]; 145 Vector<size_t> clearsizes; 146 Vector<size_t> encryptedsizes; 147 }; 148 Vector<Sample> mCurrentSamples; 149 150 MPEG4Source(const MPEG4Source &); 151 MPEG4Source &operator=(const MPEG4Source &); 152}; 153 154// This custom data source wraps an existing one and satisfies requests 155// falling entirely within a cached range from the cache while forwarding 156// all remaining requests to the wrapped datasource. 157// This is used to cache the full sampletable metadata for a single track, 158// possibly wrapping multiple times to cover all tracks, i.e. 159// Each MPEG4DataSource caches the sampletable metadata for a single track. 160 161struct MPEG4DataSource : public DataSource { 162 MPEG4DataSource(const sp<DataSource> &source); 163 164 virtual status_t initCheck() const; 165 virtual ssize_t readAt(off64_t offset, void *data, size_t size); 166 virtual status_t getSize(off64_t *size); 167 virtual uint32_t flags(); 168 169 status_t setCachedRange(off64_t offset, size_t size); 170 171protected: 172 virtual ~MPEG4DataSource(); 173 174private: 175 Mutex mLock; 176 177 sp<DataSource> mSource; 178 off64_t mCachedOffset; 179 size_t mCachedSize; 180 uint8_t *mCache; 181 182 void clearCache(); 183 184 MPEG4DataSource(const MPEG4DataSource &); 185 MPEG4DataSource &operator=(const MPEG4DataSource &); 186}; 187 188MPEG4DataSource::MPEG4DataSource(const sp<DataSource> &source) 189 : mSource(source), 190 mCachedOffset(0), 191 mCachedSize(0), 192 mCache(NULL) { 193} 194 195MPEG4DataSource::~MPEG4DataSource() { 196 clearCache(); 197} 198 199void MPEG4DataSource::clearCache() { 200 if (mCache) { 201 free(mCache); 202 mCache = NULL; 203 } 204 205 mCachedOffset = 0; 206 mCachedSize = 0; 207} 208 209status_t MPEG4DataSource::initCheck() const { 210 return mSource->initCheck(); 211} 212 213ssize_t MPEG4DataSource::readAt(off64_t offset, void *data, size_t size) { 214 Mutex::Autolock autoLock(mLock); 215 216 if (offset >= mCachedOffset 217 && offset + size <= mCachedOffset + mCachedSize) { 218 memcpy(data, &mCache[offset - mCachedOffset], size); 219 return size; 220 } 221 222 return mSource->readAt(offset, data, size); 223} 224 225status_t MPEG4DataSource::getSize(off64_t *size) { 226 return mSource->getSize(size); 227} 228 229uint32_t MPEG4DataSource::flags() { 230 return mSource->flags(); 231} 232 233status_t MPEG4DataSource::setCachedRange(off64_t offset, size_t size) { 234 Mutex::Autolock autoLock(mLock); 235 236 clearCache(); 237 238 mCache = (uint8_t *)malloc(size); 239 240 if (mCache == NULL) { 241 return -ENOMEM; 242 } 243 244 mCachedOffset = offset; 245 mCachedSize = size; 246 247 ssize_t err = mSource->readAt(mCachedOffset, mCache, mCachedSize); 248 249 if (err < (ssize_t)size) { 250 clearCache(); 251 252 return ERROR_IO; 253 } 254 255 return OK; 256} 257 258//////////////////////////////////////////////////////////////////////////////// 259 260static void hexdump(const void *_data, size_t size) { 261 const uint8_t *data = (const uint8_t *)_data; 262 size_t offset = 0; 263 while (offset < size) { 264 printf("0x%04zx ", offset); 265 266 size_t n = size - offset; 267 if (n > 16) { 268 n = 16; 269 } 270 271 for (size_t i = 0; i < 16; ++i) { 272 if (i == 8) { 273 printf(" "); 274 } 275 276 if (offset + i < size) { 277 printf("%02x ", data[offset + i]); 278 } else { 279 printf(" "); 280 } 281 } 282 283 printf(" "); 284 285 for (size_t i = 0; i < n; ++i) { 286 if (isprint(data[offset + i])) { 287 printf("%c", data[offset + i]); 288 } else { 289 printf("."); 290 } 291 } 292 293 printf("\n"); 294 295 offset += 16; 296 } 297} 298 299static const char *FourCC2MIME(uint32_t fourcc) { 300 switch (fourcc) { 301 case FOURCC('m', 'p', '4', 'a'): 302 return MEDIA_MIMETYPE_AUDIO_AAC; 303 304 case FOURCC('s', 'a', 'm', 'r'): 305 return MEDIA_MIMETYPE_AUDIO_AMR_NB; 306 307 case FOURCC('s', 'a', 'w', 'b'): 308 return MEDIA_MIMETYPE_AUDIO_AMR_WB; 309 310 case FOURCC('m', 'p', '4', 'v'): 311 return MEDIA_MIMETYPE_VIDEO_MPEG4; 312 313 case FOURCC('s', '2', '6', '3'): 314 case FOURCC('h', '2', '6', '3'): 315 case FOURCC('H', '2', '6', '3'): 316 return MEDIA_MIMETYPE_VIDEO_H263; 317 318 case FOURCC('a', 'v', 'c', '1'): 319 return MEDIA_MIMETYPE_VIDEO_AVC; 320 321 case FOURCC('h', 'v', 'c', '1'): 322 case FOURCC('h', 'e', 'v', '1'): 323 return MEDIA_MIMETYPE_VIDEO_HEVC; 324 default: 325 CHECK(!"should not be here."); 326 return NULL; 327 } 328} 329 330static bool AdjustChannelsAndRate(uint32_t fourcc, uint32_t *channels, uint32_t *rate) { 331 if (!strcasecmp(MEDIA_MIMETYPE_AUDIO_AMR_NB, FourCC2MIME(fourcc))) { 332 // AMR NB audio is always mono, 8kHz 333 *channels = 1; 334 *rate = 8000; 335 return true; 336 } else if (!strcasecmp(MEDIA_MIMETYPE_AUDIO_AMR_WB, FourCC2MIME(fourcc))) { 337 // AMR WB audio is always mono, 16kHz 338 *channels = 1; 339 *rate = 16000; 340 return true; 341 } 342 return false; 343} 344 345MPEG4Extractor::MPEG4Extractor(const sp<DataSource> &source) 346 : mSidxDuration(0), 347 mMoofOffset(0), 348 mDataSource(source), 349 mInitCheck(NO_INIT), 350 mHasVideo(false), 351 mHeaderTimescale(0), 352 mFirstTrack(NULL), 353 mLastTrack(NULL), 354 mFileMetaData(new MetaData), 355 mFirstSINF(NULL), 356 mIsDrm(false) { 357} 358 359MPEG4Extractor::~MPEG4Extractor() { 360 Track *track = mFirstTrack; 361 while (track) { 362 Track *next = track->next; 363 364 delete track; 365 track = next; 366 } 367 mFirstTrack = mLastTrack = NULL; 368 369 SINF *sinf = mFirstSINF; 370 while (sinf) { 371 SINF *next = sinf->next; 372 delete[] sinf->IPMPData; 373 delete sinf; 374 sinf = next; 375 } 376 mFirstSINF = NULL; 377 378 for (size_t i = 0; i < mPssh.size(); i++) { 379 delete [] mPssh[i].data; 380 } 381} 382 383uint32_t MPEG4Extractor::flags() const { 384 return CAN_PAUSE | 385 ((mMoofOffset == 0 || mSidxEntries.size() != 0) ? 386 (CAN_SEEK_BACKWARD | CAN_SEEK_FORWARD | CAN_SEEK) : 0); 387} 388 389sp<MetaData> MPEG4Extractor::getMetaData() { 390 status_t err; 391 if ((err = readMetaData()) != OK) { 392 return new MetaData; 393 } 394 395 return mFileMetaData; 396} 397 398size_t MPEG4Extractor::countTracks() { 399 status_t err; 400 if ((err = readMetaData()) != OK) { 401 ALOGV("MPEG4Extractor::countTracks: no tracks"); 402 return 0; 403 } 404 405 size_t n = 0; 406 Track *track = mFirstTrack; 407 while (track) { 408 ++n; 409 track = track->next; 410 } 411 412 ALOGV("MPEG4Extractor::countTracks: %d tracks", n); 413 return n; 414} 415 416sp<MetaData> MPEG4Extractor::getTrackMetaData( 417 size_t index, uint32_t flags) { 418 status_t err; 419 if ((err = readMetaData()) != OK) { 420 return NULL; 421 } 422 423 Track *track = mFirstTrack; 424 while (index > 0) { 425 if (track == NULL) { 426 return NULL; 427 } 428 429 track = track->next; 430 --index; 431 } 432 433 if (track == NULL) { 434 return NULL; 435 } 436 437 if ((flags & kIncludeExtensiveMetaData) 438 && !track->includes_expensive_metadata) { 439 track->includes_expensive_metadata = true; 440 441 const char *mime; 442 CHECK(track->meta->findCString(kKeyMIMEType, &mime)); 443 if (!strncasecmp("video/", mime, 6)) { 444 if (mMoofOffset > 0) { 445 int64_t duration; 446 if (track->meta->findInt64(kKeyDuration, &duration)) { 447 // nothing fancy, just pick a frame near 1/4th of the duration 448 track->meta->setInt64( 449 kKeyThumbnailTime, duration / 4); 450 } 451 } else { 452 uint32_t sampleIndex; 453 uint32_t sampleTime; 454 if (track->sampleTable->findThumbnailSample(&sampleIndex) == OK 455 && track->sampleTable->getMetaDataForSample( 456 sampleIndex, NULL /* offset */, NULL /* size */, 457 &sampleTime) == OK) { 458 track->meta->setInt64( 459 kKeyThumbnailTime, 460 ((int64_t)sampleTime * 1000000) / track->timescale); 461 } 462 } 463 } 464 } 465 466 return track->meta; 467} 468 469static void MakeFourCCString(uint32_t x, char *s) { 470 s[0] = x >> 24; 471 s[1] = (x >> 16) & 0xff; 472 s[2] = (x >> 8) & 0xff; 473 s[3] = x & 0xff; 474 s[4] = '\0'; 475} 476 477status_t MPEG4Extractor::readMetaData() { 478 if (mInitCheck != NO_INIT) { 479 return mInitCheck; 480 } 481 482 off64_t offset = 0; 483 status_t err; 484 while (true) { 485 off64_t orig_offset = offset; 486 err = parseChunk(&offset, 0); 487 488 if (offset <= orig_offset) { 489 // only continue parsing if the offset was advanced, 490 // otherwise we might end up in an infinite loop 491 ALOGE("did not advance: 0x%lld->0x%lld", orig_offset, offset); 492 err = ERROR_MALFORMED; 493 break; 494 } else if (err == OK) { 495 continue; 496 } else if (err != UNKNOWN_ERROR) { 497 break; 498 } 499 uint32_t hdr[2]; 500 if (mDataSource->readAt(offset, hdr, 8) < 8) { 501 break; 502 } 503 uint32_t chunk_type = ntohl(hdr[1]); 504 if (chunk_type == FOURCC('m', 'o', 'o', 'f')) { 505 // store the offset of the first segment 506 mMoofOffset = offset; 507 } else if (chunk_type != FOURCC('m', 'd', 'a', 't')) { 508 // keep parsing until we get to the data 509 continue; 510 } 511 break; 512 } 513 514 if (mInitCheck == OK) { 515 if (mHasVideo) { 516 mFileMetaData->setCString( 517 kKeyMIMEType, MEDIA_MIMETYPE_CONTAINER_MPEG4); 518 } else { 519 mFileMetaData->setCString(kKeyMIMEType, "audio/mp4"); 520 } 521 } else { 522 mInitCheck = err; 523 } 524 525 CHECK_NE(err, (status_t)NO_INIT); 526 527 // copy pssh data into file metadata 528 int psshsize = 0; 529 for (size_t i = 0; i < mPssh.size(); i++) { 530 psshsize += 20 + mPssh[i].datalen; 531 } 532 if (psshsize) { 533 char *buf = (char*)malloc(psshsize); 534 char *ptr = buf; 535 for (size_t i = 0; i < mPssh.size(); i++) { 536 memcpy(ptr, mPssh[i].uuid, 20); // uuid + length 537 memcpy(ptr + 20, mPssh[i].data, mPssh[i].datalen); 538 ptr += (20 + mPssh[i].datalen); 539 } 540 mFileMetaData->setData(kKeyPssh, 'pssh', buf, psshsize); 541 free(buf); 542 } 543 return mInitCheck; 544} 545 546char* MPEG4Extractor::getDrmTrackInfo(size_t trackID, int *len) { 547 if (mFirstSINF == NULL) { 548 return NULL; 549 } 550 551 SINF *sinf = mFirstSINF; 552 while (sinf && (trackID != sinf->trackID)) { 553 sinf = sinf->next; 554 } 555 556 if (sinf == NULL) { 557 return NULL; 558 } 559 560 *len = sinf->len; 561 return sinf->IPMPData; 562} 563 564// Reads an encoded integer 7 bits at a time until it encounters the high bit clear. 565static int32_t readSize(off64_t offset, 566 const sp<DataSource> DataSource, uint8_t *numOfBytes) { 567 uint32_t size = 0; 568 uint8_t data; 569 bool moreData = true; 570 *numOfBytes = 0; 571 572 while (moreData) { 573 if (DataSource->readAt(offset, &data, 1) < 1) { 574 return -1; 575 } 576 offset ++; 577 moreData = (data >= 128) ? true : false; 578 size = (size << 7) | (data & 0x7f); // Take last 7 bits 579 (*numOfBytes) ++; 580 } 581 582 return size; 583} 584 585status_t MPEG4Extractor::parseDrmSINF( 586 off64_t * /* offset */, off64_t data_offset) { 587 uint8_t updateIdTag; 588 if (mDataSource->readAt(data_offset, &updateIdTag, 1) < 1) { 589 return ERROR_IO; 590 } 591 data_offset ++; 592 593 if (0x01/*OBJECT_DESCRIPTOR_UPDATE_ID_TAG*/ != updateIdTag) { 594 return ERROR_MALFORMED; 595 } 596 597 uint8_t numOfBytes; 598 int32_t size = readSize(data_offset, mDataSource, &numOfBytes); 599 if (size < 0) { 600 return ERROR_IO; 601 } 602 int32_t classSize = size; 603 data_offset += numOfBytes; 604 605 while(size >= 11 ) { 606 uint8_t descriptorTag; 607 if (mDataSource->readAt(data_offset, &descriptorTag, 1) < 1) { 608 return ERROR_IO; 609 } 610 data_offset ++; 611 612 if (0x11/*OBJECT_DESCRIPTOR_ID_TAG*/ != descriptorTag) { 613 return ERROR_MALFORMED; 614 } 615 616 uint8_t buffer[8]; 617 //ObjectDescriptorID and ObjectDescriptor url flag 618 if (mDataSource->readAt(data_offset, buffer, 2) < 2) { 619 return ERROR_IO; 620 } 621 data_offset += 2; 622 623 if ((buffer[1] >> 5) & 0x0001) { //url flag is set 624 return ERROR_MALFORMED; 625 } 626 627 if (mDataSource->readAt(data_offset, buffer, 8) < 8) { 628 return ERROR_IO; 629 } 630 data_offset += 8; 631 632 if ((0x0F/*ES_ID_REF_TAG*/ != buffer[1]) 633 || ( 0x0A/*IPMP_DESCRIPTOR_POINTER_ID_TAG*/ != buffer[5])) { 634 return ERROR_MALFORMED; 635 } 636 637 SINF *sinf = new SINF; 638 sinf->trackID = U16_AT(&buffer[3]); 639 sinf->IPMPDescriptorID = buffer[7]; 640 sinf->next = mFirstSINF; 641 mFirstSINF = sinf; 642 643 size -= (8 + 2 + 1); 644 } 645 646 if (size != 0) { 647 return ERROR_MALFORMED; 648 } 649 650 if (mDataSource->readAt(data_offset, &updateIdTag, 1) < 1) { 651 return ERROR_IO; 652 } 653 data_offset ++; 654 655 if(0x05/*IPMP_DESCRIPTOR_UPDATE_ID_TAG*/ != updateIdTag) { 656 return ERROR_MALFORMED; 657 } 658 659 size = readSize(data_offset, mDataSource, &numOfBytes); 660 if (size < 0) { 661 return ERROR_IO; 662 } 663 classSize = size; 664 data_offset += numOfBytes; 665 666 while (size > 0) { 667 uint8_t tag; 668 int32_t dataLen; 669 if (mDataSource->readAt(data_offset, &tag, 1) < 1) { 670 return ERROR_IO; 671 } 672 data_offset ++; 673 674 if (0x0B/*IPMP_DESCRIPTOR_ID_TAG*/ == tag) { 675 uint8_t id; 676 dataLen = readSize(data_offset, mDataSource, &numOfBytes); 677 if (dataLen < 0) { 678 return ERROR_IO; 679 } else if (dataLen < 4) { 680 return ERROR_MALFORMED; 681 } 682 data_offset += numOfBytes; 683 684 if (mDataSource->readAt(data_offset, &id, 1) < 1) { 685 return ERROR_IO; 686 } 687 data_offset ++; 688 689 SINF *sinf = mFirstSINF; 690 while (sinf && (sinf->IPMPDescriptorID != id)) { 691 sinf = sinf->next; 692 } 693 if (sinf == NULL) { 694 return ERROR_MALFORMED; 695 } 696 sinf->len = dataLen - 3; 697 sinf->IPMPData = new (std::nothrow) char[sinf->len]; 698 if (sinf->IPMPData == NULL) { 699 return ERROR_MALFORMED; 700 } 701 data_offset += 2; 702 703 if (mDataSource->readAt(data_offset, sinf->IPMPData, sinf->len) < sinf->len) { 704 return ERROR_IO; 705 } 706 data_offset += sinf->len; 707 708 size -= (dataLen + numOfBytes + 1); 709 } 710 } 711 712 if (size != 0) { 713 return ERROR_MALFORMED; 714 } 715 716 return UNKNOWN_ERROR; // Return a dummy error. 717} 718 719struct PathAdder { 720 PathAdder(Vector<uint32_t> *path, uint32_t chunkType) 721 : mPath(path) { 722 mPath->push(chunkType); 723 } 724 725 ~PathAdder() { 726 mPath->pop(); 727 } 728 729private: 730 Vector<uint32_t> *mPath; 731 732 PathAdder(const PathAdder &); 733 PathAdder &operator=(const PathAdder &); 734}; 735 736static bool underMetaDataPath(const Vector<uint32_t> &path) { 737 return path.size() >= 5 738 && path[0] == FOURCC('m', 'o', 'o', 'v') 739 && path[1] == FOURCC('u', 'd', 't', 'a') 740 && path[2] == FOURCC('m', 'e', 't', 'a') 741 && path[3] == FOURCC('i', 'l', 's', 't'); 742} 743 744// Given a time in seconds since Jan 1 1904, produce a human-readable string. 745static void convertTimeToDate(int64_t time_1904, String8 *s) { 746 time_t time_1970 = time_1904 - (((66 * 365 + 17) * 24) * 3600); 747 748 char tmp[32]; 749 strftime(tmp, sizeof(tmp), "%Y%m%dT%H%M%S.000Z", gmtime(&time_1970)); 750 751 s->setTo(tmp); 752} 753 754status_t MPEG4Extractor::parseChunk(off64_t *offset, int depth) { 755 ALOGV("entering parseChunk %lld/%d", *offset, depth); 756 uint32_t hdr[2]; 757 if (mDataSource->readAt(*offset, hdr, 8) < 8) { 758 return ERROR_IO; 759 } 760 uint64_t chunk_size = ntohl(hdr[0]); 761 uint32_t chunk_type = ntohl(hdr[1]); 762 off64_t data_offset = *offset + 8; 763 764 if (chunk_size == 1) { 765 if (mDataSource->readAt(*offset + 8, &chunk_size, 8) < 8) { 766 return ERROR_IO; 767 } 768 chunk_size = ntoh64(chunk_size); 769 data_offset += 8; 770 771 if (chunk_size < 16) { 772 // The smallest valid chunk is 16 bytes long in this case. 773 return ERROR_MALFORMED; 774 } 775 } else if (chunk_size == 0) { 776 if (depth == 0) { 777 // atom extends to end of file 778 off64_t sourceSize; 779 if (mDataSource->getSize(&sourceSize) == OK) { 780 chunk_size = (sourceSize - *offset); 781 } else { 782 // XXX could we just pick a "sufficiently large" value here? 783 ALOGE("atom size is 0, and data source has no size"); 784 return ERROR_MALFORMED; 785 } 786 } else { 787 // not allowed for non-toplevel atoms, skip it 788 *offset += 4; 789 return OK; 790 } 791 } else if (chunk_size < 8) { 792 // The smallest valid chunk is 8 bytes long. 793 ALOGE("invalid chunk size: %d", int(chunk_size)); 794 return ERROR_MALFORMED; 795 } 796 797 char chunk[5]; 798 MakeFourCCString(chunk_type, chunk); 799 ALOGV("chunk: %s @ %lld, %d", chunk, *offset, depth); 800 801#if 0 802 static const char kWhitespace[] = " "; 803 const char *indent = &kWhitespace[sizeof(kWhitespace) - 1 - 2 * depth]; 804 printf("%sfound chunk '%s' of size %lld\n", indent, chunk, chunk_size); 805 806 char buffer[256]; 807 size_t n = chunk_size; 808 if (n > sizeof(buffer)) { 809 n = sizeof(buffer); 810 } 811 if (mDataSource->readAt(*offset, buffer, n) 812 < (ssize_t)n) { 813 return ERROR_IO; 814 } 815 816 hexdump(buffer, n); 817#endif 818 819 PathAdder autoAdder(&mPath, chunk_type); 820 821 off64_t chunk_data_size = *offset + chunk_size - data_offset; 822 823 if (chunk_type != FOURCC('c', 'p', 'r', 't') 824 && chunk_type != FOURCC('c', 'o', 'v', 'r') 825 && mPath.size() == 5 && underMetaDataPath(mPath)) { 826 off64_t stop_offset = *offset + chunk_size; 827 *offset = data_offset; 828 while (*offset < stop_offset) { 829 status_t err = parseChunk(offset, depth + 1); 830 if (err != OK) { 831 return err; 832 } 833 } 834 835 if (*offset != stop_offset) { 836 return ERROR_MALFORMED; 837 } 838 839 return OK; 840 } 841 842 switch(chunk_type) { 843 case FOURCC('m', 'o', 'o', 'v'): 844 case FOURCC('t', 'r', 'a', 'k'): 845 case FOURCC('m', 'd', 'i', 'a'): 846 case FOURCC('m', 'i', 'n', 'f'): 847 case FOURCC('d', 'i', 'n', 'f'): 848 case FOURCC('s', 't', 'b', 'l'): 849 case FOURCC('m', 'v', 'e', 'x'): 850 case FOURCC('m', 'o', 'o', 'f'): 851 case FOURCC('t', 'r', 'a', 'f'): 852 case FOURCC('m', 'f', 'r', 'a'): 853 case FOURCC('u', 'd', 't', 'a'): 854 case FOURCC('i', 'l', 's', 't'): 855 case FOURCC('s', 'i', 'n', 'f'): 856 case FOURCC('s', 'c', 'h', 'i'): 857 case FOURCC('e', 'd', 't', 's'): 858 { 859 if (chunk_type == FOURCC('s', 't', 'b', 'l')) { 860 ALOGV("sampleTable chunk is %d bytes long.", (size_t)chunk_size); 861 862 if (mDataSource->flags() 863 & (DataSource::kWantsPrefetching 864 | DataSource::kIsCachingDataSource)) { 865 sp<MPEG4DataSource> cachedSource = 866 new MPEG4DataSource(mDataSource); 867 868 if (cachedSource->setCachedRange(*offset, chunk_size) == OK) { 869 mDataSource = cachedSource; 870 } 871 } 872 873 mLastTrack->sampleTable = new SampleTable(mDataSource); 874 } 875 876 bool isTrack = false; 877 if (chunk_type == FOURCC('t', 'r', 'a', 'k')) { 878 isTrack = true; 879 880 Track *track = new Track; 881 track->next = NULL; 882 if (mLastTrack) { 883 mLastTrack->next = track; 884 } else { 885 mFirstTrack = track; 886 } 887 mLastTrack = track; 888 889 track->meta = new MetaData; 890 track->includes_expensive_metadata = false; 891 track->skipTrack = false; 892 track->timescale = 0; 893 track->meta->setCString(kKeyMIMEType, "application/octet-stream"); 894 } 895 896 off64_t stop_offset = *offset + chunk_size; 897 *offset = data_offset; 898 while (*offset < stop_offset) { 899 status_t err = parseChunk(offset, depth + 1); 900 if (err != OK) { 901 return err; 902 } 903 } 904 905 if (*offset != stop_offset) { 906 return ERROR_MALFORMED; 907 } 908 909 if (isTrack) { 910 if (mLastTrack->skipTrack) { 911 Track *cur = mFirstTrack; 912 913 if (cur == mLastTrack) { 914 delete cur; 915 mFirstTrack = mLastTrack = NULL; 916 } else { 917 while (cur && cur->next != mLastTrack) { 918 cur = cur->next; 919 } 920 cur->next = NULL; 921 delete mLastTrack; 922 mLastTrack = cur; 923 } 924 925 return OK; 926 } 927 928 status_t err = verifyTrack(mLastTrack); 929 930 if (err != OK) { 931 return err; 932 } 933 } else if (chunk_type == FOURCC('m', 'o', 'o', 'v')) { 934 mInitCheck = OK; 935 936 if (!mIsDrm) { 937 return UNKNOWN_ERROR; // Return a dummy error. 938 } else { 939 return OK; 940 } 941 } 942 break; 943 } 944 945 case FOURCC('e', 'l', 's', 't'): 946 { 947 *offset += chunk_size; 948 949 // See 14496-12 8.6.6 950 uint8_t version; 951 if (mDataSource->readAt(data_offset, &version, 1) < 1) { 952 return ERROR_IO; 953 } 954 955 uint32_t entry_count; 956 if (!mDataSource->getUInt32(data_offset + 4, &entry_count)) { 957 return ERROR_IO; 958 } 959 960 if (entry_count != 1) { 961 // we only support a single entry at the moment, for gapless playback 962 ALOGW("ignoring edit list with %d entries", entry_count); 963 } else if (mHeaderTimescale == 0) { 964 ALOGW("ignoring edit list because timescale is 0"); 965 } else { 966 off64_t entriesoffset = data_offset + 8; 967 uint64_t segment_duration; 968 int64_t media_time; 969 970 if (version == 1) { 971 if (!mDataSource->getUInt64(entriesoffset, &segment_duration) || 972 !mDataSource->getUInt64(entriesoffset + 8, (uint64_t*)&media_time)) { 973 return ERROR_IO; 974 } 975 } else if (version == 0) { 976 uint32_t sd; 977 int32_t mt; 978 if (!mDataSource->getUInt32(entriesoffset, &sd) || 979 !mDataSource->getUInt32(entriesoffset + 4, (uint32_t*)&mt)) { 980 return ERROR_IO; 981 } 982 segment_duration = sd; 983 media_time = mt; 984 } else { 985 return ERROR_IO; 986 } 987 988 uint64_t halfscale = mHeaderTimescale / 2; 989 segment_duration = (segment_duration * 1000000 + halfscale)/ mHeaderTimescale; 990 media_time = (media_time * 1000000 + halfscale) / mHeaderTimescale; 991 992 int64_t duration; 993 int32_t samplerate; 994 if (mLastTrack->meta->findInt64(kKeyDuration, &duration) && 995 mLastTrack->meta->findInt32(kKeySampleRate, &samplerate)) { 996 997 int64_t delay = (media_time * samplerate + 500000) / 1000000; 998 mLastTrack->meta->setInt32(kKeyEncoderDelay, delay); 999 1000 int64_t paddingus = duration - (segment_duration + media_time); 1001 if (paddingus < 0) { 1002 // track duration from media header (which is what kKeyDuration is) might 1003 // be slightly shorter than the segment duration, which would make the 1004 // padding negative. Clamp to zero. 1005 paddingus = 0; 1006 } 1007 int64_t paddingsamples = (paddingus * samplerate + 500000) / 1000000; 1008 mLastTrack->meta->setInt32(kKeyEncoderPadding, paddingsamples); 1009 } 1010 } 1011 break; 1012 } 1013 1014 case FOURCC('f', 'r', 'm', 'a'): 1015 { 1016 *offset += chunk_size; 1017 1018 uint32_t original_fourcc; 1019 if (mDataSource->readAt(data_offset, &original_fourcc, 4) < 4) { 1020 return ERROR_IO; 1021 } 1022 original_fourcc = ntohl(original_fourcc); 1023 ALOGV("read original format: %d", original_fourcc); 1024 mLastTrack->meta->setCString(kKeyMIMEType, FourCC2MIME(original_fourcc)); 1025 uint32_t num_channels = 0; 1026 uint32_t sample_rate = 0; 1027 if (AdjustChannelsAndRate(original_fourcc, &num_channels, &sample_rate)) { 1028 mLastTrack->meta->setInt32(kKeyChannelCount, num_channels); 1029 mLastTrack->meta->setInt32(kKeySampleRate, sample_rate); 1030 } 1031 break; 1032 } 1033 1034 case FOURCC('t', 'e', 'n', 'c'): 1035 { 1036 *offset += chunk_size; 1037 1038 if (chunk_size < 32) { 1039 return ERROR_MALFORMED; 1040 } 1041 1042 // tenc box contains 1 byte version, 3 byte flags, 3 byte default algorithm id, one byte 1043 // default IV size, 16 bytes default KeyID 1044 // (ISO 23001-7) 1045 char buf[4]; 1046 memset(buf, 0, 4); 1047 if (mDataSource->readAt(data_offset + 4, buf + 1, 3) < 3) { 1048 return ERROR_IO; 1049 } 1050 uint32_t defaultAlgorithmId = ntohl(*((int32_t*)buf)); 1051 if (defaultAlgorithmId > 1) { 1052 // only 0 (clear) and 1 (AES-128) are valid 1053 return ERROR_MALFORMED; 1054 } 1055 1056 memset(buf, 0, 4); 1057 if (mDataSource->readAt(data_offset + 7, buf + 3, 1) < 1) { 1058 return ERROR_IO; 1059 } 1060 uint32_t defaultIVSize = ntohl(*((int32_t*)buf)); 1061 1062 if ((defaultAlgorithmId == 0 && defaultIVSize != 0) || 1063 (defaultAlgorithmId != 0 && defaultIVSize == 0)) { 1064 // only unencrypted data must have 0 IV size 1065 return ERROR_MALFORMED; 1066 } else if (defaultIVSize != 0 && 1067 defaultIVSize != 8 && 1068 defaultIVSize != 16) { 1069 // only supported sizes are 0, 8 and 16 1070 return ERROR_MALFORMED; 1071 } 1072 1073 uint8_t defaultKeyId[16]; 1074 1075 if (mDataSource->readAt(data_offset + 8, &defaultKeyId, 16) < 16) { 1076 return ERROR_IO; 1077 } 1078 1079 mLastTrack->meta->setInt32(kKeyCryptoMode, defaultAlgorithmId); 1080 mLastTrack->meta->setInt32(kKeyCryptoDefaultIVSize, defaultIVSize); 1081 mLastTrack->meta->setData(kKeyCryptoKey, 'tenc', defaultKeyId, 16); 1082 break; 1083 } 1084 1085 case FOURCC('t', 'k', 'h', 'd'): 1086 { 1087 *offset += chunk_size; 1088 1089 status_t err; 1090 if ((err = parseTrackHeader(data_offset, chunk_data_size)) != OK) { 1091 return err; 1092 } 1093 1094 break; 1095 } 1096 1097 case FOURCC('p', 's', 's', 'h'): 1098 { 1099 *offset += chunk_size; 1100 1101 PsshInfo pssh; 1102 1103 if (mDataSource->readAt(data_offset + 4, &pssh.uuid, 16) < 16) { 1104 return ERROR_IO; 1105 } 1106 1107 uint32_t psshdatalen = 0; 1108 if (mDataSource->readAt(data_offset + 20, &psshdatalen, 4) < 4) { 1109 return ERROR_IO; 1110 } 1111 pssh.datalen = ntohl(psshdatalen); 1112 ALOGV("pssh data size: %d", pssh.datalen); 1113 if (pssh.datalen + 20 > chunk_size) { 1114 // pssh data length exceeds size of containing box 1115 return ERROR_MALFORMED; 1116 } 1117 1118 pssh.data = new (std::nothrow) uint8_t[pssh.datalen]; 1119 if (pssh.data == NULL) { 1120 return ERROR_MALFORMED; 1121 } 1122 ALOGV("allocated pssh @ %p", pssh.data); 1123 ssize_t requested = (ssize_t) pssh.datalen; 1124 if (mDataSource->readAt(data_offset + 24, pssh.data, requested) < requested) { 1125 return ERROR_IO; 1126 } 1127 mPssh.push_back(pssh); 1128 1129 break; 1130 } 1131 1132 case FOURCC('m', 'd', 'h', 'd'): 1133 { 1134 *offset += chunk_size; 1135 1136 if (chunk_data_size < 4) { 1137 return ERROR_MALFORMED; 1138 } 1139 1140 uint8_t version; 1141 if (mDataSource->readAt( 1142 data_offset, &version, sizeof(version)) 1143 < (ssize_t)sizeof(version)) { 1144 return ERROR_IO; 1145 } 1146 1147 off64_t timescale_offset; 1148 1149 if (version == 1) { 1150 timescale_offset = data_offset + 4 + 16; 1151 } else if (version == 0) { 1152 timescale_offset = data_offset + 4 + 8; 1153 } else { 1154 return ERROR_IO; 1155 } 1156 1157 uint32_t timescale; 1158 if (mDataSource->readAt( 1159 timescale_offset, ×cale, sizeof(timescale)) 1160 < (ssize_t)sizeof(timescale)) { 1161 return ERROR_IO; 1162 } 1163 1164 mLastTrack->timescale = ntohl(timescale); 1165 1166 int64_t duration = 0; 1167 if (version == 1) { 1168 if (mDataSource->readAt( 1169 timescale_offset + 4, &duration, sizeof(duration)) 1170 < (ssize_t)sizeof(duration)) { 1171 return ERROR_IO; 1172 } 1173 duration = ntoh64(duration); 1174 } else { 1175 uint32_t duration32; 1176 if (mDataSource->readAt( 1177 timescale_offset + 4, &duration32, sizeof(duration32)) 1178 < (ssize_t)sizeof(duration32)) { 1179 return ERROR_IO; 1180 } 1181 // ffmpeg sets duration to -1, which is incorrect. 1182 if (duration32 != 0xffffffff) { 1183 duration = ntohl(duration32); 1184 } 1185 } 1186 mLastTrack->meta->setInt64( 1187 kKeyDuration, (duration * 1000000) / mLastTrack->timescale); 1188 1189 uint8_t lang[2]; 1190 off64_t lang_offset; 1191 if (version == 1) { 1192 lang_offset = timescale_offset + 4 + 8; 1193 } else if (version == 0) { 1194 lang_offset = timescale_offset + 4 + 4; 1195 } else { 1196 return ERROR_IO; 1197 } 1198 1199 if (mDataSource->readAt(lang_offset, &lang, sizeof(lang)) 1200 < (ssize_t)sizeof(lang)) { 1201 return ERROR_IO; 1202 } 1203 1204 // To get the ISO-639-2/T three character language code 1205 // 1 bit pad followed by 3 5-bits characters. Each character 1206 // is packed as the difference between its ASCII value and 0x60. 1207 char lang_code[4]; 1208 lang_code[0] = ((lang[0] >> 2) & 0x1f) + 0x60; 1209 lang_code[1] = ((lang[0] & 0x3) << 3 | (lang[1] >> 5)) + 0x60; 1210 lang_code[2] = (lang[1] & 0x1f) + 0x60; 1211 lang_code[3] = '\0'; 1212 1213 mLastTrack->meta->setCString( 1214 kKeyMediaLanguage, lang_code); 1215 1216 break; 1217 } 1218 1219 case FOURCC('s', 't', 's', 'd'): 1220 { 1221 if (chunk_data_size < 8) { 1222 return ERROR_MALFORMED; 1223 } 1224 1225 uint8_t buffer[8]; 1226 if (chunk_data_size < (off64_t)sizeof(buffer)) { 1227 return ERROR_MALFORMED; 1228 } 1229 1230 if (mDataSource->readAt( 1231 data_offset, buffer, 8) < 8) { 1232 return ERROR_IO; 1233 } 1234 1235 if (U32_AT(buffer) != 0) { 1236 // Should be version 0, flags 0. 1237 return ERROR_MALFORMED; 1238 } 1239 1240 uint32_t entry_count = U32_AT(&buffer[4]); 1241 1242 if (entry_count > 1) { 1243 // For 3GPP timed text, there could be multiple tx3g boxes contain 1244 // multiple text display formats. These formats will be used to 1245 // display the timed text. 1246 // For encrypted files, there may also be more than one entry. 1247 const char *mime; 1248 CHECK(mLastTrack->meta->findCString(kKeyMIMEType, &mime)); 1249 if (strcasecmp(mime, MEDIA_MIMETYPE_TEXT_3GPP) && 1250 strcasecmp(mime, "application/octet-stream")) { 1251 // For now we only support a single type of media per track. 1252 mLastTrack->skipTrack = true; 1253 *offset += chunk_size; 1254 break; 1255 } 1256 } 1257 off64_t stop_offset = *offset + chunk_size; 1258 *offset = data_offset + 8; 1259 for (uint32_t i = 0; i < entry_count; ++i) { 1260 status_t err = parseChunk(offset, depth + 1); 1261 if (err != OK) { 1262 return err; 1263 } 1264 } 1265 1266 if (*offset != stop_offset) { 1267 return ERROR_MALFORMED; 1268 } 1269 break; 1270 } 1271 1272 case FOURCC('m', 'p', '4', 'a'): 1273 case FOURCC('e', 'n', 'c', 'a'): 1274 case FOURCC('s', 'a', 'm', 'r'): 1275 case FOURCC('s', 'a', 'w', 'b'): 1276 { 1277 uint8_t buffer[8 + 20]; 1278 if (chunk_data_size < (ssize_t)sizeof(buffer)) { 1279 // Basic AudioSampleEntry size. 1280 return ERROR_MALFORMED; 1281 } 1282 1283 if (mDataSource->readAt( 1284 data_offset, buffer, sizeof(buffer)) < (ssize_t)sizeof(buffer)) { 1285 return ERROR_IO; 1286 } 1287 1288 uint16_t data_ref_index = U16_AT(&buffer[6]); 1289 uint32_t num_channels = U16_AT(&buffer[16]); 1290 1291 uint16_t sample_size = U16_AT(&buffer[18]); 1292 uint32_t sample_rate = U32_AT(&buffer[24]) >> 16; 1293 1294 if (chunk_type != FOURCC('e', 'n', 'c', 'a')) { 1295 // if the chunk type is enca, we'll get the type from the sinf/frma box later 1296 mLastTrack->meta->setCString(kKeyMIMEType, FourCC2MIME(chunk_type)); 1297 AdjustChannelsAndRate(chunk_type, &num_channels, &sample_rate); 1298 } 1299 ALOGV("*** coding='%s' %d channels, size %d, rate %d\n", 1300 chunk, num_channels, sample_size, sample_rate); 1301 mLastTrack->meta->setInt32(kKeyChannelCount, num_channels); 1302 mLastTrack->meta->setInt32(kKeySampleRate, sample_rate); 1303 1304 off64_t stop_offset = *offset + chunk_size; 1305 *offset = data_offset + sizeof(buffer); 1306 while (*offset < stop_offset) { 1307 status_t err = parseChunk(offset, depth + 1); 1308 if (err != OK) { 1309 return err; 1310 } 1311 } 1312 1313 if (*offset != stop_offset) { 1314 return ERROR_MALFORMED; 1315 } 1316 break; 1317 } 1318 1319 case FOURCC('m', 'p', '4', 'v'): 1320 case FOURCC('e', 'n', 'c', 'v'): 1321 case FOURCC('s', '2', '6', '3'): 1322 case FOURCC('H', '2', '6', '3'): 1323 case FOURCC('h', '2', '6', '3'): 1324 case FOURCC('a', 'v', 'c', '1'): 1325 case FOURCC('h', 'v', 'c', '1'): 1326 case FOURCC('h', 'e', 'v', '1'): 1327 { 1328 mHasVideo = true; 1329 1330 uint8_t buffer[78]; 1331 if (chunk_data_size < (ssize_t)sizeof(buffer)) { 1332 // Basic VideoSampleEntry size. 1333 return ERROR_MALFORMED; 1334 } 1335 1336 if (mDataSource->readAt( 1337 data_offset, buffer, sizeof(buffer)) < (ssize_t)sizeof(buffer)) { 1338 return ERROR_IO; 1339 } 1340 1341 uint16_t data_ref_index = U16_AT(&buffer[6]); 1342 uint16_t width = U16_AT(&buffer[6 + 18]); 1343 uint16_t height = U16_AT(&buffer[6 + 20]); 1344 1345 // The video sample is not standard-compliant if it has invalid dimension. 1346 // Use some default width and height value, and 1347 // let the decoder figure out the actual width and height (and thus 1348 // be prepared for INFO_FOMRAT_CHANGED event). 1349 if (width == 0) width = 352; 1350 if (height == 0) height = 288; 1351 1352 // printf("*** coding='%s' width=%d height=%d\n", 1353 // chunk, width, height); 1354 1355 if (chunk_type != FOURCC('e', 'n', 'c', 'v')) { 1356 // if the chunk type is encv, we'll get the type from the sinf/frma box later 1357 mLastTrack->meta->setCString(kKeyMIMEType, FourCC2MIME(chunk_type)); 1358 } 1359 mLastTrack->meta->setInt32(kKeyWidth, width); 1360 mLastTrack->meta->setInt32(kKeyHeight, height); 1361 1362 off64_t stop_offset = *offset + chunk_size; 1363 *offset = data_offset + sizeof(buffer); 1364 while (*offset < stop_offset) { 1365 status_t err = parseChunk(offset, depth + 1); 1366 if (err != OK) { 1367 return err; 1368 } 1369 } 1370 1371 if (*offset != stop_offset) { 1372 return ERROR_MALFORMED; 1373 } 1374 break; 1375 } 1376 1377 case FOURCC('s', 't', 'c', 'o'): 1378 case FOURCC('c', 'o', '6', '4'): 1379 { 1380 status_t err = 1381 mLastTrack->sampleTable->setChunkOffsetParams( 1382 chunk_type, data_offset, chunk_data_size); 1383 1384 *offset += chunk_size; 1385 1386 if (err != OK) { 1387 return err; 1388 } 1389 1390 break; 1391 } 1392 1393 case FOURCC('s', 't', 's', 'c'): 1394 { 1395 status_t err = 1396 mLastTrack->sampleTable->setSampleToChunkParams( 1397 data_offset, chunk_data_size); 1398 1399 *offset += chunk_size; 1400 1401 if (err != OK) { 1402 return err; 1403 } 1404 1405 break; 1406 } 1407 1408 case FOURCC('s', 't', 's', 'z'): 1409 case FOURCC('s', 't', 'z', '2'): 1410 { 1411 status_t err = 1412 mLastTrack->sampleTable->setSampleSizeParams( 1413 chunk_type, data_offset, chunk_data_size); 1414 1415 *offset += chunk_size; 1416 1417 if (err != OK) { 1418 return err; 1419 } 1420 1421 size_t max_size; 1422 err = mLastTrack->sampleTable->getMaxSampleSize(&max_size); 1423 1424 if (err != OK) { 1425 return err; 1426 } 1427 1428 if (max_size != 0) { 1429 // Assume that a given buffer only contains at most 10 chunks, 1430 // each chunk originally prefixed with a 2 byte length will 1431 // have a 4 byte header (0x00 0x00 0x00 0x01) after conversion, 1432 // and thus will grow by 2 bytes per chunk. 1433 mLastTrack->meta->setInt32(kKeyMaxInputSize, max_size + 10 * 2); 1434 } else { 1435 // No size was specified. Pick a conservatively large size. 1436 int32_t width, height; 1437 if (!mLastTrack->meta->findInt32(kKeyWidth, &width) || 1438 !mLastTrack->meta->findInt32(kKeyHeight, &height)) { 1439 ALOGE("No width or height, assuming worst case 1080p"); 1440 width = 1920; 1441 height = 1080; 1442 } 1443 1444 const char *mime; 1445 CHECK(mLastTrack->meta->findCString(kKeyMIMEType, &mime)); 1446 if (!strcmp(mime, MEDIA_MIMETYPE_VIDEO_AVC)) { 1447 // AVC requires compression ratio of at least 2, and uses 1448 // macroblocks 1449 max_size = ((width + 15) / 16) * ((height + 15) / 16) * 192; 1450 } else { 1451 // For all other formats there is no minimum compression 1452 // ratio. Use compression ratio of 1. 1453 max_size = width * height * 3 / 2; 1454 } 1455 mLastTrack->meta->setInt32(kKeyMaxInputSize, max_size); 1456 } 1457 1458 // NOTE: setting another piece of metadata invalidates any pointers (such as the 1459 // mimetype) previously obtained, so don't cache them. 1460 const char *mime; 1461 CHECK(mLastTrack->meta->findCString(kKeyMIMEType, &mime)); 1462 // Calculate average frame rate. 1463 if (!strncasecmp("video/", mime, 6)) { 1464 size_t nSamples = mLastTrack->sampleTable->countSamples(); 1465 int64_t durationUs; 1466 if (mLastTrack->meta->findInt64(kKeyDuration, &durationUs)) { 1467 if (durationUs > 0) { 1468 int32_t frameRate = (nSamples * 1000000LL + 1469 (durationUs >> 1)) / durationUs; 1470 mLastTrack->meta->setInt32(kKeyFrameRate, frameRate); 1471 } 1472 } 1473 } 1474 1475 break; 1476 } 1477 1478 case FOURCC('s', 't', 't', 's'): 1479 { 1480 *offset += chunk_size; 1481 1482 status_t err = 1483 mLastTrack->sampleTable->setTimeToSampleParams( 1484 data_offset, chunk_data_size); 1485 1486 if (err != OK) { 1487 return err; 1488 } 1489 1490 break; 1491 } 1492 1493 case FOURCC('c', 't', 't', 's'): 1494 { 1495 *offset += chunk_size; 1496 1497 status_t err = 1498 mLastTrack->sampleTable->setCompositionTimeToSampleParams( 1499 data_offset, chunk_data_size); 1500 1501 if (err != OK) { 1502 return err; 1503 } 1504 1505 break; 1506 } 1507 1508 case FOURCC('s', 't', 's', 's'): 1509 { 1510 *offset += chunk_size; 1511 1512 status_t err = 1513 mLastTrack->sampleTable->setSyncSampleParams( 1514 data_offset, chunk_data_size); 1515 1516 if (err != OK) { 1517 return err; 1518 } 1519 1520 break; 1521 } 1522 1523 // @xyz 1524 case FOURCC('\xA9', 'x', 'y', 'z'): 1525 { 1526 *offset += chunk_size; 1527 1528 // Best case the total data length inside "@xyz" box 1529 // would be 8, for instance "@xyz" + "\x00\x04\x15\xc7" + "0+0/", 1530 // where "\x00\x04" is the text string length with value = 4, 1531 // "\0x15\xc7" is the language code = en, and "0+0" is a 1532 // location (string) value with longitude = 0 and latitude = 0. 1533 if (chunk_data_size < 8) { 1534 return ERROR_MALFORMED; 1535 } 1536 1537 // Worst case the location string length would be 18, 1538 // for instance +90.0000-180.0000, without the trailing "/" and 1539 // the string length + language code. 1540 char buffer[18]; 1541 1542 // Substracting 5 from the data size is because the text string length + 1543 // language code takes 4 bytes, and the trailing slash "/" takes 1 byte. 1544 off64_t location_length = chunk_data_size - 5; 1545 if (location_length >= (off64_t) sizeof(buffer)) { 1546 return ERROR_MALFORMED; 1547 } 1548 1549 if (mDataSource->readAt( 1550 data_offset + 4, buffer, location_length) < location_length) { 1551 return ERROR_IO; 1552 } 1553 1554 buffer[location_length] = '\0'; 1555 mFileMetaData->setCString(kKeyLocation, buffer); 1556 break; 1557 } 1558 1559 case FOURCC('e', 's', 'd', 's'): 1560 { 1561 *offset += chunk_size; 1562 1563 if (chunk_data_size < 4) { 1564 return ERROR_MALFORMED; 1565 } 1566 1567 uint8_t buffer[256]; 1568 if (chunk_data_size > (off64_t)sizeof(buffer)) { 1569 return ERROR_BUFFER_TOO_SMALL; 1570 } 1571 1572 if (mDataSource->readAt( 1573 data_offset, buffer, chunk_data_size) < chunk_data_size) { 1574 return ERROR_IO; 1575 } 1576 1577 if (U32_AT(buffer) != 0) { 1578 // Should be version 0, flags 0. 1579 return ERROR_MALFORMED; 1580 } 1581 1582 mLastTrack->meta->setData( 1583 kKeyESDS, kTypeESDS, &buffer[4], chunk_data_size - 4); 1584 1585 if (mPath.size() >= 2 1586 && mPath[mPath.size() - 2] == FOURCC('m', 'p', '4', 'a')) { 1587 // Information from the ESDS must be relied on for proper 1588 // setup of sample rate and channel count for MPEG4 Audio. 1589 // The generic header appears to only contain generic 1590 // information... 1591 1592 status_t err = updateAudioTrackInfoFromESDS_MPEG4Audio( 1593 &buffer[4], chunk_data_size - 4); 1594 1595 if (err != OK) { 1596 return err; 1597 } 1598 } 1599 1600 break; 1601 } 1602 1603 case FOURCC('a', 'v', 'c', 'C'): 1604 { 1605 *offset += chunk_size; 1606 1607 sp<ABuffer> buffer = new ABuffer(chunk_data_size); 1608 1609 if (mDataSource->readAt( 1610 data_offset, buffer->data(), chunk_data_size) < chunk_data_size) { 1611 return ERROR_IO; 1612 } 1613 1614 mLastTrack->meta->setData( 1615 kKeyAVCC, kTypeAVCC, buffer->data(), chunk_data_size); 1616 1617 break; 1618 } 1619 case FOURCC('h', 'v', 'c', 'C'): 1620 { 1621 sp<ABuffer> buffer = new ABuffer(chunk_data_size); 1622 1623 if (mDataSource->readAt( 1624 data_offset, buffer->data(), chunk_data_size) < chunk_data_size) { 1625 return ERROR_IO; 1626 } 1627 1628 mLastTrack->meta->setData( 1629 kKeyHVCC, kTypeHVCC, buffer->data(), chunk_data_size); 1630 1631 *offset += chunk_size; 1632 break; 1633 } 1634 1635 case FOURCC('d', '2', '6', '3'): 1636 { 1637 *offset += chunk_size; 1638 /* 1639 * d263 contains a fixed 7 bytes part: 1640 * vendor - 4 bytes 1641 * version - 1 byte 1642 * level - 1 byte 1643 * profile - 1 byte 1644 * optionally, "d263" box itself may contain a 16-byte 1645 * bit rate box (bitr) 1646 * average bit rate - 4 bytes 1647 * max bit rate - 4 bytes 1648 */ 1649 char buffer[23]; 1650 if (chunk_data_size != 7 && 1651 chunk_data_size != 23) { 1652 ALOGE("Incorrect D263 box size %lld", chunk_data_size); 1653 return ERROR_MALFORMED; 1654 } 1655 1656 if (mDataSource->readAt( 1657 data_offset, buffer, chunk_data_size) < chunk_data_size) { 1658 return ERROR_IO; 1659 } 1660 1661 mLastTrack->meta->setData(kKeyD263, kTypeD263, buffer, chunk_data_size); 1662 1663 break; 1664 } 1665 1666 case FOURCC('m', 'e', 't', 'a'): 1667 { 1668 uint8_t buffer[4]; 1669 if (chunk_data_size < (off64_t)sizeof(buffer)) { 1670 *offset += chunk_size; 1671 return ERROR_MALFORMED; 1672 } 1673 1674 if (mDataSource->readAt( 1675 data_offset, buffer, 4) < 4) { 1676 *offset += chunk_size; 1677 return ERROR_IO; 1678 } 1679 1680 if (U32_AT(buffer) != 0) { 1681 // Should be version 0, flags 0. 1682 1683 // If it's not, let's assume this is one of those 1684 // apparently malformed chunks that don't have flags 1685 // and completely different semantics than what's 1686 // in the MPEG4 specs and skip it. 1687 *offset += chunk_size; 1688 return OK; 1689 } 1690 1691 off64_t stop_offset = *offset + chunk_size; 1692 *offset = data_offset + sizeof(buffer); 1693 while (*offset < stop_offset) { 1694 status_t err = parseChunk(offset, depth + 1); 1695 if (err != OK) { 1696 return err; 1697 } 1698 } 1699 1700 if (*offset != stop_offset) { 1701 return ERROR_MALFORMED; 1702 } 1703 break; 1704 } 1705 1706 case FOURCC('m', 'e', 'a', 'n'): 1707 case FOURCC('n', 'a', 'm', 'e'): 1708 case FOURCC('d', 'a', 't', 'a'): 1709 { 1710 *offset += chunk_size; 1711 1712 if (mPath.size() == 6 && underMetaDataPath(mPath)) { 1713 status_t err = parseITunesMetaData(data_offset, chunk_data_size); 1714 1715 if (err != OK) { 1716 return err; 1717 } 1718 } 1719 1720 break; 1721 } 1722 1723 case FOURCC('m', 'v', 'h', 'd'): 1724 { 1725 *offset += chunk_size; 1726 1727 if (chunk_data_size < 24) { 1728 return ERROR_MALFORMED; 1729 } 1730 1731 uint8_t header[24]; 1732 if (mDataSource->readAt( 1733 data_offset, header, sizeof(header)) 1734 < (ssize_t)sizeof(header)) { 1735 return ERROR_IO; 1736 } 1737 1738 uint64_t creationTime; 1739 if (header[0] == 1) { 1740 creationTime = U64_AT(&header[4]); 1741 mHeaderTimescale = U32_AT(&header[20]); 1742 } else if (header[0] != 0) { 1743 return ERROR_MALFORMED; 1744 } else { 1745 creationTime = U32_AT(&header[4]); 1746 mHeaderTimescale = U32_AT(&header[12]); 1747 } 1748 1749 String8 s; 1750 convertTimeToDate(creationTime, &s); 1751 1752 mFileMetaData->setCString(kKeyDate, s.string()); 1753 1754 break; 1755 } 1756 1757 case FOURCC('m', 'd', 'a', 't'): 1758 { 1759 ALOGV("mdat chunk, drm: %d", mIsDrm); 1760 if (!mIsDrm) { 1761 *offset += chunk_size; 1762 break; 1763 } 1764 1765 if (chunk_size < 8) { 1766 return ERROR_MALFORMED; 1767 } 1768 1769 return parseDrmSINF(offset, data_offset); 1770 } 1771 1772 case FOURCC('h', 'd', 'l', 'r'): 1773 { 1774 *offset += chunk_size; 1775 1776 uint32_t buffer; 1777 if (mDataSource->readAt( 1778 data_offset + 8, &buffer, 4) < 4) { 1779 return ERROR_IO; 1780 } 1781 1782 uint32_t type = ntohl(buffer); 1783 // For the 3GPP file format, the handler-type within the 'hdlr' box 1784 // shall be 'text'. We also want to support 'sbtl' handler type 1785 // for a practical reason as various MPEG4 containers use it. 1786 if (type == FOURCC('t', 'e', 'x', 't') || type == FOURCC('s', 'b', 't', 'l')) { 1787 mLastTrack->meta->setCString(kKeyMIMEType, MEDIA_MIMETYPE_TEXT_3GPP); 1788 } 1789 1790 break; 1791 } 1792 1793 case FOURCC('t', 'x', '3', 'g'): 1794 { 1795 uint32_t type; 1796 const void *data; 1797 size_t size = 0; 1798 if (!mLastTrack->meta->findData( 1799 kKeyTextFormatData, &type, &data, &size)) { 1800 size = 0; 1801 } 1802 1803 uint8_t *buffer = new (std::nothrow) uint8_t[size + chunk_size]; 1804 if (buffer == NULL) { 1805 return ERROR_MALFORMED; 1806 } 1807 1808 if (size > 0) { 1809 memcpy(buffer, data, size); 1810 } 1811 1812 if ((size_t)(mDataSource->readAt(*offset, buffer + size, chunk_size)) 1813 < chunk_size) { 1814 delete[] buffer; 1815 buffer = NULL; 1816 1817 // advance read pointer so we don't end up reading this again 1818 *offset += chunk_size; 1819 return ERROR_IO; 1820 } 1821 1822 mLastTrack->meta->setData( 1823 kKeyTextFormatData, 0, buffer, size + chunk_size); 1824 1825 delete[] buffer; 1826 1827 *offset += chunk_size; 1828 break; 1829 } 1830 1831 case FOURCC('c', 'o', 'v', 'r'): 1832 { 1833 *offset += chunk_size; 1834 1835 if (mFileMetaData != NULL) { 1836 ALOGV("chunk_data_size = %lld and data_offset = %lld", 1837 chunk_data_size, data_offset); 1838 sp<ABuffer> buffer = new ABuffer(chunk_data_size + 1); 1839 if (mDataSource->readAt( 1840 data_offset, buffer->data(), chunk_data_size) != (ssize_t)chunk_data_size) { 1841 return ERROR_IO; 1842 } 1843 const int kSkipBytesOfDataBox = 16; 1844 mFileMetaData->setData( 1845 kKeyAlbumArt, MetaData::TYPE_NONE, 1846 buffer->data() + kSkipBytesOfDataBox, chunk_data_size - kSkipBytesOfDataBox); 1847 } 1848 1849 break; 1850 } 1851 1852 case FOURCC('t', 'i', 't', 'l'): 1853 case FOURCC('p', 'e', 'r', 'f'): 1854 case FOURCC('a', 'u', 't', 'h'): 1855 case FOURCC('g', 'n', 'r', 'e'): 1856 case FOURCC('a', 'l', 'b', 'm'): 1857 case FOURCC('y', 'r', 'r', 'c'): 1858 { 1859 *offset += chunk_size; 1860 1861 status_t err = parse3GPPMetaData(data_offset, chunk_data_size, depth); 1862 1863 if (err != OK) { 1864 return err; 1865 } 1866 1867 break; 1868 } 1869 1870 case FOURCC('I', 'D', '3', '2'): 1871 { 1872 *offset += chunk_size; 1873 1874 if (chunk_data_size < 6) { 1875 return ERROR_MALFORMED; 1876 } 1877 1878 parseID3v2MetaData(data_offset + 6); 1879 1880 break; 1881 } 1882 1883 case FOURCC('-', '-', '-', '-'): 1884 { 1885 mLastCommentMean.clear(); 1886 mLastCommentName.clear(); 1887 mLastCommentData.clear(); 1888 *offset += chunk_size; 1889 break; 1890 } 1891 1892 case FOURCC('s', 'i', 'd', 'x'): 1893 { 1894 parseSegmentIndex(data_offset, chunk_data_size); 1895 *offset += chunk_size; 1896 return UNKNOWN_ERROR; // stop parsing after sidx 1897 } 1898 1899 default: 1900 { 1901 *offset += chunk_size; 1902 break; 1903 } 1904 } 1905 1906 return OK; 1907} 1908 1909status_t MPEG4Extractor::parseSegmentIndex(off64_t offset, size_t size) { 1910 ALOGV("MPEG4Extractor::parseSegmentIndex"); 1911 1912 if (size < 12) { 1913 return -EINVAL; 1914 } 1915 1916 uint32_t flags; 1917 if (!mDataSource->getUInt32(offset, &flags)) { 1918 return ERROR_MALFORMED; 1919 } 1920 1921 uint32_t version = flags >> 24; 1922 flags &= 0xffffff; 1923 1924 ALOGV("sidx version %d", version); 1925 1926 uint32_t referenceId; 1927 if (!mDataSource->getUInt32(offset + 4, &referenceId)) { 1928 return ERROR_MALFORMED; 1929 } 1930 1931 uint32_t timeScale; 1932 if (!mDataSource->getUInt32(offset + 8, &timeScale)) { 1933 return ERROR_MALFORMED; 1934 } 1935 ALOGV("sidx refid/timescale: %d/%d", referenceId, timeScale); 1936 1937 uint64_t earliestPresentationTime; 1938 uint64_t firstOffset; 1939 1940 offset += 12; 1941 size -= 12; 1942 1943 if (version == 0) { 1944 if (size < 8) { 1945 return -EINVAL; 1946 } 1947 uint32_t tmp; 1948 if (!mDataSource->getUInt32(offset, &tmp)) { 1949 return ERROR_MALFORMED; 1950 } 1951 earliestPresentationTime = tmp; 1952 if (!mDataSource->getUInt32(offset + 4, &tmp)) { 1953 return ERROR_MALFORMED; 1954 } 1955 firstOffset = tmp; 1956 offset += 8; 1957 size -= 8; 1958 } else { 1959 if (size < 16) { 1960 return -EINVAL; 1961 } 1962 if (!mDataSource->getUInt64(offset, &earliestPresentationTime)) { 1963 return ERROR_MALFORMED; 1964 } 1965 if (!mDataSource->getUInt64(offset + 8, &firstOffset)) { 1966 return ERROR_MALFORMED; 1967 } 1968 offset += 16; 1969 size -= 16; 1970 } 1971 ALOGV("sidx pres/off: %Ld/%Ld", earliestPresentationTime, firstOffset); 1972 1973 if (size < 4) { 1974 return -EINVAL; 1975 } 1976 1977 uint16_t referenceCount; 1978 if (!mDataSource->getUInt16(offset + 2, &referenceCount)) { 1979 return ERROR_MALFORMED; 1980 } 1981 offset += 4; 1982 size -= 4; 1983 ALOGV("refcount: %d", referenceCount); 1984 1985 if (size < referenceCount * 12) { 1986 return -EINVAL; 1987 } 1988 1989 uint64_t total_duration = 0; 1990 for (unsigned int i = 0; i < referenceCount; i++) { 1991 uint32_t d1, d2, d3; 1992 1993 if (!mDataSource->getUInt32(offset, &d1) || // size 1994 !mDataSource->getUInt32(offset + 4, &d2) || // duration 1995 !mDataSource->getUInt32(offset + 8, &d3)) { // flags 1996 return ERROR_MALFORMED; 1997 } 1998 1999 if (d1 & 0x80000000) { 2000 ALOGW("sub-sidx boxes not supported yet"); 2001 } 2002 bool sap = d3 & 0x80000000; 2003 uint32_t saptype = (d3 >> 28) & 7; 2004 if (!sap || (saptype != 1 && saptype != 2)) { 2005 // type 1 and 2 are sync samples 2006 ALOGW("not a stream access point, or unsupported type: %08x", d3); 2007 } 2008 total_duration += d2; 2009 offset += 12; 2010 ALOGV(" item %d, %08x %08x %08x", i, d1, d2, d3); 2011 SidxEntry se; 2012 se.mSize = d1 & 0x7fffffff; 2013 se.mDurationUs = 1000000LL * d2 / timeScale; 2014 mSidxEntries.add(se); 2015 } 2016 2017 mSidxDuration = total_duration * 1000000 / timeScale; 2018 ALOGV("duration: %lld", mSidxDuration); 2019 2020 int64_t metaDuration; 2021 if (!mLastTrack->meta->findInt64(kKeyDuration, &metaDuration) || metaDuration == 0) { 2022 mLastTrack->meta->setInt64(kKeyDuration, mSidxDuration); 2023 } 2024 return OK; 2025} 2026 2027 2028 2029status_t MPEG4Extractor::parseTrackHeader( 2030 off64_t data_offset, off64_t data_size) { 2031 if (data_size < 4) { 2032 return ERROR_MALFORMED; 2033 } 2034 2035 uint8_t version; 2036 if (mDataSource->readAt(data_offset, &version, 1) < 1) { 2037 return ERROR_IO; 2038 } 2039 2040 size_t dynSize = (version == 1) ? 36 : 24; 2041 2042 uint8_t buffer[36 + 60]; 2043 2044 if (data_size != (off64_t)dynSize + 60) { 2045 return ERROR_MALFORMED; 2046 } 2047 2048 if (mDataSource->readAt( 2049 data_offset, buffer, data_size) < (ssize_t)data_size) { 2050 return ERROR_IO; 2051 } 2052 2053 uint64_t ctime, mtime, duration; 2054 int32_t id; 2055 2056 if (version == 1) { 2057 ctime = U64_AT(&buffer[4]); 2058 mtime = U64_AT(&buffer[12]); 2059 id = U32_AT(&buffer[20]); 2060 duration = U64_AT(&buffer[28]); 2061 } else if (version == 0) { 2062 ctime = U32_AT(&buffer[4]); 2063 mtime = U32_AT(&buffer[8]); 2064 id = U32_AT(&buffer[12]); 2065 duration = U32_AT(&buffer[20]); 2066 } else { 2067 return ERROR_UNSUPPORTED; 2068 } 2069 2070 mLastTrack->meta->setInt32(kKeyTrackID, id); 2071 2072 size_t matrixOffset = dynSize + 16; 2073 int32_t a00 = U32_AT(&buffer[matrixOffset]); 2074 int32_t a01 = U32_AT(&buffer[matrixOffset + 4]); 2075 int32_t dx = U32_AT(&buffer[matrixOffset + 8]); 2076 int32_t a10 = U32_AT(&buffer[matrixOffset + 12]); 2077 int32_t a11 = U32_AT(&buffer[matrixOffset + 16]); 2078 int32_t dy = U32_AT(&buffer[matrixOffset + 20]); 2079 2080#if 0 2081 ALOGI("x' = %.2f * x + %.2f * y + %.2f", 2082 a00 / 65536.0f, a01 / 65536.0f, dx / 65536.0f); 2083 ALOGI("y' = %.2f * x + %.2f * y + %.2f", 2084 a10 / 65536.0f, a11 / 65536.0f, dy / 65536.0f); 2085#endif 2086 2087 uint32_t rotationDegrees; 2088 2089 static const int32_t kFixedOne = 0x10000; 2090 if (a00 == kFixedOne && a01 == 0 && a10 == 0 && a11 == kFixedOne) { 2091 // Identity, no rotation 2092 rotationDegrees = 0; 2093 } else if (a00 == 0 && a01 == kFixedOne && a10 == -kFixedOne && a11 == 0) { 2094 rotationDegrees = 90; 2095 } else if (a00 == 0 && a01 == -kFixedOne && a10 == kFixedOne && a11 == 0) { 2096 rotationDegrees = 270; 2097 } else if (a00 == -kFixedOne && a01 == 0 && a10 == 0 && a11 == -kFixedOne) { 2098 rotationDegrees = 180; 2099 } else { 2100 ALOGW("We only support 0,90,180,270 degree rotation matrices"); 2101 rotationDegrees = 0; 2102 } 2103 2104 if (rotationDegrees != 0) { 2105 mLastTrack->meta->setInt32(kKeyRotation, rotationDegrees); 2106 } 2107 2108 // Handle presentation display size, which could be different 2109 // from the image size indicated by kKeyWidth and kKeyHeight. 2110 uint32_t width = U32_AT(&buffer[dynSize + 52]); 2111 uint32_t height = U32_AT(&buffer[dynSize + 56]); 2112 mLastTrack->meta->setInt32(kKeyDisplayWidth, width >> 16); 2113 mLastTrack->meta->setInt32(kKeyDisplayHeight, height >> 16); 2114 2115 return OK; 2116} 2117 2118status_t MPEG4Extractor::parseITunesMetaData(off64_t offset, size_t size) { 2119 if (size < 4) { 2120 return ERROR_MALFORMED; 2121 } 2122 2123 uint8_t *buffer = new (std::nothrow) uint8_t[size + 1]; 2124 if (buffer == NULL) { 2125 return ERROR_MALFORMED; 2126 } 2127 if (mDataSource->readAt( 2128 offset, buffer, size) != (ssize_t)size) { 2129 delete[] buffer; 2130 buffer = NULL; 2131 2132 return ERROR_IO; 2133 } 2134 2135 uint32_t flags = U32_AT(buffer); 2136 2137 uint32_t metadataKey = 0; 2138 char chunk[5]; 2139 MakeFourCCString(mPath[4], chunk); 2140 ALOGV("meta: %s @ %lld", chunk, offset); 2141 switch (mPath[4]) { 2142 case FOURCC(0xa9, 'a', 'l', 'b'): 2143 { 2144 metadataKey = kKeyAlbum; 2145 break; 2146 } 2147 case FOURCC(0xa9, 'A', 'R', 'T'): 2148 { 2149 metadataKey = kKeyArtist; 2150 break; 2151 } 2152 case FOURCC('a', 'A', 'R', 'T'): 2153 { 2154 metadataKey = kKeyAlbumArtist; 2155 break; 2156 } 2157 case FOURCC(0xa9, 'd', 'a', 'y'): 2158 { 2159 metadataKey = kKeyYear; 2160 break; 2161 } 2162 case FOURCC(0xa9, 'n', 'a', 'm'): 2163 { 2164 metadataKey = kKeyTitle; 2165 break; 2166 } 2167 case FOURCC(0xa9, 'w', 'r', 't'): 2168 { 2169 metadataKey = kKeyWriter; 2170 break; 2171 } 2172 case FOURCC('c', 'o', 'v', 'r'): 2173 { 2174 metadataKey = kKeyAlbumArt; 2175 break; 2176 } 2177 case FOURCC('g', 'n', 'r', 'e'): 2178 { 2179 metadataKey = kKeyGenre; 2180 break; 2181 } 2182 case FOURCC(0xa9, 'g', 'e', 'n'): 2183 { 2184 metadataKey = kKeyGenre; 2185 break; 2186 } 2187 case FOURCC('c', 'p', 'i', 'l'): 2188 { 2189 if (size == 9 && flags == 21) { 2190 char tmp[16]; 2191 sprintf(tmp, "%d", 2192 (int)buffer[size - 1]); 2193 2194 mFileMetaData->setCString(kKeyCompilation, tmp); 2195 } 2196 break; 2197 } 2198 case FOURCC('t', 'r', 'k', 'n'): 2199 { 2200 if (size == 16 && flags == 0) { 2201 char tmp[16]; 2202 uint16_t* pTrack = (uint16_t*)&buffer[10]; 2203 uint16_t* pTotalTracks = (uint16_t*)&buffer[12]; 2204 sprintf(tmp, "%d/%d", ntohs(*pTrack), ntohs(*pTotalTracks)); 2205 2206 mFileMetaData->setCString(kKeyCDTrackNumber, tmp); 2207 } 2208 break; 2209 } 2210 case FOURCC('d', 'i', 's', 'k'): 2211 { 2212 if ((size == 14 || size == 16) && flags == 0) { 2213 char tmp[16]; 2214 uint16_t* pDisc = (uint16_t*)&buffer[10]; 2215 uint16_t* pTotalDiscs = (uint16_t*)&buffer[12]; 2216 sprintf(tmp, "%d/%d", ntohs(*pDisc), ntohs(*pTotalDiscs)); 2217 2218 mFileMetaData->setCString(kKeyDiscNumber, tmp); 2219 } 2220 break; 2221 } 2222 case FOURCC('-', '-', '-', '-'): 2223 { 2224 buffer[size] = '\0'; 2225 switch (mPath[5]) { 2226 case FOURCC('m', 'e', 'a', 'n'): 2227 mLastCommentMean.setTo((const char *)buffer + 4); 2228 break; 2229 case FOURCC('n', 'a', 'm', 'e'): 2230 mLastCommentName.setTo((const char *)buffer + 4); 2231 break; 2232 case FOURCC('d', 'a', 't', 'a'): 2233 mLastCommentData.setTo((const char *)buffer + 8); 2234 break; 2235 } 2236 2237 // Once we have a set of mean/name/data info, go ahead and process 2238 // it to see if its something we are interested in. Whether or not 2239 // were are interested in the specific tag, make sure to clear out 2240 // the set so we can be ready to process another tuple should one 2241 // show up later in the file. 2242 if ((mLastCommentMean.length() != 0) && 2243 (mLastCommentName.length() != 0) && 2244 (mLastCommentData.length() != 0)) { 2245 2246 if (mLastCommentMean == "com.apple.iTunes" 2247 && mLastCommentName == "iTunSMPB") { 2248 int32_t delay, padding; 2249 if (sscanf(mLastCommentData, 2250 " %*x %x %x %*x", &delay, &padding) == 2) { 2251 mLastTrack->meta->setInt32(kKeyEncoderDelay, delay); 2252 mLastTrack->meta->setInt32(kKeyEncoderPadding, padding); 2253 } 2254 } 2255 2256 mLastCommentMean.clear(); 2257 mLastCommentName.clear(); 2258 mLastCommentData.clear(); 2259 } 2260 break; 2261 } 2262 2263 default: 2264 break; 2265 } 2266 2267 if (size >= 8 && metadataKey && !mFileMetaData->hasData(metadataKey)) { 2268 if (metadataKey == kKeyAlbumArt) { 2269 mFileMetaData->setData( 2270 kKeyAlbumArt, MetaData::TYPE_NONE, 2271 buffer + 8, size - 8); 2272 } else if (metadataKey == kKeyGenre) { 2273 if (flags == 0) { 2274 // uint8_t genre code, iTunes genre codes are 2275 // the standard id3 codes, except they start 2276 // at 1 instead of 0 (e.g. Pop is 14, not 13) 2277 // We use standard id3 numbering, so subtract 1. 2278 int genrecode = (int)buffer[size - 1]; 2279 genrecode--; 2280 if (genrecode < 0) { 2281 genrecode = 255; // reserved for 'unknown genre' 2282 } 2283 char genre[10]; 2284 sprintf(genre, "%d", genrecode); 2285 2286 mFileMetaData->setCString(metadataKey, genre); 2287 } else if (flags == 1) { 2288 // custom genre string 2289 buffer[size] = '\0'; 2290 2291 mFileMetaData->setCString( 2292 metadataKey, (const char *)buffer + 8); 2293 } 2294 } else { 2295 buffer[size] = '\0'; 2296 2297 mFileMetaData->setCString( 2298 metadataKey, (const char *)buffer + 8); 2299 } 2300 } 2301 2302 delete[] buffer; 2303 buffer = NULL; 2304 2305 return OK; 2306} 2307 2308status_t MPEG4Extractor::parse3GPPMetaData(off64_t offset, size_t size, int depth) { 2309 if (size < 4) { 2310 return ERROR_MALFORMED; 2311 } 2312 2313 uint8_t *buffer = new (std::nothrow) uint8_t[size]; 2314 if (buffer == NULL) { 2315 return ERROR_MALFORMED; 2316 } 2317 if (mDataSource->readAt( 2318 offset, buffer, size) != (ssize_t)size) { 2319 delete[] buffer; 2320 buffer = NULL; 2321 2322 return ERROR_IO; 2323 } 2324 2325 uint32_t metadataKey = 0; 2326 switch (mPath[depth]) { 2327 case FOURCC('t', 'i', 't', 'l'): 2328 { 2329 metadataKey = kKeyTitle; 2330 break; 2331 } 2332 case FOURCC('p', 'e', 'r', 'f'): 2333 { 2334 metadataKey = kKeyArtist; 2335 break; 2336 } 2337 case FOURCC('a', 'u', 't', 'h'): 2338 { 2339 metadataKey = kKeyWriter; 2340 break; 2341 } 2342 case FOURCC('g', 'n', 'r', 'e'): 2343 { 2344 metadataKey = kKeyGenre; 2345 break; 2346 } 2347 case FOURCC('a', 'l', 'b', 'm'): 2348 { 2349 if (buffer[size - 1] != '\0') { 2350 char tmp[4]; 2351 sprintf(tmp, "%u", buffer[size - 1]); 2352 2353 mFileMetaData->setCString(kKeyCDTrackNumber, tmp); 2354 } 2355 2356 metadataKey = kKeyAlbum; 2357 break; 2358 } 2359 case FOURCC('y', 'r', 'r', 'c'): 2360 { 2361 char tmp[5]; 2362 uint16_t year = U16_AT(&buffer[4]); 2363 2364 if (year < 10000) { 2365 sprintf(tmp, "%u", year); 2366 2367 mFileMetaData->setCString(kKeyYear, tmp); 2368 } 2369 break; 2370 } 2371 2372 default: 2373 break; 2374 } 2375 2376 if (metadataKey > 0) { 2377 bool isUTF8 = true; // Common case 2378 char16_t *framedata = NULL; 2379 int len16 = 0; // Number of UTF-16 characters 2380 2381 // smallest possible valid UTF-16 string w BOM: 0xfe 0xff 0x00 0x00 2382 if (size - 6 >= 4) { 2383 len16 = ((size - 6) / 2) - 1; // don't include 0x0000 terminator 2384 framedata = (char16_t *)(buffer + 6); 2385 if (0xfffe == *framedata) { 2386 // endianness marker (BOM) doesn't match host endianness 2387 for (int i = 0; i < len16; i++) { 2388 framedata[i] = bswap_16(framedata[i]); 2389 } 2390 // BOM is now swapped to 0xfeff, we will execute next block too 2391 } 2392 2393 if (0xfeff == *framedata) { 2394 // Remove the BOM 2395 framedata++; 2396 len16--; 2397 isUTF8 = false; 2398 } 2399 // else normal non-zero-length UTF-8 string 2400 // we can't handle UTF-16 without BOM as there is no other 2401 // indication of encoding. 2402 } 2403 2404 if (isUTF8) { 2405 mFileMetaData->setCString(metadataKey, (const char *)buffer + 6); 2406 } else { 2407 // Convert from UTF-16 string to UTF-8 string. 2408 String8 tmpUTF8str(framedata, len16); 2409 mFileMetaData->setCString(metadataKey, tmpUTF8str.string()); 2410 } 2411 } 2412 2413 delete[] buffer; 2414 buffer = NULL; 2415 2416 return OK; 2417} 2418 2419void MPEG4Extractor::parseID3v2MetaData(off64_t offset) { 2420 ID3 id3(mDataSource, true /* ignorev1 */, offset); 2421 2422 if (id3.isValid()) { 2423 struct Map { 2424 int key; 2425 const char *tag1; 2426 const char *tag2; 2427 }; 2428 static const Map kMap[] = { 2429 { kKeyAlbum, "TALB", "TAL" }, 2430 { kKeyArtist, "TPE1", "TP1" }, 2431 { kKeyAlbumArtist, "TPE2", "TP2" }, 2432 { kKeyComposer, "TCOM", "TCM" }, 2433 { kKeyGenre, "TCON", "TCO" }, 2434 { kKeyTitle, "TIT2", "TT2" }, 2435 { kKeyYear, "TYE", "TYER" }, 2436 { kKeyAuthor, "TXT", "TEXT" }, 2437 { kKeyCDTrackNumber, "TRK", "TRCK" }, 2438 { kKeyDiscNumber, "TPA", "TPOS" }, 2439 { kKeyCompilation, "TCP", "TCMP" }, 2440 }; 2441 static const size_t kNumMapEntries = sizeof(kMap) / sizeof(kMap[0]); 2442 2443 for (size_t i = 0; i < kNumMapEntries; ++i) { 2444 if (!mFileMetaData->hasData(kMap[i].key)) { 2445 ID3::Iterator *it = new ID3::Iterator(id3, kMap[i].tag1); 2446 if (it->done()) { 2447 delete it; 2448 it = new ID3::Iterator(id3, kMap[i].tag2); 2449 } 2450 2451 if (it->done()) { 2452 delete it; 2453 continue; 2454 } 2455 2456 String8 s; 2457 it->getString(&s); 2458 delete it; 2459 2460 mFileMetaData->setCString(kMap[i].key, s); 2461 } 2462 } 2463 2464 size_t dataSize; 2465 String8 mime; 2466 const void *data = id3.getAlbumArt(&dataSize, &mime); 2467 2468 if (data) { 2469 mFileMetaData->setData(kKeyAlbumArt, MetaData::TYPE_NONE, data, dataSize); 2470 mFileMetaData->setCString(kKeyAlbumArtMIME, mime.string()); 2471 } 2472 } 2473} 2474 2475sp<MediaSource> MPEG4Extractor::getTrack(size_t index) { 2476 status_t err; 2477 if ((err = readMetaData()) != OK) { 2478 return NULL; 2479 } 2480 2481 Track *track = mFirstTrack; 2482 while (index > 0) { 2483 if (track == NULL) { 2484 return NULL; 2485 } 2486 2487 track = track->next; 2488 --index; 2489 } 2490 2491 if (track == NULL) { 2492 return NULL; 2493 } 2494 2495 ALOGV("getTrack called, pssh: %d", mPssh.size()); 2496 2497 return new MPEG4Source( 2498 track->meta, mDataSource, track->timescale, track->sampleTable, 2499 mSidxEntries, mMoofOffset); 2500} 2501 2502// static 2503status_t MPEG4Extractor::verifyTrack(Track *track) { 2504 const char *mime; 2505 CHECK(track->meta->findCString(kKeyMIMEType, &mime)); 2506 2507 uint32_t type; 2508 const void *data; 2509 size_t size; 2510 if (!strcasecmp(mime, MEDIA_MIMETYPE_VIDEO_AVC)) { 2511 if (!track->meta->findData(kKeyAVCC, &type, &data, &size) 2512 || type != kTypeAVCC) { 2513 return ERROR_MALFORMED; 2514 } 2515 } else if (!strcasecmp(mime, MEDIA_MIMETYPE_VIDEO_HEVC)) { 2516 if (!track->meta->findData(kKeyHVCC, &type, &data, &size) 2517 || type != kTypeHVCC) { 2518 return ERROR_MALFORMED; 2519 } 2520 } else if (!strcasecmp(mime, MEDIA_MIMETYPE_VIDEO_MPEG4) 2521 || !strcasecmp(mime, MEDIA_MIMETYPE_AUDIO_AAC)) { 2522 if (!track->meta->findData(kKeyESDS, &type, &data, &size) 2523 || type != kTypeESDS) { 2524 return ERROR_MALFORMED; 2525 } 2526 } 2527 2528 if (track->sampleTable == NULL || !track->sampleTable->isValid()) { 2529 // Make sure we have all the metadata we need. 2530 ALOGE("stbl atom missing/invalid."); 2531 return ERROR_MALFORMED; 2532 } 2533 2534 return OK; 2535} 2536 2537typedef enum { 2538 //AOT_NONE = -1, 2539 //AOT_NULL_OBJECT = 0, 2540 //AOT_AAC_MAIN = 1, /**< Main profile */ 2541 AOT_AAC_LC = 2, /**< Low Complexity object */ 2542 //AOT_AAC_SSR = 3, 2543 //AOT_AAC_LTP = 4, 2544 AOT_SBR = 5, 2545 //AOT_AAC_SCAL = 6, 2546 //AOT_TWIN_VQ = 7, 2547 //AOT_CELP = 8, 2548 //AOT_HVXC = 9, 2549 //AOT_RSVD_10 = 10, /**< (reserved) */ 2550 //AOT_RSVD_11 = 11, /**< (reserved) */ 2551 //AOT_TTSI = 12, /**< TTSI Object */ 2552 //AOT_MAIN_SYNTH = 13, /**< Main Synthetic object */ 2553 //AOT_WAV_TAB_SYNTH = 14, /**< Wavetable Synthesis object */ 2554 //AOT_GEN_MIDI = 15, /**< General MIDI object */ 2555 //AOT_ALG_SYNTH_AUD_FX = 16, /**< Algorithmic Synthesis and Audio FX object */ 2556 AOT_ER_AAC_LC = 17, /**< Error Resilient(ER) AAC Low Complexity */ 2557 //AOT_RSVD_18 = 18, /**< (reserved) */ 2558 //AOT_ER_AAC_LTP = 19, /**< Error Resilient(ER) AAC LTP object */ 2559 AOT_ER_AAC_SCAL = 20, /**< Error Resilient(ER) AAC Scalable object */ 2560 //AOT_ER_TWIN_VQ = 21, /**< Error Resilient(ER) TwinVQ object */ 2561 AOT_ER_BSAC = 22, /**< Error Resilient(ER) BSAC object */ 2562 AOT_ER_AAC_LD = 23, /**< Error Resilient(ER) AAC LowDelay object */ 2563 //AOT_ER_CELP = 24, /**< Error Resilient(ER) CELP object */ 2564 //AOT_ER_HVXC = 25, /**< Error Resilient(ER) HVXC object */ 2565 //AOT_ER_HILN = 26, /**< Error Resilient(ER) HILN object */ 2566 //AOT_ER_PARA = 27, /**< Error Resilient(ER) Parametric object */ 2567 //AOT_RSVD_28 = 28, /**< might become SSC */ 2568 AOT_PS = 29, /**< PS, Parametric Stereo (includes SBR) */ 2569 //AOT_MPEGS = 30, /**< MPEG Surround */ 2570 2571 AOT_ESCAPE = 31, /**< Signal AOT uses more than 5 bits */ 2572 2573 //AOT_MP3ONMP4_L1 = 32, /**< MPEG-Layer1 in mp4 */ 2574 //AOT_MP3ONMP4_L2 = 33, /**< MPEG-Layer2 in mp4 */ 2575 //AOT_MP3ONMP4_L3 = 34, /**< MPEG-Layer3 in mp4 */ 2576 //AOT_RSVD_35 = 35, /**< might become DST */ 2577 //AOT_RSVD_36 = 36, /**< might become ALS */ 2578 //AOT_AAC_SLS = 37, /**< AAC + SLS */ 2579 //AOT_SLS = 38, /**< SLS */ 2580 //AOT_ER_AAC_ELD = 39, /**< AAC Enhanced Low Delay */ 2581 2582 //AOT_USAC = 42, /**< USAC */ 2583 //AOT_SAOC = 43, /**< SAOC */ 2584 //AOT_LD_MPEGS = 44, /**< Low Delay MPEG Surround */ 2585 2586 //AOT_RSVD50 = 50, /**< Interim AOT for Rsvd50 */ 2587} AUDIO_OBJECT_TYPE; 2588 2589status_t MPEG4Extractor::updateAudioTrackInfoFromESDS_MPEG4Audio( 2590 const void *esds_data, size_t esds_size) { 2591 ESDS esds(esds_data, esds_size); 2592 2593 uint8_t objectTypeIndication; 2594 if (esds.getObjectTypeIndication(&objectTypeIndication) != OK) { 2595 return ERROR_MALFORMED; 2596 } 2597 2598 if (objectTypeIndication == 0xe1) { 2599 // This isn't MPEG4 audio at all, it's QCELP 14k... 2600 mLastTrack->meta->setCString(kKeyMIMEType, MEDIA_MIMETYPE_AUDIO_QCELP); 2601 return OK; 2602 } 2603 2604 if (objectTypeIndication == 0x6b) { 2605 // The media subtype is MP3 audio 2606 // Our software MP3 audio decoder may not be able to handle 2607 // packetized MP3 audio; for now, lets just return ERROR_UNSUPPORTED 2608 ALOGE("MP3 track in MP4/3GPP file is not supported"); 2609 return ERROR_UNSUPPORTED; 2610 } 2611 2612 const uint8_t *csd; 2613 size_t csd_size; 2614 if (esds.getCodecSpecificInfo( 2615 (const void **)&csd, &csd_size) != OK) { 2616 return ERROR_MALFORMED; 2617 } 2618 2619#if 0 2620 printf("ESD of size %d\n", csd_size); 2621 hexdump(csd, csd_size); 2622#endif 2623 2624 if (csd_size == 0) { 2625 // There's no further information, i.e. no codec specific data 2626 // Let's assume that the information provided in the mpeg4 headers 2627 // is accurate and hope for the best. 2628 2629 return OK; 2630 } 2631 2632 if (csd_size < 2) { 2633 return ERROR_MALFORMED; 2634 } 2635 2636 static uint32_t kSamplingRate[] = { 2637 96000, 88200, 64000, 48000, 44100, 32000, 24000, 22050, 2638 16000, 12000, 11025, 8000, 7350 2639 }; 2640 2641 ABitReader br(csd, csd_size); 2642 uint32_t objectType = br.getBits(5); 2643 2644 if (objectType == 31) { // AAC-ELD => additional 6 bits 2645 objectType = 32 + br.getBits(6); 2646 } 2647 2648 //keep AOT type 2649 mLastTrack->meta->setInt32(kKeyAACAOT, objectType); 2650 2651 uint32_t freqIndex = br.getBits(4); 2652 2653 int32_t sampleRate = 0; 2654 int32_t numChannels = 0; 2655 if (freqIndex == 15) { 2656 if (csd_size < 5) { 2657 return ERROR_MALFORMED; 2658 } 2659 sampleRate = br.getBits(24); 2660 numChannels = br.getBits(4); 2661 } else { 2662 numChannels = br.getBits(4); 2663 2664 if (freqIndex == 13 || freqIndex == 14) { 2665 return ERROR_MALFORMED; 2666 } 2667 2668 sampleRate = kSamplingRate[freqIndex]; 2669 } 2670 2671 if (objectType == AOT_SBR || objectType == AOT_PS) {//SBR specific config per 14496-3 table 1.13 2672 uint32_t extFreqIndex = br.getBits(4); 2673 int32_t extSampleRate; 2674 if (extFreqIndex == 15) { 2675 if (csd_size < 8) { 2676 return ERROR_MALFORMED; 2677 } 2678 extSampleRate = br.getBits(24); 2679 } else { 2680 if (extFreqIndex == 13 || extFreqIndex == 14) { 2681 return ERROR_MALFORMED; 2682 } 2683 extSampleRate = kSamplingRate[extFreqIndex]; 2684 } 2685 //TODO: save the extension sampling rate value in meta data => 2686 // mLastTrack->meta->setInt32(kKeyExtSampleRate, extSampleRate); 2687 } 2688 2689 switch (numChannels) { 2690 // values defined in 14496-3_2009 amendment-4 Table 1.19 - Channel Configuration 2691 case 0: 2692 case 1:// FC 2693 case 2:// FL FR 2694 case 3:// FC, FL FR 2695 case 4:// FC, FL FR, RC 2696 case 5:// FC, FL FR, SL SR 2697 case 6:// FC, FL FR, SL SR, LFE 2698 //numChannels already contains the right value 2699 break; 2700 case 11:// FC, FL FR, SL SR, RC, LFE 2701 numChannels = 7; 2702 break; 2703 case 7: // FC, FCL FCR, FL FR, SL SR, LFE 2704 case 12:// FC, FL FR, SL SR, RL RR, LFE 2705 case 14:// FC, FL FR, SL SR, LFE, FHL FHR 2706 numChannels = 8; 2707 break; 2708 default: 2709 return ERROR_UNSUPPORTED; 2710 } 2711 2712 { 2713 if (objectType == AOT_SBR || objectType == AOT_PS) { 2714 const int32_t extensionSamplingFrequency = br.getBits(4); 2715 objectType = br.getBits(5); 2716 2717 if (objectType == AOT_ESCAPE) { 2718 objectType = 32 + br.getBits(6); 2719 } 2720 } 2721 if (objectType == AOT_AAC_LC || objectType == AOT_ER_AAC_LC || 2722 objectType == AOT_ER_AAC_LD || objectType == AOT_ER_AAC_SCAL || 2723 objectType == AOT_ER_BSAC) { 2724 const int32_t frameLengthFlag = br.getBits(1); 2725 2726 const int32_t dependsOnCoreCoder = br.getBits(1); 2727 2728 if (dependsOnCoreCoder ) { 2729 const int32_t coreCoderDelay = br.getBits(14); 2730 } 2731 2732 const int32_t extensionFlag = br.getBits(1); 2733 2734 if (numChannels == 0 ) { 2735 int32_t channelsEffectiveNum = 0; 2736 int32_t channelsNum = 0; 2737 const int32_t ElementInstanceTag = br.getBits(4); 2738 const int32_t Profile = br.getBits(2); 2739 const int32_t SamplingFrequencyIndex = br.getBits(4); 2740 const int32_t NumFrontChannelElements = br.getBits(4); 2741 const int32_t NumSideChannelElements = br.getBits(4); 2742 const int32_t NumBackChannelElements = br.getBits(4); 2743 const int32_t NumLfeChannelElements = br.getBits(2); 2744 const int32_t NumAssocDataElements = br.getBits(3); 2745 const int32_t NumValidCcElements = br.getBits(4); 2746 2747 const int32_t MonoMixdownPresent = br.getBits(1); 2748 if (MonoMixdownPresent != 0) { 2749 const int32_t MonoMixdownElementNumber = br.getBits(4); 2750 } 2751 2752 const int32_t StereoMixdownPresent = br.getBits(1); 2753 if (StereoMixdownPresent != 0) { 2754 const int32_t StereoMixdownElementNumber = br.getBits(4); 2755 } 2756 2757 const int32_t MatrixMixdownIndexPresent = br.getBits(1); 2758 if (MatrixMixdownIndexPresent != 0) { 2759 const int32_t MatrixMixdownIndex = br.getBits(2); 2760 const int32_t PseudoSurroundEnable = br.getBits(1); 2761 } 2762 2763 int i; 2764 for (i=0; i < NumFrontChannelElements; i++) { 2765 const int32_t FrontElementIsCpe = br.getBits(1); 2766 const int32_t FrontElementTagSelect = br.getBits(4); 2767 channelsNum += FrontElementIsCpe ? 2 : 1; 2768 } 2769 2770 for (i=0; i < NumSideChannelElements; i++) { 2771 const int32_t SideElementIsCpe = br.getBits(1); 2772 const int32_t SideElementTagSelect = br.getBits(4); 2773 channelsNum += SideElementIsCpe ? 2 : 1; 2774 } 2775 2776 for (i=0; i < NumBackChannelElements; i++) { 2777 const int32_t BackElementIsCpe = br.getBits(1); 2778 const int32_t BackElementTagSelect = br.getBits(4); 2779 channelsNum += BackElementIsCpe ? 2 : 1; 2780 } 2781 channelsEffectiveNum = channelsNum; 2782 2783 for (i=0; i < NumLfeChannelElements; i++) { 2784 const int32_t LfeElementTagSelect = br.getBits(4); 2785 channelsNum += 1; 2786 } 2787 ALOGV("mpeg4 audio channelsNum = %d", channelsNum); 2788 ALOGV("mpeg4 audio channelsEffectiveNum = %d", channelsEffectiveNum); 2789 numChannels = channelsNum; 2790 } 2791 } 2792 } 2793 2794 if (numChannels == 0) { 2795 return ERROR_UNSUPPORTED; 2796 } 2797 2798 int32_t prevSampleRate; 2799 CHECK(mLastTrack->meta->findInt32(kKeySampleRate, &prevSampleRate)); 2800 2801 if (prevSampleRate != sampleRate) { 2802 ALOGV("mpeg4 audio sample rate different from previous setting. " 2803 "was: %d, now: %d", prevSampleRate, sampleRate); 2804 } 2805 2806 mLastTrack->meta->setInt32(kKeySampleRate, sampleRate); 2807 2808 int32_t prevChannelCount; 2809 CHECK(mLastTrack->meta->findInt32(kKeyChannelCount, &prevChannelCount)); 2810 2811 if (prevChannelCount != numChannels) { 2812 ALOGV("mpeg4 audio channel count different from previous setting. " 2813 "was: %d, now: %d", prevChannelCount, numChannels); 2814 } 2815 2816 mLastTrack->meta->setInt32(kKeyChannelCount, numChannels); 2817 2818 return OK; 2819} 2820 2821//////////////////////////////////////////////////////////////////////////////// 2822 2823MPEG4Source::MPEG4Source( 2824 const sp<MetaData> &format, 2825 const sp<DataSource> &dataSource, 2826 int32_t timeScale, 2827 const sp<SampleTable> &sampleTable, 2828 Vector<SidxEntry> &sidx, 2829 off64_t firstMoofOffset) 2830 : mFormat(format), 2831 mDataSource(dataSource), 2832 mTimescale(timeScale), 2833 mSampleTable(sampleTable), 2834 mCurrentSampleIndex(0), 2835 mCurrentFragmentIndex(0), 2836 mSegments(sidx), 2837 mFirstMoofOffset(firstMoofOffset), 2838 mCurrentMoofOffset(firstMoofOffset), 2839 mCurrentTime(0), 2840 mCurrentSampleInfoAllocSize(0), 2841 mCurrentSampleInfoSizes(NULL), 2842 mCurrentSampleInfoOffsetsAllocSize(0), 2843 mCurrentSampleInfoOffsets(NULL), 2844 mIsAVC(false), 2845 mIsHEVC(false), 2846 mNALLengthSize(0), 2847 mStarted(false), 2848 mGroup(NULL), 2849 mBuffer(NULL), 2850 mWantsNALFragments(false), 2851 mSrcBuffer(NULL) { 2852 2853 mFormat->findInt32(kKeyCryptoMode, &mCryptoMode); 2854 mDefaultIVSize = 0; 2855 mFormat->findInt32(kKeyCryptoDefaultIVSize, &mDefaultIVSize); 2856 uint32_t keytype; 2857 const void *key; 2858 size_t keysize; 2859 if (mFormat->findData(kKeyCryptoKey, &keytype, &key, &keysize)) { 2860 CHECK(keysize <= 16); 2861 memset(mCryptoKey, 0, 16); 2862 memcpy(mCryptoKey, key, keysize); 2863 } 2864 2865 const char *mime; 2866 bool success = mFormat->findCString(kKeyMIMEType, &mime); 2867 CHECK(success); 2868 2869 mIsAVC = !strcasecmp(mime, MEDIA_MIMETYPE_VIDEO_AVC); 2870 mIsHEVC = !strcasecmp(mime, MEDIA_MIMETYPE_VIDEO_HEVC); 2871 2872 if (mIsAVC) { 2873 uint32_t type; 2874 const void *data; 2875 size_t size; 2876 CHECK(format->findData(kKeyAVCC, &type, &data, &size)); 2877 2878 const uint8_t *ptr = (const uint8_t *)data; 2879 2880 CHECK(size >= 7); 2881 CHECK_EQ((unsigned)ptr[0], 1u); // configurationVersion == 1 2882 2883 // The number of bytes used to encode the length of a NAL unit. 2884 mNALLengthSize = 1 + (ptr[4] & 3); 2885 } else if (mIsHEVC) { 2886 uint32_t type; 2887 const void *data; 2888 size_t size; 2889 CHECK(format->findData(kKeyHVCC, &type, &data, &size)); 2890 2891 const uint8_t *ptr = (const uint8_t *)data; 2892 2893 CHECK(size >= 7); 2894 CHECK_EQ((unsigned)ptr[0], 1u); // configurationVersion == 1 2895 2896 mNALLengthSize = 1 + (ptr[14 + 7] & 3); 2897 } 2898 2899 CHECK(format->findInt32(kKeyTrackID, &mTrackId)); 2900 2901 if (mFirstMoofOffset != 0) { 2902 off64_t offset = mFirstMoofOffset; 2903 parseChunk(&offset); 2904 } 2905} 2906 2907MPEG4Source::~MPEG4Source() { 2908 if (mStarted) { 2909 stop(); 2910 } 2911 free(mCurrentSampleInfoSizes); 2912 free(mCurrentSampleInfoOffsets); 2913} 2914 2915status_t MPEG4Source::start(MetaData *params) { 2916 Mutex::Autolock autoLock(mLock); 2917 2918 CHECK(!mStarted); 2919 2920 int32_t val; 2921 if (params && params->findInt32(kKeyWantsNALFragments, &val) 2922 && val != 0) { 2923 mWantsNALFragments = true; 2924 } else { 2925 mWantsNALFragments = false; 2926 } 2927 2928 mGroup = new MediaBufferGroup; 2929 2930 int32_t max_size; 2931 CHECK(mFormat->findInt32(kKeyMaxInputSize, &max_size)); 2932 2933 mGroup->add_buffer(new MediaBuffer(max_size)); 2934 2935 mSrcBuffer = new (std::nothrow) uint8_t[max_size]; 2936 if (mSrcBuffer == NULL) { 2937 // file probably specified a bad max size 2938 return ERROR_MALFORMED; 2939 } 2940 2941 mStarted = true; 2942 2943 return OK; 2944} 2945 2946status_t MPEG4Source::stop() { 2947 Mutex::Autolock autoLock(mLock); 2948 2949 CHECK(mStarted); 2950 2951 if (mBuffer != NULL) { 2952 mBuffer->release(); 2953 mBuffer = NULL; 2954 } 2955 2956 delete[] mSrcBuffer; 2957 mSrcBuffer = NULL; 2958 2959 delete mGroup; 2960 mGroup = NULL; 2961 2962 mStarted = false; 2963 mCurrentSampleIndex = 0; 2964 2965 return OK; 2966} 2967 2968status_t MPEG4Source::parseChunk(off64_t *offset) { 2969 uint32_t hdr[2]; 2970 if (mDataSource->readAt(*offset, hdr, 8) < 8) { 2971 return ERROR_IO; 2972 } 2973 uint64_t chunk_size = ntohl(hdr[0]); 2974 uint32_t chunk_type = ntohl(hdr[1]); 2975 off64_t data_offset = *offset + 8; 2976 2977 if (chunk_size == 1) { 2978 if (mDataSource->readAt(*offset + 8, &chunk_size, 8) < 8) { 2979 return ERROR_IO; 2980 } 2981 chunk_size = ntoh64(chunk_size); 2982 data_offset += 8; 2983 2984 if (chunk_size < 16) { 2985 // The smallest valid chunk is 16 bytes long in this case. 2986 return ERROR_MALFORMED; 2987 } 2988 } else if (chunk_size < 8) { 2989 // The smallest valid chunk is 8 bytes long. 2990 return ERROR_MALFORMED; 2991 } 2992 2993 char chunk[5]; 2994 MakeFourCCString(chunk_type, chunk); 2995 ALOGV("MPEG4Source chunk %s @ %llx", chunk, *offset); 2996 2997 off64_t chunk_data_size = *offset + chunk_size - data_offset; 2998 2999 switch(chunk_type) { 3000 3001 case FOURCC('t', 'r', 'a', 'f'): 3002 case FOURCC('m', 'o', 'o', 'f'): { 3003 off64_t stop_offset = *offset + chunk_size; 3004 *offset = data_offset; 3005 while (*offset < stop_offset) { 3006 status_t err = parseChunk(offset); 3007 if (err != OK) { 3008 return err; 3009 } 3010 } 3011 if (chunk_type == FOURCC('m', 'o', 'o', 'f')) { 3012 // *offset points to the box following this moof. Find the next moof from there. 3013 3014 while (true) { 3015 if (mDataSource->readAt(*offset, hdr, 8) < 8) { 3016 return ERROR_END_OF_STREAM; 3017 } 3018 chunk_size = ntohl(hdr[0]); 3019 chunk_type = ntohl(hdr[1]); 3020 if (chunk_type == FOURCC('m', 'o', 'o', 'f')) { 3021 mNextMoofOffset = *offset; 3022 break; 3023 } 3024 *offset += chunk_size; 3025 } 3026 } 3027 break; 3028 } 3029 3030 case FOURCC('t', 'f', 'h', 'd'): { 3031 status_t err; 3032 if ((err = parseTrackFragmentHeader(data_offset, chunk_data_size)) != OK) { 3033 return err; 3034 } 3035 *offset += chunk_size; 3036 break; 3037 } 3038 3039 case FOURCC('t', 'r', 'u', 'n'): { 3040 status_t err; 3041 if (mLastParsedTrackId == mTrackId) { 3042 if ((err = parseTrackFragmentRun(data_offset, chunk_data_size)) != OK) { 3043 return err; 3044 } 3045 } 3046 3047 *offset += chunk_size; 3048 break; 3049 } 3050 3051 case FOURCC('s', 'a', 'i', 'z'): { 3052 status_t err; 3053 if ((err = parseSampleAuxiliaryInformationSizes(data_offset, chunk_data_size)) != OK) { 3054 return err; 3055 } 3056 *offset += chunk_size; 3057 break; 3058 } 3059 case FOURCC('s', 'a', 'i', 'o'): { 3060 status_t err; 3061 if ((err = parseSampleAuxiliaryInformationOffsets(data_offset, chunk_data_size)) != OK) { 3062 return err; 3063 } 3064 *offset += chunk_size; 3065 break; 3066 } 3067 3068 case FOURCC('m', 'd', 'a', 't'): { 3069 // parse DRM info if present 3070 ALOGV("MPEG4Source::parseChunk mdat"); 3071 // if saiz/saoi was previously observed, do something with the sampleinfos 3072 *offset += chunk_size; 3073 break; 3074 } 3075 3076 default: { 3077 *offset += chunk_size; 3078 break; 3079 } 3080 } 3081 return OK; 3082} 3083 3084status_t MPEG4Source::parseSampleAuxiliaryInformationSizes( 3085 off64_t offset, off64_t /* size */) { 3086 ALOGV("parseSampleAuxiliaryInformationSizes"); 3087 // 14496-12 8.7.12 3088 uint8_t version; 3089 if (mDataSource->readAt( 3090 offset, &version, sizeof(version)) 3091 < (ssize_t)sizeof(version)) { 3092 return ERROR_IO; 3093 } 3094 3095 if (version != 0) { 3096 return ERROR_UNSUPPORTED; 3097 } 3098 offset++; 3099 3100 uint32_t flags; 3101 if (!mDataSource->getUInt24(offset, &flags)) { 3102 return ERROR_IO; 3103 } 3104 offset += 3; 3105 3106 if (flags & 1) { 3107 uint32_t tmp; 3108 if (!mDataSource->getUInt32(offset, &tmp)) { 3109 return ERROR_MALFORMED; 3110 } 3111 mCurrentAuxInfoType = tmp; 3112 offset += 4; 3113 if (!mDataSource->getUInt32(offset, &tmp)) { 3114 return ERROR_MALFORMED; 3115 } 3116 mCurrentAuxInfoTypeParameter = tmp; 3117 offset += 4; 3118 } 3119 3120 uint8_t defsize; 3121 if (mDataSource->readAt(offset, &defsize, 1) != 1) { 3122 return ERROR_MALFORMED; 3123 } 3124 mCurrentDefaultSampleInfoSize = defsize; 3125 offset++; 3126 3127 uint32_t smplcnt; 3128 if (!mDataSource->getUInt32(offset, &smplcnt)) { 3129 return ERROR_MALFORMED; 3130 } 3131 mCurrentSampleInfoCount = smplcnt; 3132 offset += 4; 3133 3134 if (mCurrentDefaultSampleInfoSize != 0) { 3135 ALOGV("@@@@ using default sample info size of %d", mCurrentDefaultSampleInfoSize); 3136 return OK; 3137 } 3138 if (smplcnt > mCurrentSampleInfoAllocSize) { 3139 mCurrentSampleInfoSizes = (uint8_t*) realloc(mCurrentSampleInfoSizes, smplcnt); 3140 mCurrentSampleInfoAllocSize = smplcnt; 3141 } 3142 3143 mDataSource->readAt(offset, mCurrentSampleInfoSizes, smplcnt); 3144 return OK; 3145} 3146 3147status_t MPEG4Source::parseSampleAuxiliaryInformationOffsets( 3148 off64_t offset, off64_t /* size */) { 3149 ALOGV("parseSampleAuxiliaryInformationOffsets"); 3150 // 14496-12 8.7.13 3151 uint8_t version; 3152 if (mDataSource->readAt(offset, &version, sizeof(version)) != 1) { 3153 return ERROR_IO; 3154 } 3155 offset++; 3156 3157 uint32_t flags; 3158 if (!mDataSource->getUInt24(offset, &flags)) { 3159 return ERROR_IO; 3160 } 3161 offset += 3; 3162 3163 uint32_t entrycount; 3164 if (!mDataSource->getUInt32(offset, &entrycount)) { 3165 return ERROR_IO; 3166 } 3167 offset += 4; 3168 3169 if (entrycount > mCurrentSampleInfoOffsetsAllocSize) { 3170 mCurrentSampleInfoOffsets = (uint64_t*) realloc(mCurrentSampleInfoOffsets, entrycount * 8); 3171 mCurrentSampleInfoOffsetsAllocSize = entrycount; 3172 } 3173 mCurrentSampleInfoOffsetCount = entrycount; 3174 3175 for (size_t i = 0; i < entrycount; i++) { 3176 if (version == 0) { 3177 uint32_t tmp; 3178 if (!mDataSource->getUInt32(offset, &tmp)) { 3179 return ERROR_IO; 3180 } 3181 mCurrentSampleInfoOffsets[i] = tmp; 3182 offset += 4; 3183 } else { 3184 uint64_t tmp; 3185 if (!mDataSource->getUInt64(offset, &tmp)) { 3186 return ERROR_IO; 3187 } 3188 mCurrentSampleInfoOffsets[i] = tmp; 3189 offset += 8; 3190 } 3191 } 3192 3193 // parse clear/encrypted data 3194 3195 off64_t drmoffset = mCurrentSampleInfoOffsets[0]; // from moof 3196 3197 drmoffset += mCurrentMoofOffset; 3198 int ivlength; 3199 CHECK(mFormat->findInt32(kKeyCryptoDefaultIVSize, &ivlength)); 3200 3201 // read CencSampleAuxiliaryDataFormats 3202 for (size_t i = 0; i < mCurrentSampleInfoCount; i++) { 3203 Sample *smpl = &mCurrentSamples.editItemAt(i); 3204 3205 memset(smpl->iv, 0, 16); 3206 if (mDataSource->readAt(drmoffset, smpl->iv, ivlength) != ivlength) { 3207 return ERROR_IO; 3208 } 3209 3210 drmoffset += ivlength; 3211 3212 int32_t smplinfosize = mCurrentDefaultSampleInfoSize; 3213 if (smplinfosize == 0) { 3214 smplinfosize = mCurrentSampleInfoSizes[i]; 3215 } 3216 if (smplinfosize > ivlength) { 3217 uint16_t numsubsamples; 3218 if (!mDataSource->getUInt16(drmoffset, &numsubsamples)) { 3219 return ERROR_IO; 3220 } 3221 drmoffset += 2; 3222 for (size_t j = 0; j < numsubsamples; j++) { 3223 uint16_t numclear; 3224 uint32_t numencrypted; 3225 if (!mDataSource->getUInt16(drmoffset, &numclear)) { 3226 return ERROR_IO; 3227 } 3228 drmoffset += 2; 3229 if (!mDataSource->getUInt32(drmoffset, &numencrypted)) { 3230 return ERROR_IO; 3231 } 3232 drmoffset += 4; 3233 smpl->clearsizes.add(numclear); 3234 smpl->encryptedsizes.add(numencrypted); 3235 } 3236 } else { 3237 smpl->clearsizes.add(0); 3238 smpl->encryptedsizes.add(smpl->size); 3239 } 3240 } 3241 3242 3243 return OK; 3244} 3245 3246status_t MPEG4Source::parseTrackFragmentHeader(off64_t offset, off64_t size) { 3247 3248 if (size < 8) { 3249 return -EINVAL; 3250 } 3251 3252 uint32_t flags; 3253 if (!mDataSource->getUInt32(offset, &flags)) { // actually version + flags 3254 return ERROR_MALFORMED; 3255 } 3256 3257 if (flags & 0xff000000) { 3258 return -EINVAL; 3259 } 3260 3261 if (!mDataSource->getUInt32(offset + 4, (uint32_t*)&mLastParsedTrackId)) { 3262 return ERROR_MALFORMED; 3263 } 3264 3265 if (mLastParsedTrackId != mTrackId) { 3266 // this is not the right track, skip it 3267 return OK; 3268 } 3269 3270 mTrackFragmentHeaderInfo.mFlags = flags; 3271 mTrackFragmentHeaderInfo.mTrackID = mLastParsedTrackId; 3272 offset += 8; 3273 size -= 8; 3274 3275 ALOGV("fragment header: %08x %08x", flags, mTrackFragmentHeaderInfo.mTrackID); 3276 3277 if (flags & TrackFragmentHeaderInfo::kBaseDataOffsetPresent) { 3278 if (size < 8) { 3279 return -EINVAL; 3280 } 3281 3282 if (!mDataSource->getUInt64(offset, &mTrackFragmentHeaderInfo.mBaseDataOffset)) { 3283 return ERROR_MALFORMED; 3284 } 3285 offset += 8; 3286 size -= 8; 3287 } 3288 3289 if (flags & TrackFragmentHeaderInfo::kSampleDescriptionIndexPresent) { 3290 if (size < 4) { 3291 return -EINVAL; 3292 } 3293 3294 if (!mDataSource->getUInt32(offset, &mTrackFragmentHeaderInfo.mSampleDescriptionIndex)) { 3295 return ERROR_MALFORMED; 3296 } 3297 offset += 4; 3298 size -= 4; 3299 } 3300 3301 if (flags & TrackFragmentHeaderInfo::kDefaultSampleDurationPresent) { 3302 if (size < 4) { 3303 return -EINVAL; 3304 } 3305 3306 if (!mDataSource->getUInt32(offset, &mTrackFragmentHeaderInfo.mDefaultSampleDuration)) { 3307 return ERROR_MALFORMED; 3308 } 3309 offset += 4; 3310 size -= 4; 3311 } 3312 3313 if (flags & TrackFragmentHeaderInfo::kDefaultSampleSizePresent) { 3314 if (size < 4) { 3315 return -EINVAL; 3316 } 3317 3318 if (!mDataSource->getUInt32(offset, &mTrackFragmentHeaderInfo.mDefaultSampleSize)) { 3319 return ERROR_MALFORMED; 3320 } 3321 offset += 4; 3322 size -= 4; 3323 } 3324 3325 if (flags & TrackFragmentHeaderInfo::kDefaultSampleFlagsPresent) { 3326 if (size < 4) { 3327 return -EINVAL; 3328 } 3329 3330 if (!mDataSource->getUInt32(offset, &mTrackFragmentHeaderInfo.mDefaultSampleFlags)) { 3331 return ERROR_MALFORMED; 3332 } 3333 offset += 4; 3334 size -= 4; 3335 } 3336 3337 if (!(flags & TrackFragmentHeaderInfo::kBaseDataOffsetPresent)) { 3338 mTrackFragmentHeaderInfo.mBaseDataOffset = mCurrentMoofOffset; 3339 } 3340 3341 mTrackFragmentHeaderInfo.mDataOffset = 0; 3342 return OK; 3343} 3344 3345status_t MPEG4Source::parseTrackFragmentRun(off64_t offset, off64_t size) { 3346 3347 ALOGV("MPEG4Extractor::parseTrackFragmentRun"); 3348 if (size < 8) { 3349 return -EINVAL; 3350 } 3351 3352 enum { 3353 kDataOffsetPresent = 0x01, 3354 kFirstSampleFlagsPresent = 0x04, 3355 kSampleDurationPresent = 0x100, 3356 kSampleSizePresent = 0x200, 3357 kSampleFlagsPresent = 0x400, 3358 kSampleCompositionTimeOffsetPresent = 0x800, 3359 }; 3360 3361 uint32_t flags; 3362 if (!mDataSource->getUInt32(offset, &flags)) { 3363 return ERROR_MALFORMED; 3364 } 3365 ALOGV("fragment run flags: %08x", flags); 3366 3367 if (flags & 0xff000000) { 3368 return -EINVAL; 3369 } 3370 3371 if ((flags & kFirstSampleFlagsPresent) && (flags & kSampleFlagsPresent)) { 3372 // These two shall not be used together. 3373 return -EINVAL; 3374 } 3375 3376 uint32_t sampleCount; 3377 if (!mDataSource->getUInt32(offset + 4, &sampleCount)) { 3378 return ERROR_MALFORMED; 3379 } 3380 offset += 8; 3381 size -= 8; 3382 3383 uint64_t dataOffset = mTrackFragmentHeaderInfo.mDataOffset; 3384 3385 uint32_t firstSampleFlags = 0; 3386 3387 if (flags & kDataOffsetPresent) { 3388 if (size < 4) { 3389 return -EINVAL; 3390 } 3391 3392 int32_t dataOffsetDelta; 3393 if (!mDataSource->getUInt32(offset, (uint32_t*)&dataOffsetDelta)) { 3394 return ERROR_MALFORMED; 3395 } 3396 3397 dataOffset = mTrackFragmentHeaderInfo.mBaseDataOffset + dataOffsetDelta; 3398 3399 offset += 4; 3400 size -= 4; 3401 } 3402 3403 if (flags & kFirstSampleFlagsPresent) { 3404 if (size < 4) { 3405 return -EINVAL; 3406 } 3407 3408 if (!mDataSource->getUInt32(offset, &firstSampleFlags)) { 3409 return ERROR_MALFORMED; 3410 } 3411 offset += 4; 3412 size -= 4; 3413 } 3414 3415 uint32_t sampleDuration = 0, sampleSize = 0, sampleFlags = 0, 3416 sampleCtsOffset = 0; 3417 3418 size_t bytesPerSample = 0; 3419 if (flags & kSampleDurationPresent) { 3420 bytesPerSample += 4; 3421 } else if (mTrackFragmentHeaderInfo.mFlags 3422 & TrackFragmentHeaderInfo::kDefaultSampleDurationPresent) { 3423 sampleDuration = mTrackFragmentHeaderInfo.mDefaultSampleDuration; 3424 } else { 3425 sampleDuration = mTrackFragmentHeaderInfo.mDefaultSampleDuration; 3426 } 3427 3428 if (flags & kSampleSizePresent) { 3429 bytesPerSample += 4; 3430 } else if (mTrackFragmentHeaderInfo.mFlags 3431 & TrackFragmentHeaderInfo::kDefaultSampleSizePresent) { 3432 sampleSize = mTrackFragmentHeaderInfo.mDefaultSampleSize; 3433 } else { 3434 sampleSize = mTrackFragmentHeaderInfo.mDefaultSampleSize; 3435 } 3436 3437 if (flags & kSampleFlagsPresent) { 3438 bytesPerSample += 4; 3439 } else if (mTrackFragmentHeaderInfo.mFlags 3440 & TrackFragmentHeaderInfo::kDefaultSampleFlagsPresent) { 3441 sampleFlags = mTrackFragmentHeaderInfo.mDefaultSampleFlags; 3442 } else { 3443 sampleFlags = mTrackFragmentHeaderInfo.mDefaultSampleFlags; 3444 } 3445 3446 if (flags & kSampleCompositionTimeOffsetPresent) { 3447 bytesPerSample += 4; 3448 } else { 3449 sampleCtsOffset = 0; 3450 } 3451 3452 if (size < sampleCount * bytesPerSample) { 3453 return -EINVAL; 3454 } 3455 3456 Sample tmp; 3457 for (uint32_t i = 0; i < sampleCount; ++i) { 3458 if (flags & kSampleDurationPresent) { 3459 if (!mDataSource->getUInt32(offset, &sampleDuration)) { 3460 return ERROR_MALFORMED; 3461 } 3462 offset += 4; 3463 } 3464 3465 if (flags & kSampleSizePresent) { 3466 if (!mDataSource->getUInt32(offset, &sampleSize)) { 3467 return ERROR_MALFORMED; 3468 } 3469 offset += 4; 3470 } 3471 3472 if (flags & kSampleFlagsPresent) { 3473 if (!mDataSource->getUInt32(offset, &sampleFlags)) { 3474 return ERROR_MALFORMED; 3475 } 3476 offset += 4; 3477 } 3478 3479 if (flags & kSampleCompositionTimeOffsetPresent) { 3480 if (!mDataSource->getUInt32(offset, &sampleCtsOffset)) { 3481 return ERROR_MALFORMED; 3482 } 3483 offset += 4; 3484 } 3485 3486 ALOGV("adding sample %d at offset 0x%08llx, size %u, duration %u, " 3487 " flags 0x%08x", i + 1, 3488 dataOffset, sampleSize, sampleDuration, 3489 (flags & kFirstSampleFlagsPresent) && i == 0 3490 ? firstSampleFlags : sampleFlags); 3491 tmp.offset = dataOffset; 3492 tmp.size = sampleSize; 3493 tmp.duration = sampleDuration; 3494 mCurrentSamples.add(tmp); 3495 3496 dataOffset += sampleSize; 3497 } 3498 3499 mTrackFragmentHeaderInfo.mDataOffset = dataOffset; 3500 3501 return OK; 3502} 3503 3504sp<MetaData> MPEG4Source::getFormat() { 3505 Mutex::Autolock autoLock(mLock); 3506 3507 return mFormat; 3508} 3509 3510size_t MPEG4Source::parseNALSize(const uint8_t *data) const { 3511 switch (mNALLengthSize) { 3512 case 1: 3513 return *data; 3514 case 2: 3515 return U16_AT(data); 3516 case 3: 3517 return ((size_t)data[0] << 16) | U16_AT(&data[1]); 3518 case 4: 3519 return U32_AT(data); 3520 } 3521 3522 // This cannot happen, mNALLengthSize springs to life by adding 1 to 3523 // a 2-bit integer. 3524 CHECK(!"Should not be here."); 3525 3526 return 0; 3527} 3528 3529status_t MPEG4Source::read( 3530 MediaBuffer **out, const ReadOptions *options) { 3531 Mutex::Autolock autoLock(mLock); 3532 3533 CHECK(mStarted); 3534 3535 if (mFirstMoofOffset > 0) { 3536 return fragmentedRead(out, options); 3537 } 3538 3539 *out = NULL; 3540 3541 int64_t targetSampleTimeUs = -1; 3542 3543 int64_t seekTimeUs; 3544 ReadOptions::SeekMode mode; 3545 if (options && options->getSeekTo(&seekTimeUs, &mode)) { 3546 uint32_t findFlags = 0; 3547 switch (mode) { 3548 case ReadOptions::SEEK_PREVIOUS_SYNC: 3549 findFlags = SampleTable::kFlagBefore; 3550 break; 3551 case ReadOptions::SEEK_NEXT_SYNC: 3552 findFlags = SampleTable::kFlagAfter; 3553 break; 3554 case ReadOptions::SEEK_CLOSEST_SYNC: 3555 case ReadOptions::SEEK_CLOSEST: 3556 findFlags = SampleTable::kFlagClosest; 3557 break; 3558 default: 3559 CHECK(!"Should not be here."); 3560 break; 3561 } 3562 3563 uint32_t sampleIndex; 3564 status_t err = mSampleTable->findSampleAtTime( 3565 seekTimeUs * mTimescale / 1000000, 3566 &sampleIndex, findFlags); 3567 3568 if (mode == ReadOptions::SEEK_CLOSEST) { 3569 // We found the closest sample already, now we want the sync 3570 // sample preceding it (or the sample itself of course), even 3571 // if the subsequent sync sample is closer. 3572 findFlags = SampleTable::kFlagBefore; 3573 } 3574 3575 uint32_t syncSampleIndex; 3576 if (err == OK) { 3577 err = mSampleTable->findSyncSampleNear( 3578 sampleIndex, &syncSampleIndex, findFlags); 3579 } 3580 3581 uint32_t sampleTime; 3582 if (err == OK) { 3583 err = mSampleTable->getMetaDataForSample( 3584 sampleIndex, NULL, NULL, &sampleTime); 3585 } 3586 3587 if (err != OK) { 3588 if (err == ERROR_OUT_OF_RANGE) { 3589 // An attempt to seek past the end of the stream would 3590 // normally cause this ERROR_OUT_OF_RANGE error. Propagating 3591 // this all the way to the MediaPlayer would cause abnormal 3592 // termination. Legacy behaviour appears to be to behave as if 3593 // we had seeked to the end of stream, ending normally. 3594 err = ERROR_END_OF_STREAM; 3595 } 3596 ALOGV("end of stream"); 3597 return err; 3598 } 3599 3600 if (mode == ReadOptions::SEEK_CLOSEST) { 3601 targetSampleTimeUs = (sampleTime * 1000000ll) / mTimescale; 3602 } 3603 3604#if 0 3605 uint32_t syncSampleTime; 3606 CHECK_EQ(OK, mSampleTable->getMetaDataForSample( 3607 syncSampleIndex, NULL, NULL, &syncSampleTime)); 3608 3609 ALOGI("seek to time %lld us => sample at time %lld us, " 3610 "sync sample at time %lld us", 3611 seekTimeUs, 3612 sampleTime * 1000000ll / mTimescale, 3613 syncSampleTime * 1000000ll / mTimescale); 3614#endif 3615 3616 mCurrentSampleIndex = syncSampleIndex; 3617 if (mBuffer != NULL) { 3618 mBuffer->release(); 3619 mBuffer = NULL; 3620 } 3621 3622 // fall through 3623 } 3624 3625 off64_t offset; 3626 size_t size; 3627 uint32_t cts, stts; 3628 bool isSyncSample; 3629 bool newBuffer = false; 3630 if (mBuffer == NULL) { 3631 newBuffer = true; 3632 3633 status_t err = 3634 mSampleTable->getMetaDataForSample( 3635 mCurrentSampleIndex, &offset, &size, &cts, &isSyncSample, &stts); 3636 3637 if (err != OK) { 3638 return err; 3639 } 3640 3641 err = mGroup->acquire_buffer(&mBuffer); 3642 3643 if (err != OK) { 3644 CHECK(mBuffer == NULL); 3645 return err; 3646 } 3647 } 3648 3649 if ((!mIsAVC && !mIsHEVC) || mWantsNALFragments) { 3650 if (newBuffer) { 3651 ssize_t num_bytes_read = 3652 mDataSource->readAt(offset, (uint8_t *)mBuffer->data(), size); 3653 3654 if (num_bytes_read < (ssize_t)size) { 3655 mBuffer->release(); 3656 mBuffer = NULL; 3657 3658 return ERROR_IO; 3659 } 3660 3661 CHECK(mBuffer != NULL); 3662 mBuffer->set_range(0, size); 3663 mBuffer->meta_data()->clear(); 3664 mBuffer->meta_data()->setInt64( 3665 kKeyTime, ((int64_t)cts * 1000000) / mTimescale); 3666 mBuffer->meta_data()->setInt64( 3667 kKeyDuration, ((int64_t)stts * 1000000) / mTimescale); 3668 3669 if (targetSampleTimeUs >= 0) { 3670 mBuffer->meta_data()->setInt64( 3671 kKeyTargetTime, targetSampleTimeUs); 3672 } 3673 3674 if (isSyncSample) { 3675 mBuffer->meta_data()->setInt32(kKeyIsSyncFrame, 1); 3676 } 3677 3678 ++mCurrentSampleIndex; 3679 } 3680 3681 if (!mIsAVC && !mIsHEVC) { 3682 *out = mBuffer; 3683 mBuffer = NULL; 3684 3685 return OK; 3686 } 3687 3688 // Each NAL unit is split up into its constituent fragments and 3689 // each one of them returned in its own buffer. 3690 3691 CHECK(mBuffer->range_length() >= mNALLengthSize); 3692 3693 const uint8_t *src = 3694 (const uint8_t *)mBuffer->data() + mBuffer->range_offset(); 3695 3696 size_t nal_size = parseNALSize(src); 3697 if (mBuffer->range_length() < mNALLengthSize + nal_size) { 3698 ALOGE("incomplete NAL unit."); 3699 3700 mBuffer->release(); 3701 mBuffer = NULL; 3702 3703 return ERROR_MALFORMED; 3704 } 3705 3706 MediaBuffer *clone = mBuffer->clone(); 3707 CHECK(clone != NULL); 3708 clone->set_range(mBuffer->range_offset() + mNALLengthSize, nal_size); 3709 3710 CHECK(mBuffer != NULL); 3711 mBuffer->set_range( 3712 mBuffer->range_offset() + mNALLengthSize + nal_size, 3713 mBuffer->range_length() - mNALLengthSize - nal_size); 3714 3715 if (mBuffer->range_length() == 0) { 3716 mBuffer->release(); 3717 mBuffer = NULL; 3718 } 3719 3720 *out = clone; 3721 3722 return OK; 3723 } else { 3724 // Whole NAL units are returned but each fragment is prefixed by 3725 // the start code (0x00 00 00 01). 3726 ssize_t num_bytes_read = 0; 3727 int32_t drm = 0; 3728 bool usesDRM = (mFormat->findInt32(kKeyIsDRM, &drm) && drm != 0); 3729 if (usesDRM) { 3730 num_bytes_read = 3731 mDataSource->readAt(offset, (uint8_t*)mBuffer->data(), size); 3732 } else { 3733 num_bytes_read = mDataSource->readAt(offset, mSrcBuffer, size); 3734 } 3735 3736 if (num_bytes_read < (ssize_t)size) { 3737 mBuffer->release(); 3738 mBuffer = NULL; 3739 3740 return ERROR_IO; 3741 } 3742 3743 if (usesDRM) { 3744 CHECK(mBuffer != NULL); 3745 mBuffer->set_range(0, size); 3746 3747 } else { 3748 uint8_t *dstData = (uint8_t *)mBuffer->data(); 3749 size_t srcOffset = 0; 3750 size_t dstOffset = 0; 3751 3752 while (srcOffset < size) { 3753 bool isMalFormed = (srcOffset + mNALLengthSize > size); 3754 size_t nalLength = 0; 3755 if (!isMalFormed) { 3756 nalLength = parseNALSize(&mSrcBuffer[srcOffset]); 3757 srcOffset += mNALLengthSize; 3758 isMalFormed = srcOffset + nalLength > size; 3759 } 3760 3761 if (isMalFormed) { 3762 ALOGE("Video is malformed"); 3763 mBuffer->release(); 3764 mBuffer = NULL; 3765 return ERROR_MALFORMED; 3766 } 3767 3768 if (nalLength == 0) { 3769 continue; 3770 } 3771 3772 CHECK(dstOffset + 4 <= mBuffer->size()); 3773 3774 dstData[dstOffset++] = 0; 3775 dstData[dstOffset++] = 0; 3776 dstData[dstOffset++] = 0; 3777 dstData[dstOffset++] = 1; 3778 memcpy(&dstData[dstOffset], &mSrcBuffer[srcOffset], nalLength); 3779 srcOffset += nalLength; 3780 dstOffset += nalLength; 3781 } 3782 CHECK_EQ(srcOffset, size); 3783 CHECK(mBuffer != NULL); 3784 mBuffer->set_range(0, dstOffset); 3785 } 3786 3787 mBuffer->meta_data()->clear(); 3788 mBuffer->meta_data()->setInt64( 3789 kKeyTime, ((int64_t)cts * 1000000) / mTimescale); 3790 mBuffer->meta_data()->setInt64( 3791 kKeyDuration, ((int64_t)stts * 1000000) / mTimescale); 3792 3793 if (targetSampleTimeUs >= 0) { 3794 mBuffer->meta_data()->setInt64( 3795 kKeyTargetTime, targetSampleTimeUs); 3796 } 3797 3798 if (isSyncSample) { 3799 mBuffer->meta_data()->setInt32(kKeyIsSyncFrame, 1); 3800 } 3801 3802 ++mCurrentSampleIndex; 3803 3804 *out = mBuffer; 3805 mBuffer = NULL; 3806 3807 return OK; 3808 } 3809} 3810 3811status_t MPEG4Source::fragmentedRead( 3812 MediaBuffer **out, const ReadOptions *options) { 3813 3814 ALOGV("MPEG4Source::fragmentedRead"); 3815 3816 CHECK(mStarted); 3817 3818 *out = NULL; 3819 3820 int64_t targetSampleTimeUs = -1; 3821 3822 int64_t seekTimeUs; 3823 ReadOptions::SeekMode mode; 3824 if (options && options->getSeekTo(&seekTimeUs, &mode)) { 3825 3826 int numSidxEntries = mSegments.size(); 3827 if (numSidxEntries != 0) { 3828 int64_t totalTime = 0; 3829 off64_t totalOffset = mFirstMoofOffset; 3830 for (int i = 0; i < numSidxEntries; i++) { 3831 const SidxEntry *se = &mSegments[i]; 3832 if (totalTime + se->mDurationUs > seekTimeUs) { 3833 // The requested time is somewhere in this segment 3834 if ((mode == ReadOptions::SEEK_NEXT_SYNC && seekTimeUs > totalTime) || 3835 (mode == ReadOptions::SEEK_CLOSEST_SYNC && 3836 (seekTimeUs - totalTime) > (totalTime + se->mDurationUs - seekTimeUs))) { 3837 // requested next sync, or closest sync and it was closer to the end of 3838 // this segment 3839 totalTime += se->mDurationUs; 3840 totalOffset += se->mSize; 3841 } 3842 break; 3843 } 3844 totalTime += se->mDurationUs; 3845 totalOffset += se->mSize; 3846 } 3847 mCurrentMoofOffset = totalOffset; 3848 mCurrentSamples.clear(); 3849 mCurrentSampleIndex = 0; 3850 parseChunk(&totalOffset); 3851 mCurrentTime = totalTime * mTimescale / 1000000ll; 3852 } else { 3853 // without sidx boxes, we can only seek to 0 3854 mCurrentMoofOffset = mFirstMoofOffset; 3855 mCurrentSamples.clear(); 3856 mCurrentSampleIndex = 0; 3857 off64_t tmp = mCurrentMoofOffset; 3858 parseChunk(&tmp); 3859 mCurrentTime = 0; 3860 } 3861 3862 if (mBuffer != NULL) { 3863 mBuffer->release(); 3864 mBuffer = NULL; 3865 } 3866 3867 // fall through 3868 } 3869 3870 off64_t offset = 0; 3871 size_t size = 0; 3872 uint32_t cts = 0; 3873 bool isSyncSample = false; 3874 bool newBuffer = false; 3875 if (mBuffer == NULL) { 3876 newBuffer = true; 3877 3878 if (mCurrentSampleIndex >= mCurrentSamples.size()) { 3879 // move to next fragment if there is one 3880 if (mNextMoofOffset <= mCurrentMoofOffset) { 3881 return ERROR_END_OF_STREAM; 3882 } 3883 off64_t nextMoof = mNextMoofOffset; 3884 mCurrentMoofOffset = nextMoof; 3885 mCurrentSamples.clear(); 3886 mCurrentSampleIndex = 0; 3887 parseChunk(&nextMoof); 3888 if (mCurrentSampleIndex >= mCurrentSamples.size()) { 3889 return ERROR_END_OF_STREAM; 3890 } 3891 } 3892 3893 const Sample *smpl = &mCurrentSamples[mCurrentSampleIndex]; 3894 offset = smpl->offset; 3895 size = smpl->size; 3896 cts = mCurrentTime; 3897 mCurrentTime += smpl->duration; 3898 isSyncSample = (mCurrentSampleIndex == 0); // XXX 3899 3900 status_t err = mGroup->acquire_buffer(&mBuffer); 3901 3902 if (err != OK) { 3903 CHECK(mBuffer == NULL); 3904 ALOGV("acquire_buffer returned %d", err); 3905 return err; 3906 } 3907 } 3908 3909 const Sample *smpl = &mCurrentSamples[mCurrentSampleIndex]; 3910 const sp<MetaData> bufmeta = mBuffer->meta_data(); 3911 bufmeta->clear(); 3912 if (smpl->encryptedsizes.size()) { 3913 // store clear/encrypted lengths in metadata 3914 bufmeta->setData(kKeyPlainSizes, 0, 3915 smpl->clearsizes.array(), smpl->clearsizes.size() * 4); 3916 bufmeta->setData(kKeyEncryptedSizes, 0, 3917 smpl->encryptedsizes.array(), smpl->encryptedsizes.size() * 4); 3918 bufmeta->setData(kKeyCryptoIV, 0, smpl->iv, 16); // use 16 or the actual size? 3919 bufmeta->setInt32(kKeyCryptoDefaultIVSize, mDefaultIVSize); 3920 bufmeta->setInt32(kKeyCryptoMode, mCryptoMode); 3921 bufmeta->setData(kKeyCryptoKey, 0, mCryptoKey, 16); 3922 } 3923 3924 if ((!mIsAVC && !mIsHEVC)|| mWantsNALFragments) { 3925 if (newBuffer) { 3926 ssize_t num_bytes_read = 3927 mDataSource->readAt(offset, (uint8_t *)mBuffer->data(), size); 3928 3929 if (num_bytes_read < (ssize_t)size) { 3930 mBuffer->release(); 3931 mBuffer = NULL; 3932 3933 ALOGV("i/o error"); 3934 return ERROR_IO; 3935 } 3936 3937 CHECK(mBuffer != NULL); 3938 mBuffer->set_range(0, size); 3939 mBuffer->meta_data()->setInt64( 3940 kKeyTime, ((int64_t)cts * 1000000) / mTimescale); 3941 mBuffer->meta_data()->setInt64( 3942 kKeyDuration, ((int64_t)smpl->duration * 1000000) / mTimescale); 3943 3944 if (targetSampleTimeUs >= 0) { 3945 mBuffer->meta_data()->setInt64( 3946 kKeyTargetTime, targetSampleTimeUs); 3947 } 3948 3949 if (isSyncSample) { 3950 mBuffer->meta_data()->setInt32(kKeyIsSyncFrame, 1); 3951 } 3952 3953 ++mCurrentSampleIndex; 3954 } 3955 3956 if (!mIsAVC && !mIsHEVC) { 3957 *out = mBuffer; 3958 mBuffer = NULL; 3959 3960 return OK; 3961 } 3962 3963 // Each NAL unit is split up into its constituent fragments and 3964 // each one of them returned in its own buffer. 3965 3966 CHECK(mBuffer->range_length() >= mNALLengthSize); 3967 3968 const uint8_t *src = 3969 (const uint8_t *)mBuffer->data() + mBuffer->range_offset(); 3970 3971 size_t nal_size = parseNALSize(src); 3972 if (mBuffer->range_length() < mNALLengthSize + nal_size) { 3973 ALOGE("incomplete NAL unit."); 3974 3975 mBuffer->release(); 3976 mBuffer = NULL; 3977 3978 return ERROR_MALFORMED; 3979 } 3980 3981 MediaBuffer *clone = mBuffer->clone(); 3982 CHECK(clone != NULL); 3983 clone->set_range(mBuffer->range_offset() + mNALLengthSize, nal_size); 3984 3985 CHECK(mBuffer != NULL); 3986 mBuffer->set_range( 3987 mBuffer->range_offset() + mNALLengthSize + nal_size, 3988 mBuffer->range_length() - mNALLengthSize - nal_size); 3989 3990 if (mBuffer->range_length() == 0) { 3991 mBuffer->release(); 3992 mBuffer = NULL; 3993 } 3994 3995 *out = clone; 3996 3997 return OK; 3998 } else { 3999 ALOGV("whole NAL"); 4000 // Whole NAL units are returned but each fragment is prefixed by 4001 // the start code (0x00 00 00 01). 4002 ssize_t num_bytes_read = 0; 4003 int32_t drm = 0; 4004 bool usesDRM = (mFormat->findInt32(kKeyIsDRM, &drm) && drm != 0); 4005 if (usesDRM) { 4006 num_bytes_read = 4007 mDataSource->readAt(offset, (uint8_t*)mBuffer->data(), size); 4008 } else { 4009 num_bytes_read = mDataSource->readAt(offset, mSrcBuffer, size); 4010 } 4011 4012 if (num_bytes_read < (ssize_t)size) { 4013 mBuffer->release(); 4014 mBuffer = NULL; 4015 4016 ALOGV("i/o error"); 4017 return ERROR_IO; 4018 } 4019 4020 if (usesDRM) { 4021 CHECK(mBuffer != NULL); 4022 mBuffer->set_range(0, size); 4023 4024 } else { 4025 uint8_t *dstData = (uint8_t *)mBuffer->data(); 4026 size_t srcOffset = 0; 4027 size_t dstOffset = 0; 4028 4029 while (srcOffset < size) { 4030 bool isMalFormed = (srcOffset + mNALLengthSize > size); 4031 size_t nalLength = 0; 4032 if (!isMalFormed) { 4033 nalLength = parseNALSize(&mSrcBuffer[srcOffset]); 4034 srcOffset += mNALLengthSize; 4035 isMalFormed = srcOffset + nalLength > size; 4036 } 4037 4038 if (isMalFormed) { 4039 ALOGE("Video is malformed"); 4040 mBuffer->release(); 4041 mBuffer = NULL; 4042 return ERROR_MALFORMED; 4043 } 4044 4045 if (nalLength == 0) { 4046 continue; 4047 } 4048 4049 CHECK(dstOffset + 4 <= mBuffer->size()); 4050 4051 dstData[dstOffset++] = 0; 4052 dstData[dstOffset++] = 0; 4053 dstData[dstOffset++] = 0; 4054 dstData[dstOffset++] = 1; 4055 memcpy(&dstData[dstOffset], &mSrcBuffer[srcOffset], nalLength); 4056 srcOffset += nalLength; 4057 dstOffset += nalLength; 4058 } 4059 CHECK_EQ(srcOffset, size); 4060 CHECK(mBuffer != NULL); 4061 mBuffer->set_range(0, dstOffset); 4062 } 4063 4064 mBuffer->meta_data()->setInt64( 4065 kKeyTime, ((int64_t)cts * 1000000) / mTimescale); 4066 mBuffer->meta_data()->setInt64( 4067 kKeyDuration, ((int64_t)smpl->duration * 1000000) / mTimescale); 4068 4069 if (targetSampleTimeUs >= 0) { 4070 mBuffer->meta_data()->setInt64( 4071 kKeyTargetTime, targetSampleTimeUs); 4072 } 4073 4074 if (isSyncSample) { 4075 mBuffer->meta_data()->setInt32(kKeyIsSyncFrame, 1); 4076 } 4077 4078 ++mCurrentSampleIndex; 4079 4080 *out = mBuffer; 4081 mBuffer = NULL; 4082 4083 return OK; 4084 } 4085} 4086 4087MPEG4Extractor::Track *MPEG4Extractor::findTrackByMimePrefix( 4088 const char *mimePrefix) { 4089 for (Track *track = mFirstTrack; track != NULL; track = track->next) { 4090 const char *mime; 4091 if (track->meta != NULL 4092 && track->meta->findCString(kKeyMIMEType, &mime) 4093 && !strncasecmp(mime, mimePrefix, strlen(mimePrefix))) { 4094 return track; 4095 } 4096 } 4097 4098 return NULL; 4099} 4100 4101static bool LegacySniffMPEG4( 4102 const sp<DataSource> &source, String8 *mimeType, float *confidence) { 4103 uint8_t header[8]; 4104 4105 ssize_t n = source->readAt(4, header, sizeof(header)); 4106 if (n < (ssize_t)sizeof(header)) { 4107 return false; 4108 } 4109 4110 if (!memcmp(header, "ftyp3gp", 7) || !memcmp(header, "ftypmp42", 8) 4111 || !memcmp(header, "ftyp3gr6", 8) || !memcmp(header, "ftyp3gs6", 8) 4112 || !memcmp(header, "ftyp3ge6", 8) || !memcmp(header, "ftyp3gg6", 8) 4113 || !memcmp(header, "ftypisom", 8) || !memcmp(header, "ftypM4V ", 8) 4114 || !memcmp(header, "ftypM4A ", 8) || !memcmp(header, "ftypf4v ", 8) 4115 || !memcmp(header, "ftypkddi", 8) || !memcmp(header, "ftypM4VP", 8)) { 4116 *mimeType = MEDIA_MIMETYPE_CONTAINER_MPEG4; 4117 *confidence = 0.4; 4118 4119 return true; 4120 } 4121 4122 return false; 4123} 4124 4125static bool isCompatibleBrand(uint32_t fourcc) { 4126 static const uint32_t kCompatibleBrands[] = { 4127 FOURCC('i', 's', 'o', 'm'), 4128 FOURCC('i', 's', 'o', '2'), 4129 FOURCC('a', 'v', 'c', '1'), 4130 FOURCC('h', 'v', 'c', '1'), 4131 FOURCC('h', 'e', 'v', '1'), 4132 FOURCC('3', 'g', 'p', '4'), 4133 FOURCC('m', 'p', '4', '1'), 4134 FOURCC('m', 'p', '4', '2'), 4135 4136 // Won't promise that the following file types can be played. 4137 // Just give these file types a chance. 4138 FOURCC('q', 't', ' ', ' '), // Apple's QuickTime 4139 FOURCC('M', 'S', 'N', 'V'), // Sony's PSP 4140 4141 FOURCC('3', 'g', '2', 'a'), // 3GPP2 4142 FOURCC('3', 'g', '2', 'b'), 4143 }; 4144 4145 for (size_t i = 0; 4146 i < sizeof(kCompatibleBrands) / sizeof(kCompatibleBrands[0]); 4147 ++i) { 4148 if (kCompatibleBrands[i] == fourcc) { 4149 return true; 4150 } 4151 } 4152 4153 return false; 4154} 4155 4156// Attempt to actually parse the 'ftyp' atom and determine if a suitable 4157// compatible brand is present. 4158// Also try to identify where this file's metadata ends 4159// (end of the 'moov' atom) and report it to the caller as part of 4160// the metadata. 4161static bool BetterSniffMPEG4( 4162 const sp<DataSource> &source, String8 *mimeType, float *confidence, 4163 sp<AMessage> *meta) { 4164 // We scan up to 128 bytes to identify this file as an MP4. 4165 static const off64_t kMaxScanOffset = 128ll; 4166 4167 off64_t offset = 0ll; 4168 bool foundGoodFileType = false; 4169 off64_t moovAtomEndOffset = -1ll; 4170 bool done = false; 4171 4172 while (!done && offset < kMaxScanOffset) { 4173 uint32_t hdr[2]; 4174 if (source->readAt(offset, hdr, 8) < 8) { 4175 return false; 4176 } 4177 4178 uint64_t chunkSize = ntohl(hdr[0]); 4179 uint32_t chunkType = ntohl(hdr[1]); 4180 off64_t chunkDataOffset = offset + 8; 4181 4182 if (chunkSize == 1) { 4183 if (source->readAt(offset + 8, &chunkSize, 8) < 8) { 4184 return false; 4185 } 4186 4187 chunkSize = ntoh64(chunkSize); 4188 chunkDataOffset += 8; 4189 4190 if (chunkSize < 16) { 4191 // The smallest valid chunk is 16 bytes long in this case. 4192 return false; 4193 } 4194 } else if (chunkSize < 8) { 4195 // The smallest valid chunk is 8 bytes long. 4196 return false; 4197 } 4198 4199 off64_t chunkDataSize = offset + chunkSize - chunkDataOffset; 4200 4201 char chunkstring[5]; 4202 MakeFourCCString(chunkType, chunkstring); 4203 ALOGV("saw chunk type %s, size %lld @ %lld", chunkstring, chunkSize, offset); 4204 switch (chunkType) { 4205 case FOURCC('f', 't', 'y', 'p'): 4206 { 4207 if (chunkDataSize < 8) { 4208 return false; 4209 } 4210 4211 uint32_t numCompatibleBrands = (chunkDataSize - 8) / 4; 4212 for (size_t i = 0; i < numCompatibleBrands + 2; ++i) { 4213 if (i == 1) { 4214 // Skip this index, it refers to the minorVersion, 4215 // not a brand. 4216 continue; 4217 } 4218 4219 uint32_t brand; 4220 if (source->readAt( 4221 chunkDataOffset + 4 * i, &brand, 4) < 4) { 4222 return false; 4223 } 4224 4225 brand = ntohl(brand); 4226 4227 if (isCompatibleBrand(brand)) { 4228 foundGoodFileType = true; 4229 break; 4230 } 4231 } 4232 4233 if (!foundGoodFileType) { 4234 return false; 4235 } 4236 4237 break; 4238 } 4239 4240 case FOURCC('m', 'o', 'o', 'v'): 4241 { 4242 moovAtomEndOffset = offset + chunkSize; 4243 4244 done = true; 4245 break; 4246 } 4247 4248 default: 4249 break; 4250 } 4251 4252 offset += chunkSize; 4253 } 4254 4255 if (!foundGoodFileType) { 4256 return false; 4257 } 4258 4259 *mimeType = MEDIA_MIMETYPE_CONTAINER_MPEG4; 4260 *confidence = 0.4f; 4261 4262 if (moovAtomEndOffset >= 0) { 4263 *meta = new AMessage; 4264 (*meta)->setInt64("meta-data-size", moovAtomEndOffset); 4265 4266 ALOGV("found metadata size: %lld", moovAtomEndOffset); 4267 } 4268 4269 return true; 4270} 4271 4272bool SniffMPEG4( 4273 const sp<DataSource> &source, String8 *mimeType, float *confidence, 4274 sp<AMessage> *meta) { 4275 if (BetterSniffMPEG4(source, mimeType, confidence, meta)) { 4276 return true; 4277 } 4278 4279 if (LegacySniffMPEG4(source, mimeType, confidence)) { 4280 ALOGW("Identified supported mpeg4 through LegacySniffMPEG4."); 4281 return true; 4282 } 4283 4284 return false; 4285} 4286 4287} // namespace android 4288